├── .gitignore ├── LICENSE ├── README.md ├── client ├── Makefile ├── main.cc ├── p4ml_manager.cc └── p4ml_manager.h ├── common ├── CC_manager.h ├── HashTable.cc ├── HashTable.h ├── ThreadPool.h ├── dma_common.cc ├── dma_common.h ├── mlx5_defs.h ├── p4ml_struct.h ├── packet.h ├── quantize.h ├── utils.h └── window_manager.h ├── docs └── benchmark.md ├── p4src ├── includes │ ├── actions.p4 │ ├── common.p4 │ ├── headers.p4 │ ├── parser.p4 │ ├── registers.p4 │ └── tables.p4 └── p4ml.p4 ├── ptf └── ptfTest.py ├── run_pd_rpc └── setup.py └── server ├── Makefile ├── ParameterServer.cc └── ParameterServer.h /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .vscode 3 | *.tar 4 | log 5 | *.o 6 | app 7 | .DS_Store -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 NetLabIIIS and WISR 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ATP 2 | 3 | ATP is a service that performs multi-rack multi-tenant in-network aggregation via co-designing with programmable switch and end-host networking stack. 4 | 5 | # Benchmark 6 | To run the benchmark, please see [benchmark](docs/benchmark.md). 7 | 8 | # Publications 9 | 10 | - [NSDI'21] "[ATP: In-network Aggregation for Multi-tenant Learning](https://www.usenix.org/conference/nsdi21/presentation/lao)". ChonLam Lao, Yanfang Le, Kshiteej Mahajan, Yixi Chen, Wenfei Wu, Aditya Akella, Michael Swift. 11 | 12 | # Contact 13 | 14 | Any questions? Please feel free to reach us at inatpcontact@gmail.com. You are more likely to receive a helpful response if your question is specific, self-contained and concise. 15 | -------------------------------------------------------------------------------- /client/Makefile: -------------------------------------------------------------------------------- 1 | # CFLAGS := -O3 -g 2 | # LD := g++ 3 | # LDFLAGS := ${LDFLAGS} -lrdmacm -libverbs -lrt -lpthread -lm 4 | 5 | # ROCE_COMMON_PATH = ../common/ 6 | # INCLUDES = -I${ROCE_COMMON_PATH} 7 | # CFLAGS := ${CFLAGS} ${INCLUDES} 8 | # SOURCES := $(wildcard *.c *.h ${ROCE_COMMON_PATH}*.c ${ROCE_COMMON_PATH}*.h) 9 | 10 | 11 | # all: app 12 | # app: main.o p4ml_manager.o ${ROCE_COMMON_PATH}packet.o ${ROCE_COMMON_PATH}dma_common.o ${ROCE_COMMON_PATH}window_manager.o 13 | # ${LD} $(CFLAGS) -o $@ $^ ${LDFLAGS} 14 | 15 | 16 | # # Clean Target 17 | # clean: 18 | # rm *.o ../common/*.o 19 | # rm app 20 | 21 | all: 22 | g++ -std=c++11 -g -O3 -c -o main.o main.cc 23 | g++ -std=c++11 -g -O3 -c -o p4ml_manager.o p4ml_manager.cc -mavx 24 | g++ -std=c++11 -g -O3 -c -o ../common/HashTable.o ../common/HashTable.cc 25 | g++ -std=c++11 -g -O3 -c -o ../common/dma_common.o ../common/dma_common.cc 26 | g++ -std=c++11 -g -O3 -I../common/ -o app main.o p4ml_manager.o ../common/HashTable.o ../common/dma_common.o -lrdmacm -libverbs -lrt -lpthread -lm 27 | 28 | clean: 29 | rm *.o 30 | rm app 31 | -------------------------------------------------------------------------------- /client/main.cc: -------------------------------------------------------------------------------- 1 | #include "p4ml_manager.h" 2 | 3 | #define ENABLE_LOG true 4 | 5 | uint32_t* init_model(int size) { 6 | uint32_t* tmp = new uint32_t[size]; 7 | for (int i = 0; i < size; i++) 8 | tmp[i] = i+1; 9 | return tmp; 10 | } 11 | 12 | float* init_model_float(int size) { 13 | float* tmp = new float[size]; 14 | for (int i = 0; i < size; i++) { 15 | tmp[i] = (i+1.0) / 10000000.0; 16 | // tmp[i] = (i + 1.0) / 10000.0; 17 | // printf("%f ", tmp[i]); 18 | } 19 | // tmp[63] = 200; 20 | return tmp; 21 | } 22 | 23 | float* init_model_float_with_overflow(int size) { 24 | float* tmp = new float[size]; 25 | for (int i = 0; i < size; i++) { 26 | tmp[i] = (i+1.0) / 10000000.0; 27 | } 28 | for (int i = 0; i < 100; i++) { 29 | int rand_num = rand() % size; 30 | if (rand_num > size / 2) 31 | tmp[rand_num] = 200; 32 | else 33 | tmp[rand_num] = 100; 34 | // printf("rand!!! %d\n", rand_num); 35 | } 36 | return tmp; 37 | } 38 | 39 | 40 | std::shared_ptr _p4ml_manager; 41 | 42 | int main(int argc, char *argv[]) 43 | { 44 | bindingCPU(0); 45 | 46 | if (argc < 5) { 47 | printf("\nUsage %s [MyID] [Num of Worker] [AppID] [Num of PS]\n\n", argv[0]); 48 | exit(1); 49 | } 50 | 51 | int host = atoi(argv[1]); 52 | int num_worker = atoi(argv[2]); 53 | int appID = atoi(argv[3]); 54 | int num_PS = atoi(argv[4]); 55 | 56 | //int host = 0; 57 | // int num_worker = 2; 58 | // int appID = 1; 59 | 60 | _p4ml_manager = std::shared_ptr(new P4mlManager(host, num_worker, appID, num_PS)); 61 | 62 | /* Here for int size to send per thread */ 63 | /* ex. 25600 = 32*800 = 1 Round */ 64 | int size = 1024000; 65 | int thread_to_use = 12; 66 | int loop_time = 1000; 67 | 68 | if (argc > 5) { 69 | std::string option = argv[5]; 70 | if (option == "-a") { 71 | int num_agtr = atoi(argv[6]); 72 | _p4ml_manager->SetMaxAgtrSizePerThread(num_agtr); 73 | } 74 | if (option == "-f") { 75 | float forward_rate = atof(argv[6]); 76 | _p4ml_manager->SetForceForward(forward_rate); 77 | } 78 | if (option == "-l") { 79 | loop_time = atof(argv[6]); 80 | } 81 | if (option == "-aa") { 82 | int num_used_agtr = atoi(argv[6]); 83 | _p4ml_manager->SetUsedSwitchAGTRcount(num_used_agtr); 84 | } 85 | } 86 | 87 | /* (40) Threads in thread pool */ 88 | /* MAX_AGTR (32000) / 40 = 800 Agtr per thread */ 89 | _p4ml_manager->init_threadPool(thread_to_use); 90 | 91 | // _p4ml_manager->SetForceForward(0.25); 92 | // _p4ml_manager->SetMaxAgtrSizePerThread(50); 93 | 94 | int finish_counter = loop_time * thread_to_use; 95 | uint32_t** tensor = new uint32_t*[thread_to_use * loop_time]; 96 | 97 | printf("\nModel initializing...\n"); 98 | // for (int i = 0; i < thread_to_use * loop_time; i++) 99 | for (int i = 0; i < 1; i++) 100 | if (FLOATING_POINT_INPUT) 101 | tensor[i] = (uint32_t*) init_model_float_with_overflow(size); 102 | else 103 | tensor[i] = init_model(size); 104 | 105 | printf("\nModel initialized completed. Start sending...\n\n"); 106 | 107 | std::chrono::time_point timer = std::chrono::high_resolution_clock::now(); 108 | 109 | std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now(); 110 | 111 | for (int j = 0; j < loop_time; j++) { 112 | /* thread to use */ 113 | for (int i = 0; i < thread_to_use; i++) { 114 | uint64_t key = _p4ml_manager->GetNewKey(); 115 | _p4ml_manager->PushPull(key, (char*) tensor[0], size, 1); 116 | } 117 | } 118 | 119 | 120 | int total_sent = 0; 121 | 122 | while (finish_counter > 0) { 123 | int64_t tmp_key = _p4ml_manager->GetFinishKey(); 124 | if (tmp_key >= 0) { 125 | finish_counter--; 126 | total_sent++; 127 | } 128 | 129 | if (ENABLE_LOG) { 130 | std::chrono::time_point current_time = 131 | std::chrono::high_resolution_clock::now(); 132 | std::chrono::duration time_span = 133 | std::chrono::duration_cast>(current_time - timer); 134 | std::chrono::duration total_time = 135 | std::chrono::duration_cast>(current_time - t1); 136 | if (time_span.count() >= 1) { 137 | // printf("Tensor left: %d, ", finish_counter); 138 | // printf("total send %" PRIu64 " bytes, time %lf, throughput: %lf\n", total_sent * 32000 * 194, total_time, total_sent * 6062.5 / 1024.0 / 1024.0 * 8.0 / 1.0); 139 | // printf("%lf\n", total_sent * 6062.5 / 1024.0 / 1024.0 * 8.0 / 1.0); 140 | // int tmp = _p4ml_manager->GetCollisionTimeAndClear(); 141 | // if (tmp) 142 | // printf("%d\n", tmp); 143 | // printf("%d\n", _p4ml_manager->GetCollisionTimeAndClear()); 144 | printf("%lf\n", (float)total_sent * (16517 * P4ML_PACKET_SIZE) / 1024 / 1024 / 1024 * 8); 145 | total_sent = 0; 146 | timer = current_time; 147 | } 148 | } 149 | } 150 | _p4ml_manager->GetLossRate(); 151 | std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now(); 152 | std::chrono::duration time_span = std::chrono::duration_cast>(t2 - t1); 153 | double transmit_size_in_m = (double)((double)size * loop_time * thread_to_use / (float)MAX_ENTRIES_PER_PACKET) * P4ML_PACKET_SIZE / 1024 / 1024; 154 | double total_time = time_span.count(); 155 | double throughput = (transmit_size_in_m / 1024 * 8 ) / total_time; 156 | printf("Finish all %d Tensors,\n Time = %lf s,\n Total Size = %lf MB,\n Throughput: %lf Gbps\n\n", thread_to_use * loop_time, total_time, transmit_size_in_m, throughput); 157 | } 158 | -------------------------------------------------------------------------------- /client/p4ml_manager.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef P4ML_MANAGER_H 3 | #define P4ML_MANAGER_H 4 | 5 | #include "../common/dma_common.h" 6 | #include "../common/packet.h" 7 | #include "../common/utils.h" 8 | #include "../common/window_manager.h" 9 | #include "../common/HashTable.h" 10 | #include "../common/quantize.h" 11 | #include "../common/p4ml_struct.h" 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | #define FLOATING_POINT_INPUT false 31 | 32 | #define ONLY_DO_QUAN false 33 | 34 | #define OVERFLOW_THRESHOLD 213 35 | #define UNDERFLOW_THRESHOLD -213 36 | 37 | #define P4ML_KEY_TOTAL 500000 38 | #define MAX_TENSOR_SIZE 1024000 39 | 40 | #define MAX_THREAD_PER_APP 20 41 | 42 | class P4mlManager { 43 | public: 44 | P4mlManager(uint32_t host, int num_worker, int appID, int num_PS); 45 | // ~P4mlManager(); 46 | 47 | void init_threadPool(int num_thread); 48 | void PushPull(uint64_t key, char* data, int len, int cmd); 49 | static void PushPullLoop(int thread_id); 50 | static void QuantizationLoop(int thread_id); 51 | 52 | void PushTaskToThread(uint64_t key, char *data, int len, int cmd, int thread_id); 53 | 54 | uint64_t GetNewKey(); 55 | int64_t GetFinishKey(); 56 | double GetLossRate(); 57 | int GetCollisionTimeAndClear(); 58 | void SetForceForward(float forward_rate); 59 | void SetMaxAgtrSizePerThread(int max_agtr_size_per_thread); 60 | void SetUsedSwitchAGTRcount(int used_agtr); 61 | 62 | private: 63 | static uint32_t host; 64 | static uint8_t num_worker; 65 | static uint8_t num_PS; 66 | static uint16_t appID; 67 | static uint64_t p4mlKey; 68 | static AppInfo* app_info; 69 | 70 | static int max_agtr_size_per_thread; 71 | static int UsedSwitchAGTRcount; 72 | static int _num_thread; 73 | static std::chrono::time_point start; 74 | static ThreadInfo** threadInfoQueue; 75 | static DMAcontext** dmaContextQueue; 76 | static std::thread** threadQueue; 77 | static std::thread** pushPullthreadQueue; 78 | static std::queue* pushPulljobQueue; 79 | static std::thread** quantizationthreadQueue; 80 | static std::queue* quantizejobQueue; 81 | static std::queue* dequantizejobQueue; 82 | 83 | static WindowManager* window_manager; 84 | static std::queue finishQueue; 85 | static std::queue* pendingQueue; 86 | static uint64_t* weightQueue; 87 | 88 | // static uint16_t* hash_map; 89 | static HashTable* hash_table; 90 | static int32_t** quantizeBuffer; 91 | static bool** isOverflow; 92 | 93 | static bool isForceForward; 94 | static int forwardFrequency; 95 | static float forwardRate; 96 | 97 | static std::mutex Resource_mutex; 98 | static std::mutex _P4MLKey_mutex; 99 | static std::mutex _print_mutex; 100 | static std::mutex _queuePush_mutex; 101 | 102 | static void main_receive_packet_loop(DMAcontext* dma_context, int32_t* data, int my_id); 103 | static void updateModel(agghdr* p4ml_header, int32_t* data, int my_id); 104 | }; 105 | 106 | inline void P4mlManager::updateModel(agghdr* p4ml_header, int32_t* data, int my_id) 107 | { 108 | uint16_t* p_seq = &p4ml_header->seq_num; 109 | uint32_t* tensor_len = &pushPulljobQueue[my_id].front()->len; 110 | 111 | int32_t* p_model = p4ml_header->vector; 112 | uint32_t offset = (*p_seq - 1) * MAX_ENTRIES_PER_PACKET; 113 | if (offset < *tensor_len) { 114 | if (offset + MAX_ENTRIES_PER_PACKET > *tensor_len) 115 | memcpy(data + offset, p_model, sizeof(int32_t) * (*tensor_len % MAX_ENTRIES_PER_PACKET)); 116 | else 117 | memcpy(data + offset, p_model, sizeof(int32_t) * MAX_ENTRIES_PER_PACKET); 118 | } 119 | } 120 | 121 | #endif //P4ML_MANAGER_H -------------------------------------------------------------------------------- /common/CC_manager.h: -------------------------------------------------------------------------------- 1 | #ifndef CC_MANAGER_H 2 | #define CC_MANAGER_H 3 | 4 | #define MAX_BYTES 100 * P4ML_PACKET_SIZE 5 | 6 | #include "packet.h" 7 | #include 8 | #include 9 | #include 10 | 11 | using namespace std; 12 | #define do_div(n, base) ({ \ 13 | uint32_t __base = (base); \ 14 | uint32_t __rem; \ 15 | __rem = ((uint64_t)(n)) % __base; \ 16 | (n) = ((uint64_t)(n)) / __base; \ 17 | __rem; \ 18 | }) 19 | #define GET_MIN(a, b) (a < b ? a : b) 20 | #define GET_MAX(a, b) (a > b ? a : b) 21 | 22 | class CC_manager { 23 | 24 | public: 25 | CC_manager(int init_window) 26 | { 27 | cwnd_bytes = init_window * P4ML_PACKET_SIZE; 28 | } 29 | 30 | int adjustWindow(bool isECN) 31 | { 32 | if (isECN) 33 | { 34 | cwnd_bytes /= 2; 35 | } 36 | else 37 | { 38 | cwnd_bytes += 1500; 39 | } 40 | 41 | if (cwnd_bytes < P4ML_PACKET_SIZE) 42 | cwnd_bytes = P4ML_PACKET_SIZE; 43 | if (cwnd_bytes > MAX_BYTES) 44 | cwnd_bytes = MAX_BYTES; 45 | if (cwnd_bytes > P4ML_PACKET_SIZE) 46 | cwnd_bytes = (cwnd_bytes / P4ML_PACKET_SIZE) * P4ML_PACKET_SIZE; 47 | return cwnd_bytes / P4ML_PACKET_SIZE; 48 | } 49 | 50 | private: 51 | uint64_t cwnd_bytes; 52 | }; 53 | 54 | #endif -------------------------------------------------------------------------------- /common/HashTable.cc: -------------------------------------------------------------------------------- 1 | #include "HashTable.h" 2 | #define MAX_BYTES 100 * P4ML_PACKET_SIZE 3 | 4 | HashTable::HashTable(int size) 5 | { 6 | used_size = size; 7 | hash_map = new uint16_t[size]; 8 | memset(isAlreadyDeclare, 0, sizeof(bool) * size); 9 | memset(predefine_agtr_list, 0, sizeof(bool) * size); 10 | for (int i = 0; i < size; i++) { 11 | predefine_agtr_list[i] = i; 12 | // printf("[%d] %d ", i, predefine_agtr_list[i]); 13 | } 14 | int random_seed = rand(); 15 | std::shuffle(predefine_agtr_list, predefine_agtr_list + size, std::default_random_engine(random_seed)); 16 | 17 | // for (int i = 0; i < size; i++) { 18 | 19 | // printf("[%d] %d ", i, predefine_agtr_list[i]); 20 | // } 21 | hash_pos = 0; 22 | } 23 | 24 | void HashTable::HashNew_linear(int index) 25 | { 26 | // Guarantee non-repeat element generated 27 | uint16_t new_value; 28 | do { 29 | new_value = hash_function(); 30 | } while (isAlreadyDeclare[new_value]); 31 | 32 | hash_map[index] = new_value; 33 | isAlreadyDeclare[new_value] = true; 34 | } 35 | 36 | int HashTable::HashNew_predefine() 37 | { 38 | if (hash_pos >= used_size) { 39 | return -1; 40 | } 41 | 42 | // Get AGTR from predefined hash 43 | while (hash_pos < used_size) { 44 | int new_agtr = predefine_agtr_list[hash_pos]; 45 | if (isAlreadyDeclare[new_agtr]) { 46 | hash_pos++; 47 | } else { 48 | hash_pos++; 49 | isAlreadyDeclare[new_agtr] = true; 50 | return new_agtr; 51 | } 52 | } 53 | 54 | return -1; 55 | } 56 | 57 | int HashTable::HashNew_crc(uint16_t appID, uint16_t index) 58 | { 59 | // Guarantee non-repeat element generated 60 | uint8_t crc_input[] = {(uint8_t)(appID & 0xff), (uint8_t)(appID >> 8), (uint8_t)(index & 0xff), (uint8_t)(index >> 8), 0, 0}; 61 | 62 | uint16_t new_value; 63 | uint8_t salt = 0; 64 | do { 65 | new_value = crc32_le(0xffffffff, crc_input, 6); 66 | new_value %= used_size; 67 | crc_input[4]++; 68 | if (crc_input[4] == 255) { 69 | crc_input[4] = 0; 70 | crc_input[5]++; 71 | } 72 | } while (isAlreadyDeclare[new_value]); 73 | hash_map[index] = new_value; 74 | isAlreadyDeclare[new_value] = true; 75 | return new_value; 76 | } 77 | 78 | void HashTable::HashNew_separate(uint16_t appID, uint16_t index) 79 | { 80 | int real_index = ((appID - 1) * 2000) + index; 81 | hash_map[index] = real_index; 82 | isAlreadyDeclare[real_index] = true; 83 | } 84 | 85 | uint16_t HashTable::hash_function() 86 | { 87 | return hash_pos++; 88 | } 89 | 90 | uint32_t HashTable::crc32_le(uint32_t crc, unsigned char const* p, size_t len) 91 | { 92 | while (len--) { 93 | crc ^= *p++; 94 | for (int i = 0; i < 8; i++) 95 | crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); 96 | } 97 | return ~crc; 98 | } 99 | -------------------------------------------------------------------------------- /common/HashTable.h: -------------------------------------------------------------------------------- 1 | #ifndef HASHTABLE_H 2 | #define HASHTABLE_H 3 | #include 4 | #include "packet.h" 5 | #include "utils.h" 6 | #define CRCPOLY_LE 0xedb88320 7 | 8 | class HashTable { 9 | 10 | public: 11 | HashTable(int size); 12 | void HashNew_linear(int index); 13 | int HashNew_crc(uint16_t appID, uint16_t index); 14 | int HashNew_predefine(); 15 | void HashNew_separate(uint16_t appID, uint16_t index); 16 | uint16_t* hash_map; 17 | bool isAlreadyDeclare[MAX_AGTR_COUNT]; 18 | 19 | private: 20 | int used_size; 21 | uint32_t crc32_le(uint32_t crc, unsigned char const* p, size_t len); 22 | int predefine_agtr_list[MAX_AGTR_COUNT]; 23 | 24 | // These for predefine Hash 25 | 26 | // These two for Linear Hash 27 | uint16_t hash_function(); 28 | uint16_t hash_pos; 29 | 30 | }; 31 | 32 | #endif -------------------------------------------------------------------------------- /common/ThreadPool.h: -------------------------------------------------------------------------------- 1 | #ifndef THREAD_POOL_H 2 | #define THREAD_POOL_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | class ThreadPool { 15 | public: 16 | template ThreadPool(size_t, F callback); 17 | template 18 | auto enqueue(F&& f, Args&&... args) 19 | -> std::future::type>; 20 | ~ThreadPool(); 21 | private: 22 | // need to keep track of threads so we can join them 23 | std::vector< std::thread > workers; 24 | // the task queue 25 | std::queue< std::function > tasks; 26 | 27 | // synchronization 28 | std::mutex queue_mutex; 29 | std::condition_variable condition; 30 | bool stop; 31 | }; 32 | 33 | // the constructor just launches some amount of workers 34 | template 35 | inline ThreadPool::ThreadPool(size_t threads, F callback) 36 | : stop(false) 37 | { 38 | for(size_t i = 0;i task; 45 | 46 | { 47 | std::unique_lock lock(this->queue_mutex); 48 | this->condition.wait(lock, 49 | [this]{ return this->stop || !this->tasks.empty(); }); 50 | if(this->stop && this->tasks.empty()) 51 | return; 52 | task = std::move(this->tasks.front()); 53 | this->tasks.pop(); 54 | } 55 | 56 | task(); 57 | callback(); 58 | } 59 | } 60 | ); 61 | } 62 | 63 | // add new work item to the pool 64 | template 65 | auto ThreadPool::enqueue(F&& f, Args&&... args) 66 | -> std::future::type> 67 | { 68 | using return_type = typename std::result_of::type; 69 | 70 | auto task = std::make_shared< std::packaged_task >( 71 | std::bind(std::forward(f), std::forward(args)...) 72 | ); 73 | 74 | std::future res = task->get_future(); 75 | { 76 | std::unique_lock lock(queue_mutex); 77 | 78 | // don't allow enqueueing after stopping the pool 79 | if(stop) 80 | throw std::runtime_error("enqueue on stopped ThreadPool"); 81 | 82 | tasks.emplace([task](){ (*task)(); }); 83 | } 84 | condition.notify_one(); 85 | return res; 86 | } 87 | 88 | // the destructor joins all threads 89 | inline ThreadPool::~ThreadPool() 90 | { 91 | { 92 | std::unique_lock lock(queue_mutex); 93 | stop = true; 94 | } 95 | condition.notify_all(); 96 | for(std::thread &worker: workers) 97 | worker.join(); 98 | } 99 | 100 | #endif 101 | -------------------------------------------------------------------------------- /common/dma_common.cc: -------------------------------------------------------------------------------- 1 | #define __USE_GNU 2 | 3 | #include "dma_common.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | 18 | std::mutex ___print_mutex; 19 | int my_send_queue_length = 2048; 20 | int my_recv_queue_length = my_send_queue_length * 8; 21 | 22 | unsigned char PS_FILTER_TEMPLATE_R[] = { 0x05, 0x04, 0x03, 0x02, 0x01, 0xFF }; 23 | unsigned char WORKER_FILTER_TEMPLATE_R[] = { 0x77, 0x77, 0x77, 0x77, 0x77, 0xFF }; 24 | 25 | DMAcontext* DMA_create(ibv_device* ib_dev, int thread_id, bool isPS) 26 | { 27 | 28 | ibv_context* context = ibv_open_device(ib_dev); 29 | if (!context) { 30 | fprintf(stderr, "Couldn't get context for %s\n", 31 | ibv_get_device_name(ib_dev)); 32 | exit(1); 33 | } 34 | ibv_pd* pd = ibv_alloc_pd(context); 35 | if (!pd) { 36 | fprintf(stderr, "Couldn't allocate PD\n"); 37 | exit(1); 38 | } 39 | 40 | struct ibv_cq* rec_cq = ibv_create_cq(context, my_recv_queue_length + 1, NULL, NULL, 0); 41 | if (!rec_cq) { 42 | fprintf(stderr, "Couldn't create CQ %d\n", errno); 43 | exit(1); 44 | } 45 | 46 | struct ibv_cq* snd_cq = ibv_create_cq(context, my_send_queue_length + 1, NULL, NULL, 0); 47 | if (!snd_cq) { 48 | fprintf(stderr, "Couldn't create CQ %d\n", errno); 49 | exit(1); 50 | } 51 | 52 | struct ibv_qp* qp; 53 | struct ibv_exp_qp_init_attr* qp_init_attr = (struct ibv_exp_qp_init_attr*)malloc(sizeof(struct ibv_exp_qp_init_attr)); 54 | 55 | memset(qp_init_attr, 0, sizeof(*qp_init_attr)); 56 | qp_init_attr->comp_mask = IBV_EXP_QP_INIT_ATTR_PD | IBV_EXP_QP_INIT_ATTR_MAX_TSO_HEADER | IBV_EXP_QP_INIT_ATTR_INL_RECV; 57 | qp_init_attr->send_cq = snd_cq; 58 | qp_init_attr->recv_cq = rec_cq; 59 | qp_init_attr->qp_type = IBV_QPT_RAW_PACKET; 60 | 61 | qp_init_attr->pd = pd; 62 | qp_init_attr->cap.max_send_wr = my_send_queue_length + 1; 63 | qp_init_attr->cap.max_recv_wr = my_recv_queue_length + 1; 64 | qp_init_attr->cap.max_inline_data = 512; 65 | qp_init_attr->cap.max_send_sge = 1; 66 | qp_init_attr->cap.max_recv_sge = 1; 67 | qp_init_attr->max_tso_header = IP_ETH_UDP_HEADER_SIZE; 68 | qp_init_attr->max_inl_recv = 512; 69 | 70 | qp = ibv_exp_create_qp(context, qp_init_attr); 71 | //qp = ibv_create_qp(pd, qp_init_attr); 72 | if (!qp) { 73 | fprintf(stderr, "Couldn't create RSS QP\n"); 74 | exit(1); 75 | } 76 | 77 | struct ibv_qp_attr qp_attr; 78 | int qp_flags; 79 | int ret; 80 | memset(&qp_attr, 0, sizeof(qp_attr)); 81 | qp_flags = IBV_QP_STATE | IBV_QP_PORT; 82 | qp_attr.qp_state = IBV_QPS_INIT; 83 | qp_attr.port_num = 1; 84 | ret = ibv_modify_qp(qp, &qp_attr, qp_flags); 85 | if (ret < 0) { 86 | fprintf(stderr, "failed modify qp to init\n"); 87 | exit(1); 88 | } 89 | memset(&qp_attr, 0, sizeof(qp_attr)); 90 | 91 | /* a. Move ring state to ready to receive, this is needed to be able to move ring to ready to send even if receive queue is not enabled */ 92 | 93 | qp_flags = IBV_QP_STATE; 94 | qp_attr.qp_state = IBV_QPS_RTR; 95 | ret = ibv_modify_qp(qp, &qp_attr, qp_flags); 96 | if (ret < 0) { 97 | fprintf(stderr, "failed modify qp to receive\n"); 98 | exit(1); 99 | } 100 | 101 | /* b. Move the ring to ready to send */ 102 | 103 | qp_flags = IBV_QP_STATE; 104 | qp_attr.qp_state = IBV_QPS_RTS; 105 | ret = ibv_modify_qp(qp, &qp_attr, qp_flags); 106 | if (ret < 0) { 107 | fprintf(stderr, "failed modify qp to send\n"); 108 | exit(1); 109 | } 110 | 111 | int send_buf_size = P4ML_PACKET_SIZE * my_send_queue_length; 112 | 113 | void* send_buf; 114 | 115 | //send_buf = malloc(send_buf_size); 116 | // send_buf = alloc_raw_pages(send_buf_size / EACH_HUGEPAGE_SIZE + 1, EACH_HUGEPAGE_SIZE); 117 | ib_malloc(&send_buf, send_buf_size); 118 | if (!send_buf) { 119 | fprintf(stderr, "Coudln't allocate send memory\n"); 120 | exit(1); 121 | } 122 | 123 | struct ibv_mr* send_mr; 124 | send_mr = ibv_reg_mr(pd, send_buf, send_buf_size, IBV_ACCESS_LOCAL_WRITE); 125 | if (!send_mr) { 126 | fprintf(stderr, "Couldn't register recv mr\n"); 127 | exit(1); 128 | } 129 | 130 | // Init CQ. Its size MUST be one so that we get two CQEs in mlx5. 131 | struct ibv_exp_cq_init_attr cq_init_attr; 132 | memset(&cq_init_attr, 0, sizeof(cq_init_attr)); 133 | struct ibv_cq* mp_recv_cq = ibv_exp_create_cq(context, kAppRecvCQDepth / 2, nullptr, nullptr, 0, &cq_init_attr); 134 | assert(mp_recv_cq != nullptr); 135 | 136 | // Modify the RECV CQ to ignore overrun 137 | struct ibv_exp_cq_attr cq_attr; 138 | memset(&cq_attr, 0, sizeof(cq_attr)); 139 | cq_attr.comp_mask = IBV_EXP_CQ_ATTR_CQ_CAP_FLAGS; 140 | cq_attr.cq_cap_flags = IBV_EXP_CQ_IGNORE_OVERRUN; 141 | rt_assert(ibv_exp_modify_cq(mp_recv_cq, &cq_attr, IBV_EXP_CQ_CAP_FLAGS) == 0); 142 | 143 | struct ibv_exp_wq_init_attr wq_init_attr; 144 | memset(&wq_init_attr, 0, sizeof(wq_init_attr)); 145 | 146 | wq_init_attr.wq_type = IBV_EXP_WQT_RQ; 147 | wq_init_attr.max_recv_wr = kAppRQDepth; 148 | wq_init_attr.max_recv_sge = 1; 149 | wq_init_attr.pd = pd; 150 | wq_init_attr.cq = mp_recv_cq; 151 | 152 | wq_init_attr.comp_mask |= IBV_EXP_CREATE_WQ_MP_RQ; 153 | wq_init_attr.mp_rq.use_shift = IBV_EXP_MP_RQ_NO_SHIFT; 154 | wq_init_attr.mp_rq.single_wqe_log_num_of_strides = kAppLogNumStrides; 155 | wq_init_attr.mp_rq.single_stride_log_num_of_bytes = kAppLogStrideBytes; 156 | struct ibv_exp_wq* mp_wq = ibv_exp_create_wq(context, &wq_init_attr); 157 | assert(mp_wq != nullptr); 158 | 159 | // Change WQ to ready state 160 | struct ibv_exp_wq_attr wq_attr; 161 | memset(&wq_attr, 0, sizeof(wq_attr)); 162 | wq_attr.attr_mask = IBV_EXP_WQ_ATTR_STATE; 163 | wq_attr.wq_state = IBV_EXP_WQS_RDY; 164 | rt_assert(ibv_exp_modify_wq(mp_wq, &wq_attr) == 0); 165 | 166 | // Get the RQ burst function 167 | enum ibv_exp_query_intf_status intf_status = IBV_EXP_INTF_STAT_OK; 168 | struct ibv_exp_query_intf_params query_intf_params; 169 | memset(&query_intf_params, 0, sizeof(query_intf_params)); 170 | query_intf_params.intf_scope = IBV_EXP_INTF_GLOBAL; 171 | query_intf_params.intf = IBV_EXP_INTF_WQ; 172 | query_intf_params.obj = mp_wq; 173 | struct ibv_exp_wq_family* mp_wq_family = reinterpret_cast( 174 | ibv_exp_query_intf(context, &query_intf_params, &intf_status)); 175 | assert(mp_wq_family != nullptr); 176 | 177 | // Create indirect table 178 | struct ibv_exp_rwq_ind_table_init_attr rwq_ind_table_init_attr; 179 | memset(&rwq_ind_table_init_attr, 0, sizeof(rwq_ind_table_init_attr)); 180 | rwq_ind_table_init_attr.pd = pd; 181 | rwq_ind_table_init_attr.log_ind_tbl_size = 0; // Ignore hash 182 | rwq_ind_table_init_attr.ind_tbl = &mp_wq; // Pointer to RECV work queue 183 | rwq_ind_table_init_attr.comp_mask = 0; 184 | struct ibv_exp_rwq_ind_table* mp_ind_tbl = ibv_exp_create_rwq_ind_table(context, &rwq_ind_table_init_attr); 185 | assert(mp_ind_tbl != nullptr); 186 | 187 | // Create rx_hash_conf and indirection table for the QP 188 | uint8_t toeplitz_key[] = { 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 189 | 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 190 | 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 191 | 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 192 | 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa }; 193 | const int TOEPLITZ_RX_HASH_KEY_LEN = sizeof(toeplitz_key) / sizeof(toeplitz_key[0]); 194 | 195 | struct ibv_exp_rx_hash_conf rx_hash_conf; 196 | memset(&rx_hash_conf, 0, sizeof(rx_hash_conf)); 197 | rx_hash_conf.rx_hash_function = IBV_EXP_RX_HASH_FUNC_TOEPLITZ; 198 | rx_hash_conf.rx_hash_key_len = TOEPLITZ_RX_HASH_KEY_LEN; 199 | rx_hash_conf.rx_hash_key = toeplitz_key; 200 | rx_hash_conf.rx_hash_fields_mask = IBV_EXP_RX_HASH_DST_PORT_UDP; 201 | rx_hash_conf.rwq_ind_tbl = mp_ind_tbl; 202 | 203 | struct ibv_exp_qp_init_attr mp_qp_init_attr; 204 | memset(&mp_qp_init_attr, 0, sizeof(mp_qp_init_attr)); 205 | mp_qp_init_attr.comp_mask = IBV_EXP_QP_INIT_ATTR_CREATE_FLAGS | IBV_EXP_QP_INIT_ATTR_PD | IBV_EXP_QP_INIT_ATTR_RX_HASH; 206 | mp_qp_init_attr.rx_hash_conf = &rx_hash_conf; 207 | mp_qp_init_attr.pd = pd; 208 | mp_qp_init_attr.qp_type = IBV_QPT_RAW_PACKET; 209 | 210 | // Create the QP 211 | struct ibv_qp* mp_recv_qp = ibv_exp_create_qp(context, &mp_qp_init_attr); 212 | assert(mp_recv_qp != nullptr); 213 | 214 | size_t tx_ring_size = P4ML_LAYER_SIZE * kAppMaxPostlist; 215 | uint8_t* mp_send_ring; 216 | ib_malloc((void **)&mp_send_ring, tx_ring_size); 217 | rt_assert(mp_send_ring != nullptr); 218 | memset(mp_send_ring, 0, tx_ring_size); 219 | 220 | struct ibv_mr* mp_send_mr = ibv_reg_mr(pd, mp_send_ring, tx_ring_size, IBV_ACCESS_LOCAL_WRITE); 221 | rt_assert(mp_send_mr != nullptr); 222 | 223 | // Register RX ring memory 224 | uint8_t* mp_recv_ring; 225 | ib_malloc((void **)&mp_recv_ring, kAppRingSize); 226 | rt_assert(mp_recv_ring != nullptr); 227 | memset(mp_recv_ring, 0, kAppRingSize); 228 | 229 | struct ibv_mr* mp_mr = ibv_reg_mr(pd, mp_recv_ring, kAppRingSize, IBV_ACCESS_LOCAL_WRITE); 230 | rt_assert(mp_mr != nullptr); 231 | ///////////////////////////////////////////////////////////////////////////////////// 232 | // install_flow_rule(mp_recv_qp, 30720 + thread_id); 233 | install_flow_rule(mp_recv_qp, thread_id, isPS); 234 | // This cast works for mlx5 where ibv_cq is the first member of mlx5_cq. 235 | auto* _mlx5_cq = reinterpret_cast(mp_recv_cq); 236 | rt_assert(kAppRecvCQDepth == std::pow(2, _mlx5_cq->cq_log_size)); 237 | rt_assert(_mlx5_cq->buf_a.buf != nullptr); 238 | 239 | auto* mp_cqe_arr = reinterpret_cast(_mlx5_cq->buf_a.buf); 240 | 241 | // Initialize the CQEs as if we received the last (kAppRecvCQDepth) packets 242 | // in the CQE cycle. 243 | static_assert(kAppStridesPerWQE >= kAppRecvCQDepth, ""); 244 | for (size_t i = 0; i < kAppRecvCQDepth; i++) { 245 | mp_cqe_arr[i].wqe_id = htons(std::numeric_limits::max()); 246 | // Last CQE gets 247 | // * wqe_counter = (kAppStridesPerWQE - 1) 248 | // * snapshot_cycle_idx = (kAppCQESnapshotCycle - 1) 249 | mp_cqe_arr[i].wqe_counter = htons(kAppStridesPerWQE - (kAppRecvCQDepth - i)); 250 | 251 | cqe_snapshot_t snapshot; 252 | snapshot_cqe(&mp_cqe_arr[i], snapshot); 253 | rt_assert(snapshot.get_cqe_snapshot_cycle_idx() == kAppCQESnapshotCycle - (kAppRecvCQDepth - i)); 254 | } 255 | 256 | // The multi-packet RECVs. This must be done after we've initialized the CQE. 257 | struct ibv_sge* mp_sge = reinterpret_cast(malloc(sizeof(struct ibv_sge) * kAppRQDepth)); 258 | for (size_t i = 0; i < kAppRQDepth; i++) { 259 | size_t mpwqe_offset = i * (kAppRingMbufSize * kAppStridesPerWQE); 260 | mp_sge[i].addr = reinterpret_cast(&mp_recv_ring[mpwqe_offset]); 261 | mp_sge[i].lkey = mp_mr->lkey; 262 | mp_sge[i].length = kAppRingMbufSize * kAppStridesPerWQE; //kAppRingSize; 263 | mp_wq_family->recv_burst(mp_wq, &mp_sge[i], 1); 264 | } 265 | 266 | printf("[Thread %d] Finish created QP - ", thread_id); 267 | printf("kAppRingMbufSize=%lu, kAppStridesPerWQE=%lu, kAppRingSize=%lu, kAppRQDepth=%lu\n", kAppRingMbufSize, kAppStridesPerWQE, kAppRingSize, kAppRQDepth); 268 | auto* cqe_arr = mp_cqe_arr; 269 | cqe_snapshot_t prev_snapshot; 270 | snapshot_cqe(&cqe_arr[kAppRecvCQDepth - 1], prev_snapshot); 271 | 272 | return new DMAcontext{ 273 | .pd = pd, 274 | .ctx = context, 275 | .receive_cq = rec_cq, 276 | .send_cq = snd_cq, 277 | .send_mr = send_mr, 278 | .send_region = send_buf, 279 | .data_qp = qp, 280 | 281 | .mp_recv_qp = mp_recv_qp, 282 | .mp_recv_cq = mp_recv_cq, 283 | .mp_wq = mp_wq, 284 | .mp_wq_family = mp_wq_family, 285 | .mp_ind_tbl = mp_ind_tbl, 286 | .mp_cqe_arr = mp_cqe_arr, 287 | .mp_sge = mp_sge, 288 | .mp_recv_ring = mp_recv_ring, 289 | .mp_send_ring = mp_send_ring, 290 | .mp_send_mr = mp_send_mr, 291 | 292 | .id = thread_id, 293 | .total_received = 0, 294 | .total_sent = 0, 295 | .my_send_queue_length = my_send_queue_length, 296 | .my_recv_queue_length = my_recv_queue_length, 297 | 298 | .ring_head = 0, 299 | .nb_rx_rolling = 0, 300 | .sge_idx = 0, 301 | .cqe_idx = 0, 302 | .prev_snapshot = prev_snapshot, 303 | .isPS = isPS, 304 | .isMarkTimeStamp = false, 305 | }; 306 | } 307 | 308 | void send_packet(DMAcontext* dma_context, int chunk_size, uint64_t offset) 309 | { 310 | int ret; 311 | 312 | struct ibv_sge sg; 313 | struct ibv_exp_send_wr wr, *bad_wr; 314 | // struct ibv_send_wr wr; 315 | // struct ibv_send_wr *bad_wr; 316 | 317 | memset(&sg, 0, sizeof(sg)); 318 | sg.addr = (uintptr_t)((char*)dma_context->send_region + offset * P4ML_LAYER_SIZE); 319 | // printf("%d\n", sg.addr); 320 | sg.length = chunk_size; 321 | sg.lkey = dma_context->send_mr->lkey; 322 | 323 | memset(&wr, 0, sizeof(wr)); 324 | wr.wr_id = 0; 325 | wr.sg_list = &sg; 326 | wr.num_sge = 1; 327 | // wr.opcode = IBV_WR_SEND; 328 | wr.exp_opcode = IBV_EXP_WR_TSO; 329 | wr.tso.mss = P4ML_LAYER_SIZE; // Maximum Segment Size example 330 | wr.tso.hdr_sz = IP_ETH_UDP_HEADER_SIZE; // ETH/IPv4/TCP header example 331 | char hdr[IP_ETH_UDP_HEADER_SIZE]; // ETH/IPv4/TCP header example 332 | if (dma_context->isPS) 333 | memcpy(hdr, PS_IP_ETH_UDP_HEADER, IP_ETH_UDP_HEADER_SIZE); // Assuming that the header buffer was define before. 334 | else 335 | memcpy(hdr, WORKER_IP_ETH_UDP_HEADER, IP_ETH_UDP_HEADER_SIZE); // Assuming that the header buffer was define before. 336 | 337 | hdr[5] = dma_context->id; 338 | // hdr[37] = dma_context->id; 339 | wr.tso.hdr = hdr; // There is no need to use malloc operation in this case, local definition of hdr is ok. 340 | //wr.exp_send_flags = IBV_SEND_INLINE; 341 | wr.exp_send_flags |= IBV_SEND_SIGNALED; 342 | 343 | if (DEBUG_PRINT_ALL_SENDING_PACKET) 344 | for (int i = 0; i < chunk_size / P4ML_LAYER_SIZE; i++) 345 | p4ml_header_print_h((agghdr*)((char *)sg.addr + i * P4ML_LAYER_SIZE), "SEND"); 346 | 347 | // mark first time sending timestamp 348 | if (dma_context->isMarkTimeStamp) { 349 | std::chrono::high_resolution_clock::time_point current_time = std::chrono::high_resolution_clock::now(); 350 | for (int i = 0; i < chunk_size / P4ML_LAYER_SIZE; i++) { 351 | agghdr* p4ml_header = (agghdr*)((char*)sg.addr + i * P4ML_LAYER_SIZE); 352 | if (!dma_context->isSent[ntohs(p4ml_header->seq_num)]) { 353 | dma_context->isSent[ntohs(p4ml_header->seq_num)] = true; 354 | dma_context->first_send_time[ntohs(p4ml_header->seq_num)] = current_time; 355 | } else { 356 | /* Resend may trigger */ 357 | } 358 | } 359 | } 360 | 361 | // we dont need to wait cq cause received represent sent 362 | ret = ibv_exp_post_send(dma_context->data_qp, &wr, &bad_wr); 363 | if (ret < 0) { 364 | fprintf(stderr, "failed in post send\n"); 365 | exit(1); 366 | } 367 | 368 | struct ibv_wc wc_send_cq[POLLING_SIZE]; 369 | ibv_poll_cq(dma_context->send_cq, POLLING_SIZE, wc_send_cq); 370 | if (DEBUG_CHECK_SEND_RECEIVE_TOTAL) 371 | dma_context->total_sent += chunk_size / P4ML_LAYER_SIZE; 372 | } 373 | 374 | size_t receive_packet(DMAcontext *dma_context, cqe_snapshot_t* new_snapshot) 375 | { 376 | // cqe_snapshot_t new_snapshot; 377 | // cur_snapshot = new_snapshot; 378 | snapshot_cqe(&dma_context->mp_cqe_arr[dma_context->cqe_idx], *new_snapshot); 379 | const size_t delta = get_cycle_delta(dma_context->prev_snapshot, *new_snapshot); 380 | 381 | if (!(delta == 0 || delta >= kAppNumRingEntries)) { 382 | if (DEBUG_CHECK_SEND_RECEIVE_TOTAL) 383 | dma_context->total_received += delta; 384 | return delta; 385 | } 386 | else 387 | return 0; 388 | // return delta; 389 | } 390 | 391 | void dma_postback(DMAcontext *dma_context) 392 | { 393 | dma_context->ring_head = (dma_context->ring_head + 1) % kAppNumRingEntries; 394 | dma_context->nb_rx_rolling++; 395 | if (dma_context->nb_rx_rolling == kAppStridesPerWQE) 396 | { 397 | dma_context->nb_rx_rolling = 0; 398 | int ret = dma_context->mp_wq_family->recv_burst(dma_context->mp_wq, &dma_context->mp_sge[dma_context->sge_idx], 1); 399 | rt_assert(ret == 0); 400 | dma_context->sge_idx = (dma_context->sge_idx + 1) % kAppRQDepth; 401 | } 402 | } 403 | 404 | void dma_update_snapshot(DMAcontext *dma_context, cqe_snapshot_t new_snapshot) 405 | { 406 | dma_context->prev_snapshot = new_snapshot; 407 | dma_context->cqe_idx = (dma_context->cqe_idx + 1) % kAppRecvCQDepth; 408 | } 409 | 410 | const char* ibv_wc_opcode_str(enum ibv_wc_opcode opcode) 411 | { 412 | switch (opcode) { 413 | case IBV_EXP_WC_SEND: 414 | return "IBV_WC_SEND"; 415 | case IBV_EXP_WC_RDMA_WRITE: 416 | return "IBV_WC_RDMA_WRITE"; 417 | case IBV_EXP_WC_RDMA_READ: 418 | return "IBV_WC_RDMA_READ"; 419 | case IBV_WC_COMP_SWAP: 420 | return "IBV_WC_COMP_SWAP"; 421 | case IBV_WC_FETCH_ADD: 422 | return "IBV_WC_FETCH_ADD"; 423 | case IBV_WC_BIND_MW: 424 | return "IBV_WC_BIND_MW"; 425 | /* receive-side: inbound completion */ 426 | case IBV_EXP_WC_RECV: 427 | return "IBV_WC_RECV"; 428 | case IBV_EXP_WC_RECV_RDMA_WITH_IMM: 429 | return "IBV_WC_RECV_RDMA_WITH_IMM"; 430 | default: 431 | return "IBV_WC_UNKNOWN"; 432 | } 433 | } 434 | 435 | // Install a flow rule 436 | void install_flow_rule(struct ibv_qp* qp, uint16_t thread_id, bool isPS) 437 | { 438 | static constexpr size_t rule_sz = sizeof(ibv_exp_flow_attr) + sizeof(ibv_exp_flow_spec_eth) + sizeof(ibv_exp_flow_spec_ipv4_ext); 439 | 440 | uint8_t* flow_rule = new uint8_t[rule_sz]; 441 | memset(flow_rule, 0, rule_sz); 442 | uint8_t* buf = flow_rule; 443 | 444 | auto* flow_attr = reinterpret_cast(flow_rule); 445 | flow_attr->type = IBV_EXP_FLOW_ATTR_NORMAL; 446 | flow_attr->size = rule_sz; 447 | flow_attr->priority = 0; 448 | flow_attr->num_of_specs = 1; 449 | flow_attr->port = 1; 450 | flow_attr->flags = 0; 451 | flow_attr->reserved = 0; 452 | buf += sizeof(struct ibv_exp_flow_attr); 453 | 454 | // Ethernet - all wildcard 455 | auto* eth_spec = reinterpret_cast(buf); 456 | eth_spec->type = IBV_EXP_FLOW_SPEC_ETH; 457 | eth_spec->size = sizeof(struct ibv_exp_flow_spec_eth); 458 | buf += sizeof(struct ibv_exp_flow_spec_eth); 459 | 460 | const unsigned char R_SRC_MAC[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; 461 | unsigned char R_DST_MAC[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; 462 | if (isPS) 463 | memcpy(R_DST_MAC, PS_FILTER_TEMPLATE_R, sizeof(R_DST_MAC)); 464 | else 465 | memcpy(R_DST_MAC, WORKER_FILTER_TEMPLATE_R, sizeof(R_DST_MAC)); 466 | 467 | R_DST_MAC[5] = thread_id; 468 | 469 | const unsigned char R_SRC_MAC_MASK[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; 470 | const unsigned char R_DST_MAC_MASK[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; 471 | memcpy(eth_spec->val.dst_mac, R_DST_MAC, sizeof(R_DST_MAC)); 472 | memcpy(eth_spec->val.src_mac, R_SRC_MAC, sizeof(R_SRC_MAC)); 473 | memcpy(eth_spec->mask.dst_mac, R_DST_MAC_MASK, sizeof(R_DST_MAC_MASK)); 474 | memcpy(eth_spec->mask.src_mac, R_SRC_MAC_MASK, sizeof(R_SRC_MAC_MASK)); 475 | eth_spec->val.vlan_tag = 0; 476 | eth_spec->mask.ether_type = 0; 477 | 478 | rt_assert(ibv_exp_create_flow(qp, flow_attr) != nullptr); 479 | } 480 | 481 | // Install a UDP destination port--based flow rule 482 | void install_udp_flow_rule(struct ibv_qp* qp, uint16_t dst_port) 483 | { 484 | static constexpr size_t rule_sz = sizeof(ibv_exp_flow_attr) + sizeof(ibv_exp_flow_spec_eth) + sizeof(ibv_exp_flow_spec_ipv4_ext) + sizeof(ibv_exp_flow_spec_tcp_udp); 485 | 486 | uint8_t* flow_rule = new uint8_t[rule_sz]; 487 | memset(flow_rule, 0, rule_sz); 488 | uint8_t* buf = flow_rule; 489 | 490 | auto* flow_attr = reinterpret_cast(flow_rule); 491 | flow_attr->type = IBV_EXP_FLOW_ATTR_NORMAL; 492 | flow_attr->size = rule_sz; 493 | flow_attr->priority = 0; 494 | flow_attr->num_of_specs = 1; 495 | flow_attr->port = 1; 496 | flow_attr->flags = 0; 497 | flow_attr->reserved = 0; 498 | buf += sizeof(struct ibv_exp_flow_attr); 499 | 500 | // Ethernet - all wildcard 501 | auto* eth_spec = reinterpret_cast(buf); 502 | eth_spec->type = IBV_EXP_FLOW_SPEC_ETH; 503 | eth_spec->size = sizeof(struct ibv_exp_flow_spec_eth); 504 | buf += sizeof(struct ibv_exp_flow_spec_eth); 505 | 506 | // IPv4 - all wildcard 507 | auto* spec_ipv4 = reinterpret_cast(buf); 508 | spec_ipv4->type = IBV_EXP_FLOW_SPEC_IPV4_EXT; 509 | spec_ipv4->size = sizeof(struct ibv_exp_flow_spec_ipv4_ext); 510 | buf += sizeof(struct ibv_exp_flow_spec_ipv4_ext); 511 | 512 | // UDP - match dst port 513 | auto* udp_spec = reinterpret_cast(buf); 514 | udp_spec->type = IBV_EXP_FLOW_SPEC_UDP; 515 | udp_spec->size = sizeof(struct ibv_exp_flow_spec_tcp_udp); 516 | udp_spec->val.dst_port = htons(dst_port); 517 | udp_spec->mask.dst_port = 0xffffu; 518 | udp_spec->mask.dst_port = 0; 519 | 520 | rt_assert(ibv_exp_create_flow(qp, flow_attr) != nullptr); 521 | } 522 | 523 | void snapshot_cqe(volatile mlx5_cqe64* cqe, cqe_snapshot_t& cqe_snapshot) 524 | { 525 | while (true) { 526 | uint16_t wqe_id_0 = cqe->wqe_id; 527 | uint16_t wqe_counter_0 = cqe->wqe_counter; 528 | memory_barrier(); 529 | uint16_t wqe_id_1 = cqe->wqe_id; 530 | 531 | if (likely(wqe_id_0 == wqe_id_1)) { 532 | cqe_snapshot.wqe_id = ntohs(wqe_id_0); 533 | cqe_snapshot.wqe_counter = ntohs(wqe_counter_0); 534 | return; 535 | } 536 | } 537 | } 538 | 539 | size_t get_cycle_delta(const cqe_snapshot_t& prev, const cqe_snapshot_t& cur) 540 | { 541 | size_t prev_idx = prev.get_cqe_snapshot_cycle_idx(); 542 | size_t cur_idx = cur.get_cqe_snapshot_cycle_idx(); 543 | assert(prev_idx < kAppCQESnapshotCycle && cur_idx < kAppCQESnapshotCycle); 544 | 545 | return ((cur_idx + kAppCQESnapshotCycle) - prev_idx) % kAppCQESnapshotCycle; 546 | } 547 | -------------------------------------------------------------------------------- /common/dma_common.h: -------------------------------------------------------------------------------- 1 | #ifndef DMA_COMMON_H 2 | #define DMA_COMMON_H 3 | 4 | #include "mlx5_defs.h" 5 | #include "packet.h" 6 | #include "utils.h" 7 | #include 8 | #include 9 | #include 10 | #include //ifreq 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #define POLLING_SIZE 400 25 | #define ENTRY_SIZE 256 /* maximum size of each buffer */ 26 | #define PORT_NUM 1 27 | 28 | #define DEBUG_PRINT_ALL_SENDING_PACKET false 29 | #define DEBUG_PRINT_ALL_RECEIVING_PACKET false 30 | 31 | #define DEBUG_CHECK_SEND_RECEIVE_TOTAL false 32 | 33 | static constexpr size_t kAppRecvCQDepth = 8; 34 | static constexpr size_t kAppRQDepth = 4; // Multi-packet RQ depth 35 | 36 | static constexpr size_t kAppLogNumStrides = 9; 37 | static constexpr size_t kAppLogStrideBytes = 9; 38 | static constexpr size_t kAppMaxPostlist = 512; 39 | 40 | static constexpr bool kAppVerbose = false; 41 | static constexpr bool kAppCheckContents = true; // Check buffer contents 42 | 43 | /// Size of one ring message buffer 44 | static constexpr size_t kAppRingMbufSize = (1ull << kAppLogStrideBytes); 45 | 46 | /// Number of strides in one multi-packet RECV WQE 47 | static constexpr size_t kAppStridesPerWQE = (1ull << kAppLogNumStrides); 48 | 49 | /// Packets after which the CQE snapshot cycles 50 | static constexpr size_t kAppCQESnapshotCycle = 65536 * kAppStridesPerWQE; 51 | 52 | /// Total number of entries in the RX ring 53 | static constexpr size_t kAppNumRingEntries = (kAppStridesPerWQE * kAppRQDepth); 54 | 55 | static constexpr size_t kAppRingSize = (kAppNumRingEntries * kAppRingMbufSize); 56 | 57 | /// A consistent snapshot of CQE fields in host endian format 58 | struct cqe_snapshot_t { 59 | uint16_t wqe_id; 60 | uint16_t wqe_counter; 61 | 62 | /// Return this packet's index in the CQE snapshot cycle 63 | size_t get_cqe_snapshot_cycle_idx() const 64 | { 65 | return wqe_id * kAppStridesPerWQE + wqe_counter; 66 | } 67 | 68 | std::string to_string() 69 | { 70 | std::ostringstream ret; 71 | ret << "[ID " << std::to_string(wqe_id) << ", counter " 72 | << std::to_string(wqe_counter) << "]"; 73 | return ret.str(); 74 | } 75 | }; 76 | 77 | struct DMAcontext { 78 | struct ibv_pd* pd; 79 | struct ibv_context* ctx; 80 | struct ibv_cq* receive_cq; 81 | struct ibv_cq* send_cq; 82 | struct ibv_mr* send_mr; 83 | void* send_region; 84 | struct ibv_qp* data_qp; 85 | 86 | struct ibv_qp* mp_recv_qp; 87 | struct ibv_cq* mp_recv_cq; 88 | struct ibv_exp_wq* mp_wq; 89 | struct ibv_exp_wq_family* mp_wq_family; 90 | struct ibv_exp_rwq_ind_table* mp_ind_tbl; 91 | volatile mlx5_cqe64* mp_cqe_arr; 92 | struct ibv_sge* mp_sge; 93 | uint8_t* mp_recv_ring; 94 | uint8_t* mp_send_ring; 95 | struct ibv_mr* mp_send_mr; 96 | 97 | // for connection 98 | int id; 99 | int total_received; 100 | int total_sent; 101 | int my_send_queue_length; 102 | int my_recv_queue_length; 103 | 104 | size_t ring_head; 105 | size_t nb_rx_rolling; 106 | size_t sge_idx; 107 | size_t cqe_idx; 108 | 109 | cqe_snapshot_t prev_snapshot; 110 | 111 | bool isPS; 112 | 113 | // // For window adjustment 114 | bool isMarkTimeStamp; 115 | bool* isSent; 116 | std::chrono::high_resolution_clock::time_point* first_send_time; 117 | std::chrono::high_resolution_clock::time_point* first_receive_time; 118 | }; 119 | 120 | DMAcontext* DMA_create(ibv_device* ib_dev, int thread_id, bool isPS); 121 | const char* ibv_wc_opcode_str(enum ibv_wc_opcode opcode); 122 | void send_packet(DMAcontext* dma_context, int packet_size, uint64_t offset); 123 | size_t receive_packet(DMAcontext *dma_context, cqe_snapshot_t* new_snapshot); 124 | void dma_postback(DMAcontext *dma_context); 125 | void dma_update_snapshot(DMAcontext *dma_context, cqe_snapshot_t new_snapshot); 126 | void dma_context_print(DMAcontext* dma_context, const char* caption); 127 | 128 | // Install a UDP destination port--based flow rule 129 | void install_flow_rule(struct ibv_qp* qp, uint16_t thread_id, bool isPS); 130 | void install_udp_flow_rule(struct ibv_qp* qp, uint16_t dst_port); 131 | void snapshot_cqe(volatile mlx5_cqe64* cqe, cqe_snapshot_t& cqe_snapshot); 132 | size_t get_cycle_delta(const cqe_snapshot_t& prev, const cqe_snapshot_t& cur); 133 | #endif 134 | -------------------------------------------------------------------------------- /common/mlx5_defs.h: -------------------------------------------------------------------------------- 1 | #ifndef MLX5_DEFS_H 2 | #define MLX5_DEFS_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | enum mlx5_alloc_type { 10 | MLX5_ALLOC_TYPE_ANON, 11 | MLX5_ALLOC_TYPE_HUGE, 12 | MLX5_ALLOC_TYPE_CONTIG, 13 | MLX5_ALLOC_TYPE_PEER_DIRECT, 14 | MLX5_ALLOC_TYPE_PREFER_HUGE, 15 | MLX5_ALLOC_TYPE_PREFER_CONTIG, 16 | MLX5_ALLOC_TYPE_ALL 17 | }; 18 | 19 | enum mlx5_lock_type { 20 | MLX5_SPIN_LOCK = 0, 21 | MLX5_MUTEX = 1, 22 | }; 23 | 24 | enum mlx5_lock_state { MLX5_USE_LOCK, 25 | MLX5_LOCKED, 26 | MLX5_UNLOCKED }; 27 | 28 | struct mlx5_lock { 29 | pthread_mutex_t mutex; 30 | pthread_spinlock_t slock; 31 | enum mlx5_lock_state state; 32 | enum mlx5_lock_type type; 33 | }; 34 | 35 | struct mlx5_numa_req { 36 | int valid; 37 | int numa_id; 38 | }; 39 | 40 | struct mlx5_peer_direct_mem { 41 | uint32_t dir; 42 | uint64_t va_id; 43 | struct ibv_exp_peer_buf* pb; 44 | struct ibv_exp_peer_direct_attr* ctx; 45 | }; 46 | 47 | struct mlx5_buf { 48 | void* buf; 49 | size_t length; 50 | int base; 51 | struct mlx5_hugetlb_mem* hmem; 52 | struct mlx5_peer_direct_mem peer; 53 | enum mlx5_alloc_type type; 54 | struct mlx5_numa_req numa_req; 55 | int numa_alloc; 56 | }; 57 | 58 | struct mlx5_mini_cqe8 { 59 | union { 60 | uint32_t rx_hash_result; 61 | uint32_t checksum; 62 | struct { 63 | uint16_t wqe_counter; 64 | uint8_t s_wqe_opcode; 65 | uint8_t reserved; 66 | } s_wqe_info; 67 | }; 68 | uint32_t byte_cnt; 69 | }; 70 | 71 | enum { MLX5_MINI_ARR_SIZE = 8 }; 72 | 73 | struct mlx5_tm_cqe { 74 | uint32_t success; 75 | uint32_t hw_phase_cnt; 76 | uint8_t rsvd0[10]; 77 | }; 78 | 79 | struct mlx5_cqe64 { 80 | uint8_t rsvd0[2]; 81 | /* 82 | * wqe_id is valid only for 83 | * Striding RQ (Multi-Packet RQ). 84 | * It provides the WQE index inside the RQ. 85 | */ 86 | uint16_t wqe_id; 87 | uint8_t rsvd4[8]; 88 | uint32_t rx_hash_res; 89 | uint8_t rx_hash_type; 90 | uint8_t ml_path; 91 | uint8_t rsvd20[2]; 92 | uint16_t checksum; 93 | uint16_t slid; 94 | uint32_t flags_rqpn; 95 | uint8_t hds_ip_ext; 96 | uint8_t l4_hdr_type_etc; 97 | __be16 vlan_info; 98 | uint32_t srqn_uidx; 99 | uint32_t imm_inval_pkey; 100 | uint8_t app; 101 | uint8_t app_op; 102 | uint16_t app_info; 103 | uint32_t byte_cnt; 104 | __be64 timestamp; 105 | union { 106 | uint32_t sop_drop_qpn; 107 | struct { 108 | uint8_t sop; 109 | uint8_t qpn[3]; 110 | } sop_qpn; 111 | }; 112 | /* 113 | * In Striding RQ (Multi-Packet RQ) wqe_counter provides 114 | * the WQE stride index (to calc pointer to start of the message) 115 | */ 116 | uint16_t wqe_counter; 117 | uint8_t signature; 118 | uint8_t op_own; 119 | }; 120 | 121 | struct mlx5_cq { 122 | struct ibv_cq ibv_cq; 123 | uint32_t creation_flags; 124 | uint32_t pattern; 125 | struct mlx5_buf buf_a; 126 | struct mlx5_buf buf_b; 127 | struct mlx5_buf* active_buf; 128 | struct mlx5_buf* resize_buf; 129 | int resize_cqes; 130 | int active_cqes; 131 | struct mlx5_lock lock; 132 | uint32_t cqn; 133 | uint32_t cons_index; 134 | uint32_t wait_index; 135 | uint32_t wait_count; 136 | volatile uint32_t* dbrec; 137 | int arm_sn; 138 | int cqe_sz; 139 | int resize_cqe_sz; 140 | int stall_next_poll; 141 | int stall_enable; 142 | uint64_t stall_last_count; 143 | int stall_adaptive_enable; 144 | int stall_cycles; 145 | uint8_t model_flags; /* use mlx5_cq_model_flags */ 146 | uint16_t cqe_comp_max_num; 147 | uint8_t cq_log_size; 148 | /* Compressed CQE data */ 149 | struct mlx5_cqe64 next_decomp_cqe64; 150 | struct mlx5_resource* compressed_rsc; 151 | uint16_t compressed_left; 152 | uint16_t compressed_wqe_cnt; 153 | uint8_t compressed_req; 154 | uint8_t compressed_mp_rq; 155 | uint8_t mini_arr_idx; 156 | struct mlx5_mini_cqe8 mini_array[MLX5_MINI_ARR_SIZE]; 157 | /* peer-direct data */ 158 | int peer_enabled; 159 | struct ibv_exp_peer_direct_attr* peer_ctx; 160 | struct mlx5_buf peer_buf; 161 | struct mlx5_peek_entry** peer_peek_table; 162 | struct mlx5_peek_entry* peer_peek_free; 163 | }; 164 | 165 | #endif // MLX5_DEFS_H 166 | -------------------------------------------------------------------------------- /common/p4ml_struct.h: -------------------------------------------------------------------------------- 1 | #ifndef P4ML_STRUCT_H 2 | #define P4ML_STRUCT_H 3 | #include 4 | 5 | #include "packet.h" 6 | 7 | struct ThreadInfo 8 | { 9 | int thread_id; 10 | int agtr_start_pos; 11 | }; 12 | 13 | struct Job 14 | { 15 | uint64_t key; 16 | float *float_data; 17 | int32_t *int_data; 18 | uint32_t len; 19 | int cmd; 20 | }; 21 | 22 | struct AppInfo 23 | { 24 | uint32_t host; 25 | uint16_t appID; 26 | uint8_t num_worker; 27 | uint8_t num_PS; 28 | }; 29 | 30 | #endif -------------------------------------------------------------------------------- /common/packet.h: -------------------------------------------------------------------------------- 1 | #ifndef PACKET_P4ML_H 2 | #define PACKET_P4ML_H 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "utils.h" 15 | #include "p4ml_struct.h" 16 | 17 | #define PS_FILTER_TEMPLATE 0x05, 0x04, 0x03, 0x02, 0x01, 0xFF 18 | #define WORKER_FILTER_TEMPLATE 0x77, 0x77, 0x77, 0x77, 0x77, 0xFF 19 | 20 | // #define SRC_MAC 0xb8, 0x59, 0x9f, 0x1d, 0x04, 0xf2 21 | #define SRC_MAC 0xe4, 0x1d, 0x2d, 0xf3, 0xdd, 0xcc 22 | // #define DST_MAC 0xb8, 0x59, 0x9f, 0x0b, 0x30, 0x72 23 | 24 | #define ETH_TYPE 0x07, 0x00 25 | 26 | #define IP_HDRS 0x45, 0x00, 0x00, 0x54, 0x00, 0x00, 0x40, 0x00, 0x40, 0x01, 0xaf, 0xb6 27 | 28 | #define SRC_IP 0x0d, 0x07, 0x38, 0x66 29 | 30 | #define DST_IP 0x0d, 0x07, 0x38, 0x7f 31 | 32 | #define SRC_PORT 0x67, 0x67 33 | 34 | #define DST_PORT 0x78, 0x78 35 | 36 | #define UDP_HDRS 0x00, 0x00, 0x00, 0x00 37 | 38 | // Only a template, DST_IP will be modified soon 39 | // This one is for sending 40 | const unsigned char PS_IP_ETH_UDP_HEADER[] = { WORKER_FILTER_TEMPLATE, SRC_MAC, ETH_TYPE, IP_HDRS, SRC_IP, DST_IP }; 41 | const unsigned char WORKER_IP_ETH_UDP_HEADER[] = { PS_FILTER_TEMPLATE, SRC_MAC, ETH_TYPE, IP_HDRS, SRC_IP, DST_IP }; 42 | 43 | // P4ML_PACKET_SIZE = IP_ETH_HEADER_SIZE + P4ML_HEADER_SIZE + P4ML_DATA_SIZE 44 | #define P4ML_PACKET_SIZE 308 45 | #define P4ML_DATA_SIZE 248 46 | #define P4ML_HEADER_SIZE 26 47 | #define P4ML_LAYER_SIZE 274 48 | #define IP_ETH_UDP_HEADER_SIZE 34 49 | 50 | #define MAX_ENTRIES_PER_PACKET 62 51 | 52 | #define BYTE_TO_BINARY_PATTERN "%c%c%c%c%c%c%c%c" 53 | #define BYTE_TO_BINARY(byte) \ 54 | (byte & 0x80 ? '1' : '0'), \ 55 | (byte & 0x40 ? '1' : '0'), \ 56 | (byte & 0x20 ? '1' : '0'), \ 57 | (byte & 0x10 ? '1' : '0'), \ 58 | (byte & 0x08 ? '1' : '0'), \ 59 | (byte & 0x04 ? '1' : '0'), \ 60 | (byte & 0x02 ? '1' : '0'), \ 61 | (byte & 0x01 ? '1' : '0') 62 | 63 | #pragma pack(push, 1) 64 | struct agghdr { 65 | uint32_t bitmap; 66 | uint8_t num_worker; 67 | uint8_t flag; 68 | // reserved : 2; 69 | // isForceFoward : 1; 70 | 71 | /* Current version 72 | overflow : 1; 73 | PSIndex : 2; 74 | dataIndex : 1; 75 | ECN : 1; 76 | isResend : 1; 77 | isSWCollision : 1; 78 | isACK : 1; 79 | */ 80 | 81 | uint16_t appID; 82 | uint16_t seq_num; 83 | uint16_t agtr; 84 | uint16_t agtr2; 85 | int32_t vector[MAX_ENTRIES_PER_PACKET]; 86 | uint64_t key; 87 | uint32_t len_tensor; 88 | }; 89 | #pragma pack(pop) 90 | 91 | static std::mutex _packet_print_mutex; 92 | 93 | void inline make_p4ml_layer_and_copy_to(void* payload, Job* job_info, AppInfo* app_info, uint16_t* agtr, uint16_t* seq_num, int* offset, bool isResend, bool isForceForward, bool isOverflow) 94 | { 95 | agghdr* agg_header = (agghdr*)payload; 96 | agghdr* p4ml_header = agg_header; 97 | agg_header->key = job_info->key; 98 | agg_header->len_tensor = htonl(job_info->len); 99 | agg_header->bitmap = htonl(1 << (app_info->host)); 100 | agg_header->num_worker = app_info->num_worker; 101 | agg_header->appID = htons(app_info->appID); 102 | agg_header->flag = 0; 103 | agg_header->agtr = htons(*agtr); 104 | //TODO: clarify this and UsedSwitchAGTRcount 105 | agg_header->agtr2 = htons(*agtr + MAX_AGTR_COUNT); 106 | agg_header->seq_num = htons(*seq_num); 107 | 108 | agg_header->flag = ((job_info->key % app_info->num_PS)) << 5; 109 | 110 | if (isResend) 111 | agg_header->flag |= 4; 112 | 113 | if (isForceForward) 114 | agg_header->flag |= 32; 115 | 116 | if (isOverflow) 117 | agg_header->flag |= 128; 118 | // PS Index 119 | // agg_header->flag |= (*num_PS << 5); 120 | // printf("to PS: %d\n", ((*key % *num_PS)+1)); 121 | 122 | int32_t* used_data; 123 | if (isOverflow) { 124 | used_data = (int32_t*) job_info->float_data; 125 | } 126 | else 127 | used_data = (int32_t*) job_info->int_data; 128 | 129 | int32_t* send_data; 130 | if (*offset + MAX_ENTRIES_PER_PACKET > job_info->len) { 131 | int32_t* tmp = new int32_t[MAX_ENTRIES_PER_PACKET](); 132 | memcpy(tmp, used_data + *offset, sizeof(int32_t) * (job_info->len % MAX_ENTRIES_PER_PACKET)); 133 | send_data = tmp; 134 | delete tmp; 135 | } else { 136 | send_data = used_data + *offset; 137 | } 138 | 139 | // p4ml_header_print_h(agg_header, "Make"); 140 | } 141 | 142 | // void inline make_packet_and_copy_to(void* payload, uint64_t* key, uint32_t* len_tensor, uint32_t* workerID, uint8_t* num_worker, uint16_t* appID, uint16_t* agtr, uint16_t* seq_num, int32_t* data, bool isResend, bool isForceForward, uint8_t* num_PS, int thread_id) 143 | // { 144 | // char* eth_ip_header = (char*)payload; 145 | // memcpy(payload, IP_ETH_UDP_HEADER, sizeof(IP_ETH_UDP_HEADER)); 146 | // eth_ip_header[5] = thread_id; 147 | // make_p4ml_layer_and_copy_to((char*)payload + sizeof(IP_ETH_UDP_HEADER), key, len_tensor, workerID, num_worker, appID, agtr, seq_num, data, isResend, isForceForward, num_PS); 148 | // } 149 | 150 | void inline p4ml_header_ntoh(agghdr* p_p4ml) 151 | { 152 | p_p4ml->len_tensor = ntohl(p_p4ml->len_tensor); 153 | p_p4ml->bitmap = ntohl(p_p4ml->bitmap); 154 | p_p4ml->seq_num = ntohs(p_p4ml->seq_num); 155 | p_p4ml->agtr = ntohs(p_p4ml->agtr); 156 | p_p4ml->agtr2 = ntohs(p_p4ml->agtr2); 157 | p_p4ml->appID = ntohs(p_p4ml->appID); 158 | int32_t* p_model = p_p4ml->vector; 159 | 160 | /* if not float */ 161 | if (!(p_p4ml->flag & 0x80)) { 162 | for (int i = 0; i < MAX_ENTRIES_PER_PACKET; i++) 163 | p_model[i] = ntohl(p_model[i]); 164 | } 165 | } 166 | 167 | void inline p4ml_header_ntoh_without_data(agghdr* p_p4ml) 168 | { 169 | p_p4ml->len_tensor = ntohl(p_p4ml->len_tensor); 170 | p_p4ml->bitmap = ntohl(p_p4ml->bitmap); 171 | p_p4ml->seq_num = ntohs(p_p4ml->seq_num); 172 | p_p4ml->agtr = ntohs(p_p4ml->agtr); 173 | p_p4ml->agtr2 = ntohs(p_p4ml->agtr2); 174 | p_p4ml->appID = ntohs(p_p4ml->appID); 175 | // // p_p4ml->last_ack = ntohl(p_p4ml->last_ack); 176 | int32_t* p_model = p_p4ml->vector; 177 | } 178 | 179 | void inline p4ml_header_hton_without_data(agghdr* p_p4ml) 180 | { 181 | p_p4ml->len_tensor = htonl(p_p4ml->len_tensor); 182 | p_p4ml->bitmap = htonl(p_p4ml->bitmap); 183 | p_p4ml->seq_num = htons(p_p4ml->seq_num); 184 | p_p4ml->agtr = htons(p_p4ml->agtr); 185 | p_p4ml->agtr2 = htons(p_p4ml->agtr2); 186 | p_p4ml->appID = htons(p_p4ml->appID); 187 | // // p_p4ml->last_ack = htonl(p_p4ml->last_ack); 188 | } 189 | 190 | void inline p4ml_header_setACK(agghdr* p4ml_header) 191 | { 192 | p4ml_header->flag |= 1; 193 | } 194 | 195 | void inline p4ml_header_setOverflow(agghdr* p4ml_header) 196 | { 197 | p4ml_header->flag |= 128; 198 | } 199 | 200 | void inline p4ml_header_setOverflowRequest(agghdr* p4ml_header) 201 | { 202 | p4ml_header->flag |= 128; 203 | p4ml_header->flag &= ~(4); 204 | } 205 | 206 | void inline p4ml_header_setCollisionBit(agghdr* p4ml_header) 207 | { 208 | p4ml_header->flag |= 2; 209 | } 210 | 211 | void inline p4ml_header_setLengthFieldToAgtr(agghdr* p4ml_header, uint16_t new_agtr) 212 | { 213 | p4ml_header->len_tensor = new_agtr; 214 | } 215 | 216 | void inline p4ml_header_resetIndex(agghdr* p4ml_header) 217 | { 218 | p4ml_header->flag &= ~(16); 219 | } 220 | 221 | void inline p4ml_header_resetCollisionBit(agghdr* p4ml_header) 222 | { 223 | p4ml_header->flag &= ~(2); 224 | } 225 | 226 | void inline p4ml_header_print(agghdr *p4ml_header, const char *caption) 227 | { 228 | std::lock_guard lock(_packet_print_mutex); 229 | printf("[%s] \n key: %" PRIu64 ", len_tensor: %u, " 230 | "bitmap: " BYTE_TO_BINARY_PATTERN ", num_worker: %u, appID: %u, " 231 | "agtr: %u, agtr2: %u, seq_num: %u, isACK: %d, dataIndex: %d," 232 | "isResend: %d, isOverflow: %d, data: ", 233 | caption, p4ml_header->key, p4ml_header->len_tensor, 234 | BYTE_TO_BINARY(p4ml_header->bitmap), p4ml_header->num_worker, p4ml_header->appID, 235 | p4ml_header->agtr, p4ml_header->agtr2, p4ml_header->seq_num, 236 | p4ml_header->flag & 1 ? 1 : 0, p4ml_header->flag & 16 ? 1 : 0, p4ml_header->flag & 4 ? 1 : 0, 237 | p4ml_header->flag & 128 ? 1 : 0); 238 | 239 | // is Overflow? 240 | if (p4ml_header->flag & 128) 241 | // is ACK? isn't Resend? 242 | if (p4ml_header->flag & 1 && !(p4ml_header->flag & 4)) 243 | printf("REQUEST - CARELESS."); 244 | else 245 | for (int i = 0; i < MAX_ENTRIES_PER_PACKET; i++) 246 | printf("%.7f ", ntohf((p4ml_header->vector)[i])); 247 | else 248 | for (int i = 0; i < MAX_ENTRIES_PER_PACKET; i++) 249 | printf("%d ", p4ml_header->vector[i]); 250 | printf("\n"); 251 | } 252 | 253 | void inline p4ml_header_print_h(agghdr *p4ml_header, const char *caption) 254 | { 255 | std::lock_guard lock(_packet_print_mutex); 256 | printf("[%s] \n key: %" PRIu64 ", len_tensor: %u, " 257 | "bitmap: " BYTE_TO_BINARY_PATTERN ", num_worker: %u, appID: %u, " 258 | "agtr: %u, agtr2: %u, seq_num: %u, isACK: %d, dataIndex: %d," 259 | "isResend: %d, isOverflow: %d, data: ", 260 | caption, p4ml_header->key, ntohl(p4ml_header->len_tensor), 261 | BYTE_TO_BINARY(ntohl(p4ml_header->bitmap)), p4ml_header->num_worker, ntohs(p4ml_header->appID), 262 | ntohs(p4ml_header->agtr), ntohs(p4ml_header->agtr2), ntohs(p4ml_header->seq_num), 263 | p4ml_header->flag & 1 ? 1 : 0, p4ml_header->flag & 16 ? 1 : 0, p4ml_header->flag & 4 ? 1 : 0, 264 | p4ml_header->flag & 128 ? 1 : 0); 265 | 266 | // is Overflow? 267 | if (p4ml_header->flag & 128) 268 | // is ACK? isn't Resend? 269 | if (p4ml_header->flag & 1 && !(p4ml_header->flag & 4)) 270 | printf("REQUEST - CARELESS."); 271 | else 272 | for (int i = 0; i < MAX_ENTRIES_PER_PACKET; i++) 273 | printf("%.7f ", ((float *)(p4ml_header->vector))[i]); 274 | else 275 | for (int i = 0; i < MAX_ENTRIES_PER_PACKET; i++) 276 | printf("%d ", ntohl(p4ml_header->vector[i])); 277 | printf("\n"); 278 | } 279 | 280 | #endif 281 | -------------------------------------------------------------------------------- /common/quantize.h: -------------------------------------------------------------------------------- 1 | #ifndef QUAN_P4ML_H 2 | #define QUAN_P4ML_H 3 | #include 4 | #include 5 | 6 | // scale up float then translate it to int 7 | // without any further optimization 8 | inline static void quantizeNaive(char *data_ptr, uint32_t size) 9 | { 10 | int factor = 1000000; 11 | int *int_data_ptr = (int *)data_ptr; 12 | float *float_data_ptr = (float *)data_ptr; 13 | for (uint32_t i = 0; i < size; i++) 14 | { 15 | int_data_ptr[i] = (int)(float_data_ptr[i] * factor); 16 | } 17 | } 18 | 19 | // translate back to float and scale down 20 | // without any further optimization 21 | inline static void dequantizeNaive(char *data_ptr, uint32_t size) 22 | { 23 | float factor = 1000000.0; 24 | int *int_data_ptr = (int *)data_ptr; 25 | float *float_data_ptr = (float *)data_ptr; 26 | for (uint32_t i = 0; i < size; i++) 27 | { 28 | float_data_ptr[i] = (float)(int_data_ptr[i] / factor); 29 | } 30 | } 31 | 32 | // functioned the same as quantizeNaive 33 | // boost with avx 256 instructions 34 | inline static void quantizeAVX2(char *data_ptr, uint32_t size) 35 | { 36 | // check alignment 37 | 38 | __m256 input; 39 | __m256i output; 40 | 41 | int unaligned_size = size % 8; 42 | int aligned_size = size / 8; 43 | 44 | const float factor = 1000000.0; 45 | float *float_data_ptr = (float *)data_ptr; 46 | int *int_data_ptr = (int *)data_ptr; 47 | 48 | // 0xF4240 is 1000000 in hex 49 | __m256 factor_in_avx = _mm256_broadcast_ss(&factor); 50 | 51 | for (uint32_t i = 0; i < aligned_size; i++) 52 | { 53 | float *current_pos = float_data_ptr + i * 8; 54 | input = _mm256_loadu_ps(current_pos); 55 | input = _mm256_mul_ps(input, factor_in_avx); 56 | output = _mm256_cvtps_epi32(input); 57 | _mm256_storeu_si256((__m256i *)current_pos, output); 58 | } 59 | 60 | for (uint32_t i = 0; i < unaligned_size; i++) 61 | { 62 | int_data_ptr[aligned_size * 8 + i] = 63 | (int)(float_data_ptr[aligned_size * 8 + i] * factor); 64 | } 65 | } 66 | 67 | // functioned the same as dequantizeNaive 68 | // boost with avx 256 instructions 69 | inline static void dequantizeAVX2(char *data_ptr, uint32_t size) 70 | { 71 | __m256i input; 72 | __m256 output; 73 | 74 | int unaligned_size = size % 8; 75 | int aligned_size = size / 8; 76 | 77 | const float factor = 1000000.0; 78 | int *int_data_ptr = (int *)data_ptr; 79 | float *float_data_ptr = (float *)data_ptr; 80 | 81 | // __m256i* input_avx = (__m256i*) data_ptr; 82 | __m256 factor_in_avx = _mm256_broadcast_ss(&factor); 83 | 84 | for (uint32_t i = 0; i < aligned_size; i++) 85 | { 86 | float *current_pos = float_data_ptr + i * 8; 87 | input = _mm256_loadu_si256((__m256i *)current_pos); 88 | output = _mm256_cvtepi32_ps(input); 89 | output = _mm256_div_ps(output, factor_in_avx); 90 | _mm256_storeu_ps(current_pos, output); 91 | } 92 | 93 | for (uint32_t i = 0; i < unaligned_size; i++) 94 | { 95 | float_data_ptr[aligned_size * 8 + i] = 96 | (float)(int_data_ptr[aligned_size * 8 + i] / factor); 97 | } 98 | } 99 | 100 | // functioned the same as quantizeNaive 101 | // boost with avx 256 instructions 102 | inline static void quantizeAVX2to(char *dst_ptr, char *src_ptr, uint32_t size) 103 | { 104 | // check alignment 105 | 106 | __m256 input; 107 | __m256i output; 108 | 109 | int unaligned_size = size % 8; 110 | int aligned_size = size / 8; 111 | 112 | const float factor = 1000000.0; 113 | float *float_data_ptr = (float *)src_ptr; 114 | int *int_data_ptr = (int *)src_ptr; 115 | 116 | float *dst_float_data_ptr = (float *)dst_ptr; 117 | int *dst_int_data_ptr = (int *)dst_ptr; 118 | 119 | // 0xF4240 is 1000000 in hex 120 | __m256 factor_in_avx = _mm256_broadcast_ss(&factor); 121 | 122 | for (uint32_t i = 0; i < aligned_size; i++) 123 | { 124 | float *current_pos = float_data_ptr + i * 8; 125 | float *current_dst_pos = dst_float_data_ptr + i * 8; 126 | 127 | input = _mm256_loadu_ps(current_pos); 128 | input = _mm256_mul_ps(input, factor_in_avx); 129 | output = _mm256_cvtps_epi32(input); 130 | _mm256_storeu_si256((__m256i *)current_dst_pos, output); 131 | } 132 | 133 | for (uint32_t i = 0; i < unaligned_size; i++) 134 | { 135 | dst_int_data_ptr[aligned_size * 8 + i] = 136 | (int)(float_data_ptr[aligned_size * 8 + i] * factor); 137 | } 138 | } 139 | 140 | // functioned the same as dequantizeNaive 141 | // boost with avx 256 instructions 142 | inline static void dequantizeAVX2to(char *dst_ptr, char *src_ptr, 143 | uint32_t size) 144 | { 145 | __m256i input; 146 | __m256 output; 147 | 148 | int unaligned_size = size % 8; 149 | int aligned_size = size / 8; 150 | 151 | const float factor = 1000000.0; 152 | int *int_data_ptr = (int *)src_ptr; 153 | float *float_data_ptr = (float *)src_ptr; 154 | 155 | int *dst_int_data_ptr = (int *)dst_ptr; 156 | float *dst_float_data_ptr = (float *)dst_ptr; 157 | 158 | // __m256i* input_avx = (__m256i*) src_ptr; 159 | __m256 factor_in_avx = _mm256_broadcast_ss(&factor); 160 | 161 | for (uint32_t i = 0; i < aligned_size; i++) 162 | { 163 | float *current_pos = float_data_ptr + i * 8; 164 | float *current_dst_pos = dst_float_data_ptr + i * 8; 165 | 166 | input = _mm256_loadu_si256((__m256i *)current_pos); 167 | output = _mm256_cvtepi32_ps(input); 168 | output = _mm256_div_ps(output, factor_in_avx); 169 | _mm256_storeu_ps(current_dst_pos, output); 170 | } 171 | 172 | for (uint32_t i = 0; i < unaligned_size; i++) 173 | { 174 | dst_float_data_ptr[aligned_size * 8 + i] = 175 | (float)(int_data_ptr[aligned_size * 8 + i] / factor); 176 | } 177 | } 178 | 179 | #endif -------------------------------------------------------------------------------- /common/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_H 2 | #define UTILS_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | // Because here we use 2 agtr for one packet, so /2 16 | #define MAX_AGTR_COUNT 20000 17 | #define AGTR_TO_USE_PER_APPLICATION 2800 18 | 19 | #define EACH_HUGEPAGE_SIZE (2048*1024) 20 | 21 | #define likely(x) __builtin_expect(!!(x), 1) 22 | #define unlikely(x) __builtin_expect(!!(x), 0) 23 | 24 | 25 | #define DIVUP(x, y) (((x)+(y)-1)/(y)) 26 | #define ROUNDUP(x, y) (DIVUP((x), (y))*(y)) 27 | 28 | template 29 | static inline T align_floor(T v, T align) { 30 | return v - (v % align); 31 | } 32 | 33 | template 34 | static inline T align_ceil(T v, T align) { 35 | return align_floor(v + align - 1, align); 36 | } 37 | 38 | static inline void ib_malloc(void** ptr, size_t size) { 39 | size_t page_size = sysconf(_SC_PAGESIZE); 40 | void* p; 41 | int size_aligned = ROUNDUP(size, page_size); 42 | int ret = posix_memalign(&p, page_size, size_aligned); 43 | if (ret != 0) { 44 | printf("posix_memalign error.\n"); 45 | exit(1); 46 | } 47 | memset(p, 0, size); 48 | *ptr = p; 49 | } 50 | 51 | #define KB(x) (static_cast(x) << 10) 52 | #define KB_(x) (KB(x) - 1) 53 | #define MB(x) (static_cast(x) << 20) 54 | #define MB_(x) (MB(x) - 1) 55 | 56 | static void memory_barrier() { asm volatile("" ::: "memory"); } 57 | static void lfence() { asm volatile("lfence" ::: "memory"); } 58 | static void sfence() { asm volatile("sfence" ::: "memory"); } 59 | static void mfence() { asm volatile("mfence" ::: "memory"); } 60 | static void clflush(volatile void* p) { asm volatile("clflush (%0)" ::"r"(p)); } 61 | static void cpuid(unsigned int* eax, unsigned int* ebx, unsigned int* ecx, 62 | unsigned int* edx) { 63 | asm volatile("cpuid" 64 | : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), "=d"(*edx) 65 | : "0"(*eax), "2"(*ecx)); 66 | } 67 | 68 | inline void bindingCPU(int num) { 69 | int result; 70 | cpu_set_t mask; 71 | CPU_ZERO(&mask); 72 | CPU_SET(num, &mask); 73 | result = sched_setaffinity(0, sizeof(mask), &mask); 74 | if (result < 0) { 75 | printf("binding CPU fails\n"); 76 | exit(1); 77 | } 78 | } 79 | 80 | /// Check a condition at runtime. If the condition is false, throw exception. 81 | static inline void rt_assert(bool condition) { 82 | if (unlikely(!condition)) throw std::runtime_error(""); 83 | } 84 | 85 | 86 | /* allocate the huge pages. */ 87 | inline char *alloc_raw_pages(int cnt, int size) { 88 | /* 89 | * Don't touch the page since then allocator would not allocate the page 90 | * right now. 91 | */ 92 | int flag = MAP_SHARED | MAP_ANONYMOUS; 93 | if (size == EACH_HUGEPAGE_SIZE) flag |= MAP_HUGETLB; 94 | char *ptr = 95 | (char *)mmap(NULL, (int64_t)cnt * size, PROT_READ | PROT_WRITE, flag, -1, 0); 96 | if (ptr == (char *)-1) { 97 | perror("alloc_raw_pages"); 98 | return NULL; 99 | } 100 | return ptr; 101 | } 102 | 103 | union { 104 | float f; 105 | uint32_t u; 106 | } if_value; 107 | 108 | inline float ntohf(uint32_t net32) 109 | { 110 | if_value.u = ntohl(net32); 111 | return if_value.f; 112 | } 113 | 114 | // /* Returns the MAC Address Params: int iNetType - 0: ethernet, 1: Wifi char chMAC[6] - MAC Address in binary format Returns: 0: success -1: Failure */ 115 | // int getMACAddress(char chMAC[6]) 116 | // { 117 | // struct ifreq ifr; 118 | // int sock; 119 | // char* ifname = "enp178s0f0"; 120 | // sock = socket(AF_INET, SOCK_DGRAM, 0); 121 | // strcpy(ifr.ifr_name, ifname); 122 | // ifr.ifr_addr.sa_family = AF_INET; 123 | // if (ioctl(sock, SIOCGIFHWADDR, &ifr) < 0) { 124 | // return -1; 125 | // } 126 | // memcpy(chMAC, ifr.ifr_hwaddr.sa_data, 6); 127 | // close(sock); 128 | // return 0; 129 | // } 130 | 131 | // /* Returns the interface IP Address Params: int iNetType - 0: ethernet, 1: Wifi char *chIP - IP Address string Return: 0: success / -1: Failure */ 132 | // int getIpAddress(char chIP[16]) 133 | // { 134 | // struct ifreq ifr; 135 | // int sock = 0; 136 | // sock = socket(AF_INET, SOCK_DGRAM, 0); 137 | // strcpy(ifr.ifr_name, "enp178s0f0"); 138 | // if (ioctl(sock, SIOCGIFADDR, &ifr) < 0) { 139 | // strcpy(chIP, "0.0.0.0"); 140 | // return -1; 141 | // } 142 | // sprintf(chIP, "%s", inet_ntoa(((struct sockaddr_in*)&(ifr.ifr_addr))->sin_addr)); 143 | // close(sock); 144 | // return 0; 145 | // } 146 | 147 | #endif 148 | -------------------------------------------------------------------------------- /common/window_manager.h: -------------------------------------------------------------------------------- 1 | #ifndef SLIDING_W_H 2 | #define SLIDING_W_H 3 | 4 | #include "packet.h" 5 | #include "CC_manager.h" 6 | #define RESEND_TRIGGER 1 7 | 8 | class WindowManager { 9 | public: 10 | bool* isACKed; 11 | /* This three variable is completely useless, but 12 | when deleting it, the performance will drop from 46Gbps to 40Gbps.. */ 13 | bool* isSent; 14 | std::chrono::high_resolution_clock::time_point* send_time; 15 | std::chrono::high_resolution_clock::time_point* receive_time; 16 | /* */ 17 | int total_ACK; 18 | int last_ACK; 19 | 20 | WindowManager() { 21 | last_ACK = 0; 22 | } 23 | 24 | bool inline UpdateWindow(uint16_t* seq_num) 25 | { 26 | bool isLastAckUpdated = false; 27 | isACKed[*seq_num] = true; 28 | while (isACKed[last_ACK + 1]) { 29 | last_ACK++; 30 | isLastAckUpdated = true; 31 | } 32 | return isLastAckUpdated; 33 | } 34 | 35 | int inline Reset(int packet_total) 36 | { 37 | last_ACK = 0; 38 | total_ACK = packet_total; 39 | memset(isACKed, 0, sizeof(bool) * packet_total + 1); 40 | } 41 | }; 42 | 43 | #endif -------------------------------------------------------------------------------- /docs/benchmark.md: -------------------------------------------------------------------------------- 1 | # Benchmark 2 | 3 | ## Requirment - Run with Signal Switch 4 | 5 | In this experiment, 2 physical workers, 1 physical PS (Parameter Server) and 1 switch is used. 6 | 7 | 8 | ## Getting Started 9 | ``` 10 | $ git clone https://github.com/ATP-NSDI/ATP.git 11 | ``` 12 | 13 | ### Run Tofino Switch 14 | 15 | #### Compile P4 Program and Start the Tofino Model (Terminal1) 16 | If you are using physical switch, compile the switch program then jump to Terminal 2 directly. 17 | ``` 18 | $ cd $SDE 19 | ``` 20 | ``` 21 | $ $TOOLS/p4_build.sh ~/git/p4ml/p4src/p4ml.p4 22 | ``` 23 | ``` 24 | # (Optional) for software Tofino behavior model 25 | $ ./run_tofino_model.sh -p p4ml 26 | ``` 27 | #### Load Specified Switch Program (Terminal2) 28 | ``` 29 | $ cd $SDE 30 | ``` 31 | ``` 32 | $ ./run_switchd.sh -p p4ml 33 | ``` 34 | #### Enable Ports and Install Entries (Terminal3) 35 | ``` 36 | $ $SDE/run_p4_tests.sh -t $ATP_REPO/ptf/ -p p4ml 37 | ``` 38 | ``` 39 | $ $TOOLS/run_pd_rpc.py -p p4ml $ATP_REPO/run_pd_rpc/setup.py 40 | ``` 41 | 42 | ### Run Parameter Server 43 | #### Compile and Run Server (Terminal4) 44 | ``` 45 | $ cd $ATP_REPO/server/ 46 | ``` 47 | ``` 48 | $ make 49 | ``` 50 | ``` 51 | # Usage: ./app [AppID] 52 | sudo ./app 1 53 | ``` 54 | 55 | wait until all threads finish their QP creation. 56 | 57 | ### Compile and Run Workers 58 | ``` 59 | $ cd $ATP_REPO/client/ 60 | ``` 61 | ``` 62 | $ make 63 | ``` 64 | #### Run Worker1 (Terminal5) 65 | ``` 66 | # Usage: ./app [MyID] [Num of Worker] [AppID] [Num of PS] 67 | $ sudo ./app 0 2 1 1 68 | ``` 69 | #### Run Worker2 (Terminal6) 70 | ``` 71 | # Usage: ./app [MyID] [Num of Worker] [AppID] [Num of PS] 72 | $ sudo ./app 1 2 1 1 73 | ``` 74 | 75 | Then you can switch to Terminal 5/6 to the see bandwidth report. 76 | -------------------------------------------------------------------------------- /p4src/includes/actions.p4: -------------------------------------------------------------------------------- 1 | action processentry1() { 2 | write_data_entry1.execute_stateful_alu(p4ml_agtr_index.agtr); 3 | } 4 | 5 | action noequ0_processentry1() { 6 | noequ0_write_data_entry1.execute_stateful_alu(p4ml_agtr_index.agtr); 7 | } 8 | 9 | action processentry1andWriteToPacket() { 10 | write_read_data_entry1.execute_stateful_alu(p4ml_agtr_index.agtr); 11 | } 12 | 13 | action noequ0_processentry1andWriteToPacket() { 14 | noequ0_write_read_data_entry1.execute_stateful_alu(p4ml_agtr_index.agtr); 15 | } 16 | 17 | action do_cleanEntry1() { 18 | clean_entry1.execute_stateful_alu(p4ml_agtr_index.agtr); 19 | } 20 | 21 | action entry1WriteToPacket() { 22 | read_data_entry1.execute_stateful_alu(p4ml_agtr_index.agtr); 23 | } 24 | 25 | action processentry2() { 26 | write_data_entry2.execute_stateful_alu(p4ml_agtr_index.agtr); 27 | } 28 | 29 | action noequ0_processentry2() { 30 | noequ0_write_data_entry2.execute_stateful_alu(p4ml_agtr_index.agtr); 31 | } 32 | 33 | action processentry2andWriteToPacket() { 34 | write_read_data_entry2.execute_stateful_alu(p4ml_agtr_index.agtr); 35 | } 36 | 37 | action noequ0_processentry2andWriteToPacket() { 38 | noequ0_write_read_data_entry2.execute_stateful_alu(p4ml_agtr_index.agtr); 39 | } 40 | 41 | action do_cleanEntry2() { 42 | clean_entry2.execute_stateful_alu(p4ml_agtr_index.agtr); 43 | } 44 | 45 | action entry2WriteToPacket() { 46 | read_data_entry2.execute_stateful_alu(p4ml_agtr_index.agtr); 47 | } 48 | 49 | action processentry3() { 50 | write_data_entry3.execute_stateful_alu(p4ml_agtr_index.agtr); 51 | } 52 | 53 | action noequ0_processentry3() { 54 | noequ0_write_data_entry3.execute_stateful_alu(p4ml_agtr_index.agtr); 55 | } 56 | 57 | action processentry3andWriteToPacket() { 58 | write_read_data_entry3.execute_stateful_alu(p4ml_agtr_index.agtr); 59 | } 60 | 61 | action noequ0_processentry3andWriteToPacket() { 62 | noequ0_write_read_data_entry3.execute_stateful_alu(p4ml_agtr_index.agtr); 63 | } 64 | 65 | action do_cleanEntry3() { 66 | clean_entry3.execute_stateful_alu(p4ml_agtr_index.agtr); 67 | } 68 | 69 | action entry3WriteToPacket() { 70 | read_data_entry3.execute_stateful_alu(p4ml_agtr_index.agtr); 71 | } 72 | 73 | action processentry4() { 74 | write_data_entry4.execute_stateful_alu(p4ml_agtr_index.agtr); 75 | } 76 | 77 | action noequ0_processentry4() { 78 | noequ0_write_data_entry4.execute_stateful_alu(p4ml_agtr_index.agtr); 79 | } 80 | 81 | action processentry4andWriteToPacket() { 82 | write_read_data_entry4.execute_stateful_alu(p4ml_agtr_index.agtr); 83 | } 84 | 85 | action noequ0_processentry4andWriteToPacket() { 86 | noequ0_write_read_data_entry4.execute_stateful_alu(p4ml_agtr_index.agtr); 87 | } 88 | 89 | action do_cleanEntry4() { 90 | clean_entry4.execute_stateful_alu(p4ml_agtr_index.agtr); 91 | } 92 | 93 | action entry4WriteToPacket() { 94 | read_data_entry4.execute_stateful_alu(p4ml_agtr_index.agtr); 95 | } 96 | 97 | action processentry5() { 98 | write_data_entry5.execute_stateful_alu(p4ml_agtr_index.agtr); 99 | } 100 | 101 | action noequ0_processentry5() { 102 | noequ0_write_data_entry5.execute_stateful_alu(p4ml_agtr_index.agtr); 103 | } 104 | 105 | action processentry5andWriteToPacket() { 106 | write_read_data_entry5.execute_stateful_alu(p4ml_agtr_index.agtr); 107 | } 108 | 109 | action noequ0_processentry5andWriteToPacket() { 110 | noequ0_write_read_data_entry5.execute_stateful_alu(p4ml_agtr_index.agtr); 111 | } 112 | 113 | action do_cleanEntry5() { 114 | clean_entry5.execute_stateful_alu(p4ml_agtr_index.agtr); 115 | } 116 | 117 | action entry5WriteToPacket() { 118 | read_data_entry5.execute_stateful_alu(p4ml_agtr_index.agtr); 119 | } 120 | 121 | action processentry6() { 122 | write_data_entry6.execute_stateful_alu(p4ml_agtr_index.agtr); 123 | } 124 | 125 | action noequ0_processentry6() { 126 | noequ0_write_data_entry6.execute_stateful_alu(p4ml_agtr_index.agtr); 127 | } 128 | 129 | action processentry6andWriteToPacket() { 130 | write_read_data_entry6.execute_stateful_alu(p4ml_agtr_index.agtr); 131 | } 132 | 133 | action noequ0_processentry6andWriteToPacket() { 134 | noequ0_write_read_data_entry6.execute_stateful_alu(p4ml_agtr_index.agtr); 135 | } 136 | 137 | action do_cleanEntry6() { 138 | clean_entry6.execute_stateful_alu(p4ml_agtr_index.agtr); 139 | } 140 | 141 | action entry6WriteToPacket() { 142 | read_data_entry6.execute_stateful_alu(p4ml_agtr_index.agtr); 143 | } 144 | 145 | action processentry7() { 146 | write_data_entry7.execute_stateful_alu(p4ml_agtr_index.agtr); 147 | } 148 | 149 | action noequ0_processentry7() { 150 | noequ0_write_data_entry7.execute_stateful_alu(p4ml_agtr_index.agtr); 151 | } 152 | 153 | action processentry7andWriteToPacket() { 154 | write_read_data_entry7.execute_stateful_alu(p4ml_agtr_index.agtr); 155 | } 156 | 157 | action noequ0_processentry7andWriteToPacket() { 158 | noequ0_write_read_data_entry7.execute_stateful_alu(p4ml_agtr_index.agtr); 159 | } 160 | 161 | action do_cleanEntry7() { 162 | clean_entry7.execute_stateful_alu(p4ml_agtr_index.agtr); 163 | } 164 | 165 | action entry7WriteToPacket() { 166 | read_data_entry7.execute_stateful_alu(p4ml_agtr_index.agtr); 167 | } 168 | 169 | action processentry8() { 170 | write_data_entry8.execute_stateful_alu(p4ml_agtr_index.agtr); 171 | } 172 | 173 | action noequ0_processentry8() { 174 | noequ0_write_data_entry8.execute_stateful_alu(p4ml_agtr_index.agtr); 175 | } 176 | 177 | action processentry8andWriteToPacket() { 178 | write_read_data_entry8.execute_stateful_alu(p4ml_agtr_index.agtr); 179 | } 180 | 181 | action noequ0_processentry8andWriteToPacket() { 182 | noequ0_write_read_data_entry8.execute_stateful_alu(p4ml_agtr_index.agtr); 183 | } 184 | 185 | action do_cleanEntry8() { 186 | clean_entry8.execute_stateful_alu(p4ml_agtr_index.agtr); 187 | } 188 | 189 | action entry8WriteToPacket() { 190 | read_data_entry8.execute_stateful_alu(p4ml_agtr_index.agtr); 191 | } 192 | 193 | action processentry9() { 194 | write_data_entry9.execute_stateful_alu(p4ml_agtr_index.agtr); 195 | } 196 | 197 | action noequ0_processentry9() { 198 | noequ0_write_data_entry9.execute_stateful_alu(p4ml_agtr_index.agtr); 199 | } 200 | 201 | action processentry9andWriteToPacket() { 202 | write_read_data_entry9.execute_stateful_alu(p4ml_agtr_index.agtr); 203 | } 204 | 205 | action noequ0_processentry9andWriteToPacket() { 206 | noequ0_write_read_data_entry9.execute_stateful_alu(p4ml_agtr_index.agtr); 207 | } 208 | 209 | action do_cleanEntry9() { 210 | clean_entry9.execute_stateful_alu(p4ml_agtr_index.agtr); 211 | } 212 | 213 | action entry9WriteToPacket() { 214 | read_data_entry9.execute_stateful_alu(p4ml_agtr_index.agtr); 215 | } 216 | 217 | action processentry10() { 218 | write_data_entry10.execute_stateful_alu(p4ml_agtr_index.agtr); 219 | } 220 | 221 | action noequ0_processentry10() { 222 | noequ0_write_data_entry10.execute_stateful_alu(p4ml_agtr_index.agtr); 223 | } 224 | 225 | action processentry10andWriteToPacket() { 226 | write_read_data_entry10.execute_stateful_alu(p4ml_agtr_index.agtr); 227 | } 228 | 229 | action noequ0_processentry10andWriteToPacket() { 230 | noequ0_write_read_data_entry10.execute_stateful_alu(p4ml_agtr_index.agtr); 231 | } 232 | 233 | action do_cleanEntry10() { 234 | clean_entry10.execute_stateful_alu(p4ml_agtr_index.agtr); 235 | } 236 | 237 | action entry10WriteToPacket() { 238 | read_data_entry10.execute_stateful_alu(p4ml_agtr_index.agtr); 239 | } 240 | 241 | action processentry11() { 242 | write_data_entry11.execute_stateful_alu(p4ml_agtr_index.agtr); 243 | } 244 | 245 | action noequ0_processentry11() { 246 | noequ0_write_data_entry11.execute_stateful_alu(p4ml_agtr_index.agtr); 247 | } 248 | 249 | action processentry11andWriteToPacket() { 250 | write_read_data_entry11.execute_stateful_alu(p4ml_agtr_index.agtr); 251 | } 252 | 253 | action noequ0_processentry11andWriteToPacket() { 254 | noequ0_write_read_data_entry11.execute_stateful_alu(p4ml_agtr_index.agtr); 255 | } 256 | 257 | action do_cleanEntry11() { 258 | clean_entry11.execute_stateful_alu(p4ml_agtr_index.agtr); 259 | } 260 | 261 | action entry11WriteToPacket() { 262 | read_data_entry11.execute_stateful_alu(p4ml_agtr_index.agtr); 263 | } 264 | 265 | action processentry12() { 266 | write_data_entry12.execute_stateful_alu(p4ml_agtr_index.agtr); 267 | } 268 | 269 | action noequ0_processentry12() { 270 | noequ0_write_data_entry12.execute_stateful_alu(p4ml_agtr_index.agtr); 271 | } 272 | 273 | action processentry12andWriteToPacket() { 274 | write_read_data_entry12.execute_stateful_alu(p4ml_agtr_index.agtr); 275 | } 276 | 277 | action noequ0_processentry12andWriteToPacket() { 278 | noequ0_write_read_data_entry12.execute_stateful_alu(p4ml_agtr_index.agtr); 279 | } 280 | 281 | action do_cleanEntry12() { 282 | clean_entry12.execute_stateful_alu(p4ml_agtr_index.agtr); 283 | } 284 | 285 | action entry12WriteToPacket() { 286 | read_data_entry12.execute_stateful_alu(p4ml_agtr_index.agtr); 287 | } 288 | 289 | action processentry13() { 290 | write_data_entry13.execute_stateful_alu(p4ml_agtr_index.agtr); 291 | } 292 | 293 | action noequ0_processentry13() { 294 | noequ0_write_data_entry13.execute_stateful_alu(p4ml_agtr_index.agtr); 295 | } 296 | 297 | action processentry13andWriteToPacket() { 298 | write_read_data_entry13.execute_stateful_alu(p4ml_agtr_index.agtr); 299 | } 300 | 301 | action noequ0_processentry13andWriteToPacket() { 302 | noequ0_write_read_data_entry13.execute_stateful_alu(p4ml_agtr_index.agtr); 303 | } 304 | 305 | action do_cleanEntry13() { 306 | clean_entry13.execute_stateful_alu(p4ml_agtr_index.agtr); 307 | } 308 | 309 | action entry13WriteToPacket() { 310 | read_data_entry13.execute_stateful_alu(p4ml_agtr_index.agtr); 311 | } 312 | 313 | action processentry14() { 314 | write_data_entry14.execute_stateful_alu(p4ml_agtr_index.agtr); 315 | } 316 | 317 | action noequ0_processentry14() { 318 | noequ0_write_data_entry14.execute_stateful_alu(p4ml_agtr_index.agtr); 319 | } 320 | 321 | action processentry14andWriteToPacket() { 322 | write_read_data_entry14.execute_stateful_alu(p4ml_agtr_index.agtr); 323 | } 324 | 325 | action noequ0_processentry14andWriteToPacket() { 326 | noequ0_write_read_data_entry14.execute_stateful_alu(p4ml_agtr_index.agtr); 327 | } 328 | 329 | action do_cleanEntry14() { 330 | clean_entry14.execute_stateful_alu(p4ml_agtr_index.agtr); 331 | } 332 | 333 | action entry14WriteToPacket() { 334 | read_data_entry14.execute_stateful_alu(p4ml_agtr_index.agtr); 335 | } 336 | 337 | action processentry15() { 338 | write_data_entry15.execute_stateful_alu(p4ml_agtr_index.agtr); 339 | } 340 | 341 | action noequ0_processentry15() { 342 | noequ0_write_data_entry15.execute_stateful_alu(p4ml_agtr_index.agtr); 343 | } 344 | 345 | action processentry15andWriteToPacket() { 346 | write_read_data_entry15.execute_stateful_alu(p4ml_agtr_index.agtr); 347 | } 348 | 349 | action noequ0_processentry15andWriteToPacket() { 350 | noequ0_write_read_data_entry15.execute_stateful_alu(p4ml_agtr_index.agtr); 351 | } 352 | 353 | action do_cleanEntry15() { 354 | clean_entry15.execute_stateful_alu(p4ml_agtr_index.agtr); 355 | } 356 | 357 | action entry15WriteToPacket() { 358 | read_data_entry15.execute_stateful_alu(p4ml_agtr_index.agtr); 359 | } 360 | 361 | action processentry16() { 362 | write_data_entry16.execute_stateful_alu(p4ml_agtr_index.agtr); 363 | } 364 | 365 | action noequ0_processentry16() { 366 | noequ0_write_data_entry16.execute_stateful_alu(p4ml_agtr_index.agtr); 367 | } 368 | 369 | action processentry16andWriteToPacket() { 370 | write_read_data_entry16.execute_stateful_alu(p4ml_agtr_index.agtr); 371 | } 372 | 373 | action noequ0_processentry16andWriteToPacket() { 374 | noequ0_write_read_data_entry16.execute_stateful_alu(p4ml_agtr_index.agtr); 375 | } 376 | 377 | action do_cleanEntry16() { 378 | clean_entry16.execute_stateful_alu(p4ml_agtr_index.agtr); 379 | } 380 | 381 | action entry16WriteToPacket() { 382 | read_data_entry16.execute_stateful_alu(p4ml_agtr_index.agtr); 383 | } 384 | 385 | action processentry17() { 386 | write_data_entry17.execute_stateful_alu(p4ml_agtr_index.agtr); 387 | } 388 | 389 | action noequ0_processentry17() { 390 | noequ0_write_data_entry17.execute_stateful_alu(p4ml_agtr_index.agtr); 391 | } 392 | 393 | action processentry17andWriteToPacket() { 394 | write_read_data_entry17.execute_stateful_alu(p4ml_agtr_index.agtr); 395 | } 396 | 397 | action noequ0_processentry17andWriteToPacket() { 398 | noequ0_write_read_data_entry17.execute_stateful_alu(p4ml_agtr_index.agtr); 399 | } 400 | 401 | action do_cleanEntry17() { 402 | clean_entry17.execute_stateful_alu(p4ml_agtr_index.agtr); 403 | } 404 | 405 | action entry17WriteToPacket() { 406 | read_data_entry17.execute_stateful_alu(p4ml_agtr_index.agtr); 407 | } 408 | 409 | action processentry18() { 410 | write_data_entry18.execute_stateful_alu(p4ml_agtr_index.agtr); 411 | } 412 | 413 | action noequ0_processentry18() { 414 | noequ0_write_data_entry18.execute_stateful_alu(p4ml_agtr_index.agtr); 415 | } 416 | 417 | action processentry18andWriteToPacket() { 418 | write_read_data_entry18.execute_stateful_alu(p4ml_agtr_index.agtr); 419 | } 420 | 421 | action noequ0_processentry18andWriteToPacket() { 422 | noequ0_write_read_data_entry18.execute_stateful_alu(p4ml_agtr_index.agtr); 423 | } 424 | 425 | action do_cleanEntry18() { 426 | clean_entry18.execute_stateful_alu(p4ml_agtr_index.agtr); 427 | } 428 | 429 | action entry18WriteToPacket() { 430 | read_data_entry18.execute_stateful_alu(p4ml_agtr_index.agtr); 431 | } 432 | 433 | action processentry19() { 434 | write_data_entry19.execute_stateful_alu(p4ml_agtr_index.agtr); 435 | } 436 | 437 | action noequ0_processentry19() { 438 | noequ0_write_data_entry19.execute_stateful_alu(p4ml_agtr_index.agtr); 439 | } 440 | 441 | action processentry19andWriteToPacket() { 442 | write_read_data_entry19.execute_stateful_alu(p4ml_agtr_index.agtr); 443 | } 444 | 445 | action noequ0_processentry19andWriteToPacket() { 446 | noequ0_write_read_data_entry19.execute_stateful_alu(p4ml_agtr_index.agtr); 447 | } 448 | 449 | action do_cleanEntry19() { 450 | clean_entry19.execute_stateful_alu(p4ml_agtr_index.agtr); 451 | } 452 | 453 | action entry19WriteToPacket() { 454 | read_data_entry19.execute_stateful_alu(p4ml_agtr_index.agtr); 455 | } 456 | 457 | action processentry20() { 458 | write_data_entry20.execute_stateful_alu(p4ml_agtr_index.agtr); 459 | } 460 | 461 | action noequ0_processentry20() { 462 | noequ0_write_data_entry20.execute_stateful_alu(p4ml_agtr_index.agtr); 463 | } 464 | 465 | action processentry20andWriteToPacket() { 466 | write_read_data_entry20.execute_stateful_alu(p4ml_agtr_index.agtr); 467 | } 468 | 469 | action noequ0_processentry20andWriteToPacket() { 470 | noequ0_write_read_data_entry20.execute_stateful_alu(p4ml_agtr_index.agtr); 471 | } 472 | 473 | action do_cleanEntry20() { 474 | clean_entry20.execute_stateful_alu(p4ml_agtr_index.agtr); 475 | } 476 | 477 | action entry20WriteToPacket() { 478 | read_data_entry20.execute_stateful_alu(p4ml_agtr_index.agtr); 479 | } 480 | 481 | action processentry21() { 482 | write_data_entry21.execute_stateful_alu(p4ml_agtr_index.agtr); 483 | } 484 | 485 | action noequ0_processentry21() { 486 | noequ0_write_data_entry21.execute_stateful_alu(p4ml_agtr_index.agtr); 487 | } 488 | 489 | action processentry21andWriteToPacket() { 490 | write_read_data_entry21.execute_stateful_alu(p4ml_agtr_index.agtr); 491 | } 492 | 493 | action noequ0_processentry21andWriteToPacket() { 494 | noequ0_write_read_data_entry21.execute_stateful_alu(p4ml_agtr_index.agtr); 495 | } 496 | 497 | action do_cleanEntry21() { 498 | clean_entry21.execute_stateful_alu(p4ml_agtr_index.agtr); 499 | } 500 | 501 | action entry21WriteToPacket() { 502 | read_data_entry21.execute_stateful_alu(p4ml_agtr_index.agtr); 503 | } 504 | 505 | action processentry22() { 506 | write_data_entry22.execute_stateful_alu(p4ml_agtr_index.agtr); 507 | } 508 | 509 | action noequ0_processentry22() { 510 | noequ0_write_data_entry22.execute_stateful_alu(p4ml_agtr_index.agtr); 511 | } 512 | 513 | action processentry22andWriteToPacket() { 514 | write_read_data_entry22.execute_stateful_alu(p4ml_agtr_index.agtr); 515 | } 516 | 517 | action noequ0_processentry22andWriteToPacket() { 518 | noequ0_write_read_data_entry22.execute_stateful_alu(p4ml_agtr_index.agtr); 519 | } 520 | 521 | action do_cleanEntry22() { 522 | clean_entry22.execute_stateful_alu(p4ml_agtr_index.agtr); 523 | } 524 | 525 | action entry22WriteToPacket() { 526 | read_data_entry22.execute_stateful_alu(p4ml_agtr_index.agtr); 527 | } 528 | 529 | action processentry23() { 530 | write_data_entry23.execute_stateful_alu(p4ml_agtr_index.agtr); 531 | } 532 | 533 | action noequ0_processentry23() { 534 | noequ0_write_data_entry23.execute_stateful_alu(p4ml_agtr_index.agtr); 535 | } 536 | 537 | action processentry23andWriteToPacket() { 538 | write_read_data_entry23.execute_stateful_alu(p4ml_agtr_index.agtr); 539 | } 540 | 541 | action noequ0_processentry23andWriteToPacket() { 542 | noequ0_write_read_data_entry23.execute_stateful_alu(p4ml_agtr_index.agtr); 543 | } 544 | 545 | action do_cleanEntry23() { 546 | clean_entry23.execute_stateful_alu(p4ml_agtr_index.agtr); 547 | } 548 | 549 | action entry23WriteToPacket() { 550 | read_data_entry23.execute_stateful_alu(p4ml_agtr_index.agtr); 551 | } 552 | 553 | action processentry24() { 554 | write_data_entry24.execute_stateful_alu(p4ml_agtr_index.agtr); 555 | } 556 | 557 | action noequ0_processentry24() { 558 | noequ0_write_data_entry24.execute_stateful_alu(p4ml_agtr_index.agtr); 559 | } 560 | 561 | action processentry24andWriteToPacket() { 562 | write_read_data_entry24.execute_stateful_alu(p4ml_agtr_index.agtr); 563 | } 564 | 565 | action noequ0_processentry24andWriteToPacket() { 566 | noequ0_write_read_data_entry24.execute_stateful_alu(p4ml_agtr_index.agtr); 567 | } 568 | 569 | action do_cleanEntry24() { 570 | clean_entry24.execute_stateful_alu(p4ml_agtr_index.agtr); 571 | } 572 | 573 | action entry24WriteToPacket() { 574 | read_data_entry24.execute_stateful_alu(p4ml_agtr_index.agtr); 575 | } 576 | 577 | action processentry25() { 578 | write_data_entry25.execute_stateful_alu(p4ml_agtr_index.agtr); 579 | } 580 | 581 | action noequ0_processentry25() { 582 | noequ0_write_data_entry25.execute_stateful_alu(p4ml_agtr_index.agtr); 583 | } 584 | 585 | action processentry25andWriteToPacket() { 586 | write_read_data_entry25.execute_stateful_alu(p4ml_agtr_index.agtr); 587 | } 588 | 589 | action noequ0_processentry25andWriteToPacket() { 590 | noequ0_write_read_data_entry25.execute_stateful_alu(p4ml_agtr_index.agtr); 591 | } 592 | 593 | action do_cleanEntry25() { 594 | clean_entry25.execute_stateful_alu(p4ml_agtr_index.agtr); 595 | } 596 | 597 | action entry25WriteToPacket() { 598 | read_data_entry25.execute_stateful_alu(p4ml_agtr_index.agtr); 599 | } 600 | 601 | action processentry26() { 602 | write_data_entry26.execute_stateful_alu(p4ml_agtr_index.agtr); 603 | } 604 | 605 | action noequ0_processentry26() { 606 | noequ0_write_data_entry26.execute_stateful_alu(p4ml_agtr_index.agtr); 607 | } 608 | 609 | action processentry26andWriteToPacket() { 610 | write_read_data_entry26.execute_stateful_alu(p4ml_agtr_index.agtr); 611 | } 612 | 613 | action noequ0_processentry26andWriteToPacket() { 614 | noequ0_write_read_data_entry26.execute_stateful_alu(p4ml_agtr_index.agtr); 615 | } 616 | 617 | action do_cleanEntry26() { 618 | clean_entry26.execute_stateful_alu(p4ml_agtr_index.agtr); 619 | } 620 | 621 | action entry26WriteToPacket() { 622 | read_data_entry26.execute_stateful_alu(p4ml_agtr_index.agtr); 623 | } 624 | 625 | action processentry27() { 626 | write_data_entry27.execute_stateful_alu(p4ml_agtr_index.agtr); 627 | } 628 | 629 | action noequ0_processentry27() { 630 | noequ0_write_data_entry27.execute_stateful_alu(p4ml_agtr_index.agtr); 631 | } 632 | 633 | action processentry27andWriteToPacket() { 634 | write_read_data_entry27.execute_stateful_alu(p4ml_agtr_index.agtr); 635 | } 636 | 637 | action noequ0_processentry27andWriteToPacket() { 638 | noequ0_write_read_data_entry27.execute_stateful_alu(p4ml_agtr_index.agtr); 639 | } 640 | 641 | action do_cleanEntry27() { 642 | clean_entry27.execute_stateful_alu(p4ml_agtr_index.agtr); 643 | } 644 | 645 | action entry27WriteToPacket() { 646 | read_data_entry27.execute_stateful_alu(p4ml_agtr_index.agtr); 647 | } 648 | 649 | action processentry28() { 650 | write_data_entry28.execute_stateful_alu(p4ml_agtr_index.agtr); 651 | } 652 | 653 | action noequ0_processentry28() { 654 | noequ0_write_data_entry28.execute_stateful_alu(p4ml_agtr_index.agtr); 655 | } 656 | 657 | action processentry28andWriteToPacket() { 658 | write_read_data_entry28.execute_stateful_alu(p4ml_agtr_index.agtr); 659 | } 660 | 661 | action noequ0_processentry28andWriteToPacket() { 662 | noequ0_write_read_data_entry28.execute_stateful_alu(p4ml_agtr_index.agtr); 663 | } 664 | 665 | action do_cleanEntry28() { 666 | clean_entry28.execute_stateful_alu(p4ml_agtr_index.agtr); 667 | } 668 | 669 | action entry28WriteToPacket() { 670 | read_data_entry28.execute_stateful_alu(p4ml_agtr_index.agtr); 671 | } 672 | 673 | action processentry29() { 674 | write_data_entry29.execute_stateful_alu(p4ml_agtr_index.agtr); 675 | } 676 | 677 | action noequ0_processentry29() { 678 | noequ0_write_data_entry29.execute_stateful_alu(p4ml_agtr_index.agtr); 679 | } 680 | 681 | action processentry29andWriteToPacket() { 682 | write_read_data_entry29.execute_stateful_alu(p4ml_agtr_index.agtr); 683 | } 684 | 685 | action noequ0_processentry29andWriteToPacket() { 686 | noequ0_write_read_data_entry29.execute_stateful_alu(p4ml_agtr_index.agtr); 687 | } 688 | 689 | action do_cleanEntry29() { 690 | clean_entry29.execute_stateful_alu(p4ml_agtr_index.agtr); 691 | } 692 | 693 | action entry29WriteToPacket() { 694 | read_data_entry29.execute_stateful_alu(p4ml_agtr_index.agtr); 695 | } 696 | 697 | action processentry30() { 698 | write_data_entry30.execute_stateful_alu(p4ml_agtr_index.agtr); 699 | } 700 | 701 | action noequ0_processentry30() { 702 | noequ0_write_data_entry30.execute_stateful_alu(p4ml_agtr_index.agtr); 703 | } 704 | 705 | action processentry30andWriteToPacket() { 706 | write_read_data_entry30.execute_stateful_alu(p4ml_agtr_index.agtr); 707 | } 708 | 709 | action noequ0_processentry30andWriteToPacket() { 710 | noequ0_write_read_data_entry30.execute_stateful_alu(p4ml_agtr_index.agtr); 711 | } 712 | 713 | action do_cleanEntry30() { 714 | clean_entry30.execute_stateful_alu(p4ml_agtr_index.agtr); 715 | } 716 | 717 | action entry30WriteToPacket() { 718 | read_data_entry30.execute_stateful_alu(p4ml_agtr_index.agtr); 719 | } 720 | 721 | action processentry31() { 722 | write_data_entry31.execute_stateful_alu(p4ml_agtr_index.agtr); 723 | } 724 | 725 | action noequ0_processentry31() { 726 | noequ0_write_data_entry31.execute_stateful_alu(p4ml_agtr_index.agtr); 727 | } 728 | 729 | action processentry31andWriteToPacket() { 730 | write_read_data_entry31.execute_stateful_alu(p4ml_agtr_index.agtr); 731 | } 732 | 733 | action noequ0_processentry31andWriteToPacket() { 734 | noequ0_write_read_data_entry31.execute_stateful_alu(p4ml_agtr_index.agtr); 735 | } 736 | 737 | action do_cleanEntry31() { 738 | clean_entry31.execute_stateful_alu(p4ml_agtr_index.agtr); 739 | } 740 | 741 | action entry31WriteToPacket() { 742 | read_data_entry31.execute_stateful_alu(p4ml_agtr_index.agtr); 743 | } 744 | 745 | //action processentry32() { 746 | // write_data_entry32.execute_stateful_alu(p4ml_agtr_index.agtr); 747 | //} 748 | 749 | //actionoequ0_n processentry32() { 750 | // noequ0_ write_data_entry32.execute_stateful_alu(p4ml_agtr_index.agtr); 751 | //} 752 | // 753 | //action processentry32andWriteToPacket() { 754 | // write_read_data_entry32.execute_stateful_alu(p4ml_agtr_index.agtr); 755 | //} 756 | 757 | //actionoequ0_n processentry32andWriteToPacket() { 758 | // noequ0_ write_read_data_entry32.execute_stateful_alu(p4ml_agtr_index.agtr); 759 | //} 760 | 761 | //action do_cleanEntryry32() { 762 | // clean_entry32.execute_stateful_alu(p4ml_agtr_index.agtr); 763 | 764 | 765 | //action entry32WriteToPacket() { 766 | // read_data_entry32.execute_stateful_alu(p4ml_agtr_index.agtr); 767 | //} 768 | // 769 | -------------------------------------------------------------------------------- /p4src/includes/common.p4: -------------------------------------------------------------------------------- 1 | /* 2 | * P4PS 3 | * / 4 | 5 | /************************************************************************* 6 | *********************** R E G I S T E R ******************************* 7 | *************************************************************************/ 8 | 9 | blackbox stateful_alu cleaning_agtr_time { 10 | reg: agtr_time; 11 | 12 | update_lo_1_value : 0; 13 | } 14 | 15 | blackbox stateful_alu cleaning_ecn { 16 | reg: ecn_register; 17 | 18 | update_lo_1_value : 0; 19 | } 20 | 21 | blackbox stateful_alu cleaning_bitmap { 22 | reg: bitmap; 23 | 24 | update_lo_1_value : 0; 25 | } 26 | 27 | blackbox stateful_alu read_write_bitmap { 28 | reg: bitmap; 29 | 30 | output_dst : mdata.bitmap; 31 | 32 | output_value : register_lo; 33 | 34 | update_lo_1_value : register_lo | p4ml.bitmap; 35 | } 36 | 37 | blackbox stateful_alu read_write_bitmap_resend { 38 | reg: bitmap; 39 | 40 | output_dst : mdata.bitmap; 41 | 42 | output_value : register_lo; 43 | 44 | update_lo_1_value : 0; 45 | } 46 | 47 | // if same application, output appID, if not, not output (zero) 48 | blackbox stateful_alu check_app_id_and_seq { 49 | reg: appID_and_Seq; 50 | 51 | condition_lo : p4ml.appIDandSeqNum == register_lo; 52 | // The agtr is empty 53 | condition_hi : register_lo == 0; 54 | 55 | update_lo_1_predicate : condition_lo or condition_hi; 56 | update_lo_1_value : p4ml.appIDandSeqNum; 57 | 58 | output_predicate : condition_lo or condition_hi; 59 | output_dst : mdata.isMyAppIDandMyCurrentSeq; 60 | output_value : p4ml.appIDandSeqNum; 61 | } 62 | 63 | blackbox stateful_alu check_app_id_and_seq_resend { 64 | reg: appID_and_Seq; 65 | 66 | condition_lo : p4ml.appIDandSeqNum == register_lo; 67 | 68 | update_lo_1_predicate : condition_lo; 69 | update_lo_1_value : 0; 70 | 71 | output_predicate : condition_lo; 72 | output_dst : mdata.isMyAppIDandMyCurrentSeq; 73 | output_value : register_lo; 74 | } 75 | 76 | blackbox stateful_alu clean_app_id_and_seq { 77 | reg: appID_and_Seq; 78 | 79 | condition_lo : p4ml.appIDandSeqNum == register_lo; 80 | 81 | update_lo_1_predicate : condition_lo; 82 | update_lo_1_value : 0; 83 | 84 | output_predicate : condition_lo; 85 | output_dst : mdata.isMyAppIDandMyCurrentSeq; 86 | output_value : p4ml.appIDandSeqNum; 87 | } 88 | 89 | blackbox stateful_alu check_agtrTime { 90 | reg: agtr_time; 91 | 92 | condition_lo : mdata.isAggregate != 0; 93 | output_dst : mdata.current_agtr_time; 94 | 95 | update_lo_1_predicate : condition_lo; 96 | update_lo_1_value : register_lo + 1; 97 | 98 | update_lo_2_predicate : not condition_lo; 99 | update_lo_2_value : register_lo; 100 | 101 | output_value : alu_lo; 102 | } 103 | 104 | blackbox stateful_alu check_resend_agtrTime { 105 | reg: agtr_time; 106 | 107 | condition_lo : mdata.isAggregate != 0; 108 | // fake, force forward 109 | output_dst : mdata.current_agtr_time; 110 | 111 | update_lo_1_predicate : condition_lo; 112 | update_lo_1_value : 0; 113 | 114 | update_lo_2_predicate : not condition_lo; 115 | update_lo_2_value : 0; 116 | 117 | output_value : p4ml.agtr_time; 118 | } 119 | 120 | blackbox stateful_alu do_comp_qdepth { 121 | reg: dqueue_alert_threshold; 122 | 123 | condition_lo : eg_intr_md.deq_qdepth >= register_lo; 124 | // fake, force forward 125 | output_predicate : condition_lo; 126 | output_dst : mdata.qdepth; 127 | output_value : eg_intr_md.deq_qdepth; 128 | initial_register_lo_value : 1000; 129 | } 130 | 131 | blackbox stateful_alu do_check_ecn { 132 | reg: ecn_register; 133 | 134 | condition_lo : register_lo == 1; 135 | 136 | update_lo_1_value : register_lo | mdata.is_ecn; 137 | 138 | output_predicate : condition_lo; 139 | output_value : mdata.value_one; 140 | output_dst : p4ml.ECN; 141 | } 142 | 143 | /************************************************************************* 144 | ************** I N G R E S S P R O C E S S I N G ******************* 145 | *************************************************************************/ 146 | 147 | /* 148 | * Actions 149 | */ 150 | 151 | action process_bitmap() { 152 | read_write_bitmap.execute_stateful_alu(p4ml_agtr_index.agtr); 153 | } 154 | 155 | action process_bitmap_resend() { 156 | read_write_bitmap_resend.execute_stateful_alu(p4ml_agtr_index.agtr); 157 | } 158 | 159 | 160 | action check_aggregate_and_forward() { 161 | // this is is for aggregation needed checking 162 | bit_andcb(mdata.isAggregate, p4ml.bitmap, mdata.bitmap); 163 | bit_or(mdata.integrated_bitmap, p4ml.bitmap, mdata.bitmap); 164 | } 165 | 166 | action clean_agtr_time() { 167 | cleaning_agtr_time.execute_stateful_alu(p4ml_agtr_index.agtr); 168 | } 169 | 170 | action clean_ecn() { 171 | cleaning_ecn.execute_stateful_alu(p4ml_agtr_index.agtr); 172 | } 173 | 174 | action clean_bitmap() { 175 | cleaning_bitmap.execute_stateful_alu(p4ml_agtr_index.agtr); 176 | } 177 | 178 | action multicast(group) { 179 | modify_field(ig_intr_md_for_tm.mcast_grp_a, group); 180 | } 181 | 182 | action check_appID_and_seq() { 183 | check_app_id_and_seq.execute_stateful_alu(p4ml_agtr_index.agtr); 184 | //modify_field(mdata.qdepth, 0); 185 | } 186 | 187 | action check_appID_and_seq_resend() { 188 | check_app_id_and_seq_resend.execute_stateful_alu(p4ml_agtr_index.agtr); 189 | // modify_field(mdata.qdepth, 0); 190 | } 191 | 192 | action clean_appID_and_seq() { 193 | clean_app_id_and_seq.execute_stateful_alu(p4ml_agtr_index.agtr); 194 | } 195 | 196 | action check_agtr_time() { 197 | check_agtrTime.execute_stateful_alu(p4ml_agtr_index.agtr); 198 | } 199 | 200 | action check_resend_agtr_time() { 201 | check_resend_agtrTime.execute_stateful_alu(p4ml_agtr_index.agtr); 202 | } 203 | 204 | action modify_packet_bitmap() { 205 | modify_field(p4ml.bitmap, mdata.integrated_bitmap); 206 | } 207 | 208 | action do_qdepth() { 209 | do_comp_qdepth.execute_stateful_alu(0); 210 | } 211 | 212 | action modify_ecn() { 213 | modify_field(p4ml.ECN, 1); 214 | } 215 | 216 | action mark_ecn() { 217 | bit_or(mdata.is_ecn, mdata.qdepth, mdata.is_ecn); 218 | } 219 | 220 | action modify_ipv4_ecn() { 221 | modify_field(ipv4.ecn, 3); 222 | } 223 | 224 | action check_ecn() { 225 | do_check_ecn.execute_stateful_alu(p4ml_agtr_index.agtr); 226 | } 227 | 228 | action setup_ecn() { 229 | modify_field(mdata.is_ecn, 1); 230 | } 231 | 232 | action tag_collision_incoming() { 233 | modify_field(p4ml.isSWCollision, 1); 234 | // modify_field(p4ml.bitmap, mdata.isMyAppIDandMyCurrentSeq); 235 | } 236 | 237 | action set_egr(egress_spec) { 238 | modify_field(ig_intr_md_for_tm.ucast_egress_port, egress_spec); 239 | // increase_p4ml_counter.execute_stateful_alu(ig_intr_md.ingress_port); 240 | } 241 | 242 | action set_egr_and_set_index(egress_spec) { 243 | modify_field(ig_intr_md_for_tm.ucast_egress_port, egress_spec); 244 | modify_field(p4ml.dataIndex, 1); 245 | // increase_p4ml_counter.execute_stateful_alu(ig_intr_md.ingress_port); 246 | } 247 | 248 | action nop() 249 | { 250 | } 251 | 252 | action drop_pkt() { 253 | drop(); 254 | } 255 | 256 | action increase_counter() { 257 | increase_p4ml_counter.execute_stateful_alu(0); 258 | } 259 | 260 | table bitmap_table { 261 | actions { 262 | process_bitmap; 263 | } 264 | default_action: process_bitmap(); 265 | size : 1; 266 | } 267 | 268 | table bitmap_resend_table { 269 | actions { 270 | process_bitmap_resend; 271 | } 272 | default_action: process_bitmap_resend(); 273 | size : 1; 274 | } 275 | 276 | table bitmap_aggregate_table { 277 | actions { 278 | check_aggregate_and_forward; 279 | } 280 | default_action: check_aggregate_and_forward(); 281 | size : 1; 282 | } 283 | 284 | table agtr_time_table { 285 | actions { 286 | check_agtr_time; 287 | } 288 | default_action: check_agtr_time(); 289 | size : 1; 290 | } 291 | 292 | table agtr_time_resend_table { 293 | actions { 294 | check_resend_agtr_time; 295 | } 296 | default_action: check_resend_agtr_time(); 297 | size : 1; 298 | } 299 | 300 | table immd_outPort_table { 301 | reads { 302 | p4ml.appIDandSeqNum mask 0xFFFF0000: exact; 303 | } 304 | actions { 305 | set_egr; 306 | } 307 | } 308 | 309 | table outPort_table { 310 | reads { 311 | p4ml.appIDandSeqNum mask 0xFFFF0000: exact; 312 | ig_intr_md.ingress_port: exact; 313 | p4ml.dataIndex: exact; 314 | p4ml.PSIndex: exact; 315 | } 316 | actions { 317 | nop; 318 | set_egr; 319 | set_egr_and_set_index; 320 | drop_pkt; 321 | } 322 | default_action: drop_pkt(); 323 | } 324 | 325 | table bg_outPort_table { 326 | reads { 327 | // useless here, just can't use default action for variable 328 | p4ml_bg.isACK : exact; 329 | } 330 | actions { 331 | set_egr; 332 | nop; 333 | } 334 | } 335 | 336 | table multicast_table { 337 | reads { 338 | p4ml.isACK: exact; 339 | p4ml.appIDandSeqNum mask 0xFFFF0000: exact; 340 | ig_intr_md.ingress_port: exact; 341 | p4ml.dataIndex: exact; 342 | } 343 | actions { 344 | multicast; drop_pkt; set_egr_and_set_index; 345 | } 346 | default_action: drop_pkt(); 347 | } 348 | 349 | @pragma stage 3 350 | table clean_agtr_time_table { 351 | actions { 352 | clean_agtr_time; 353 | } 354 | default_action: clean_agtr_time(); 355 | size : 1; 356 | } 357 | 358 | table clean_ecn_table { 359 | actions { 360 | clean_ecn; 361 | } 362 | default_action: clean_ecn(); 363 | size : 1; 364 | } 365 | 366 | 367 | table clean_bitmap_table { 368 | actions { 369 | clean_bitmap; 370 | } 371 | default_action: clean_bitmap(); 372 | size : 1; 373 | } 374 | 375 | /* Counter */ 376 | register p4ml_counter { 377 | width : 32; 378 | instance_count :1; 379 | } 380 | 381 | blackbox stateful_alu increase_p4ml_counter { 382 | reg: p4ml_counter; 383 | 384 | update_lo_1_value : register_lo + 1 ; 385 | } 386 | 387 | table forward_counter_table { 388 | actions { 389 | increase_counter; 390 | } 391 | default_action: increase_counter(); 392 | } 393 | 394 | table appID_and_seq_table { 395 | actions { 396 | check_appID_and_seq; 397 | } 398 | default_action: check_appID_and_seq(); 399 | } 400 | 401 | table appID_and_seq_resend_table { 402 | actions { 403 | check_appID_and_seq_resend; 404 | } 405 | default_action: check_appID_and_seq_resend(); 406 | } 407 | 408 | table clean_appID_and_seq_table { 409 | actions { 410 | clean_appID_and_seq; 411 | } 412 | default_action: clean_appID_and_seq(); 413 | } 414 | 415 | table modify_packet_bitmap_table { 416 | reads { 417 | p4ml.dataIndex: exact; 418 | } 419 | actions { 420 | modify_packet_bitmap; nop; 421 | } 422 | default_action: nop(); 423 | } 424 | 425 | table qdepth_table { 426 | actions { 427 | do_qdepth; 428 | } 429 | default_action: do_qdepth(); 430 | } 431 | 432 | table modify_ecn_table { 433 | actions { 434 | modify_ecn; 435 | } 436 | default_action: modify_ecn(); 437 | } 438 | 439 | table mark_ecn_ipv4_table { 440 | actions { 441 | modify_ipv4_ecn; 442 | } 443 | default_action: modify_ipv4_ecn(); 444 | } 445 | 446 | table ecn_mark_table { 447 | actions { 448 | mark_ecn; 449 | } 450 | default_action: mark_ecn(); 451 | } 452 | 453 | table ecn_register_table { 454 | actions { 455 | check_ecn; 456 | } 457 | default_action: check_ecn(); 458 | } 459 | 460 | table setup_ecn_table { 461 | actions { 462 | setup_ecn; 463 | } 464 | default_action: setup_ecn(); 465 | } 466 | 467 | table forward { 468 | reads { 469 | ethernet.dstAddr : exact; 470 | } 471 | actions { 472 | set_egr; nop; drop_pkt; 473 | } 474 | default_action: drop_pkt(); 475 | } 476 | 477 | table drop_table { 478 | reads { 479 | ig_intr_md.ingress_port: exact; 480 | p4ml.dataIndex : exact; 481 | } 482 | actions { 483 | drop_pkt; set_egr; set_egr_and_set_index; 484 | } 485 | default_action: drop_pkt(); 486 | } 487 | 488 | table tag_collision_incoming_table { 489 | actions { 490 | tag_collision_incoming; 491 | } 492 | default_action: tag_collision_incoming(); 493 | } 494 | -------------------------------------------------------------------------------- /p4src/includes/headers.p4: -------------------------------------------------------------------------------- 1 | #define MAX_ENTRIES_PER_PACKET 32 2 | /************************************************************************* 3 | *********************** H E A D E R S ********************************* 4 | *************************************************************************/ 5 | 6 | // 14Byte 7 | header_type ethernet_t { 8 | fields { 9 | dstAddr : 48; 10 | srcAddr : 48; 11 | etherType : 16; 12 | } 13 | } 14 | 15 | // 20Byte 16 | header_type ipv4_t { 17 | fields { 18 | version : 4; 19 | ihl : 4; 20 | dscp : 6; 21 | ecn : 2; 22 | totalLen : 16; 23 | identification : 16; 24 | flags : 3; 25 | fragOffset : 13; 26 | ttl : 8; 27 | protocol : 8; 28 | hdrChecksum : 16; 29 | srcAddr : 32; 30 | dstAddr : 32; 31 | } 32 | } 33 | 34 | header_type udp_t { 35 | fields { 36 | srcPort : 16; 37 | dstPort : 16; 38 | length_ : 16; 39 | checksum : 16; 40 | } 41 | } 42 | 43 | // 12Byte * 2 44 | header_type p4ml_t { 45 | fields { 46 | bitmap : 32; 47 | agtr_time : 8; 48 | overflow : 1; 49 | /* For multiple PS */ 50 | PSIndex : 2; 51 | /* For signle PS */ 52 | // reserved : 2; 53 | // isForceFoward : 1; 54 | dataIndex : 1; 55 | ECN : 1; 56 | isResend : 1; 57 | isSWCollision : 1; 58 | isACK : 1; 59 | appIDandSeqNum : 32; //in switchml.p4: this is used to find the bit location 60 | } 61 | } 62 | 63 | header_type p4ml_agtr_index_t { 64 | fields{ 65 | agtr :16; 66 | } 67 | } 68 | 69 | header_type bg_p4ml_t { 70 | fields { 71 | key : 64; 72 | len_tensor : 32; 73 | bitmap : 32; 74 | agtr_time : 8; 75 | reserved : 4; 76 | ECN : 1; 77 | isResend : 1; 78 | isSWCollision : 1; 79 | isACK : 1; 80 | agtr : 16; 81 | appIDandSeqNum : 32; //in switchml.p4: this is used to find the bit location 82 | } 83 | } 84 | 85 | // 108Byte * 2 86 | header_type entry_t { 87 | fields { 88 | data0 : 32 (signed); 89 | data1 : 32 (signed); 90 | data2 : 32 (signed); 91 | data3 : 32 (signed); 92 | data4 : 32 (signed); 93 | data5 : 32 (signed); 94 | data6 : 32 (signed); 95 | data7 : 32 (signed); 96 | data8 : 32 (signed); 97 | data9 : 32 (signed); 98 | data10 : 32 (signed); 99 | data11 : 32 (signed); 100 | data12 : 32 (signed); 101 | data13 : 32 (signed); 102 | data14 : 32 (signed); 103 | data15 : 32 (signed); 104 | data16 : 32 (signed); 105 | data17 : 32 (signed); 106 | data18 : 32 (signed); 107 | data19 : 32 (signed); 108 | data20 : 32 (signed); 109 | data21 : 32 (signed); 110 | data22 : 32 (signed); 111 | data23 : 32 (signed); 112 | data24 : 32 (signed); 113 | data25 : 32 (signed); 114 | data26 : 32 (signed); 115 | data27 : 32 (signed); 116 | data28 : 32 (signed); 117 | data29 : 32 (signed); 118 | data30 : 32 (signed); 119 | // data31 : 32 (signed); 120 | } 121 | } 122 | 123 | //12Byte * 2 124 | // header_type entry2_t { 125 | // fields { 126 | // data27 : 32 (signed); 127 | // data28 : 32 (signed); 128 | // data29 : 32 (signed); 129 | // data30 : 32 (signed); 130 | // data31 : 32 (signed); 131 | // } 132 | // } 133 | 134 | /************************************************************************* 135 | *********************** M E T A D A T A ******************************* 136 | *************************************************************************/ 137 | 138 | header_type p4ml_meta_t { 139 | fields { 140 | // P4ML 141 | isMyAppIDandMyCurrentSeq : 16; 142 | bitmap : 32; 143 | isAggregate : 32; 144 | agtr_time : 8; 145 | integrated_bitmap : 32; 146 | current_agtr_time : 8; 147 | agtr_index : 32; 148 | isDrop : 32; 149 | inside_appID_and_Seq : 1; 150 | value_one : 1; 151 | qdepth : 16; 152 | seen_bitmap0 : 8; 153 | seen_isAggregate : 8; 154 | is_ecn : 32; 155 | } 156 | } 157 | 158 | header_type p4ml_constant_t { 159 | fields{ 160 | bitmap :32; 161 | agtr_time :8; 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /p4src/includes/parser.p4: -------------------------------------------------------------------------------- 1 | 2 | 3 | metadata p4ml_meta_t mdata; 4 | metadata p4ml_constant_t p4ml_constant; 5 | 6 | header ethernet_t ethernet; 7 | header ipv4_t ipv4; 8 | header udp_t udp; 9 | header p4ml_agtr_index_t p4ml_agtr_index; 10 | header p4ml_agtr_index_t p4ml_agtr_index_useless; 11 | header p4ml_agtr_index_t p4ml_agtr_index_useless2; 12 | 13 | header p4ml_t p4ml; 14 | header entry_t p4ml_entries; 15 | header entry_t p4ml_entries_useless; 16 | 17 | header bg_p4ml_t p4ml_bg; 18 | // header blank3_t blank3; 19 | /************************************************************************* 20 | *********************** P A R S E R *********************************** 21 | *************************************************************************/ 22 | 23 | parser start { 24 | extract(ethernet); 25 | set_metadata(mdata.value_one, 1); 26 | return select(ethernet.etherType) { 27 | 0x0700 : parse_ipv4; 28 | 0x0800 : parse_rdma; 29 | 0x0900 : parse_bg; 30 | default : ingress; 31 | } 32 | // return parse_ipv4; 33 | 34 | } 35 | 36 | parser parse_ipv4 { 37 | extract(ipv4); 38 | return parse_p4ml; 39 | } 40 | 41 | parser parse_p4ml { 42 | extract(p4ml); 43 | return select(p4ml.dataIndex) { 44 | 0x0 : check_if_resubmit; 45 | 0x1 : use_second_p4ml_agtr_index_recirculate; 46 | default : ingress; 47 | } 48 | } 49 | 50 | parser check_if_resubmit { 51 | return select(ig_intr_md.resubmit_flag) { 52 | // 0x0 : parse_p4ml_agtr_index; 53 | 0x0 : use_first_p4ml_agtr_index_recirculate; 54 | // 0x1 : skip_first_p4ml_agtr_index; 55 | 0x1 : use_second_p4ml_agtr_index_recirculate; 56 | default : ingress; 57 | } 58 | } 59 | 60 | /// resubmit 0x0 61 | 62 | parser parse_p4ml_agtr_index { 63 | extract(p4ml_agtr_index); 64 | return skip_second_p4ml_agtr_index; 65 | } 66 | 67 | @pragma force_shift ingress 16 /* 2 bytes */ 68 | parser skip_second_p4ml_agtr_index { 69 | return parse_entry; 70 | } 71 | 72 | parser parse_entry { 73 | extract(p4ml_entries); 74 | return ingress; 75 | } 76 | 77 | /// resubmit 0x1 78 | 79 | parser parse_p4ml_agtr_index2 { 80 | extract(p4ml_agtr_index); 81 | return skip_header_c_0_31; 82 | } 83 | 84 | @pragma force_shift ingress 16 /* 2 bytes */ 85 | parser skip_first_p4ml_agtr_index { 86 | return parse_p4ml_agtr_index2; 87 | } 88 | 89 | /// recirculate 2 90 | 91 | parser use_second_p4ml_agtr_index_recirculate { 92 | extract(p4ml_agtr_index_useless2); 93 | return parse_p4ml_agtr_index_recirculate; 94 | } 95 | 96 | parser parse_p4ml_agtr_index_recirculate { 97 | extract(p4ml_agtr_index); 98 | return parse_entry2; 99 | } 100 | 101 | parser parse_entry2 { 102 | extract(p4ml_entries_useless); 103 | return parse_entry; 104 | } 105 | 106 | /// recirculate 1 107 | 108 | parser use_first_p4ml_agtr_index_recirculate { 109 | extract(p4ml_agtr_index); 110 | return useless_second_p4ml_agtr_index_recirculate; 111 | } 112 | 113 | parser useless_second_p4ml_agtr_index_recirculate { 114 | extract(p4ml_agtr_index_useless); 115 | return parse_entry; 116 | } 117 | /// 118 | 119 | @pragma force_shift ingress 256 /* 32 bytes */ 120 | parser skip_header_c_0_31 { 121 | return skip_header_c_32_63; 122 | } 123 | 124 | @pragma force_shift ingress 256 /* 32 bytes */ 125 | parser skip_header_c_32_63 { 126 | return skip_header_c_64_95; 127 | } 128 | 129 | @pragma force_shift ingress 256 /* 32 bytes */ 130 | parser skip_header_c_64_95 { 131 | return skip_header_c_96_127; 132 | } 133 | 134 | @pragma force_shift ingress 256 /* 32 bytes */ 135 | parser skip_header_c_96_127 { 136 | return parse_entry; 137 | } 138 | 139 | 140 | // /* RDMA */ 141 | parser parse_rdma { 142 | extract(ipv4); 143 | return ingress; 144 | } 145 | 146 | // /* BG */ 147 | parser parse_bg { 148 | extract(ipv4); 149 | return parse_udp_bg; 150 | } 151 | 152 | parser parse_udp_bg { 153 | extract(udp); 154 | return parse_p4ml_bg; 155 | } 156 | 157 | parser parse_p4ml_bg { 158 | extract(p4ml_bg); 159 | //set_metadata(mdata.qdepth, 0); 160 | // return ingress; 161 | return ingress; 162 | } 163 | -------------------------------------------------------------------------------- /p4src/includes/tables.p4: -------------------------------------------------------------------------------- 1 | @pragma stage 4 2 | table processEntry1 { 3 | reads { 4 | mdata.bitmap : ternary; 5 | } 6 | actions { 7 | processentry1; 8 | noequ0_processentry1; 9 | } 10 | // default_action : noequ0_processentry1; 11 | size : 2; 12 | } 13 | 14 | @pragma stage 4 15 | table noequ0_processEntry1 { 16 | actions { 17 | noequ0_processentry1; 18 | } 19 | default_action : noequ0_processentry1(); 20 | size : 1; 21 | } 22 | 23 | @pragma stage 4 24 | table Entry1WriteToPacket { 25 | actions { 26 | entry1WriteToPacket; 27 | } 28 | default_action : entry1WriteToPacket(); 29 | size : 1; 30 | } 31 | 32 | @pragma stage 4 33 | table processEntry1andWriteToPacket { 34 | actions { 35 | processentry1andWriteToPacket; 36 | } 37 | size : 1; 38 | } 39 | 40 | @pragma stage 4 41 | table noequ0_processEntry1andWriteToPacket { 42 | actions { 43 | noequ0_processentry1andWriteToPacket; 44 | } 45 | size : 1; 46 | } 47 | 48 | @pragma stage 4 49 | table cleanEntry1 { 50 | actions { 51 | do_cleanEntry1; 52 | } 53 | default_action : do_cleanEntry1(); 54 | size : 1; 55 | } 56 | 57 | table processEntry2 { 58 | reads { 59 | mdata.bitmap : ternary; 60 | } 61 | actions { 62 | processentry2; 63 | noequ0_processentry2; 64 | } 65 | // default_action : noequ0_processentry2; 66 | size : 2; 67 | } 68 | 69 | table noequ0_processEntry2 { 70 | actions { 71 | noequ0_processentry2; 72 | } 73 | default_action : noequ0_processentry2(); 74 | size : 1; 75 | } 76 | 77 | table Entry2WriteToPacket { 78 | actions { 79 | entry2WriteToPacket; 80 | } 81 | default_action : entry2WriteToPacket(); 82 | size : 1; 83 | } 84 | 85 | table processEntry2andWriteToPacket { 86 | actions { 87 | processentry2andWriteToPacket; 88 | } 89 | size : 1; 90 | } 91 | 92 | table noequ0_processEntry2andWriteToPacket { 93 | actions { 94 | noequ0_processentry2andWriteToPacket; 95 | } 96 | size : 1; 97 | } 98 | 99 | table cleanEntry2 { 100 | actions { 101 | do_cleanEntry2; 102 | } 103 | default_action : do_cleanEntry2(); 104 | size : 1; 105 | } 106 | 107 | table processEntry3 { 108 | reads { 109 | mdata.bitmap : ternary; 110 | } 111 | actions { 112 | processentry3; 113 | noequ0_processentry3; 114 | } 115 | // default_action : noequ0_processentry3; 116 | size : 2; 117 | } 118 | 119 | table noequ0_processEntry3 { 120 | actions { 121 | noequ0_processentry3; 122 | } 123 | default_action : noequ0_processentry3(); 124 | size : 1; 125 | } 126 | 127 | table Entry3WriteToPacket { 128 | actions { 129 | entry3WriteToPacket; 130 | } 131 | default_action : entry3WriteToPacket(); 132 | size : 1; 133 | } 134 | 135 | table processEntry3andWriteToPacket { 136 | actions { 137 | processentry3andWriteToPacket; 138 | } 139 | size : 1; 140 | } 141 | 142 | table noequ0_processEntry3andWriteToPacket { 143 | actions { 144 | noequ0_processentry3andWriteToPacket; 145 | } 146 | size : 1; 147 | } 148 | 149 | table cleanEntry3 { 150 | actions { 151 | do_cleanEntry3; 152 | } 153 | default_action : do_cleanEntry3(); 154 | size : 1; 155 | } 156 | 157 | table processEntry4 { 158 | reads { 159 | mdata.bitmap : ternary; 160 | } 161 | actions { 162 | processentry4; 163 | noequ0_processentry4; 164 | } 165 | // default_action : noequ0_processentry4; 166 | size : 2; 167 | } 168 | 169 | table noequ0_processEntry4 { 170 | actions { 171 | noequ0_processentry4; 172 | } 173 | default_action : noequ0_processentry4(); 174 | size : 1; 175 | } 176 | 177 | table Entry4WriteToPacket { 178 | actions { 179 | entry4WriteToPacket; 180 | } 181 | default_action : entry4WriteToPacket(); 182 | size : 1; 183 | } 184 | 185 | table processEntry4andWriteToPacket { 186 | actions { 187 | processentry4andWriteToPacket; 188 | } 189 | size : 1; 190 | } 191 | 192 | table noequ0_processEntry4andWriteToPacket { 193 | actions { 194 | noequ0_processentry4andWriteToPacket; 195 | } 196 | size : 1; 197 | } 198 | 199 | table cleanEntry4 { 200 | actions { 201 | do_cleanEntry4; 202 | } 203 | default_action : do_cleanEntry4(); 204 | size : 1; 205 | } 206 | 207 | table processEntry5 { 208 | reads { 209 | mdata.bitmap : ternary; 210 | } 211 | actions { 212 | processentry5; 213 | noequ0_processentry5; 214 | } 215 | // default_action : noequ0_processentry5; 216 | size : 2; 217 | } 218 | 219 | table noequ0_processEntry5 { 220 | actions { 221 | noequ0_processentry5; 222 | } 223 | default_action : noequ0_processentry5(); 224 | size : 1; 225 | } 226 | 227 | table Entry5WriteToPacket { 228 | actions { 229 | entry5WriteToPacket; 230 | } 231 | default_action : entry5WriteToPacket(); 232 | size : 1; 233 | } 234 | 235 | table processEntry5andWriteToPacket { 236 | actions { 237 | processentry5andWriteToPacket; 238 | } 239 | size : 1; 240 | } 241 | 242 | table noequ0_processEntry5andWriteToPacket { 243 | actions { 244 | noequ0_processentry5andWriteToPacket; 245 | } 246 | size : 1; 247 | } 248 | 249 | table cleanEntry5 { 250 | actions { 251 | do_cleanEntry5; 252 | } 253 | default_action : do_cleanEntry5(); 254 | size : 1; 255 | } 256 | 257 | table processEntry6 { 258 | reads { 259 | mdata.bitmap : ternary; 260 | } 261 | actions { 262 | processentry6; 263 | noequ0_processentry6; 264 | } 265 | // default_action : noequ0_processentry6; 266 | size : 2; 267 | } 268 | 269 | table noequ0_processEntry6 { 270 | actions { 271 | noequ0_processentry6; 272 | } 273 | default_action : noequ0_processentry6(); 274 | size : 1; 275 | } 276 | 277 | table Entry6WriteToPacket { 278 | actions { 279 | entry6WriteToPacket; 280 | } 281 | default_action : entry6WriteToPacket(); 282 | size : 1; 283 | } 284 | 285 | table processEntry6andWriteToPacket { 286 | actions { 287 | processentry6andWriteToPacket; 288 | } 289 | size : 1; 290 | } 291 | 292 | table noequ0_processEntry6andWriteToPacket { 293 | actions { 294 | noequ0_processentry6andWriteToPacket; 295 | } 296 | size : 1; 297 | } 298 | 299 | table cleanEntry6 { 300 | actions { 301 | do_cleanEntry6; 302 | } 303 | default_action : do_cleanEntry6(); 304 | size : 1; 305 | } 306 | 307 | table processEntry7 { 308 | reads { 309 | mdata.bitmap : ternary; 310 | } 311 | actions { 312 | processentry7; 313 | noequ0_processentry7; 314 | } 315 | // default_action : noequ0_processentry7; 316 | size : 2; 317 | } 318 | 319 | table noequ0_processEntry7 { 320 | actions { 321 | noequ0_processentry7; 322 | } 323 | default_action : noequ0_processentry7(); 324 | size : 1; 325 | } 326 | 327 | table Entry7WriteToPacket { 328 | actions { 329 | entry7WriteToPacket; 330 | } 331 | default_action : entry7WriteToPacket(); 332 | size : 1; 333 | } 334 | 335 | table processEntry7andWriteToPacket { 336 | actions { 337 | processentry7andWriteToPacket; 338 | } 339 | size : 1; 340 | } 341 | 342 | table noequ0_processEntry7andWriteToPacket { 343 | actions { 344 | noequ0_processentry7andWriteToPacket; 345 | } 346 | size : 1; 347 | } 348 | 349 | table cleanEntry7 { 350 | actions { 351 | do_cleanEntry7; 352 | } 353 | default_action : do_cleanEntry7(); 354 | size : 1; 355 | } 356 | 357 | table processEntry8 { 358 | reads { 359 | mdata.bitmap : ternary; 360 | } 361 | actions { 362 | processentry8; 363 | noequ0_processentry8; 364 | } 365 | // default_action : noequ0_processentry8; 366 | size : 2; 367 | } 368 | 369 | table noequ0_processEntry8 { 370 | actions { 371 | noequ0_processentry8; 372 | } 373 | default_action : noequ0_processentry8(); 374 | size : 1; 375 | } 376 | 377 | table Entry8WriteToPacket { 378 | actions { 379 | entry8WriteToPacket; 380 | } 381 | default_action : entry8WriteToPacket(); 382 | size : 1; 383 | } 384 | 385 | table processEntry8andWriteToPacket { 386 | actions { 387 | processentry8andWriteToPacket; 388 | } 389 | size : 1; 390 | } 391 | 392 | table noequ0_processEntry8andWriteToPacket { 393 | actions { 394 | noequ0_processentry8andWriteToPacket; 395 | } 396 | size : 1; 397 | } 398 | 399 | table cleanEntry8 { 400 | actions { 401 | do_cleanEntry8; 402 | } 403 | default_action : do_cleanEntry8(); 404 | size : 1; 405 | } 406 | 407 | table processEntry9 { 408 | reads { 409 | mdata.bitmap : ternary; 410 | } 411 | actions { 412 | processentry9; 413 | noequ0_processentry9; 414 | } 415 | // default_action : noequ0_processentry9; 416 | size : 2; 417 | } 418 | 419 | table noequ0_processEntry9 { 420 | actions { 421 | noequ0_processentry9; 422 | } 423 | default_action : noequ0_processentry9(); 424 | size : 1; 425 | } 426 | 427 | table Entry9WriteToPacket { 428 | actions { 429 | entry9WriteToPacket; 430 | } 431 | default_action : entry9WriteToPacket(); 432 | size : 1; 433 | } 434 | 435 | table processEntry9andWriteToPacket { 436 | actions { 437 | processentry9andWriteToPacket; 438 | } 439 | size : 1; 440 | } 441 | 442 | table noequ0_processEntry9andWriteToPacket { 443 | actions { 444 | noequ0_processentry9andWriteToPacket; 445 | } 446 | size : 1; 447 | } 448 | 449 | table cleanEntry9 { 450 | actions { 451 | do_cleanEntry9; 452 | } 453 | default_action : do_cleanEntry9(); 454 | size : 1; 455 | } 456 | 457 | table processEntry10 { 458 | reads { 459 | mdata.bitmap : ternary; 460 | } 461 | actions { 462 | processentry10; 463 | noequ0_processentry10; 464 | } 465 | // default_action : noequ0_processentry10; 466 | size : 2; 467 | } 468 | 469 | table noequ0_processEntry10 { 470 | actions { 471 | noequ0_processentry10; 472 | } 473 | default_action : noequ0_processentry10(); 474 | size : 1; 475 | } 476 | 477 | table Entry10WriteToPacket { 478 | actions { 479 | entry10WriteToPacket; 480 | } 481 | default_action : entry10WriteToPacket(); 482 | size : 1; 483 | } 484 | 485 | table processEntry10andWriteToPacket { 486 | actions { 487 | processentry10andWriteToPacket; 488 | } 489 | size : 1; 490 | } 491 | 492 | table noequ0_processEntry10andWriteToPacket { 493 | actions { 494 | noequ0_processentry10andWriteToPacket; 495 | } 496 | size : 1; 497 | } 498 | 499 | table cleanEntry10 { 500 | actions { 501 | do_cleanEntry10; 502 | } 503 | default_action : do_cleanEntry10(); 504 | size : 1; 505 | } 506 | 507 | table processEntry11 { 508 | reads { 509 | mdata.bitmap : ternary; 510 | } 511 | actions { 512 | processentry11; 513 | noequ0_processentry11; 514 | } 515 | // default_action : noequ0_processentry11; 516 | size : 2; 517 | } 518 | 519 | table noequ0_processEntry11 { 520 | actions { 521 | noequ0_processentry11; 522 | } 523 | default_action : noequ0_processentry11(); 524 | size : 1; 525 | } 526 | 527 | table Entry11WriteToPacket { 528 | actions { 529 | entry11WriteToPacket; 530 | } 531 | default_action : entry11WriteToPacket(); 532 | size : 1; 533 | } 534 | 535 | table processEntry11andWriteToPacket { 536 | actions { 537 | processentry11andWriteToPacket; 538 | } 539 | size : 1; 540 | } 541 | 542 | table noequ0_processEntry11andWriteToPacket { 543 | actions { 544 | noequ0_processentry11andWriteToPacket; 545 | } 546 | size : 1; 547 | } 548 | 549 | table cleanEntry11 { 550 | actions { 551 | do_cleanEntry11; 552 | } 553 | default_action : do_cleanEntry11(); 554 | size : 1; 555 | } 556 | 557 | table processEntry12 { 558 | reads { 559 | mdata.bitmap : ternary; 560 | } 561 | actions { 562 | processentry12; 563 | noequ0_processentry12; 564 | } 565 | // default_action : noequ0_processentry12; 566 | size : 2; 567 | } 568 | 569 | table noequ0_processEntry12 { 570 | actions { 571 | noequ0_processentry12; 572 | } 573 | default_action : noequ0_processentry12(); 574 | size : 1; 575 | } 576 | 577 | table Entry12WriteToPacket { 578 | actions { 579 | entry12WriteToPacket; 580 | } 581 | default_action : entry12WriteToPacket(); 582 | size : 1; 583 | } 584 | 585 | table processEntry12andWriteToPacket { 586 | actions { 587 | processentry12andWriteToPacket; 588 | } 589 | size : 1; 590 | } 591 | 592 | table noequ0_processEntry12andWriteToPacket { 593 | actions { 594 | noequ0_processentry12andWriteToPacket; 595 | } 596 | size : 1; 597 | } 598 | 599 | table cleanEntry12 { 600 | actions { 601 | do_cleanEntry12; 602 | } 603 | default_action : do_cleanEntry12(); 604 | size : 1; 605 | } 606 | 607 | table processEntry13 { 608 | reads { 609 | mdata.bitmap : ternary; 610 | } 611 | actions { 612 | processentry13; 613 | noequ0_processentry13; 614 | } 615 | // default_action : noequ0_processentry13; 616 | size : 2; 617 | } 618 | 619 | table noequ0_processEntry13 { 620 | actions { 621 | noequ0_processentry13; 622 | } 623 | default_action : noequ0_processentry13(); 624 | size : 1; 625 | } 626 | 627 | table Entry13WriteToPacket { 628 | actions { 629 | entry13WriteToPacket; 630 | } 631 | default_action : entry13WriteToPacket(); 632 | size : 1; 633 | } 634 | 635 | table processEntry13andWriteToPacket { 636 | actions { 637 | processentry13andWriteToPacket; 638 | } 639 | size : 1; 640 | } 641 | 642 | table noequ0_processEntry13andWriteToPacket { 643 | actions { 644 | noequ0_processentry13andWriteToPacket; 645 | } 646 | size : 1; 647 | } 648 | 649 | table cleanEntry13 { 650 | actions { 651 | do_cleanEntry13; 652 | } 653 | default_action : do_cleanEntry13(); 654 | size : 1; 655 | } 656 | 657 | table processEntry14 { 658 | reads { 659 | mdata.bitmap : ternary; 660 | } 661 | actions { 662 | processentry14; 663 | noequ0_processentry14; 664 | } 665 | // default_action : noequ0_processentry14; 666 | size : 2; 667 | } 668 | 669 | table noequ0_processEntry14 { 670 | actions { 671 | noequ0_processentry14; 672 | } 673 | default_action : noequ0_processentry14(); 674 | size : 1; 675 | } 676 | 677 | table Entry14WriteToPacket { 678 | actions { 679 | entry14WriteToPacket; 680 | } 681 | default_action : entry14WriteToPacket(); 682 | size : 1; 683 | } 684 | 685 | table processEntry14andWriteToPacket { 686 | actions { 687 | processentry14andWriteToPacket; 688 | } 689 | size : 1; 690 | } 691 | 692 | table noequ0_processEntry14andWriteToPacket { 693 | actions { 694 | noequ0_processentry14andWriteToPacket; 695 | } 696 | size : 1; 697 | } 698 | 699 | table cleanEntry14 { 700 | actions { 701 | do_cleanEntry14; 702 | } 703 | default_action : do_cleanEntry14(); 704 | size : 1; 705 | } 706 | 707 | table processEntry15 { 708 | reads { 709 | mdata.bitmap : ternary; 710 | } 711 | actions { 712 | processentry15; 713 | noequ0_processentry15; 714 | } 715 | // default_action : noequ0_processentry15; 716 | size : 2; 717 | } 718 | 719 | table noequ0_processEntry15 { 720 | actions { 721 | noequ0_processentry15; 722 | } 723 | default_action : noequ0_processentry15(); 724 | size : 1; 725 | } 726 | 727 | table Entry15WriteToPacket { 728 | actions { 729 | entry15WriteToPacket; 730 | } 731 | default_action : entry15WriteToPacket(); 732 | size : 1; 733 | } 734 | 735 | table processEntry15andWriteToPacket { 736 | actions { 737 | processentry15andWriteToPacket; 738 | } 739 | size : 1; 740 | } 741 | 742 | table noequ0_processEntry15andWriteToPacket { 743 | actions { 744 | noequ0_processentry15andWriteToPacket; 745 | } 746 | size : 1; 747 | } 748 | 749 | table cleanEntry15 { 750 | actions { 751 | do_cleanEntry15; 752 | } 753 | default_action : do_cleanEntry15(); 754 | size : 1; 755 | } 756 | 757 | table processEntry16 { 758 | reads { 759 | mdata.bitmap : ternary; 760 | } 761 | actions { 762 | processentry16; 763 | noequ0_processentry16; 764 | } 765 | // default_action : noequ0_processentry16; 766 | size : 2; 767 | } 768 | 769 | table noequ0_processEntry16 { 770 | actions { 771 | noequ0_processentry16; 772 | } 773 | default_action : noequ0_processentry16(); 774 | size : 1; 775 | } 776 | 777 | table Entry16WriteToPacket { 778 | actions { 779 | entry16WriteToPacket; 780 | } 781 | default_action : entry16WriteToPacket(); 782 | size : 1; 783 | } 784 | 785 | table processEntry16andWriteToPacket { 786 | actions { 787 | processentry16andWriteToPacket; 788 | } 789 | size : 1; 790 | } 791 | 792 | table noequ0_processEntry16andWriteToPacket { 793 | actions { 794 | noequ0_processentry16andWriteToPacket; 795 | } 796 | size : 1; 797 | } 798 | 799 | table cleanEntry16 { 800 | actions { 801 | do_cleanEntry16; 802 | } 803 | default_action : do_cleanEntry16(); 804 | size : 1; 805 | } 806 | 807 | table processEntry17 { 808 | reads { 809 | mdata.bitmap : ternary; 810 | } 811 | actions { 812 | processentry17; 813 | noequ0_processentry17; 814 | } 815 | // default_action : noequ0_processentry17; 816 | size : 2; 817 | } 818 | 819 | table noequ0_processEntry17 { 820 | actions { 821 | noequ0_processentry17; 822 | } 823 | default_action : noequ0_processentry17(); 824 | size : 1; 825 | } 826 | 827 | table Entry17WriteToPacket { 828 | actions { 829 | entry17WriteToPacket; 830 | } 831 | default_action : entry17WriteToPacket(); 832 | size : 1; 833 | } 834 | 835 | table processEntry17andWriteToPacket { 836 | actions { 837 | processentry17andWriteToPacket; 838 | } 839 | size : 1; 840 | } 841 | 842 | table noequ0_processEntry17andWriteToPacket { 843 | actions { 844 | noequ0_processentry17andWriteToPacket; 845 | } 846 | size : 1; 847 | } 848 | 849 | table cleanEntry17 { 850 | actions { 851 | do_cleanEntry17; 852 | } 853 | default_action : do_cleanEntry17(); 854 | size : 1; 855 | } 856 | 857 | table processEntry18 { 858 | reads { 859 | mdata.bitmap : ternary; 860 | } 861 | actions { 862 | processentry18; 863 | noequ0_processentry18; 864 | } 865 | // default_action : noequ0_processentry18; 866 | size : 2; 867 | } 868 | 869 | table noequ0_processEntry18 { 870 | actions { 871 | noequ0_processentry18; 872 | } 873 | default_action : noequ0_processentry18(); 874 | size : 1; 875 | } 876 | 877 | table Entry18WriteToPacket { 878 | actions { 879 | entry18WriteToPacket; 880 | } 881 | default_action : entry18WriteToPacket(); 882 | size : 1; 883 | } 884 | 885 | table processEntry18andWriteToPacket { 886 | actions { 887 | processentry18andWriteToPacket; 888 | } 889 | size : 1; 890 | } 891 | 892 | table noequ0_processEntry18andWriteToPacket { 893 | actions { 894 | noequ0_processentry18andWriteToPacket; 895 | } 896 | size : 1; 897 | } 898 | 899 | table cleanEntry18 { 900 | actions { 901 | do_cleanEntry18; 902 | } 903 | default_action : do_cleanEntry18(); 904 | size : 1; 905 | } 906 | 907 | table processEntry19 { 908 | reads { 909 | mdata.bitmap : ternary; 910 | } 911 | actions { 912 | processentry19; 913 | noequ0_processentry19; 914 | } 915 | // default_action : noequ0_processentry19; 916 | size : 2; 917 | } 918 | 919 | table noequ0_processEntry19 { 920 | actions { 921 | noequ0_processentry19; 922 | } 923 | default_action : noequ0_processentry19(); 924 | size : 1; 925 | } 926 | 927 | table Entry19WriteToPacket { 928 | actions { 929 | entry19WriteToPacket; 930 | } 931 | default_action : entry19WriteToPacket(); 932 | size : 1; 933 | } 934 | 935 | table processEntry19andWriteToPacket { 936 | actions { 937 | processentry19andWriteToPacket; 938 | } 939 | size : 1; 940 | } 941 | 942 | table noequ0_processEntry19andWriteToPacket { 943 | actions { 944 | noequ0_processentry19andWriteToPacket; 945 | } 946 | size : 1; 947 | } 948 | 949 | table cleanEntry19 { 950 | actions { 951 | do_cleanEntry19; 952 | } 953 | default_action : do_cleanEntry19(); 954 | size : 1; 955 | } 956 | 957 | table processEntry20 { 958 | reads { 959 | mdata.bitmap : ternary; 960 | } 961 | actions { 962 | processentry20; 963 | noequ0_processentry20; 964 | } 965 | // default_action : noequ0_processentry20; 966 | size : 2; 967 | } 968 | 969 | table noequ0_processEntry20 { 970 | actions { 971 | noequ0_processentry20; 972 | } 973 | default_action : noequ0_processentry20(); 974 | size : 1; 975 | } 976 | 977 | table Entry20WriteToPacket { 978 | actions { 979 | entry20WriteToPacket; 980 | } 981 | default_action : entry20WriteToPacket(); 982 | size : 1; 983 | } 984 | 985 | table processEntry20andWriteToPacket { 986 | actions { 987 | processentry20andWriteToPacket; 988 | } 989 | size : 1; 990 | } 991 | 992 | table noequ0_processEntry20andWriteToPacket { 993 | actions { 994 | noequ0_processentry20andWriteToPacket; 995 | } 996 | size : 1; 997 | } 998 | 999 | table cleanEntry20 { 1000 | actions { 1001 | do_cleanEntry20; 1002 | } 1003 | default_action : do_cleanEntry20(); 1004 | size : 1; 1005 | } 1006 | 1007 | table processEntry21 { 1008 | reads { 1009 | mdata.bitmap : ternary; 1010 | } 1011 | actions { 1012 | processentry21; 1013 | noequ0_processentry21; 1014 | } 1015 | // default_action : noequ0_processentry21; 1016 | size : 2; 1017 | } 1018 | 1019 | table noequ0_processEntry21 { 1020 | actions { 1021 | noequ0_processentry21; 1022 | } 1023 | default_action : noequ0_processentry21(); 1024 | size : 1; 1025 | } 1026 | 1027 | table Entry21WriteToPacket { 1028 | actions { 1029 | entry21WriteToPacket; 1030 | } 1031 | default_action : entry21WriteToPacket(); 1032 | size : 1; 1033 | } 1034 | 1035 | table processEntry21andWriteToPacket { 1036 | actions { 1037 | processentry21andWriteToPacket; 1038 | } 1039 | size : 1; 1040 | } 1041 | 1042 | table noequ0_processEntry21andWriteToPacket { 1043 | actions { 1044 | noequ0_processentry21andWriteToPacket; 1045 | } 1046 | size : 1; 1047 | } 1048 | 1049 | table cleanEntry21 { 1050 | actions { 1051 | do_cleanEntry21; 1052 | } 1053 | default_action : do_cleanEntry21(); 1054 | size : 1; 1055 | } 1056 | 1057 | table processEntry22 { 1058 | reads { 1059 | mdata.bitmap : ternary; 1060 | } 1061 | actions { 1062 | processentry22; 1063 | noequ0_processentry22; 1064 | } 1065 | // default_action : noequ0_processentry22; 1066 | size : 2; 1067 | } 1068 | 1069 | table noequ0_processEntry22 { 1070 | actions { 1071 | noequ0_processentry22; 1072 | } 1073 | default_action : noequ0_processentry22(); 1074 | size : 1; 1075 | } 1076 | 1077 | table Entry22WriteToPacket { 1078 | actions { 1079 | entry22WriteToPacket; 1080 | } 1081 | default_action : entry22WriteToPacket(); 1082 | size : 1; 1083 | } 1084 | 1085 | table processEntry22andWriteToPacket { 1086 | actions { 1087 | processentry22andWriteToPacket; 1088 | } 1089 | size : 1; 1090 | } 1091 | 1092 | table noequ0_processEntry22andWriteToPacket { 1093 | actions { 1094 | noequ0_processentry22andWriteToPacket; 1095 | } 1096 | size : 1; 1097 | } 1098 | 1099 | table cleanEntry22 { 1100 | actions { 1101 | do_cleanEntry22; 1102 | } 1103 | default_action : do_cleanEntry22(); 1104 | size : 1; 1105 | } 1106 | 1107 | table processEntry23 { 1108 | reads { 1109 | mdata.bitmap : ternary; 1110 | } 1111 | actions { 1112 | processentry23; 1113 | noequ0_processentry23; 1114 | } 1115 | // default_action : noequ0_processentry23; 1116 | size : 2; 1117 | } 1118 | 1119 | table noequ0_processEntry23 { 1120 | actions { 1121 | noequ0_processentry23; 1122 | } 1123 | default_action : noequ0_processentry23(); 1124 | size : 1; 1125 | } 1126 | 1127 | table Entry23WriteToPacket { 1128 | actions { 1129 | entry23WriteToPacket; 1130 | } 1131 | default_action : entry23WriteToPacket(); 1132 | size : 1; 1133 | } 1134 | 1135 | table processEntry23andWriteToPacket { 1136 | actions { 1137 | processentry23andWriteToPacket; 1138 | } 1139 | size : 1; 1140 | } 1141 | 1142 | table noequ0_processEntry23andWriteToPacket { 1143 | actions { 1144 | noequ0_processentry23andWriteToPacket; 1145 | } 1146 | size : 1; 1147 | } 1148 | 1149 | table cleanEntry23 { 1150 | actions { 1151 | do_cleanEntry23; 1152 | } 1153 | default_action : do_cleanEntry23(); 1154 | size : 1; 1155 | } 1156 | 1157 | table processEntry24 { 1158 | reads { 1159 | mdata.bitmap : ternary; 1160 | } 1161 | actions { 1162 | processentry24; 1163 | noequ0_processentry24; 1164 | } 1165 | // default_action : noequ0_processentry24; 1166 | size : 2; 1167 | } 1168 | 1169 | table noequ0_processEntry24 { 1170 | actions { 1171 | noequ0_processentry24; 1172 | } 1173 | default_action : noequ0_processentry24(); 1174 | size : 1; 1175 | } 1176 | 1177 | table Entry24WriteToPacket { 1178 | actions { 1179 | entry24WriteToPacket; 1180 | } 1181 | default_action : entry24WriteToPacket(); 1182 | size : 1; 1183 | } 1184 | 1185 | table processEntry24andWriteToPacket { 1186 | actions { 1187 | processentry24andWriteToPacket; 1188 | } 1189 | size : 1; 1190 | } 1191 | 1192 | table noequ0_processEntry24andWriteToPacket { 1193 | actions { 1194 | noequ0_processentry24andWriteToPacket; 1195 | } 1196 | size : 1; 1197 | } 1198 | 1199 | table cleanEntry24 { 1200 | actions { 1201 | do_cleanEntry24; 1202 | } 1203 | default_action : do_cleanEntry24(); 1204 | size : 1; 1205 | } 1206 | 1207 | table processEntry25 { 1208 | reads { 1209 | mdata.bitmap : ternary; 1210 | } 1211 | actions { 1212 | processentry25; 1213 | noequ0_processentry25; 1214 | } 1215 | // default_action : noequ0_processentry25; 1216 | size : 2; 1217 | } 1218 | 1219 | table noequ0_processEntry25 { 1220 | actions { 1221 | noequ0_processentry25; 1222 | } 1223 | default_action : noequ0_processentry25(); 1224 | size : 1; 1225 | } 1226 | 1227 | table Entry25WriteToPacket { 1228 | actions { 1229 | entry25WriteToPacket; 1230 | } 1231 | default_action : entry25WriteToPacket(); 1232 | size : 1; 1233 | } 1234 | 1235 | table processEntry25andWriteToPacket { 1236 | actions { 1237 | processentry25andWriteToPacket; 1238 | } 1239 | size : 1; 1240 | } 1241 | 1242 | table noequ0_processEntry25andWriteToPacket { 1243 | actions { 1244 | noequ0_processentry25andWriteToPacket; 1245 | } 1246 | size : 1; 1247 | } 1248 | 1249 | table cleanEntry25 { 1250 | actions { 1251 | do_cleanEntry25; 1252 | } 1253 | default_action : do_cleanEntry25(); 1254 | size : 1; 1255 | } 1256 | 1257 | table processEntry26 { 1258 | reads { 1259 | mdata.bitmap : ternary; 1260 | } 1261 | actions { 1262 | processentry26; 1263 | noequ0_processentry26; 1264 | } 1265 | // default_action : noequ0_processentry26; 1266 | size : 2; 1267 | } 1268 | 1269 | table noequ0_processEntry26 { 1270 | actions { 1271 | noequ0_processentry26; 1272 | } 1273 | default_action : noequ0_processentry26(); 1274 | size : 1; 1275 | } 1276 | 1277 | table Entry26WriteToPacket { 1278 | actions { 1279 | entry26WriteToPacket; 1280 | } 1281 | default_action : entry26WriteToPacket(); 1282 | size : 1; 1283 | } 1284 | 1285 | table processEntry26andWriteToPacket { 1286 | actions { 1287 | processentry26andWriteToPacket; 1288 | } 1289 | size : 1; 1290 | } 1291 | 1292 | table noequ0_processEntry26andWriteToPacket { 1293 | actions { 1294 | noequ0_processentry26andWriteToPacket; 1295 | } 1296 | size : 1; 1297 | } 1298 | 1299 | table cleanEntry26 { 1300 | actions { 1301 | do_cleanEntry26; 1302 | } 1303 | default_action : do_cleanEntry26(); 1304 | size : 1; 1305 | } 1306 | 1307 | table processEntry27 { 1308 | reads { 1309 | mdata.bitmap : ternary; 1310 | } 1311 | actions { 1312 | processentry27; 1313 | noequ0_processentry27; 1314 | } 1315 | // default_action : noequ0_processentry27; 1316 | size : 2; 1317 | } 1318 | 1319 | table noequ0_processEntry27 { 1320 | actions { 1321 | noequ0_processentry27; 1322 | } 1323 | default_action : noequ0_processentry27(); 1324 | size : 1; 1325 | } 1326 | 1327 | table Entry27WriteToPacket { 1328 | actions { 1329 | entry27WriteToPacket; 1330 | } 1331 | default_action : entry27WriteToPacket(); 1332 | size : 1; 1333 | } 1334 | 1335 | table processEntry27andWriteToPacket { 1336 | actions { 1337 | processentry27andWriteToPacket; 1338 | } 1339 | size : 1; 1340 | } 1341 | 1342 | table noequ0_processEntry27andWriteToPacket { 1343 | actions { 1344 | noequ0_processentry27andWriteToPacket; 1345 | } 1346 | size : 1; 1347 | } 1348 | 1349 | table cleanEntry27 { 1350 | actions { 1351 | do_cleanEntry27; 1352 | } 1353 | default_action : do_cleanEntry27(); 1354 | size : 1; 1355 | } 1356 | 1357 | table processEntry28 { 1358 | reads { 1359 | mdata.bitmap : ternary; 1360 | } 1361 | actions { 1362 | processentry28; 1363 | noequ0_processentry28; 1364 | } 1365 | // default_action : noequ0_processentry28; 1366 | size : 2; 1367 | } 1368 | 1369 | table noequ0_processEntry28 { 1370 | actions { 1371 | noequ0_processentry28; 1372 | } 1373 | default_action : noequ0_processentry28(); 1374 | size : 1; 1375 | } 1376 | 1377 | table Entry28WriteToPacket { 1378 | actions { 1379 | entry28WriteToPacket; 1380 | } 1381 | default_action : entry28WriteToPacket(); 1382 | size : 1; 1383 | } 1384 | 1385 | table processEntry28andWriteToPacket { 1386 | actions { 1387 | processentry28andWriteToPacket; 1388 | } 1389 | size : 1; 1390 | } 1391 | 1392 | table noequ0_processEntry28andWriteToPacket { 1393 | actions { 1394 | noequ0_processentry28andWriteToPacket; 1395 | } 1396 | size : 1; 1397 | } 1398 | 1399 | table cleanEntry28 { 1400 | actions { 1401 | do_cleanEntry28; 1402 | } 1403 | default_action : do_cleanEntry28(); 1404 | size : 1; 1405 | } 1406 | 1407 | table processEntry29 { 1408 | reads { 1409 | mdata.bitmap : ternary; 1410 | } 1411 | actions { 1412 | processentry29; 1413 | noequ0_processentry29; 1414 | } 1415 | // default_action : noequ0_processentry29; 1416 | size : 2; 1417 | } 1418 | 1419 | table noequ0_processEntry29 { 1420 | actions { 1421 | noequ0_processentry29; 1422 | } 1423 | default_action : noequ0_processentry29(); 1424 | size : 1; 1425 | } 1426 | 1427 | table Entry29WriteToPacket { 1428 | actions { 1429 | entry29WriteToPacket; 1430 | } 1431 | default_action : entry29WriteToPacket(); 1432 | size : 1; 1433 | } 1434 | 1435 | table processEntry29andWriteToPacket { 1436 | actions { 1437 | processentry29andWriteToPacket; 1438 | } 1439 | size : 1; 1440 | } 1441 | 1442 | table noequ0_processEntry29andWriteToPacket { 1443 | actions { 1444 | noequ0_processentry29andWriteToPacket; 1445 | } 1446 | size : 1; 1447 | } 1448 | 1449 | table cleanEntry29 { 1450 | actions { 1451 | do_cleanEntry29; 1452 | } 1453 | default_action : do_cleanEntry29(); 1454 | size : 1; 1455 | } 1456 | 1457 | table processEntry30 { 1458 | reads { 1459 | mdata.bitmap : ternary; 1460 | } 1461 | actions { 1462 | processentry30; 1463 | noequ0_processentry30; 1464 | } 1465 | // default_action : noequ0_processentry30; 1466 | size : 2; 1467 | } 1468 | 1469 | table noequ0_processEntry30 { 1470 | actions { 1471 | noequ0_processentry30; 1472 | } 1473 | default_action : noequ0_processentry30(); 1474 | size : 1; 1475 | } 1476 | 1477 | table Entry30WriteToPacket { 1478 | actions { 1479 | entry30WriteToPacket; 1480 | } 1481 | default_action : entry30WriteToPacket(); 1482 | size : 1; 1483 | } 1484 | 1485 | table processEntry30andWriteToPacket { 1486 | actions { 1487 | processentry30andWriteToPacket; 1488 | } 1489 | size : 1; 1490 | } 1491 | 1492 | table noequ0_processEntry30andWriteToPacket { 1493 | actions { 1494 | noequ0_processentry30andWriteToPacket; 1495 | } 1496 | size : 1; 1497 | } 1498 | 1499 | table cleanEntry30 { 1500 | actions { 1501 | do_cleanEntry30; 1502 | } 1503 | default_action : do_cleanEntry30(); 1504 | size : 1; 1505 | } 1506 | 1507 | table processEntry31 { 1508 | reads { 1509 | mdata.bitmap : ternary; 1510 | } 1511 | actions { 1512 | processentry31; 1513 | noequ0_processentry31; 1514 | } 1515 | // default_action : noequ0_processentry31; 1516 | size : 2; 1517 | } 1518 | 1519 | table noequ0_processEntry31 { 1520 | actions { 1521 | noequ0_processentry31; 1522 | } 1523 | default_action : noequ0_processentry31(); 1524 | size : 1; 1525 | } 1526 | 1527 | table Entry31WriteToPacket { 1528 | actions { 1529 | entry31WriteToPacket; 1530 | } 1531 | default_action : entry31WriteToPacket(); 1532 | size : 1; 1533 | } 1534 | 1535 | table processEntry31andWriteToPacket { 1536 | actions { 1537 | processentry31andWriteToPacket; 1538 | } 1539 | size : 1; 1540 | } 1541 | 1542 | table noequ0_processEntry31andWriteToPacket { 1543 | actions { 1544 | noequ0_processentry31andWriteToPacket; 1545 | } 1546 | size : 1; 1547 | } 1548 | 1549 | table cleanEntry31 { 1550 | actions { 1551 | do_cleanEntry31; 1552 | } 1553 | default_action : do_cleanEntry31(); 1554 | size : 1; 1555 | } 1556 | 1557 | //table processEntry32 { 1558 | // actions { 1559 | // processentry32; 1560 | // } 1561 | // default_action : processentry32(); 1562 | // size : 1; 1563 | // / 1564 | 1565 | //tablnoequ0_e processEntry32 { 1566 | // actions { 1567 | // noequ0_ processentry32; 1568 | // } 1569 | // default_action noequ0_: processentry32(); 1570 | // size : 1; 1571 | //} 1572 | // 1573 | //table Entry32WriteToPacket { 1574 | // actions { 1575 | // entry32WriteToPacket; 1576 | // } 1577 | // default_action : entry32WriteToPacket(); 1578 | // size : 1; 1579 | //} 1580 | // 1581 | //table processEntry32andWriteToPacket { 1582 | // default_action : processentry32andWriteToPacket(); 1583 | // size : 1; 1584 | 1585 | //tablnoequ0_e processEntry32andWriteToPacket { 1586 | // default_action noequ0_: processentry32andWriteToPacket(); 1587 | // size : 1; 1588 | 1589 | //table cleanry3Entry2 { 1590 | // // actions { 1591 | // / 1592 | // //table processEntry32andWriteToPacket { 1593 | // // default_action : processentry32andWriteToPacket(); 1594 | // // size : 1; 1595 | 1596 | // //noequ0_tablnoequ0_e processEntry32andWriteToPacket { 1597 | // // default_action noequ0_: processentry32andWriteToPacket(); 1598 | // noequ0_// size : 1; 1599 | 1600 | // //table cleanry3Entry2 { 1601 | // / 1602 | // / do_cleanEntry32; 1603 | // } 1604 | // default_action : do_cleanEntry32(); 1605 | // size : 1; 1606 | //} 1607 | -------------------------------------------------------------------------------- /p4src/p4ml.p4: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "includes/headers.p4" 5 | #include "includes/parser.p4" 6 | 7 | #include "includes/registers.p4" 8 | #include "includes/tables.p4" 9 | #include "includes/actions.p4" 10 | #include "includes/common.p4" 11 | 12 | field_list p4ml_resubmit_list{ 13 | mdata.agtr_time; 14 | } 15 | 16 | action do_resubmit(){ 17 | resubmit(p4ml_resubmit_list); 18 | } 19 | 20 | table p4ml_resubmit{ 21 | actions{ 22 | do_resubmit; 23 | } 24 | default_action: do_resubmit(); 25 | size: 1; 26 | 27 | } 28 | control ingress 29 | { 30 | 31 | if (valid(p4ml_entries)) { 32 | 33 | if (ipv4.ecn == 3 or p4ml.ECN == 1) { 34 | apply(setup_ecn_table); 35 | } 36 | // ack packet 37 | if (p4ml.isACK == 1) { 38 | 39 | if (p4ml.overflow == 1 and p4ml.isResend == 0) { 40 | 41 | } else { 42 | apply(clean_appID_and_seq_table); 43 | 44 | if (mdata.isMyAppIDandMyCurrentSeq != 0) { 45 | /* Clean */ 46 | apply(clean_bitmap_table); 47 | apply(clean_ecn_table); 48 | apply(clean_agtr_time_table); 49 | // apply(cleanEntry1); 50 | } 51 | } 52 | 53 | /* Multicast Back */ 54 | if(ig_intr_md.resubmit_flag == 1) { 55 | apply(multicast_table); 56 | } else { 57 | apply(p4ml_resubmit); 58 | } 59 | 60 | } else { 61 | 62 | if (p4ml.overflow == 1) { 63 | apply(outPort_table); 64 | } else { 65 | if (p4ml.isResend == 1) { 66 | apply(appID_and_seq_resend_table); 67 | } else { 68 | apply(appID_and_seq_table); 69 | } 70 | // Correct ID and Seq 71 | if (mdata.isMyAppIDandMyCurrentSeq != 0) { 72 | 73 | if (p4ml.isResend == 1) { 74 | // Clean the bitmap also 75 | apply(bitmap_resend_table); 76 | } else { 77 | apply(bitmap_table); 78 | } 79 | 80 | apply(ecn_register_table); 81 | 82 | apply(bitmap_aggregate_table); 83 | 84 | if (p4ml.isResend == 1) { 85 | // Force forward and clean 86 | apply(agtr_time_resend_table); 87 | } else { 88 | apply(agtr_time_table); 89 | } 90 | 91 | // bitmap correct 92 | if (mdata.isAggregate != 0) { 93 | if (mdata.current_agtr_time == p4ml.agtr_time) { 94 | apply(noequ0_processEntry1andWriteToPacket); 95 | apply(noequ0_processEntry2andWriteToPacket); 96 | apply(noequ0_processEntry3andWriteToPacket); 97 | apply(noequ0_processEntry4andWriteToPacket); 98 | apply(noequ0_processEntry5andWriteToPacket); 99 | apply(noequ0_processEntry6andWriteToPacket); 100 | apply(noequ0_processEntry7andWriteToPacket); 101 | apply(noequ0_processEntry8andWriteToPacket); 102 | apply(noequ0_processEntry9andWriteToPacket); 103 | apply(noequ0_processEntry10andWriteToPacket); 104 | apply(noequ0_processEntry11andWriteToPacket); 105 | apply(noequ0_processEntry12andWriteToPacket); 106 | apply(noequ0_processEntry13andWriteToPacket); 107 | apply(noequ0_processEntry14andWriteToPacket); 108 | apply(noequ0_processEntry15andWriteToPacket); 109 | apply(noequ0_processEntry16andWriteToPacket); 110 | apply(noequ0_processEntry17andWriteToPacket); 111 | apply(noequ0_processEntry18andWriteToPacket); 112 | apply(noequ0_processEntry19andWriteToPacket); 113 | apply(noequ0_processEntry20andWriteToPacket); 114 | apply(noequ0_processEntry21andWriteToPacket); 115 | apply(noequ0_processEntry22andWriteToPacket); 116 | apply(noequ0_processEntry23andWriteToPacket); 117 | apply(noequ0_processEntry24andWriteToPacket); 118 | apply(noequ0_processEntry25andWriteToPacket); 119 | apply(noequ0_processEntry26andWriteToPacket); 120 | apply(noequ0_processEntry27andWriteToPacket); 121 | apply(noequ0_processEntry28andWriteToPacket); 122 | apply(noequ0_processEntry29andWriteToPacket); 123 | apply(noequ0_processEntry30andWriteToPacket); 124 | apply(noequ0_processEntry31andWriteToPacket); 125 | //apply(noequ0_processEntry32andWriteToPacket); 126 | // set output port 127 | // if(ig_intr_md.resubmit_flag == 1) { 128 | apply(modify_packet_bitmap_table); 129 | apply(outPort_table); 130 | // } else { 131 | // apply(p4ml_resubmit); 132 | // } 133 | } else { 134 | apply(processEntry1); 135 | apply(processEntry2); 136 | apply(processEntry3); 137 | apply(processEntry4); 138 | apply(processEntry5); 139 | apply(processEntry6); 140 | apply(processEntry7); 141 | apply(processEntry8); 142 | apply(processEntry9); 143 | apply(processEntry10); 144 | apply(processEntry11); 145 | apply(processEntry12); 146 | apply(processEntry13); 147 | apply(processEntry14); 148 | apply(processEntry15); 149 | apply(processEntry16); 150 | apply(processEntry17); 151 | apply(processEntry18); 152 | apply(processEntry19); 153 | apply(processEntry20); 154 | apply(processEntry21); 155 | apply(processEntry22); 156 | apply(processEntry23); 157 | apply(processEntry24); 158 | apply(processEntry25); 159 | apply(processEntry26); 160 | apply(processEntry27); 161 | apply(processEntry28); 162 | apply(processEntry29); 163 | apply(processEntry30); 164 | apply(processEntry31); 165 | //apply(processEntry32); 166 | 167 | if (ig_intr_md.resubmit_flag == 1) { 168 | apply(drop_table); 169 | } else { 170 | apply(p4ml_resubmit); 171 | } 172 | 173 | } 174 | } else { 175 | if (mdata.current_agtr_time == p4ml.agtr_time) { 176 | apply(Entry1WriteToPacket); 177 | apply(Entry2WriteToPacket); 178 | apply(Entry3WriteToPacket); 179 | apply(Entry4WriteToPacket); 180 | apply(Entry5WriteToPacket); 181 | apply(Entry6WriteToPacket); 182 | apply(Entry7WriteToPacket); 183 | apply(Entry8WriteToPacket); 184 | apply(Entry9WriteToPacket); 185 | apply(Entry10WriteToPacket); 186 | apply(Entry11WriteToPacket); 187 | apply(Entry12WriteToPacket); 188 | apply(Entry13WriteToPacket); 189 | apply(Entry14WriteToPacket); 190 | apply(Entry15WriteToPacket); 191 | apply(Entry16WriteToPacket); 192 | apply(Entry17WriteToPacket); 193 | apply(Entry18WriteToPacket); 194 | apply(Entry19WriteToPacket); 195 | apply(Entry20WriteToPacket); 196 | apply(Entry21WriteToPacket); 197 | apply(Entry22WriteToPacket); 198 | apply(Entry23WriteToPacket); 199 | apply(Entry24WriteToPacket); 200 | apply(Entry25WriteToPacket); 201 | apply(Entry26WriteToPacket); 202 | apply(Entry27WriteToPacket); 203 | apply(Entry28WriteToPacket); 204 | apply(Entry29WriteToPacket); 205 | apply(Entry30WriteToPacket); 206 | apply(Entry31WriteToPacket); 207 | //apply(Entry32WriteToPacket); 208 | // set output port 209 | // if(ig_intr_md.resubmit_flag == 1) { 210 | apply(modify_packet_bitmap_table); 211 | apply(outPort_table); 212 | // } else { 213 | // apply(p4ml_resubmit); 214 | // } 215 | } 216 | } 217 | } else { 218 | /* tag collision bit in incoming one */ 219 | // if not empty 220 | if (p4ml.isResend == 0) { 221 | apply(tag_collision_incoming_table); 222 | } 223 | apply(outPort_table); 224 | } 225 | } 226 | } 227 | } else { 228 | // // BG traffic doesn't have data layer 229 | // if (valid(p4ml_bg)){ 230 | // apply(bg_outPort_table); 231 | // } else { 232 | apply(forward); 233 | // } 234 | } 235 | } 236 | 237 | control egress 238 | { 239 | apply(qdepth_table); 240 | if (valid(ipv4)) { 241 | if (mdata.qdepth != 0) { 242 | apply(mark_ecn_ipv4_table); 243 | } 244 | } 245 | if (valid(p4ml_entries)) { 246 | if (mdata.qdepth != 0) { 247 | apply(modify_ecn_table); 248 | } 249 | } 250 | } 251 | 252 | -------------------------------------------------------------------------------- /ptf/ptfTest.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import pd_base_tests 4 | import pltfm_pm_rpc 5 | import pal_rpc 6 | import random 7 | import sys 8 | import time 9 | import unittest 10 | 11 | from pltfm_pm_rpc.ttypes import * 12 | from pal_rpc.ttypes import * 13 | from ptf import config 14 | from ptf.testutils import * 15 | from ptf.thriftutils import * 16 | from res_pd_rpc.ttypes import * 17 | from ptf import config 18 | from ptf.thriftutils import * 19 | 20 | from res_pd_rpc.ttypes import * 21 | from port_mapping import * 22 | 23 | from tm_api_rpc.ttypes import * 24 | 25 | this_dir = os.path.dirname(os.path.abspath(__file__)) 26 | 27 | fp_ports = ["9/0","10/0","11/0","12/0","13/0","14/0","15/0","16/0","17/0","18/0","19/0"] 28 | # fp_ports = ["13/0","14/0", "11/0"] 29 | loopback_ports = ["20/0"] 30 | # loopback_ports = ["1/0", "2/0", "3/0", "4/0", "5/0", "6/0", "7/0", "8/0", "25/0"] 31 | def toInt8(n): 32 | n = n & 0xff 33 | return (n ^ 0x80) - 0x80 34 | 35 | class L2Test(pd_base_tests.ThriftInterfaceDataPlane): 36 | def __init__(self): 37 | pd_base_tests.ThriftInterfaceDataPlane.__init__(self, 38 | ["basic_switching"]) 39 | 40 | # The setUp() method is used to prepare the test fixture. Typically 41 | # you would use it to establich connection to the Thrift server. 42 | # 43 | # You can also put the initial device configuration there. However, 44 | # if during this process an error is encountered, it will be considered 45 | # as a test error (meaning the test is incorrect), 46 | # rather than a test failure 47 | def setUp(self): 48 | # initialize the connection 49 | pd_base_tests.ThriftInterfaceDataPlane.setUp(self) 50 | self.sess_hdl = self.conn_mgr.client_init() 51 | self.dev_tgt = DevTarget_t(0, hex_to_i16(0xFFFF)) 52 | self.devPorts = [] 53 | self.LPPorts = [] 54 | self.dev = 0 55 | self.platform_type = "mavericks" 56 | board_type = self.pltfm_pm.pltfm_pm_board_type_get() 57 | if re.search("0x0234|0x1234|0x4234|0x5234", hex(board_type)): 58 | self.platform_type = "mavericks" 59 | elif re.search("0x2234|0x3234", hex(board_type)): 60 | self.platform_type = "montara" 61 | 62 | # get the device ports from front panel ports 63 | try: 64 | for fpPort in fp_ports: 65 | port, chnl = fpPort.split("/") 66 | devPort = \ 67 | self.pal.pal_port_front_panel_port_to_dev_port_get(0, 68 | int(port), 69 | int(chnl)) 70 | self.devPorts.append(devPort) 71 | 72 | if test_param_get('setup') == True or (test_param_get('setup') != True 73 | and test_param_get('cleanup') != True): 74 | 75 | # add and enable the platform ports 76 | for i in self.devPorts: 77 | self.pal.pal_port_add(0, i, 78 | pal_port_speed_t.BF_SPEED_100G, 79 | pal_fec_type_t.BF_FEC_TYP_REED_SOLOMON) 80 | self.pal.pal_port_an_set(0, i, 2); 81 | self.pal.pal_port_enable(0, i) 82 | 83 | ####################### LOOPBACK ########################### 84 | for lbPort in loopback_ports: 85 | port, chnl = lbPort.split("/") 86 | devPort = \ 87 | self.pal.pal_port_front_panel_port_to_dev_port_get(0, 88 | int(port), 89 | int(chnl)) 90 | self.LPPorts.append(devPort) 91 | 92 | # add and enable the platform ports 93 | for i in self.LPPorts: 94 | self.pal.pal_port_add(0, i, 95 | pal_port_speed_t.BF_SPEED_100G, 96 | pal_fec_type_t.BF_FEC_TYP_REED_SOLOMON) 97 | 98 | self.pal.pal_port_loopback_mode_set(0, i, 99 | pal_loopback_mod_t.BF_LPBK_MAC_NEAR) 100 | self.pal.pal_port_an_set(0, i, 2); 101 | self.pal.pal_port_enable(0, i) 102 | 103 | self.conn_mgr.complete_operations(self.sess_hdl) 104 | 105 | except Exception as e: 106 | print "Some Error in port init" 107 | 108 | # # flow control setting, follow "Barefoot Network Tofino Fixed Function API Guide" 109 | # for i in range(len(self.devPorts)): 110 | # # step 1: Map loessless traffice to a PPG handle with a buffer limit 111 | # ppg_cells = 2000 112 | # self.ppg_handler = self.tm.tm_allocate_ppg(self.dev, self.devPorts[i]) 113 | # self.tm.tm_set_ppg_guaranteed_min_limit(self.dev, self.ppg_handler, ppg_cells) 114 | 115 | # # step 2: Map traffic to an iCos 116 | # icos_bmap = toInt8(0x01) 117 | # self.tm.tm_set_ppg_icos_mapping(self.dev, self.ppg_handler, icos_bmap) 118 | 119 | # # step 3: Provision skid buffer set up pasue PFC generation 120 | # skid_cells = 4000 121 | # self.tm.tm_set_ppg_skid_limit(self.dev, self.ppg_handler, skid_cells) 122 | # self.tm.tm_enable_lossless_treatment(self.dev, self.ppg_handler) 123 | # # link-level flow control 124 | # fctype = 1 # BF_TM_PAUSE_PORT 125 | # self.tm.tm_set_port_flowcontrol_mode(self.dev, self.devPorts[i], fctype) 126 | # # iCos to Cos 127 | # icos_cos_map = tm_pfc_cos_map_t(CoS0_to_iCos=0) 128 | # self.tm.tm_set_port_pfc_cos_mapping(self.dev, self.devPorts[i], icos_cos_map) 129 | 130 | # ########################################## 131 | # for i in range(len(self.devPorts)): 132 | # #step 4: Apply buffering 133 | # queue_id = 0 134 | # queue_cells = 25000 135 | # self.tm.tm_set_q_guaranteed_min_limit(self.dev, self.devPorts[i], queue_id, queue_cells) 136 | 137 | # # step 5: Allocate queues 138 | # q_count = 8 139 | # q_map = tm_q_map_t(0,1,2,3,4,5,6,7) 140 | # self.tm.tm_set_port_q_mapping(self.dev, self.devPorts[i], q_count, q_map) 141 | # # step 6: Apply weighting if needed (skip, no use) 142 | 143 | # # step 7: Honor pause/PFC event 144 | # cos = 0 145 | # self.tm.tm_set_q_pfc_cos_mapping(self.dev, self.devPorts[i], queue_id, cos) 146 | 147 | # # Can not find below API 148 | # # self.tm.tm_set_port_flowcontrol_rx(self.dev, self.devPorts, fctype) 149 | # self.tm.tm_complete_operations(self.dev) 150 | 151 | # for i in range(len(self.devPorts)): 152 | # # For MAC 153 | # self.pal.pal_port_flow_control_pfc_set(self.dev, self.devPorts[i], 1, 1) 154 | # print("Done with PFC") 155 | 156 | return 157 | 158 | def runTest(self): 159 | print "runTest" 160 | # self.conn_mgr.complete_operations(self.sess_hdl) 161 | 162 | def tearDown(self): 163 | return 164 | # try: 165 | # print("Clearing table entries") 166 | # for table in self.entries.keys(): 167 | # delete_func = "self.client." + table + "_table_delete" 168 | # for entry in self.entries[table]: 169 | # exec delete_func + "(self.sess_hdl, self.dev, entry)" 170 | # except: 171 | # print("Error while cleaning up. ") 172 | # print("You might need to restart the driver") 173 | # finally: 174 | # self.conn_mgr.complete_operations(self.sess_hdl) 175 | # self.conn_mgr.client_cleanup(self.sess_hdl) 176 | # print("Closed Session %d" % self.sess_hdl) 177 | # self.tm.tm_free_ppg(self.dev, self.ppg_handler) 178 | # print("Free ppg handler %d" % self.ppg_handler) 179 | # pd_base_tests.ThriftInterfaceDataPlane.tearDown(self) 180 | -------------------------------------------------------------------------------- /run_pd_rpc/setup.py: -------------------------------------------------------------------------------- 1 | clear_all() 2 | 3 | p4_pd.register_reset_all_agtr_time() 4 | p4_pd.register_reset_all_appID_and_Seq() 5 | p4_pd.register_reset_all_bitmap() 6 | p4_pd.register_reset_all_register1() 7 | p4_pd.register_reset_all_register2() 8 | p4_pd.register_reset_all_register3() 9 | p4_pd.register_reset_all_register4() 10 | p4_pd.register_reset_all_register5() 11 | p4_pd.register_reset_all_register6() 12 | p4_pd.register_reset_all_register7() 13 | p4_pd.register_reset_all_register8() 14 | p4_pd.register_reset_all_register9() 15 | p4_pd.register_reset_all_register10() 16 | p4_pd.register_reset_all_register11() 17 | p4_pd.register_reset_all_register12() 18 | p4_pd.register_reset_all_register13() 19 | p4_pd.register_reset_all_register14() 20 | p4_pd.register_reset_all_register15() 21 | p4_pd.register_reset_all_register16() 22 | p4_pd.register_reset_all_register17() 23 | p4_pd.register_reset_all_register18() 24 | p4_pd.register_reset_all_register19() 25 | p4_pd.register_reset_all_register20() 26 | p4_pd.register_reset_all_register21() 27 | p4_pd.register_reset_all_register22() 28 | p4_pd.register_reset_all_register23() 29 | p4_pd.register_reset_all_register24() 30 | p4_pd.register_reset_all_register25() 31 | p4_pd.register_reset_all_register26() 32 | p4_pd.register_reset_all_register27() 33 | p4_pd.register_reset_all_register28() 34 | p4_pd.register_reset_all_register29() 35 | p4_pd.register_reset_all_register30() 36 | p4_pd.register_reset_all_register31() 37 | # p4_pd.register_reset_all_register32() 38 | 39 | 40 | # These are background traffic 41 | # p4_pd.bg_outPort_table_table_add_with_set_egr( 42 | # p4_pd.bg_outPort_table_match_spec_t(0), 43 | # p4_pd.set_egr_action_spec_t(4) 44 | # ) 45 | 46 | # p4_pd.bg_outPort_table_table_add_with_set_egr( 47 | # p4_pd.bg_outPort_table_match_spec_t(1), 48 | # p4_pd.set_egr_action_spec_t(0) 49 | # ) 50 | 51 | # first Zero for pending 52 | port_of_worker = [0, 56, 48, 40, 32, 24, 16, 8, 0, 4] 53 | single_loopback_port = 20 54 | 55 | MAC_address_of_worker = [ "0", 56 | "b8:59:9f:1d:04:f2" 57 | , "b8:59:9f:0b:30:72" 58 | , "98:03:9b:03:46:50" 59 | , "b8:59:9f:02:0d:14" 60 | , "b8:59:9f:b0:2d:50" 61 | , "b8:59:9f:b0:2b:b0" 62 | , "b8:59:9f:b0:2b:b8" 63 | , "b8:59:9f:b0:2d:18" 64 | , "b8:59:9f:b0:2d:58" ] 65 | 66 | # first Zero for pending 67 | # PSs = [0, 9, 8] 68 | PSs = [0, 9] 69 | 70 | len_workers = len(port_of_worker) 71 | len_PS = len(PSs) 72 | 73 | # Normal Switch traffic 74 | for i in range(1, len_workers): 75 | p4_pd.forward_table_add_with_set_egr( 76 | p4_pd.forward_match_spec_t(macAddr_to_string(MAC_address_of_worker[i])), 77 | p4_pd.set_egr_action_spec_t(port_of_worker[i]) 78 | ) 79 | 80 | 81 | # P4ML Traffic 82 | 83 | # No Pending packet, First time enter switch 84 | for i in range(1, len_workers - 1): 85 | for j in range(1, len_PS): 86 | p4_pd.outPort_table_table_add_with_set_egr_and_set_index( 87 | p4_pd.outPort_table_match_spec_t( 88 | 1 << 16, 89 | port_of_worker[i], 90 | 0, 91 | j-1), 92 | # app1 -> worker3 93 | p4_pd.set_egr_and_set_index_action_spec_t(single_loopback_port)) 94 | 95 | # Not Pending packet, Second time enter switch 96 | for j in range(1, len_PS): 97 | print(j, PSs[j]) 98 | p4_pd.outPort_table_table_add_with_set_egr( 99 | p4_pd.outPort_table_match_spec_t( 100 | 1 << 16, 101 | single_loopback_port, 102 | 1, 103 | j-1), 104 | # app1 -> worker3 105 | p4_pd.set_egr_action_spec_t(port_of_worker[PSs[j]])) 106 | 107 | # INGRESSPORT, Index 108 | # Worker1 to Worker8 109 | for i in range(1, len_workers - 1): 110 | p4_pd.drop_table_table_add_with_drop_pkt( 111 | p4_pd.drop_table_match_spec_t( 112 | port_of_worker[i], 113 | 1) 114 | ) 115 | 116 | ####### Server ######## 117 | for j in range(1, len_PS): 118 | p4_pd.multicast_table_table_add_with_multicast( 119 | p4_pd.multicast_table_match_spec_t( 120 | 1, 121 | 1 << 16, 122 | port_of_worker[PSs[j]], 123 | 0), 124 | # multicast app1 -> worker1, 2 125 | p4_pd.multicast_action_spec_t(999) 126 | ) 127 | 128 | 129 | p4_pd.modify_packet_bitmap_table_table_add_with_modify_packet_bitmap( 130 | p4_pd.modify_packet_bitmap_table_match_spec_t(1) 131 | ) 132 | 133 | p4_pd.modify_packet_bitmap_table_table_add_with_nop( 134 | p4_pd.modify_packet_bitmap_table_match_spec_t(0) 135 | ) 136 | 137 | p4_pd.processEntry1_table_add_with_processentry1( 138 | p4_pd.processEntry1_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 139 | ) 140 | p4_pd.processEntry1_table_add_with_noequ0_processentry1( 141 | p4_pd.processEntry1_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1, 142 | ) 143 | p4_pd.processEntry2_table_add_with_processentry2( 144 | p4_pd.processEntry2_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 145 | ) 146 | p4_pd.processEntry2_table_add_with_noequ0_processentry2( 147 | p4_pd.processEntry2_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 148 | ) 149 | p4_pd.processEntry3_table_add_with_processentry3( 150 | p4_pd.processEntry3_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 151 | ) 152 | p4_pd.processEntry3_table_add_with_noequ0_processentry3( 153 | p4_pd.processEntry3_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 154 | ) 155 | p4_pd.processEntry4_table_add_with_processentry4( 156 | p4_pd.processEntry4_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 157 | ) 158 | p4_pd.processEntry4_table_add_with_noequ0_processentry4( 159 | p4_pd.processEntry4_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 160 | ) 161 | p4_pd.processEntry5_table_add_with_processentry5( 162 | p4_pd.processEntry5_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 163 | ) 164 | p4_pd.processEntry5_table_add_with_noequ0_processentry5( 165 | p4_pd.processEntry5_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 166 | ) 167 | p4_pd.processEntry6_table_add_with_processentry6( 168 | p4_pd.processEntry6_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 169 | ) 170 | p4_pd.processEntry6_table_add_with_noequ0_processentry6( 171 | p4_pd.processEntry6_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 172 | ) 173 | p4_pd.processEntry7_table_add_with_processentry7( 174 | p4_pd.processEntry7_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 175 | ) 176 | p4_pd.processEntry7_table_add_with_noequ0_processentry7( 177 | p4_pd.processEntry7_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 178 | ) 179 | p4_pd.processEntry8_table_add_with_processentry8( 180 | p4_pd.processEntry8_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 181 | ) 182 | p4_pd.processEntry8_table_add_with_noequ0_processentry8( 183 | p4_pd.processEntry8_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 184 | ) 185 | p4_pd.processEntry9_table_add_with_processentry9( 186 | p4_pd.processEntry9_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 187 | ) 188 | p4_pd.processEntry9_table_add_with_noequ0_processentry9( 189 | p4_pd.processEntry9_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 190 | ) 191 | p4_pd.processEntry10_table_add_with_processentry10( 192 | p4_pd.processEntry10_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 193 | ) 194 | p4_pd.processEntry10_table_add_with_noequ0_processentry10( 195 | p4_pd.processEntry10_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 196 | ) 197 | p4_pd.processEntry11_table_add_with_processentry11( 198 | p4_pd.processEntry11_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 199 | ) 200 | p4_pd.processEntry11_table_add_with_noequ0_processentry11( 201 | p4_pd.processEntry11_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 202 | ) 203 | p4_pd.processEntry12_table_add_with_processentry12( 204 | p4_pd.processEntry12_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 205 | ) 206 | p4_pd.processEntry12_table_add_with_noequ0_processentry12( 207 | p4_pd.processEntry12_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 208 | ) 209 | p4_pd.processEntry13_table_add_with_processentry13( 210 | p4_pd.processEntry13_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 211 | ) 212 | p4_pd.processEntry13_table_add_with_noequ0_processentry13( 213 | p4_pd.processEntry13_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 214 | ) 215 | p4_pd.processEntry14_table_add_with_processentry14( 216 | p4_pd.processEntry14_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 217 | ) 218 | p4_pd.processEntry14_table_add_with_noequ0_processentry14( 219 | p4_pd.processEntry14_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 220 | ) 221 | p4_pd.processEntry15_table_add_with_processentry15( 222 | p4_pd.processEntry15_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 223 | ) 224 | p4_pd.processEntry15_table_add_with_noequ0_processentry15( 225 | p4_pd.processEntry15_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 226 | ) 227 | p4_pd.processEntry16_table_add_with_processentry16( 228 | p4_pd.processEntry16_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 229 | ) 230 | p4_pd.processEntry16_table_add_with_noequ0_processentry16( 231 | p4_pd.processEntry16_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 232 | ) 233 | p4_pd.processEntry17_table_add_with_processentry17( 234 | p4_pd.processEntry17_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 235 | ) 236 | p4_pd.processEntry17_table_add_with_noequ0_processentry17( 237 | p4_pd.processEntry17_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 238 | ) 239 | p4_pd.processEntry18_table_add_with_processentry18( 240 | p4_pd.processEntry18_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 241 | ) 242 | p4_pd.processEntry18_table_add_with_noequ0_processentry18( 243 | p4_pd.processEntry18_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 244 | ) 245 | p4_pd.processEntry19_table_add_with_processentry19( 246 | p4_pd.processEntry19_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 247 | ) 248 | p4_pd.processEntry19_table_add_with_noequ0_processentry19( 249 | p4_pd.processEntry19_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 250 | ) 251 | p4_pd.processEntry20_table_add_with_processentry20( 252 | p4_pd.processEntry20_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 253 | ) 254 | p4_pd.processEntry20_table_add_with_noequ0_processentry20( 255 | p4_pd.processEntry20_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 256 | ) 257 | p4_pd.processEntry21_table_add_with_processentry21( 258 | p4_pd.processEntry21_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 259 | ) 260 | p4_pd.processEntry21_table_add_with_noequ0_processentry21( 261 | p4_pd.processEntry21_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 262 | ) 263 | p4_pd.processEntry22_table_add_with_processentry22( 264 | p4_pd.processEntry22_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 265 | ) 266 | p4_pd.processEntry22_table_add_with_noequ0_processentry22( 267 | p4_pd.processEntry22_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 268 | ) 269 | p4_pd.processEntry23_table_add_with_processentry23( 270 | p4_pd.processEntry23_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 271 | ) 272 | p4_pd.processEntry23_table_add_with_noequ0_processentry23( 273 | p4_pd.processEntry23_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 274 | ) 275 | p4_pd.processEntry24_table_add_with_processentry24( 276 | p4_pd.processEntry24_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 277 | ) 278 | p4_pd.processEntry24_table_add_with_noequ0_processentry24( 279 | p4_pd.processEntry24_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 280 | ) 281 | p4_pd.processEntry25_table_add_with_processentry25( 282 | p4_pd.processEntry25_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 283 | ) 284 | p4_pd.processEntry25_table_add_with_noequ0_processentry25( 285 | p4_pd.processEntry25_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 286 | ) 287 | p4_pd.processEntry26_table_add_with_processentry26( 288 | p4_pd.processEntry26_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 289 | ) 290 | p4_pd.processEntry26_table_add_with_noequ0_processentry26( 291 | p4_pd.processEntry26_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 292 | ) 293 | p4_pd.processEntry27_table_add_with_processentry27( 294 | p4_pd.processEntry27_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 295 | ) 296 | p4_pd.processEntry27_table_add_with_noequ0_processentry27( 297 | p4_pd.processEntry27_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 298 | ) 299 | p4_pd.processEntry28_table_add_with_processentry28( 300 | p4_pd.processEntry28_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 301 | ) 302 | p4_pd.processEntry28_table_add_with_noequ0_processentry28( 303 | p4_pd.processEntry28_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 304 | ) 305 | p4_pd.processEntry29_table_add_with_processentry29( 306 | p4_pd.processEntry29_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 307 | ) 308 | p4_pd.processEntry29_table_add_with_noequ0_processentry29( 309 | p4_pd.processEntry29_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 310 | ) 311 | p4_pd.processEntry30_table_add_with_processentry30( 312 | p4_pd.processEntry30_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 313 | ) 314 | p4_pd.processEntry30_table_add_with_noequ0_processentry30( 315 | p4_pd.processEntry30_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 316 | ) 317 | p4_pd.processEntry31_table_add_with_processentry31( 318 | p4_pd.processEntry31_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1, 319 | ) 320 | p4_pd.processEntry31_table_add_with_noequ0_processentry31( 321 | p4_pd.processEntry31_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1 322 | ) 323 | try: 324 | # TODO: understand it 325 | # dont know why, but if group = input port, 326 | # then the packet followed by that packet will execute multicast 327 | # therefore make it 20, no 20th port is used. 328 | mcg_all = mc.mgrp_create(999) 329 | mcg1 = mc.mgrp_create(998) 330 | mcg2 = mc.mgrp_create(997) 331 | # mcg3 = mc.mgrp_create(996) 332 | except: 333 | print """ 334 | clean_all() does not yet support cleaning the PRE programming. 335 | You need to restart the driver before running this script for the second time 336 | """ 337 | quit() 338 | 339 | node_all = mc.node_create( 340 | rid=999, 341 | port_map=devports_to_mcbitmap([56,48,40,32,24,16,8,0]), 342 | # port_map=devports_to_mcbitmap([port_of_worker[2], port_of_worker[3], port_of_worker[4],]), 343 | lag_map=lags_to_mcbitmap(([])) 344 | ) 345 | mc.associate_node(mcg_all, node_all, xid=0, xid_valid=False) 346 | 347 | node1 = mc.node_create( 348 | rid=998, 349 | # Not multicast to "0" ( 0 as bg traffic ) 350 | port_map=devports_to_mcbitmap([56,48,40,32,24,16,8]), 351 | # port_map=devports_to_mcbitmap([56,48,40]), 352 | lag_map=lags_to_mcbitmap(([])) 353 | ) 354 | mc.associate_node(mcg1, node1, xid=0, xid_valid=False) 355 | 356 | node2 = mc.node_create( 357 | rid=997, 358 | # Not multicast to "0" ( 0 as bg traffic ) 359 | # port_map=devports_to_mcbitmap([56,48,40,32,24,16,8]), 360 | port_map=devports_to_mcbitmap([24,16,8]), 361 | lag_map=lags_to_mcbitmap(([])) 362 | ) 363 | mc.associate_node(mcg2, node2, xid=0, xid_valid=False) 364 | 365 | 366 | conn_mgr.complete_operations() 367 | 368 | def hex_to_i32(h): 369 | x = int(h, 0) 370 | if (x > 0xFFFFFFFF): 371 | raise UIn_Error("Integer cannot fit within 32 bits") 372 | if (x > 0x7FFFFFFF): x-= 0x100000000 373 | return x -------------------------------------------------------------------------------- /server/Makefile: -------------------------------------------------------------------------------- 1 | 2 | # All Target 3 | all: 4 | g++ -std=c++11 -O3 -g -c -o ParameterServer.o ParameterServer.cc 5 | g++ -std=c++11 -O3 -g -c -o ../common/dma_common.o ../common/dma_common.cc 6 | g++ -std=c++11 -O3 -g -c -o ../common/HashTable.o ../common/HashTable.cc 7 | g++ -std=c++11 -O3 -g -o app ParameterServer.o ../common/HashTable.o ../common/dma_common.o -lpthread -libverbs 8 | 9 | 10 | # Clean Target 11 | clean: 12 | rm *.o 13 | rm app 14 | -------------------------------------------------------------------------------- /server/ParameterServer.cc: -------------------------------------------------------------------------------- 1 | #include "ParameterServer.h" 2 | 3 | tensor_context *tensors; 4 | 5 | int max_agtr_size_per_thread; 6 | int UsedSwitchAGTRcount = MAX_AGTR_COUNT; 7 | std::mutex _dma_mutex; 8 | struct ibv_device **dev_list; 9 | struct ibv_device *ib_dev; 10 | ThreadPool* workQueue; 11 | std::mutex __print_mutex; 12 | std::mutex _init_mutex; 13 | int num_thread; 14 | int print_count = 0; 15 | int appID; 16 | 17 | long long int receive_in_sec[20] = {0}; 18 | bool receive_byte_reset_flag[20] = {0}; 19 | 20 | bool is_completed_p4ml_key[1024000] = {0}; 21 | 22 | int next_agtr[MAX_AGTR_COUNT] = {-1}; 23 | HashTable* hash_table; 24 | 25 | int packet_full_count = 0; 26 | int packet_partial_count = 0; 27 | int packet_all_forward_count = 0; 28 | int packet_partial_total_count = 0; 29 | 30 | #define MAX_MEASUREMENT_KEY 12000 31 | int full_packet_count[MAX_MEASUREMENT_KEY][16518] = { 0 }; 32 | int resend_packet_count[MAX_MEASUREMENT_KEY][16518] = { 0 }; 33 | 34 | 35 | DMAcontext** global_dma_contexts; 36 | 37 | void main_receive_packet_loop(DMAcontext* dma_context, int thread_id) { 38 | int msgs_completed = 0; 39 | int this_pos_to_send = 0; 40 | int total_last_tensor_packet = 0; 41 | int imm_pos_to_send = dma_context->my_send_queue_length / 2; 42 | bool app_init[MAX_APP_PER_THREAD] = {0}; 43 | 44 | /* Loss */ 45 | int loss = 0; 46 | 47 | int rand_index = 0; 48 | int total_loss = 0; 49 | 50 | // app start from 1 51 | int* tensors_pos_of_app = new int[MAX_APP_PER_THREAD + 1]; 52 | for (int i = 1; i <= MAX_APP_PER_THREAD; i++) { 53 | tensors_pos_of_app[i] = thread_id * MAX_STORAGE_PER_APP_PER_THREAD * MAX_APP_PER_THREAD + (i - 1) * MAX_STORAGE_PER_APP_PER_THREAD; 54 | } 55 | 56 | 57 | while (1) { 58 | 59 | cqe_snapshot_t cur_snapshot; 60 | msgs_completed = 0; 61 | 62 | std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now(); 63 | while(1) { 64 | 65 | // if (receive_byte_reset_flag[thread_id]) { 66 | // receive_in_sec[thread_id] = 0; 67 | // receive_byte_reset_flag[thread_id] = false; 68 | // } 69 | 70 | std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now(); 71 | std::chrono::duration time_span = std::chrono::duration_cast>(t2 - t1); 72 | 73 | msgs_completed = receive_packet(dma_context, &cur_snapshot); 74 | if (msgs_completed) { 75 | break; 76 | } 77 | if (time_span.count() > 20.0 && msgs_completed == 0 && dma_context->total_received > 0) { 78 | std::lock_guard lock(_dma_mutex); 79 | fprintf(stderr, "Timeout happened this thread_id=%d, total_received=%d, total_sent=%d, last_ACK=%d, total_last_tensor_packet_recv=%d\n", 80 | thread_id, global_dma_contexts[thread_id]->total_received, global_dma_contexts[thread_id]->total_sent, tensors[tensors_pos_of_app[1]].window_manager[0].last_ACK, total_last_tensor_packet); 81 | for (int i = 0; i < num_thread; i++) 82 | fprintf(stderr, "Timeout happened at thread_id=%d, total_received=%d, total_sent=%d\n", i, global_dma_contexts[i]->total_received, global_dma_contexts[i]->total_sent); 83 | 84 | for (uint64_t i = 0; i < MAX_MEASUREMENT_KEY; i++) { 85 | for (uint16_t j = 1; j <= ceil((float)MAX_TENSOR_SIZE/MAX_ENTRIES_PER_PACKET); j++) { 86 | if (full_packet_count[i][j]) { 87 | packet_full_count++; 88 | } else if (resend_packet_count[i][j]) { 89 | packet_partial_count++; 90 | packet_partial_total_count += resend_packet_count[i][j]; 91 | } else { 92 | packet_all_forward_count++; 93 | // printf("i:%d, j:%d\n", i, j); 94 | } 95 | } 96 | } 97 | printf("%d, %d, %d, %d\n", packet_full_count, packet_partial_count, packet_all_forward_count, packet_partial_total_count); 98 | 99 | int seen_agtrs = 0; 100 | for (int i = 0; i < MAX_AGTR_COUNT; i++) 101 | if (hash_table->isAlreadyDeclare[i]) 102 | seen_agtrs++; 103 | printf("Seen agtrs: %d\n", seen_agtrs); 104 | 105 | exit(-1); 106 | } 107 | } 108 | 109 | int to_be_sent = 0; 110 | if (this_pos_to_send + max_agtr_size_per_thread + max_agtr_size_per_thread > dma_context->my_send_queue_length / 2) 111 | this_pos_to_send = 0; 112 | 113 | // printf("%d packets received.\n", msgs_completed); 114 | for(int msg=0; msg < msgs_completed; msg++) { 115 | // std::chrono::high_resolution_clock::time_point packet_start = std::chrono::high_resolution_clock::now(); 116 | uint8_t* buf = &dma_context->mp_recv_ring[dma_context->ring_head * kAppRingMbufSize]; 117 | 118 | agghdr* p4ml_header = reinterpret_cast(buf + IP_ETH_UDP_HEADER_SIZE); 119 | 120 | //check ecn mark 121 | // bool is_ecn_mark_packet = p4ml_header->flag & 0x08; 122 | // if (is_ecn_mark_packet) 123 | // printf("ECN mark found.\n"); 124 | if (DEBUG_PRINT_ALL_RECEIVING_PACKET) 125 | p4ml_header_print_h(p4ml_header, "Receive"); 126 | 127 | bool isTerminated_packet = p4ml_header->flag & 0x02; 128 | bool isResend_packet = p4ml_header->flag & 0x04; 129 | bool isOverflow_packet = p4ml_header->flag & 0x80; 130 | 131 | // exit(1); 132 | p4ml_header_ntoh(p4ml_header); 133 | /* Move AppID index */ 134 | int appID = p4ml_header->appID; 135 | if (!app_init[appID]) { 136 | app_init[appID] = true; 137 | } else { 138 | if (p4ml_header->key != tensors[tensors_pos_of_app[appID]].key && tensors[tensors_pos_of_app[appID]].isCompleted) { 139 | // p4ml_header_print(p4ml_header, "ERROR PACKET"); 140 | // printf("tensors_pos_of_app[appID] from %d to %d\n", tensors_pos_of_app[appID], tensors_pos_of_app[appID]+1); 141 | tensors_pos_of_app[appID]++; 142 | if (tensors_pos_of_app[appID] == thread_id * MAX_APP_PER_THREAD * MAX_STORAGE_PER_APP_PER_THREAD + MAX_STORAGE_PER_APP_PER_THREAD * (appID)) 143 | tensors_pos_of_app[appID] = tensors_pos_of_app[appID] - MAX_STORAGE_PER_APP_PER_THREAD; 144 | } 145 | } 146 | 147 | if (!hash_table->isAlreadyDeclare[p4ml_header->agtr]) 148 | hash_table->isAlreadyDeclare[p4ml_header->agtr] = true; 149 | 150 | /* Check if Collision packet */ 151 | bool is_collision_packet = p4ml_header->flag & 0x02; 152 | 153 | if (is_collision_packet) { 154 | tensors[tensors_pos_of_app[appID]].isCollision[p4ml_header->seq_num] = true; 155 | // p4ml_header_print(p4ml_header, "COLLISION PACKET"); 156 | // exit(1); 157 | } 158 | 159 | int my_tensors_pos = tensors_pos_of_app[appID]; 160 | 161 | check_tensor_available(&tensors[my_tensors_pos], p4ml_header, thread_id); 162 | 163 | // char * eth_ip_header = (char*) dma_context->send_region + wc_recv_id * ENTRY_SIZE; 164 | // uint8_t swap[6]; 165 | // for (int i = 0; i < 6; i++) { 166 | // swap[i] = eth_ip_header[i]; 167 | // eth_ip_header[i] = eth_ip_header[i+6]; 168 | // eth_ip_header[i+6] = swap[i]; 169 | // } 170 | 171 | if (OVERFLOW_HANDLE) { 172 | // Check Switch Overflow but not Host Overflow 173 | if (!isOverflow_packet) 174 | for (int i = 0; i < MAX_ENTRIES_PER_PACKET; i++) 175 | if (p4ml_header->vector[i] == INT32_MAX || p4ml_header->vector[i] == INT32_MIN) 176 | { 177 | if (p4ml_header->vector[i] == INT32_MIN) 178 | p4ml_header_print(p4ml_header, "Switch Overflow"); 179 | isOverflow_packet = true; 180 | } 181 | 182 | // p4ml_header_print(p4ml_header, "Receive"); 183 | if (isOverflow_packet) { 184 | /* Clean Integer Data */ 185 | if (!tensors[my_tensors_pos].isFloat[p4ml_header->seq_num]) { 186 | // printf("ReadyForFloat\n"); 187 | makeTensorReadyforFloat(p4ml_header, &tensors[my_tensors_pos]); 188 | tensors[my_tensors_pos].isFloat[p4ml_header->seq_num] = true; 189 | } 190 | } 191 | 192 | /* Floating point request packet */ 193 | bool sendFloatRequest = false; 194 | if (isOverflow_packet && !isResend_packet) 195 | sendFloatRequest = true; 196 | if (!isOverflow_packet && isResend_packet && tensors[my_tensors_pos].isFloat[p4ml_header->seq_num]) 197 | sendFloatRequest = true; 198 | 199 | if (sendFloatRequest) { 200 | /* Do floating point request */ 201 | /* Send back request to everyone immediately */ 202 | p4ml_header_hton_without_data(p4ml_header); 203 | memcpy((char*) dma_context->send_region + (imm_pos_to_send * P4ML_LAYER_SIZE), (char*) buf + IP_ETH_UDP_HEADER_SIZE, P4ML_LAYER_SIZE); 204 | /* then send ACK */ 205 | p4ml_header_setACK((agghdr*)((char*)dma_context->send_region + (imm_pos_to_send * P4ML_LAYER_SIZE))); 206 | p4ml_header_setOverflowRequest((agghdr*)((char*)dma_context->send_region + (imm_pos_to_send * P4ML_LAYER_SIZE))); 207 | p4ml_header_resetIndex((agghdr*)((char*)dma_context->send_region + (imm_pos_to_send * P4ML_LAYER_SIZE))); 208 | 209 | // p4ml_header_print_h((agghdr*)((char*)dma_context->send_region + (imm_pos_to_send * P4ML_LAYER_SIZE)), "Overflow Sendback PACKET"); 210 | send_packet(dma_context, P4ML_LAYER_SIZE, imm_pos_to_send); 211 | imm_pos_to_send++; 212 | if (imm_pos_to_send == dma_context->my_send_queue_length - 1) 213 | imm_pos_to_send = dma_context->my_send_queue_length / 2 + 1; 214 | 215 | /* Push Back */ 216 | dma_postback(dma_context); 217 | continue; 218 | } 219 | } 220 | 221 | /* Check Full Packet */ 222 | bool isFullPacket = (1 << p4ml_header->num_worker) - 1 == p4ml_header->bitmap? 1:0; 223 | 224 | 225 | if (receive_byte_reset_flag[thread_id]) { 226 | receive_in_sec[thread_id] = 0; 227 | receive_byte_reset_flag[thread_id] = false; 228 | } 229 | 230 | /* if full packet, update directly. */ 231 | if (isFullPacket) { 232 | // printf("%d: full packet - seq %d update model.\n", p4ml_header->key, p4ml_header->seq_num); 233 | updateModel_force(p4ml_header, &tensors[my_tensors_pos]); 234 | for (int i = 0; i < p4ml_header->num_worker; i++) 235 | tensors[my_tensors_pos].window_manager[i].UpdateWindow(&p4ml_header->seq_num); 236 | 237 | if (p4ml_header->key < MAX_MEASUREMENT_KEY) { 238 | if (isResend_packet) { 239 | resend_packet_count[p4ml_header->key][p4ml_header->seq_num]++; 240 | } else { 241 | full_packet_count[p4ml_header->key][p4ml_header->seq_num]++; 242 | } 243 | } 244 | } else { 245 | 246 | 247 | bool type_consistent = false; 248 | if (tensors[my_tensors_pos].isFloat[p4ml_header->seq_num] && isOverflow_packet) 249 | type_consistent = true; 250 | if (!tensors[my_tensors_pos].isFloat[p4ml_header->seq_num] && !isOverflow_packet) 251 | type_consistent = true; 252 | 253 | if (type_consistent) { 254 | 255 | if (p4ml_header->key < MAX_MEASUREMENT_KEY) { 256 | if (isResend_packet) 257 | resend_packet_count[p4ml_header->key][p4ml_header->seq_num]++; 258 | } 259 | // printf("seq %d Partial packet receive.\n", p4ml_header->seq_num); 260 | // p4ml_header_print(p4ml_header, "Partial PACKET"); 261 | int valid_bit = 1; 262 | bool need_to_update = true; 263 | // check if update is needed 264 | for (int i = 0; i < p4ml_header->num_worker; i++) { 265 | if (valid_bit & p4ml_header->bitmap) { 266 | if (tensors[my_tensors_pos].window_manager[i].isACKed[p4ml_header->seq_num]) { 267 | // p4ml_header_print(p4ml_header, "ERROR PACKET"); 268 | // printf("[thread %d][worker %d]'s gredient is already integrated in PS, %d.\n", thread_id, i, p4ml_header->seq_num); 269 | need_to_update = false; 270 | break; 271 | } 272 | } 273 | valid_bit <<= 1; 274 | } 275 | 276 | if (need_to_update) { 277 | // printf("need to update\n"); 278 | int valid_bit = 1; 279 | for (int i = 0; i < p4ml_header->num_worker; i++) { 280 | if (valid_bit & p4ml_header->bitmap) { 281 | // TODO: Update Window will cause BUG, to be fix (floating point need reset ACK) 282 | tensors[my_tensors_pos].window_manager[i].UpdateWindow(&p4ml_header->seq_num); 283 | } 284 | valid_bit <<= 1; 285 | } 286 | updateModel(p4ml_header, &tensors[my_tensors_pos], isOverflow_packet); 287 | } 288 | 289 | } 290 | } 291 | // if any of the worker doesn't complete slot 292 | bool is_slot_completed = true; 293 | for (int i = 0; i < p4ml_header->num_worker; i++) 294 | if (!tensors[my_tensors_pos].window_manager[i].isACKed[p4ml_header->seq_num]) 295 | is_slot_completed = false; 296 | // printf("packet receive %d\n", p4ml_header->seq_num); 297 | if (is_slot_completed) { 298 | p4ml_header->bitmap = 1; 299 | 300 | uint16_t new_agtr; 301 | 302 | // if collsiion is happened. 303 | if (tensors[my_tensors_pos].isCollision[p4ml_header->seq_num] == true) { 304 | // Check if new agtr is already hashed 305 | if (next_agtr[p4ml_header->agtr] == -1) { 306 | int new_hash_agtr = hash_table->HashNew_predefine(); 307 | // if get any of AGTR from hash 308 | if (new_hash_agtr != -1) { 309 | new_agtr = new_hash_agtr; 310 | next_agtr[p4ml_header->agtr] = new_agtr; 311 | hash_table->hash_map[p4ml_header->agtr] = new_agtr; 312 | // printf("old: %d -> new: %d\n", p4ml_header->agtr, new_agtr); 313 | } else { 314 | // if all of the AGTR is used, full 315 | // keep original AGTR 316 | // printf("Change Agtr fail, full.\n"); 317 | new_agtr = p4ml_header->agtr; 318 | } 319 | } else { 320 | //TODO: Separate APP 321 | new_agtr = next_agtr[p4ml_header->agtr]; 322 | // printf("New hash - already: %d\n", new_agtr); 323 | // printf("[hashed] old: %d -> new: %d\n", p4ml_header->agtr, new_agtr); 324 | } 325 | 326 | p4ml_header_setLengthFieldToAgtr(p4ml_header, new_agtr); 327 | p4ml_header_setCollisionBit(p4ml_header); 328 | } else { 329 | p4ml_header_resetCollisionBit(p4ml_header); 330 | } 331 | 332 | int offset = (p4ml_header->seq_num - 1) * MAX_ENTRIES_PER_PACKET; 333 | 334 | p4ml_header_hton_without_data(p4ml_header); 335 | 336 | if (!isOverflow_packet) 337 | for (int i = 0; i < MAX_ENTRIES_PER_PACKET; i++) 338 | tensors[my_tensors_pos].data.data_int[offset + i] = htonl(tensors[my_tensors_pos].data.data_int[offset + i]); 339 | 340 | // /* Give higher priority to Resend packet */ 341 | if (isResend_packet) { 342 | // TODO: PACKET LOSS HANDLING FOR DOUBLE PACKET 343 | // printf("Immediately send back Resend packet %d\n", ntohl(p4ml_header->seq_num)); 344 | memcpy((char*) dma_context->send_region + (imm_pos_to_send * P4ML_LAYER_SIZE), (char*) buf + IP_ETH_UDP_HEADER_SIZE, P4ML_HEADER_SIZE - 12); 345 | memcpy((char*) dma_context->send_region + (imm_pos_to_send * P4ML_LAYER_SIZE) + P4ML_HEADER_SIZE - 12, tensors[my_tensors_pos].data.data_int + offset, P4ML_DATA_SIZE); 346 | memcpy((char*) dma_context->send_region + (imm_pos_to_send * P4ML_LAYER_SIZE) + 14 + P4ML_DATA_SIZE, (char*) buf + IP_ETH_UDP_HEADER_SIZE + P4ML_DATA_SIZE + 14, 12); 347 | /* then send ACK */ 348 | p4ml_header_setACK((agghdr*)((char*)dma_context->send_region + (imm_pos_to_send * P4ML_LAYER_SIZE))); 349 | p4ml_header_resetIndex((agghdr*)((char*)dma_context->send_region + (imm_pos_to_send * P4ML_LAYER_SIZE))); 350 | 351 | send_packet(dma_context, P4ML_LAYER_SIZE, imm_pos_to_send); 352 | imm_pos_to_send++; 353 | if (imm_pos_to_send == dma_context->my_send_queue_length - 1) 354 | imm_pos_to_send = dma_context->my_send_queue_length / 2 + 1; 355 | 356 | } else { 357 | memcpy((char*) dma_context->send_region + (this_pos_to_send + to_be_sent) * P4ML_LAYER_SIZE, (char*) buf + IP_ETH_UDP_HEADER_SIZE, P4ML_HEADER_SIZE - 12); 358 | memcpy((char*) dma_context->send_region + (this_pos_to_send + to_be_sent) * P4ML_LAYER_SIZE + P4ML_HEADER_SIZE - 12, tensors[my_tensors_pos].data.data_int + offset, P4ML_DATA_SIZE); 359 | memcpy((char*) dma_context->send_region + (this_pos_to_send + to_be_sent) * P4ML_LAYER_SIZE + 14 + P4ML_DATA_SIZE, (char*) buf + IP_ETH_UDP_HEADER_SIZE + P4ML_DATA_SIZE + 14, 12); 360 | /* then send ACK */ 361 | p4ml_header_setACK((agghdr*)((char*)dma_context->send_region + (this_pos_to_send + to_be_sent) * P4ML_LAYER_SIZE)); 362 | p4ml_header_resetIndex((agghdr*)((char*)dma_context->send_region + (this_pos_to_send + to_be_sent) * P4ML_LAYER_SIZE)); 363 | 364 | to_be_sent++; 365 | } 366 | // printf("to_be_sent: %d\n", to_be_sent); 367 | 368 | if (tensors[tensors_pos_of_app[appID]].num_worker > 0) { 369 | bool this_tensor_finished = true; 370 | for (int i = 0; i < tensors[tensors_pos_of_app[appID]].num_worker; i++) 371 | if (tensors[tensors_pos_of_app[appID]].window_manager[i].last_ACK < tensors[tensors_pos_of_app[appID]].window_manager[i].total_ACK) 372 | this_tensor_finished = false; 373 | 374 | if (this_tensor_finished && !tensors[tensors_pos_of_app[appID]].isCompleted) { 375 | // printf("[Thread %d] Tensor %d at %d Completed.\n", thread_id, tensors[tensors_pos_of_app[appID]].key, tensors_pos_of_app[appID]); 376 | tensors[tensors_pos_of_app[appID]].isCompleted = true; 377 | rand_index = 0; 378 | // dma_context->total_received = 0; 379 | // dma_context->total_sent = 0; 380 | } 381 | } 382 | } 383 | 384 | /* Push Back */ 385 | dma_postback(dma_context); 386 | } 387 | 388 | dma_update_snapshot(dma_context, cur_snapshot); 389 | 390 | if (msgs_completed < 0) { 391 | printf("Polling error\n"); 392 | exit(1); 393 | } 394 | 395 | if (msgs_completed > 0) { 396 | dma_context->total_received += msgs_completed; 397 | if (receive_byte_reset_flag[thread_id]) { 398 | receive_in_sec[thread_id] = msgs_completed; 399 | receive_byte_reset_flag[thread_id] = false; 400 | } 401 | else 402 | receive_in_sec[thread_id] += msgs_completed; 403 | if (to_be_sent > 0) { 404 | send_packet(dma_context, P4ML_LAYER_SIZE * to_be_sent, this_pos_to_send); 405 | } 406 | this_pos_to_send += to_be_sent; 407 | // Let assume the last packet will not loss 408 | } 409 | 410 | } 411 | } 412 | 413 | 414 | void Start(int thread_id) { 415 | bindingCPU(thread_id + 16); 416 | DMAcontext* dma_context; 417 | { 418 | std::lock_guard lock(_dma_mutex); 419 | 420 | dma_context = DMA_create(ib_dev, thread_id + ((appID - 1) * MAX_THREAD_PER_APP), true); 421 | // dma_context->isSent = new bool[MAX_TENSOR_SIZE / MAX_ENTRIES_PER_PACKET + 1]; 422 | // dma_context->send_time = new std::chrono::high_resolution_clock::time_point[MAX_TENSOR_SIZE / MAX_ENTRIES_PER_PACKET + 1]; 423 | // dma_context->receive_time = new std::chrono::high_resolution_clock::time_point[MAX_TENSOR_SIZE / MAX_ENTRIES_PER_PACKET + 1]; 424 | global_dma_contexts[thread_id] = dma_context; 425 | } 426 | 427 | main_receive_packet_loop(dma_context, thread_id); 428 | 429 | sleep(1000); 430 | } 431 | 432 | int main(int argc, char *argv[]) { 433 | bindingCPU(15); 434 | srand(time(NULL)); 435 | // num_thread = atoi(argv[1]); 436 | 437 | appID = atoi(argv[1]); 438 | // Lam: this one is for experiment, disable temporary 439 | // if (argv[1]) 440 | // UsedSwitchAGTRcount = atoi(argv[1]); 441 | // else 442 | // UsedSwitchAGTRcount = MAX_AGTR_COUNT; 443 | num_thread = 12; 444 | 445 | dev_list = ibv_get_device_list(NULL); 446 | if (!dev_list) { 447 | perror("Failed to get devices list"); 448 | exit(1); 449 | } 450 | 451 | ib_dev = dev_list[1]; 452 | if (!ib_dev) { 453 | fprintf(stderr, "IB device not found\n"); 454 | exit(1); 455 | } 456 | 457 | /* Init Thread */ 458 | workQueue = new ThreadPool(num_thread, [](){}); 459 | max_agtr_size_per_thread = 250; 460 | global_dma_contexts = new DMAcontext*[num_thread]; 461 | printf("\nUsedSwitchAGTRcount: %d\n\n", UsedSwitchAGTRcount); 462 | printf("max_agtr_size_per_thread: %d\n\n", max_agtr_size_per_thread); 463 | 464 | printf("Overflow Handled: %s\n\n", OVERFLOW_HANDLE? "TRUE":"FALSE"); 465 | /* Init tensors capacity */ 466 | tensors = new tensor_context[MAX_APP_PER_THREAD * MAX_STORAGE_PER_APP_PER_THREAD * num_thread]; 467 | printf("\nTensors memory pre-allocate...\n"); 468 | for (int i = 0; i < MAX_APP_PER_THREAD * MAX_STORAGE_PER_APP_PER_THREAD * num_thread; i++) 469 | init_tensor(&tensors[i], MAX_TENSOR_SIZE); 470 | 471 | hash_table = new HashTable(UsedSwitchAGTRcount); 472 | printf("\nHash table creating...\n\n"); 473 | memset(next_agtr, -1, sizeof(int) * MAX_AGTR_COUNT); 474 | 475 | for (int i = 0; i < num_thread; i++) 476 | workQueue->enqueue(Start, i); 477 | 478 | std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now(); 479 | std::chrono::time_point timer = std::chrono::high_resolution_clock::now(); 480 | while (1) { 481 | std::chrono::time_point current_time = std::chrono::high_resolution_clock::now(); 482 | std::chrono::duration time_span = std::chrono::duration_cast>(current_time - timer); 483 | std::chrono::duration total_time = std::chrono::duration_cast>(current_time - t1); 484 | if (time_span.count() >= 1) { 485 | // printf("############################################\n"); 486 | double total_bandwidth = 0.0; 487 | for (int i = 0; i < num_thread; i++) { 488 | // printf("[thread %d] %lf Gbps.\n", i, receive_in_sec[i] * 194.0 / 1024.0 / 1024.0 / 1024.0 * 8.0); 489 | total_bandwidth += receive_in_sec[i] * 194.0 / 1024.0 / 1024.0 / 1024.0 * 8.0; 490 | receive_byte_reset_flag[i] = true; 491 | // receive_in_sec[i] = 0; 492 | } 493 | 494 | 495 | // total_sent = 0; 496 | timer = current_time; 497 | } 498 | } 499 | 500 | sleep(10000000); 501 | 502 | } 503 | -------------------------------------------------------------------------------- /server/ParameterServer.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include "../common/packet.h" 16 | #include "../common/dma_common.h" 17 | #include "../common/ThreadPool.h" 18 | #include "../common/utils.h" 19 | #include "../common/window_manager.h" 20 | #include "../common/HashTable.h" 21 | 22 | #define MAX_TENSOR_SIZE 1024000 23 | // Lam: this one is useless since a PS can only handle 1app, to be mod. 24 | #define MAX_APP_PER_THREAD 5 25 | #define MAX_STORAGE_PER_APP_PER_THREAD 10 26 | #define MAX_WORKER 16 27 | 28 | #define MAX_THREAD_PER_APP 20 29 | 30 | #define OVERFLOW_HANDLE false 31 | 32 | 33 | union data_t { 34 | int32_t *data_int; 35 | float *data_float; 36 | }; 37 | 38 | struct tensor_context { 39 | bool* isOccupy; 40 | bool* isCollision; 41 | bool* isFloat; 42 | bool isCompleted; 43 | data_t data; 44 | uint32_t len; 45 | uint64_t key; 46 | uint8_t num_worker; 47 | WindowManager* window_manager; 48 | std::chrono::time_point start_time; 49 | }; 50 | 51 | void inline init_tensor(tensor_context* tensor, uint32_t len) { 52 | tensor->data.data_int = new int32_t[len](); 53 | tensor->isCompleted = true; 54 | tensor->isOccupy = new bool[MAX_TENSOR_SIZE / MAX_ENTRIES_PER_PACKET + 1](); 55 | tensor->isCollision = new bool[MAX_TENSOR_SIZE / MAX_ENTRIES_PER_PACKET + 1](); 56 | tensor->isFloat = new bool[MAX_TENSOR_SIZE / MAX_ENTRIES_PER_PACKET + 1](); 57 | tensor->len = 0; 58 | tensor->num_worker = 0; 59 | tensor->key = 0xffffffffffffffff; 60 | tensor->window_manager = new WindowManager[MAX_WORKER]; 61 | for (int i = 0; i < MAX_WORKER; i++) { 62 | tensor->window_manager[i].isACKed = new bool[MAX_TENSOR_SIZE / MAX_ENTRIES_PER_PACKET + 1](); 63 | tensor->window_manager[i].total_ACK = MAX_TENSOR_SIZE / MAX_ENTRIES_PER_PACKET + 1; 64 | } 65 | } 66 | 67 | int inline check_tensor_available(tensor_context* tensor, agghdr* p4ml_header, int thread_id) { 68 | // printf("*skey: %d, seq: %d\n", *skey, p4ml_header->seq_num); 69 | 70 | // Already have completed model and not retrieve 71 | if (tensor->isCompleted && p4ml_header->key != tensor->key) { 72 | int total_ACK = ceil((float)p4ml_header->len_tensor / MAX_ENTRIES_PER_PACKET); 73 | for (int i = 0; i < p4ml_header->num_worker; i++) 74 | tensor->window_manager[i].Reset(total_ACK); 75 | // if (thread_id == 0) 76 | // printf("Reset tensors[%d] LAST_ACK: %d\n", *skey, tensor->window_manager[0].last_ACK); 77 | memset(tensor->data.data_int, 0, sizeof(int32_t) * p4ml_header->len_tensor); 78 | memset(tensor->isOccupy, 0, sizeof(bool) * (total_ACK + 1)); 79 | memset(tensor->isCollision, 0, sizeof(bool) * (total_ACK + 1)); 80 | memset(tensor->isFloat, 0, sizeof(bool) * (total_ACK + 1)); 81 | tensor->num_worker = p4ml_header->num_worker; 82 | tensor->len = p4ml_header->len_tensor; 83 | tensor->isCompleted = false; 84 | tensor->key = p4ml_header->key; 85 | // printf("Place %d available, real key = %d\n", *skey, tensors[*skey].key); 86 | return 1; 87 | } 88 | return 0; 89 | } 90 | 91 | void inline makeTensorReadyforFloat(agghdr *p4ml_header, tensor_context *tensor_cnt) { 92 | int32_t* data = tensor_cnt->data.data_int; 93 | uint16_t *p_seq = &p4ml_header->seq_num; 94 | int32_t *p_model = p4ml_header->vector; 95 | uint32_t offset = (*p_seq - 1) * MAX_ENTRIES_PER_PACKET; 96 | 97 | /* Reset Data */ 98 | memset(data + offset, 0, sizeof(int32_t) * MAX_ENTRIES_PER_PACKET); 99 | tensor_cnt->isOccupy[*p_seq] = false; 100 | 101 | /* Reset Bitmap */ 102 | for (int i = 0; i < p4ml_header->num_worker; i++) { 103 | tensor_cnt->window_manager[i].isACKed[p4ml_header->seq_num] = 0; 104 | } 105 | } 106 | 107 | void inline updateModel(agghdr *p4ml_header, tensor_context *dst_place, bool isFloat) { 108 | int32_t* data = dst_place->data.data_int; 109 | uint16_t *p_seq = &p4ml_header->seq_num; 110 | uint32_t *tensor_len = &p4ml_header->len_tensor; 111 | int32_t *p_model = p4ml_header->vector; 112 | uint32_t offset = (*p_seq - 1) * MAX_ENTRIES_PER_PACKET; 113 | // printf("dst_place->isOccupy[%d]: %d\n", *p_seq - 1, dst_place->isOccupy[*p_seq - 1]); 114 | if (!dst_place->isOccupy[*p_seq]) { 115 | // printf("replace\n"); 116 | if (offset < *tensor_len) { 117 | if (offset + MAX_ENTRIES_PER_PACKET > *tensor_len) 118 | memcpy(data + offset, p_model, sizeof(int32_t) * (*tensor_len % MAX_ENTRIES_PER_PACKET)); 119 | else 120 | memcpy(data + offset, p_model, sizeof(int32_t) * MAX_ENTRIES_PER_PACKET); 121 | } else { 122 | printf("Update with offset %d > tensor length %d, something wrong.\n", offset, *tensor_len); 123 | } 124 | dst_place->isOccupy[*p_seq] = true; 125 | } else { 126 | // printf("addition\n"); 127 | if (isFloat) { 128 | float* data = dst_place->data.data_float; 129 | float* p_model = (float*) p4ml_header->vector; 130 | 131 | if (offset < *tensor_len) { 132 | for (int i = 0; i < MAX_ENTRIES_PER_PACKET; i++) 133 | data[offset + i] += p_model[i]; 134 | } else { 135 | printf("Update with offset %d > tensor length %d, something wrong.\n", offset, *tensor_len); 136 | } 137 | } else { 138 | if (offset < *tensor_len) { 139 | for (int i = 0; i < MAX_ENTRIES_PER_PACKET; i++) 140 | data[offset + i] += p_model[i]; 141 | } else { 142 | printf("Update with offset %d > tensor length %d, something wrong.\n", offset, *tensor_len); 143 | } 144 | } 145 | } 146 | } 147 | 148 | void inline updateModel_force(agghdr *p4ml_header, tensor_context *dst_place) { 149 | int32_t* data = dst_place->data.data_int; 150 | uint16_t *p_seq = &p4ml_header->seq_num; 151 | uint32_t *tensor_len = &p4ml_header->len_tensor; 152 | int32_t *p_model = p4ml_header->vector; 153 | uint32_t offset = (*p_seq - 1) * MAX_ENTRIES_PER_PACKET; 154 | 155 | if (offset < *tensor_len) { 156 | if (offset + MAX_ENTRIES_PER_PACKET > *tensor_len) 157 | memcpy(data + offset, p_model, sizeof(int32_t) * (*tensor_len % MAX_ENTRIES_PER_PACKET)); 158 | else 159 | memcpy(data + offset, p_model, sizeof(int32_t) * MAX_ENTRIES_PER_PACKET); 160 | } else { 161 | printf("Update with offset %d > tensor length %d, something wrong.\n", offset, *tensor_len); 162 | } 163 | dst_place->isOccupy[*p_seq] = true; 164 | } --------------------------------------------------------------------------------