├── .gitignore
├── LICENSE
├── README.md
├── client
    ├── Makefile
    ├── main.cc
    ├── p4ml_manager.cc
    └── p4ml_manager.h
├── common
    ├── CC_manager.h
    ├── HashTable.cc
    ├── HashTable.h
    ├── ThreadPool.h
    ├── dma_common.cc
    ├── dma_common.h
    ├── mlx5_defs.h
    ├── p4ml_struct.h
    ├── packet.h
    ├── quantize.h
    ├── utils.h
    └── window_manager.h
├── docs
    └── benchmark.md
├── p4src
    ├── includes
    │   ├── actions.p4
    │   ├── common.p4
    │   ├── headers.p4
    │   ├── parser.p4
    │   ├── registers.p4
    │   └── tables.p4
    └── p4ml.p4
├── ptf
    └── ptfTest.py
├── run_pd_rpc
    └── setup.py
└── server
    ├── Makefile
    ├── ParameterServer.cc
    └── ParameterServer.h


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | .vscode
3 | *.tar
4 | log
5 | *.o
6 | app
7 | .DS_Store


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 NetLabIIIS and WISR
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ATP
 2 | 
 3 | ATP is a service that performs multi-rack multi-tenant in-network aggregation via co-designing with programmable switch and end-host networking stack.
 4 | 
 5 | # Benchmark
 6 | To run the benchmark, please see [benchmark](docs/benchmark.md).
 7 | 
 8 | # Publications
 9 | 
10 | - [NSDI'21] "[ATP: In-network Aggregation for Multi-tenant Learning](https://www.usenix.org/conference/nsdi21/presentation/lao)". ChonLam Lao, Yanfang Le, Kshiteej Mahajan, Yixi Chen, Wenfei Wu, Aditya Akella, Michael Swift.
11 | 
12 | # Contact
13 | 
14 | Any questions? Please feel free to reach us at inatpcontact@gmail.com. You are more likely to receive a helpful response if your question is specific, self-contained and concise.
15 | 


--------------------------------------------------------------------------------
/client/Makefile:
--------------------------------------------------------------------------------
 1 | # CFLAGS  := -O3 -g
 2 | # LD      := g++
 3 | # LDFLAGS := ${LDFLAGS} -lrdmacm -libverbs -lrt -lpthread  -lm
 4 | 
 5 | # ROCE_COMMON_PATH = ../common/
 6 | # INCLUDES  = -I${ROCE_COMMON_PATH}
 7 | # CFLAGS := ${CFLAGS} ${INCLUDES}
 8 | # SOURCES := $(wildcard *.c *.h ${ROCE_COMMON_PATH}*.c ${ROCE_COMMON_PATH}*.h)
 9 | 
10 | 
11 | # all: app 
12 | # app: main.o  p4ml_manager.o ${ROCE_COMMON_PATH}packet.o ${ROCE_COMMON_PATH}dma_common.o ${ROCE_COMMON_PATH}window_manager.o
13 | # 	${LD} $(CFLAGS) -o $@ $^ ${LDFLAGS}
14 | 
15 | 
16 | # # Clean Target
17 | # clean:
18 | # 	rm *.o ../common/*.o
19 | # 	rm app
20 | 
21 | all:
22 | 	g++ -std=c++11 -g -O3 -c -o main.o main.cc
23 | 	g++ -std=c++11 -g -O3 -c -o p4ml_manager.o p4ml_manager.cc  -mavx
24 | 	g++ -std=c++11 -g -O3 -c -o ../common/HashTable.o ../common/HashTable.cc
25 | 	g++ -std=c++11 -g -O3 -c -o ../common/dma_common.o ../common/dma_common.cc
26 | 	g++ -std=c++11 -g -O3 -I../common/ -o app main.o p4ml_manager.o ../common/HashTable.o ../common/dma_common.o -lrdmacm -libverbs -lrt -lpthread  -lm 
27 | 
28 | clean:
29 | 	rm *.o
30 | 	rm app
31 | 


--------------------------------------------------------------------------------
/client/main.cc:
--------------------------------------------------------------------------------
  1 | #include "p4ml_manager.h"
  2 | 
  3 | #define ENABLE_LOG true
  4 | 
  5 | uint32_t* init_model(int size) {
  6 |     uint32_t* tmp = new uint32_t[size];
  7 |     for (int i = 0; i < size; i++)
  8 |         tmp[i] = i+1;
  9 |     return tmp;
 10 | }
 11 | 
 12 | float* init_model_float(int size) {
 13 |     float* tmp = new float[size];
 14 |     for (int i = 0; i < size; i++) {
 15 |         tmp[i] = (i+1.0) / 10000000.0;
 16 |         // tmp[i] = (i + 1.0) / 10000.0;
 17 |         // printf("%f ", tmp[i]);
 18 |     }
 19 |     // tmp[63] = 200;
 20 |     return tmp;
 21 | }
 22 | 
 23 | float* init_model_float_with_overflow(int size) {
 24 |     float* tmp = new float[size];
 25 |     for (int i = 0; i < size; i++) {
 26 |         tmp[i] = (i+1.0) / 10000000.0;
 27 |     }
 28 |     for (int i = 0; i < 100; i++) {
 29 |         int rand_num = rand() % size;
 30 |         if (rand_num > size / 2)
 31 |             tmp[rand_num] = 200;
 32 |         else 
 33 |             tmp[rand_num] = 100;
 34 |         // printf("rand!!! %d\n", rand_num);
 35 |     }
 36 |     return tmp;
 37 | }
 38 | 
 39 | 
 40 | std::shared_ptr<P4mlManager> _p4ml_manager;
 41 | 
 42 | int main(int argc, char *argv[])
 43 | {
 44 |     bindingCPU(0);
 45 | 
 46 |     if (argc < 5) {
 47 |         printf("\nUsage %s [MyID] [Num of Worker] [AppID] [Num of PS]\n\n", argv[0]);
 48 |         exit(1);
 49 |     }
 50 | 
 51 |     int host = atoi(argv[1]); 
 52 |     int num_worker = atoi(argv[2]); 
 53 |     int appID = atoi(argv[3]); 
 54 |     int num_PS = atoi(argv[4]);
 55 | 
 56 |     //int host = 0;
 57 |     // int num_worker = 2;
 58 |     // int appID = 1;
 59 | 
 60 |     _p4ml_manager = std::shared_ptr<P4mlManager>(new P4mlManager(host, num_worker, appID, num_PS));
 61 |     
 62 |     /* Here for int size to send per thread */
 63 |     /* ex. 25600 = 32*800 = 1 Round */
 64 |     int size = 1024000;
 65 |     int thread_to_use = 12;
 66 |     int loop_time = 1000;
 67 | 
 68 |     if (argc > 5) {
 69 |         std::string option = argv[5];
 70 |         if (option == "-a") {
 71 |             int num_agtr = atoi(argv[6]);
 72 |             _p4ml_manager->SetMaxAgtrSizePerThread(num_agtr);
 73 |         } 
 74 |         if (option == "-f") {
 75 |             float forward_rate = atof(argv[6]);
 76 |             _p4ml_manager->SetForceForward(forward_rate);
 77 |         }
 78 |         if (option == "-l") {
 79 |             loop_time = atof(argv[6]);
 80 |         }
 81 |         if (option == "-aa") {
 82 |             int num_used_agtr = atoi(argv[6]);
 83 |             _p4ml_manager->SetUsedSwitchAGTRcount(num_used_agtr);
 84 |         }
 85 |     }
 86 | 
 87 |     /* (40) Threads in thread pool */
 88 |     /* MAX_AGTR (32000) / 40 = 800 Agtr per thread */
 89 |     _p4ml_manager->init_threadPool(thread_to_use);
 90 | 
 91 |     // _p4ml_manager->SetForceForward(0.25);
 92 |     // _p4ml_manager->SetMaxAgtrSizePerThread(50);
 93 | 
 94 |     int finish_counter = loop_time * thread_to_use;
 95 |     uint32_t** tensor = new uint32_t*[thread_to_use * loop_time];
 96 | 
 97 |     printf("\nModel initializing...\n");
 98 |     // for (int i = 0; i < thread_to_use * loop_time; i++)
 99 |     for (int i = 0; i < 1; i++)
100 |         if (FLOATING_POINT_INPUT)
101 |             tensor[i] = (uint32_t*) init_model_float_with_overflow(size);
102 |         else 
103 |             tensor[i] = init_model(size);
104 |         
105 |     printf("\nModel initialized completed. Start sending...\n\n");
106 | 
107 |     std::chrono::time_point<std::chrono::system_clock> timer = std::chrono::high_resolution_clock::now();
108 | 
109 |     std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now();
110 |     
111 |     for (int j = 0; j < loop_time; j++) {
112 |     /* thread to use */
113 |         for (int i = 0; i < thread_to_use; i++) {
114 |             uint64_t key = _p4ml_manager->GetNewKey();
115 |             _p4ml_manager->PushPull(key, (char*) tensor[0], size, 1);
116 |         }
117 |     }
118 | 
119 | 
120 |     int total_sent = 0;
121 | 
122 |     while (finish_counter > 0) {
123 |         int64_t tmp_key = _p4ml_manager->GetFinishKey();
124 |         if (tmp_key >= 0) {
125 |             finish_counter--;
126 |             total_sent++;
127 |         }
128 | 
129 |         if (ENABLE_LOG) {
130 |             std::chrono::time_point<std::chrono::system_clock> current_time =
131 |                 std::chrono::high_resolution_clock::now();
132 |             std::chrono::duration<double> time_span =
133 |                 std::chrono::duration_cast<std::chrono::duration<double>>(current_time - timer);
134 |             std::chrono::duration<double> total_time =
135 |                 std::chrono::duration_cast<std::chrono::duration<double>>(current_time - t1);
136 |             if (time_span.count() >= 1) {
137 |                 // printf("Tensor left: %d, ", finish_counter);
138 |                 // printf("total send %" PRIu64 " bytes, time %lf, throughput: %lf\n", total_sent * 32000 * 194, total_time, total_sent * 6062.5 / 1024.0 / 1024.0 * 8.0 / 1.0);
139 |                 // printf("%lf\n", total_sent * 6062.5 / 1024.0 / 1024.0 * 8.0 / 1.0);
140 |                 // int tmp = _p4ml_manager->GetCollisionTimeAndClear();
141 |                 // if (tmp)
142 |                 //     printf("%d\n", tmp);
143 |                 // printf("%d\n", _p4ml_manager->GetCollisionTimeAndClear());
144 |                 printf("%lf\n", (float)total_sent * (16517 * P4ML_PACKET_SIZE) / 1024 / 1024 / 1024 * 8);
145 |                 total_sent = 0;
146 |                 timer = current_time;
147 |             }
148 |         }
149 |     }
150 |     _p4ml_manager->GetLossRate();
151 |     std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now();    
152 |     std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1);
153 |     double transmit_size_in_m = (double)((double)size * loop_time * thread_to_use / (float)MAX_ENTRIES_PER_PACKET) * P4ML_PACKET_SIZE / 1024 / 1024;
154 |     double total_time = time_span.count();
155 |     double throughput = (transmit_size_in_m / 1024 * 8 ) / total_time;
156 |     printf("Finish all %d Tensors,\n  Time = %lf s,\n  Total Size = %lf MB,\n  Throughput: %lf Gbps\n\n", thread_to_use * loop_time, total_time, transmit_size_in_m, throughput);
157 | }
158 | 


--------------------------------------------------------------------------------
/client/p4ml_manager.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifndef P4ML_MANAGER_H
  3 | #define P4ML_MANAGER_H
  4 | 
  5 | #include "../common/dma_common.h"
  6 | #include "../common/packet.h"
  7 | #include "../common/utils.h"
  8 | #include "../common/window_manager.h"
  9 | #include "../common/HashTable.h"
 10 | #include "../common/quantize.h"
 11 | #include "../common/p4ml_struct.h"
 12 | #include <arpa/inet.h>
 13 | #include <assert.h>
 14 | #include <bits/stdc++.h>
 15 | #include <chrono>
 16 | #include <condition_variable>
 17 | #include <cstring>
 18 | #include <ctime>
 19 | #include <iostream>
 20 | #include <limits.h>
 21 | #include <netinet/ip.h>
 22 | #include <queue>
 23 | #include <random>
 24 | #include <stdlib.h>
 25 | #include <string>
 26 | #include <thread>
 27 | #include <unistd.h>
 28 | #include <vector>
 29 | 
 30 | #define FLOATING_POINT_INPUT false
 31 | 
 32 | #define ONLY_DO_QUAN false
 33 | 
 34 | #define OVERFLOW_THRESHOLD 213
 35 | #define UNDERFLOW_THRESHOLD -213
 36 | 
 37 | #define P4ML_KEY_TOTAL 500000
 38 | #define MAX_TENSOR_SIZE 1024000
 39 | 
 40 | #define MAX_THREAD_PER_APP 20
 41 | 
 42 | class P4mlManager {
 43 | public:
 44 |     P4mlManager(uint32_t host, int num_worker, int appID, int num_PS);
 45 |     // ~P4mlManager();
 46 | 
 47 |     void init_threadPool(int num_thread);
 48 |     void PushPull(uint64_t key, char* data, int len, int cmd);
 49 |     static void PushPullLoop(int thread_id);
 50 |     static void QuantizationLoop(int thread_id);
 51 | 
 52 |     void PushTaskToThread(uint64_t key, char *data, int len, int cmd, int thread_id);
 53 | 
 54 |     uint64_t GetNewKey();
 55 |     int64_t GetFinishKey();
 56 |     double GetLossRate();
 57 |     int GetCollisionTimeAndClear();
 58 |     void SetForceForward(float forward_rate);
 59 |     void SetMaxAgtrSizePerThread(int max_agtr_size_per_thread);
 60 |     void SetUsedSwitchAGTRcount(int used_agtr);
 61 | 
 62 | private:
 63 |     static uint32_t host;
 64 |     static uint8_t num_worker;
 65 |     static uint8_t num_PS;
 66 |     static uint16_t appID;
 67 |     static uint64_t p4mlKey;
 68 |     static AppInfo* app_info;
 69 |     
 70 |     static int max_agtr_size_per_thread;
 71 |     static int UsedSwitchAGTRcount;
 72 |     static int _num_thread;
 73 |     static std::chrono::time_point<std::chrono::system_clock> start;
 74 |     static ThreadInfo** threadInfoQueue;
 75 |     static DMAcontext** dmaContextQueue;
 76 |     static std::thread** threadQueue;
 77 |     static std::thread** pushPullthreadQueue;
 78 |     static std::queue<Job*>* pushPulljobQueue;
 79 |     static std::thread** quantizationthreadQueue;
 80 |     static std::queue<Job*>* quantizejobQueue;
 81 |     static std::queue<Job*>* dequantizejobQueue;
 82 | 
 83 |     static WindowManager* window_manager;
 84 |     static std::queue<Job*> finishQueue;
 85 |     static std::queue<agghdr*>* pendingQueue;
 86 |     static uint64_t* weightQueue;
 87 | 
 88 |     // static uint16_t* hash_map;
 89 |     static HashTable* hash_table;
 90 |     static int32_t** quantizeBuffer;
 91 |     static bool** isOverflow;
 92 | 
 93 |     static bool isForceForward;
 94 |     static int forwardFrequency;
 95 |     static float forwardRate;
 96 | 
 97 |     static std::mutex Resource_mutex;
 98 |     static std::mutex _P4MLKey_mutex;
 99 |     static std::mutex _print_mutex;
100 |     static std::mutex _queuePush_mutex;
101 | 
102 |     static void main_receive_packet_loop(DMAcontext* dma_context, int32_t* data, int my_id);
103 |     static void updateModel(agghdr* p4ml_header, int32_t* data, int my_id);
104 | };
105 | 
106 | inline void P4mlManager::updateModel(agghdr* p4ml_header, int32_t* data, int my_id)
107 | {
108 |     uint16_t* p_seq = &p4ml_header->seq_num;
109 |     uint32_t* tensor_len = &pushPulljobQueue[my_id].front()->len;
110 | 
111 |     int32_t* p_model = p4ml_header->vector;
112 |     uint32_t offset = (*p_seq - 1) * MAX_ENTRIES_PER_PACKET;
113 |     if (offset < *tensor_len) {
114 |         if (offset + MAX_ENTRIES_PER_PACKET > *tensor_len)
115 |             memcpy(data + offset, p_model, sizeof(int32_t) * (*tensor_len % MAX_ENTRIES_PER_PACKET));
116 |         else
117 |             memcpy(data + offset, p_model, sizeof(int32_t) * MAX_ENTRIES_PER_PACKET);
118 |     }
119 | }
120 | 
121 | #endif //P4ML_MANAGER_H


--------------------------------------------------------------------------------
/common/CC_manager.h:
--------------------------------------------------------------------------------
 1 | #ifndef CC_MANAGER_H
 2 | #define CC_MANAGER_H
 3 | 
 4 | #define MAX_BYTES 100 * P4ML_PACKET_SIZE
 5 | 
 6 | #include "packet.h"
 7 | #include <iostream>
 8 | #include <stdint.h>
 9 | #include <stdio.h>
10 | 
11 | using namespace std;
12 | #define do_div(n, base) ({            \
13 |     uint32_t __base = (base);         \
14 |     uint32_t __rem;                   \
15 |     __rem = ((uint64_t)(n)) % __base; \
16 |     (n) = ((uint64_t)(n)) / __base;   \
17 |     __rem;                            \
18 | })
19 | #define GET_MIN(a, b) (a < b ? a : b)
20 | #define GET_MAX(a, b) (a > b ? a : b)
21 | 
22 | class CC_manager {
23 | 
24 | public:
25 |     CC_manager(int init_window)
26 |     {
27 |         cwnd_bytes = init_window * P4ML_PACKET_SIZE;
28 |     }
29 | 
30 |     int adjustWindow(bool isECN)
31 |     {
32 |         if (isECN)
33 |         {
34 |             cwnd_bytes /= 2;
35 |         }
36 |         else
37 |         {
38 |             cwnd_bytes += 1500;
39 |         }
40 | 
41 |         if (cwnd_bytes < P4ML_PACKET_SIZE)
42 |             cwnd_bytes = P4ML_PACKET_SIZE;
43 |         if (cwnd_bytes > MAX_BYTES)
44 |             cwnd_bytes = MAX_BYTES;
45 |         if (cwnd_bytes > P4ML_PACKET_SIZE)
46 |             cwnd_bytes = (cwnd_bytes / P4ML_PACKET_SIZE) * P4ML_PACKET_SIZE;
47 |         return cwnd_bytes / P4ML_PACKET_SIZE;
48 |     }
49 | 
50 | private:
51 |     uint64_t cwnd_bytes;
52 | };
53 | 
54 | #endif


--------------------------------------------------------------------------------
/common/HashTable.cc:
--------------------------------------------------------------------------------
 1 | #include "HashTable.h"
 2 | #define MAX_BYTES 100 * P4ML_PACKET_SIZE
 3 | 
 4 | HashTable::HashTable(int size)
 5 | {
 6 |     used_size = size;
 7 |     hash_map = new uint16_t[size];
 8 |     memset(isAlreadyDeclare, 0, sizeof(bool) * size);
 9 |     memset(predefine_agtr_list, 0, sizeof(bool) * size);
10 |     for (int i = 0; i < size; i++) {
11 |         predefine_agtr_list[i] = i;
12 |         // printf("[%d] %d  ", i, predefine_agtr_list[i]);
13 |     }
14 |     int random_seed = rand();
15 |     std::shuffle(predefine_agtr_list, predefine_agtr_list + size, std::default_random_engine(random_seed));
16 |     
17 |     // for (int i = 0; i < size; i++) {
18 | 
19 |     //     printf("[%d] %d ", i, predefine_agtr_list[i]);
20 |     // }
21 |     hash_pos = 0;
22 | }
23 | 
24 | void HashTable::HashNew_linear(int index)
25 | {
26 |     // Guarantee non-repeat element generated
27 |     uint16_t new_value;
28 |     do {
29 |         new_value = hash_function();
30 |     } while (isAlreadyDeclare[new_value]);
31 | 
32 |     hash_map[index] = new_value;
33 |     isAlreadyDeclare[new_value] = true;
34 | }
35 | 
36 | int HashTable::HashNew_predefine()
37 | {
38 |     if (hash_pos >= used_size) {
39 |         return -1;
40 |     }
41 | 
42 |     // Get AGTR from predefined hash
43 |     while (hash_pos < used_size) {
44 |         int new_agtr = predefine_agtr_list[hash_pos];
45 |         if (isAlreadyDeclare[new_agtr]) {
46 |             hash_pos++;
47 |         } else {
48 |             hash_pos++;
49 |             isAlreadyDeclare[new_agtr] = true;
50 |             return new_agtr;
51 |         }
52 |     }
53 | 
54 |     return -1;
55 | }
56 | 
57 | int HashTable::HashNew_crc(uint16_t appID, uint16_t index)
58 | {
59 |     // Guarantee non-repeat element generated
60 |     uint8_t crc_input[] = {(uint8_t)(appID & 0xff), (uint8_t)(appID >> 8), (uint8_t)(index & 0xff), (uint8_t)(index >> 8), 0, 0};
61 | 
62 |     uint16_t new_value;
63 |     uint8_t salt = 0;
64 |     do {
65 |         new_value = crc32_le(0xffffffff, crc_input, 6);
66 |         new_value %= used_size;
67 |         crc_input[4]++;
68 |         if (crc_input[4] == 255) {
69 |             crc_input[4] = 0;
70 |             crc_input[5]++;
71 |         }
72 |     } while (isAlreadyDeclare[new_value]);
73 |     hash_map[index] = new_value;
74 |     isAlreadyDeclare[new_value] = true;
75 |     return new_value;
76 | }
77 | 
78 | void HashTable::HashNew_separate(uint16_t appID, uint16_t index)
79 | {
80 |     int real_index = ((appID - 1) * 2000) + index;
81 |     hash_map[index] = real_index;
82 |     isAlreadyDeclare[real_index] = true;
83 | }
84 | 
85 | uint16_t HashTable::hash_function()
86 | {
87 |     return hash_pos++;
88 | }
89 | 
90 | uint32_t HashTable::crc32_le(uint32_t crc, unsigned char const* p, size_t len)
91 | {
92 |     while (len--) {
93 |         crc ^= *p++;
94 |         for (int i = 0; i < 8; i++)
95 |             crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0);
96 |     }
97 |     return ~crc;
98 | }
99 | 


--------------------------------------------------------------------------------
/common/HashTable.h:
--------------------------------------------------------------------------------
 1 | #ifndef HASHTABLE_H
 2 | #define HASHTABLE_H
 3 | #include <bits/stdc++.h>
 4 | #include "packet.h"
 5 | #include "utils.h"
 6 | #define CRCPOLY_LE 0xedb88320
 7 | 
 8 | class HashTable {
 9 | 
10 | public:
11 |     HashTable(int size);
12 |     void HashNew_linear(int index);
13 |     int HashNew_crc(uint16_t appID, uint16_t index);
14 |     int HashNew_predefine();
15 |     void HashNew_separate(uint16_t appID, uint16_t index);
16 |     uint16_t* hash_map;
17 |     bool isAlreadyDeclare[MAX_AGTR_COUNT];
18 | 
19 | private:
20 |     int used_size;
21 |     uint32_t crc32_le(uint32_t crc, unsigned char const* p, size_t len);
22 |     int predefine_agtr_list[MAX_AGTR_COUNT];
23 | 
24 |     // These for predefine Hash
25 | 
26 |     // These two for Linear Hash
27 |     uint16_t hash_function();
28 |     uint16_t hash_pos;
29 | 
30 | };
31 | 
32 | #endif


--------------------------------------------------------------------------------
/common/ThreadPool.h:
--------------------------------------------------------------------------------
  1 | #ifndef THREAD_POOL_H
  2 | #define THREAD_POOL_H
  3 | 
  4 | #include <vector>
  5 | #include <queue>
  6 | #include <memory>
  7 | #include <thread>
  8 | #include <mutex>
  9 | #include <condition_variable>
 10 | #include <future>
 11 | #include <functional>
 12 | #include <stdexcept>
 13 | 
 14 | class ThreadPool {
 15 | public:
 16 |     template<class F> ThreadPool(size_t, F callback);
 17 |     template<class F, class... Args>
 18 |     auto enqueue(F&& f, Args&&... args) 
 19 |         -> std::future<typename std::result_of<F(Args...)>::type>;
 20 |     ~ThreadPool();
 21 | private:
 22 |     // need to keep track of threads so we can join them
 23 |     std::vector< std::thread > workers;
 24 |     // the task queue
 25 |     std::queue< std::function<void()> > tasks;
 26 |     
 27 |     // synchronization
 28 |     std::mutex queue_mutex;
 29 |     std::condition_variable condition;
 30 |     bool stop;
 31 | };
 32 |  
 33 | // the constructor just launches some amount of workers
 34 | template<class F>
 35 | inline ThreadPool::ThreadPool(size_t threads, F callback)
 36 |     :   stop(false)
 37 | {
 38 |     for(size_t i = 0;i<threads;++i)
 39 |         workers.emplace_back(
 40 |             [this, callback]
 41 |             {
 42 |                 for(;;)
 43 |                 {
 44 |                     std::function<void()> task;
 45 | 
 46 |                     {
 47 |                         std::unique_lock<std::mutex> lock(this->queue_mutex);
 48 |                         this->condition.wait(lock,
 49 |                             [this]{ return this->stop || !this->tasks.empty(); });
 50 |                         if(this->stop && this->tasks.empty())
 51 |                             return;
 52 |                         task = std::move(this->tasks.front());
 53 |                         this->tasks.pop();
 54 |                     }
 55 | 
 56 |                     task();
 57 |                     callback();
 58 |                 }
 59 |             }
 60 |         );
 61 | }
 62 | 
 63 | // add new work item to the pool
 64 | template<class F, class... Args>
 65 | auto ThreadPool::enqueue(F&& f, Args&&... args) 
 66 |     -> std::future<typename std::result_of<F(Args...)>::type>
 67 | {
 68 |     using return_type = typename std::result_of<F(Args...)>::type;
 69 | 
 70 |     auto task = std::make_shared< std::packaged_task<return_type()> >(
 71 |             std::bind(std::forward<F>(f), std::forward<Args>(args)...)
 72 |         );
 73 |         
 74 |     std::future<return_type> res = task->get_future();
 75 |     {
 76 |         std::unique_lock<std::mutex> lock(queue_mutex);
 77 | 
 78 |         // don't allow enqueueing after stopping the pool
 79 |         if(stop)
 80 |             throw std::runtime_error("enqueue on stopped ThreadPool");
 81 | 
 82 |         tasks.emplace([task](){ (*task)(); });
 83 |     }
 84 |     condition.notify_one();
 85 |     return res;
 86 | }
 87 | 
 88 | // the destructor joins all threads
 89 | inline ThreadPool::~ThreadPool()
 90 | {
 91 |     {
 92 |         std::unique_lock<std::mutex> lock(queue_mutex);
 93 |         stop = true;
 94 |     }
 95 |     condition.notify_all();
 96 |     for(std::thread &worker: workers)
 97 |         worker.join();
 98 | }
 99 | 
100 | #endif
101 | 


--------------------------------------------------------------------------------
/common/dma_common.cc:
--------------------------------------------------------------------------------
  1 | #define __USE_GNU
  2 | 
  3 | #include "dma_common.h"
  4 | #include <infiniband/verbs_exp.h>
  5 | #include <inttypes.h>
  6 | #include <linux/if_ether.h>
  7 | #include <netdb.h>
  8 | #include <netinet/in.h>
  9 | #include <rdma/rdma_cma.h>
 10 | #include <sched.h>
 11 | #include <stdio.h>
 12 | #include <stdlib.h>
 13 | #include <string.h>
 14 | #include <sys/socket.h>
 15 | #include <unistd.h>
 16 | 
 17 | 
 18 | std::mutex ___print_mutex;
 19 | int my_send_queue_length = 2048;
 20 | int my_recv_queue_length = my_send_queue_length * 8;
 21 | 
 22 | unsigned char PS_FILTER_TEMPLATE_R[] = { 0x05, 0x04, 0x03, 0x02, 0x01, 0xFF };
 23 | unsigned char WORKER_FILTER_TEMPLATE_R[] = { 0x77, 0x77, 0x77, 0x77, 0x77, 0xFF };
 24 | 
 25 | DMAcontext* DMA_create(ibv_device* ib_dev, int thread_id, bool isPS)
 26 | {
 27 | 
 28 |     ibv_context* context = ibv_open_device(ib_dev);
 29 |     if (!context) {
 30 |         fprintf(stderr, "Couldn't get context for %s\n",
 31 |             ibv_get_device_name(ib_dev));
 32 |         exit(1);
 33 |     }
 34 |     ibv_pd* pd = ibv_alloc_pd(context);
 35 |     if (!pd) {
 36 |         fprintf(stderr, "Couldn't allocate PD\n");
 37 |         exit(1);
 38 |     }
 39 | 
 40 |     struct ibv_cq* rec_cq = ibv_create_cq(context, my_recv_queue_length + 1, NULL, NULL, 0);
 41 |     if (!rec_cq) {
 42 |         fprintf(stderr, "Couldn't create CQ %d\n", errno);
 43 |         exit(1);
 44 |     }
 45 | 
 46 |     struct ibv_cq* snd_cq = ibv_create_cq(context, my_send_queue_length + 1, NULL, NULL, 0);
 47 |     if (!snd_cq) {
 48 |         fprintf(stderr, "Couldn't create CQ %d\n", errno);
 49 |         exit(1);
 50 |     }
 51 | 
 52 |     struct ibv_qp* qp;
 53 |     struct ibv_exp_qp_init_attr* qp_init_attr = (struct ibv_exp_qp_init_attr*)malloc(sizeof(struct ibv_exp_qp_init_attr));
 54 | 
 55 |     memset(qp_init_attr, 0, sizeof(*qp_init_attr));
 56 |     qp_init_attr->comp_mask = IBV_EXP_QP_INIT_ATTR_PD | IBV_EXP_QP_INIT_ATTR_MAX_TSO_HEADER | IBV_EXP_QP_INIT_ATTR_INL_RECV;
 57 |     qp_init_attr->send_cq = snd_cq;
 58 |     qp_init_attr->recv_cq = rec_cq;
 59 |     qp_init_attr->qp_type = IBV_QPT_RAW_PACKET;
 60 | 
 61 |     qp_init_attr->pd = pd;
 62 |     qp_init_attr->cap.max_send_wr = my_send_queue_length + 1;
 63 |     qp_init_attr->cap.max_recv_wr = my_recv_queue_length + 1;
 64 |     qp_init_attr->cap.max_inline_data = 512;
 65 |     qp_init_attr->cap.max_send_sge = 1;
 66 |     qp_init_attr->cap.max_recv_sge = 1;
 67 |     qp_init_attr->max_tso_header = IP_ETH_UDP_HEADER_SIZE;
 68 |     qp_init_attr->max_inl_recv = 512;
 69 | 
 70 |     qp = ibv_exp_create_qp(context, qp_init_attr);
 71 |     //qp = ibv_create_qp(pd, qp_init_attr);
 72 |     if (!qp) {
 73 |         fprintf(stderr, "Couldn't create RSS QP\n");
 74 |         exit(1);
 75 |     }
 76 | 
 77 |     struct ibv_qp_attr qp_attr;
 78 |     int qp_flags;
 79 |     int ret;
 80 |     memset(&qp_attr, 0, sizeof(qp_attr));
 81 |     qp_flags = IBV_QP_STATE | IBV_QP_PORT;
 82 |     qp_attr.qp_state = IBV_QPS_INIT;
 83 |     qp_attr.port_num = 1;
 84 |     ret = ibv_modify_qp(qp, &qp_attr, qp_flags);
 85 |     if (ret < 0) {
 86 |         fprintf(stderr, "failed modify qp to init\n");
 87 |         exit(1);
 88 |     }
 89 |     memset(&qp_attr, 0, sizeof(qp_attr));
 90 | 
 91 |     /* a. Move ring state to ready to receive, this is needed to be able to move ring to ready to send even if receive queue is not enabled */
 92 | 
 93 |     qp_flags = IBV_QP_STATE;
 94 |     qp_attr.qp_state = IBV_QPS_RTR;
 95 |     ret = ibv_modify_qp(qp, &qp_attr, qp_flags);
 96 |     if (ret < 0) {
 97 |         fprintf(stderr, "failed modify qp to receive\n");
 98 |         exit(1);
 99 |     }
100 | 
101 |     /* b. Move the ring to ready to send */
102 | 
103 |     qp_flags = IBV_QP_STATE;
104 |     qp_attr.qp_state = IBV_QPS_RTS;
105 |     ret = ibv_modify_qp(qp, &qp_attr, qp_flags);
106 |     if (ret < 0) {
107 |         fprintf(stderr, "failed modify qp to send\n");
108 |         exit(1);
109 |     }
110 | 
111 |     int send_buf_size = P4ML_PACKET_SIZE * my_send_queue_length;
112 | 
113 |     void* send_buf;
114 | 
115 |     //send_buf = malloc(send_buf_size);
116 |     // send_buf = alloc_raw_pages(send_buf_size / EACH_HUGEPAGE_SIZE + 1, EACH_HUGEPAGE_SIZE);
117 |     ib_malloc(&send_buf, send_buf_size);
118 |     if (!send_buf) {
119 |         fprintf(stderr, "Coudln't allocate send memory\n");
120 |         exit(1);
121 |     }
122 | 
123 |     struct ibv_mr* send_mr;
124 |     send_mr = ibv_reg_mr(pd, send_buf, send_buf_size, IBV_ACCESS_LOCAL_WRITE);
125 |     if (!send_mr) {
126 |         fprintf(stderr, "Couldn't register recv mr\n");
127 |         exit(1);
128 |     }
129 | 
130 |     // Init CQ. Its size MUST be one so that we get two CQEs in mlx5.
131 |     struct ibv_exp_cq_init_attr cq_init_attr;
132 |     memset(&cq_init_attr, 0, sizeof(cq_init_attr));
133 |     struct ibv_cq* mp_recv_cq = ibv_exp_create_cq(context, kAppRecvCQDepth / 2, nullptr, nullptr, 0, &cq_init_attr);
134 |     assert(mp_recv_cq != nullptr);
135 | 
136 |     // Modify the RECV CQ to ignore overrun
137 |     struct ibv_exp_cq_attr cq_attr;
138 |     memset(&cq_attr, 0, sizeof(cq_attr));
139 |     cq_attr.comp_mask = IBV_EXP_CQ_ATTR_CQ_CAP_FLAGS;
140 |     cq_attr.cq_cap_flags = IBV_EXP_CQ_IGNORE_OVERRUN;
141 |     rt_assert(ibv_exp_modify_cq(mp_recv_cq, &cq_attr, IBV_EXP_CQ_CAP_FLAGS) == 0);
142 | 
143 |     struct ibv_exp_wq_init_attr wq_init_attr;
144 |     memset(&wq_init_attr, 0, sizeof(wq_init_attr));
145 | 
146 |     wq_init_attr.wq_type = IBV_EXP_WQT_RQ;
147 |     wq_init_attr.max_recv_wr = kAppRQDepth;
148 |     wq_init_attr.max_recv_sge = 1;
149 |     wq_init_attr.pd = pd;
150 |     wq_init_attr.cq = mp_recv_cq;
151 | 
152 |     wq_init_attr.comp_mask |= IBV_EXP_CREATE_WQ_MP_RQ;
153 |     wq_init_attr.mp_rq.use_shift = IBV_EXP_MP_RQ_NO_SHIFT;
154 |     wq_init_attr.mp_rq.single_wqe_log_num_of_strides = kAppLogNumStrides;
155 |     wq_init_attr.mp_rq.single_stride_log_num_of_bytes = kAppLogStrideBytes;
156 |     struct ibv_exp_wq* mp_wq = ibv_exp_create_wq(context, &wq_init_attr);
157 |     assert(mp_wq != nullptr);
158 | 
159 |     // Change WQ to ready state
160 |     struct ibv_exp_wq_attr wq_attr;
161 |     memset(&wq_attr, 0, sizeof(wq_attr));
162 |     wq_attr.attr_mask = IBV_EXP_WQ_ATTR_STATE;
163 |     wq_attr.wq_state = IBV_EXP_WQS_RDY;
164 |     rt_assert(ibv_exp_modify_wq(mp_wq, &wq_attr) == 0);
165 | 
166 |     // Get the RQ burst function
167 |     enum ibv_exp_query_intf_status intf_status = IBV_EXP_INTF_STAT_OK;
168 |     struct ibv_exp_query_intf_params query_intf_params;
169 |     memset(&query_intf_params, 0, sizeof(query_intf_params));
170 |     query_intf_params.intf_scope = IBV_EXP_INTF_GLOBAL;
171 |     query_intf_params.intf = IBV_EXP_INTF_WQ;
172 |     query_intf_params.obj = mp_wq;
173 |     struct ibv_exp_wq_family* mp_wq_family = reinterpret_cast<struct ibv_exp_wq_family*>(
174 |         ibv_exp_query_intf(context, &query_intf_params, &intf_status));
175 |     assert(mp_wq_family != nullptr);
176 | 
177 |     // Create indirect table
178 |     struct ibv_exp_rwq_ind_table_init_attr rwq_ind_table_init_attr;
179 |     memset(&rwq_ind_table_init_attr, 0, sizeof(rwq_ind_table_init_attr));
180 |     rwq_ind_table_init_attr.pd = pd;
181 |     rwq_ind_table_init_attr.log_ind_tbl_size = 0; // Ignore hash
182 |     rwq_ind_table_init_attr.ind_tbl = &mp_wq; // Pointer to RECV work queue
183 |     rwq_ind_table_init_attr.comp_mask = 0;
184 |     struct ibv_exp_rwq_ind_table* mp_ind_tbl = ibv_exp_create_rwq_ind_table(context, &rwq_ind_table_init_attr);
185 |     assert(mp_ind_tbl != nullptr);
186 | 
187 |     // Create rx_hash_conf and indirection table for the QP
188 |     uint8_t toeplitz_key[] = { 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
189 |         0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
190 |         0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
191 |         0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
192 |         0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
193 |     const int TOEPLITZ_RX_HASH_KEY_LEN = sizeof(toeplitz_key) / sizeof(toeplitz_key[0]);
194 | 
195 |     struct ibv_exp_rx_hash_conf rx_hash_conf;
196 |     memset(&rx_hash_conf, 0, sizeof(rx_hash_conf));
197 |     rx_hash_conf.rx_hash_function = IBV_EXP_RX_HASH_FUNC_TOEPLITZ;
198 |     rx_hash_conf.rx_hash_key_len = TOEPLITZ_RX_HASH_KEY_LEN;
199 |     rx_hash_conf.rx_hash_key = toeplitz_key;
200 |     rx_hash_conf.rx_hash_fields_mask = IBV_EXP_RX_HASH_DST_PORT_UDP;
201 |     rx_hash_conf.rwq_ind_tbl = mp_ind_tbl;
202 | 
203 |     struct ibv_exp_qp_init_attr mp_qp_init_attr;
204 |     memset(&mp_qp_init_attr, 0, sizeof(mp_qp_init_attr));
205 |     mp_qp_init_attr.comp_mask = IBV_EXP_QP_INIT_ATTR_CREATE_FLAGS | IBV_EXP_QP_INIT_ATTR_PD | IBV_EXP_QP_INIT_ATTR_RX_HASH;
206 |     mp_qp_init_attr.rx_hash_conf = &rx_hash_conf;
207 |     mp_qp_init_attr.pd = pd;
208 |     mp_qp_init_attr.qp_type = IBV_QPT_RAW_PACKET;
209 | 
210 |     // Create the QP
211 |     struct ibv_qp* mp_recv_qp = ibv_exp_create_qp(context, &mp_qp_init_attr);
212 |     assert(mp_recv_qp != nullptr);
213 | 
214 |     size_t tx_ring_size = P4ML_LAYER_SIZE * kAppMaxPostlist;
215 |     uint8_t* mp_send_ring;
216 |     ib_malloc((void **)&mp_send_ring, tx_ring_size);
217 |     rt_assert(mp_send_ring != nullptr);
218 |     memset(mp_send_ring, 0, tx_ring_size);
219 | 
220 |     struct ibv_mr* mp_send_mr = ibv_reg_mr(pd, mp_send_ring, tx_ring_size, IBV_ACCESS_LOCAL_WRITE);
221 |     rt_assert(mp_send_mr != nullptr);
222 | 
223 |     // Register RX ring memory
224 |     uint8_t* mp_recv_ring;
225 |     ib_malloc((void **)&mp_recv_ring, kAppRingSize);
226 |     rt_assert(mp_recv_ring != nullptr);
227 |     memset(mp_recv_ring, 0, kAppRingSize);
228 | 
229 |     struct ibv_mr* mp_mr = ibv_reg_mr(pd, mp_recv_ring, kAppRingSize, IBV_ACCESS_LOCAL_WRITE);
230 |     rt_assert(mp_mr != nullptr);
231 |     /////////////////////////////////////////////////////////////////////////////////////
232 |     // install_flow_rule(mp_recv_qp, 30720 + thread_id);
233 |     install_flow_rule(mp_recv_qp, thread_id, isPS);
234 |     // This cast works for mlx5 where ibv_cq is the first member of mlx5_cq.
235 |     auto* _mlx5_cq = reinterpret_cast<mlx5_cq*>(mp_recv_cq);
236 |     rt_assert(kAppRecvCQDepth == std::pow(2, _mlx5_cq->cq_log_size));
237 |     rt_assert(_mlx5_cq->buf_a.buf != nullptr);
238 | 
239 |     auto* mp_cqe_arr = reinterpret_cast<volatile mlx5_cqe64*>(_mlx5_cq->buf_a.buf);
240 | 
241 |     // Initialize the CQEs as if we received the last (kAppRecvCQDepth) packets
242 |     // in the CQE cycle.
243 |     static_assert(kAppStridesPerWQE >= kAppRecvCQDepth, "");
244 |     for (size_t i = 0; i < kAppRecvCQDepth; i++) {
245 |         mp_cqe_arr[i].wqe_id = htons(std::numeric_limits<uint16_t>::max());
246 |         // Last CQE gets
247 |         // * wqe_counter = (kAppStridesPerWQE - 1)
248 |         // * snapshot_cycle_idx = (kAppCQESnapshotCycle - 1)
249 |         mp_cqe_arr[i].wqe_counter = htons(kAppStridesPerWQE - (kAppRecvCQDepth - i));
250 | 
251 |         cqe_snapshot_t snapshot;
252 |         snapshot_cqe(&mp_cqe_arr[i], snapshot);
253 |         rt_assert(snapshot.get_cqe_snapshot_cycle_idx() == kAppCQESnapshotCycle - (kAppRecvCQDepth - i));
254 |     }
255 | 
256 |     // The multi-packet RECVs. This must be done after we've initialized the CQE.
257 |     struct ibv_sge* mp_sge = reinterpret_cast<struct ibv_sge*>(malloc(sizeof(struct ibv_sge) * kAppRQDepth));
258 |     for (size_t i = 0; i < kAppRQDepth; i++) {
259 |         size_t mpwqe_offset = i * (kAppRingMbufSize * kAppStridesPerWQE);
260 |         mp_sge[i].addr = reinterpret_cast<uint64_t>(&mp_recv_ring[mpwqe_offset]);
261 |         mp_sge[i].lkey = mp_mr->lkey;
262 |         mp_sge[i].length = kAppRingMbufSize * kAppStridesPerWQE; //kAppRingSize;
263 |         mp_wq_family->recv_burst(mp_wq, &mp_sge[i], 1);
264 |     }
265 | 
266 |     printf("[Thread %d] Finish created QP - ", thread_id);
267 |     printf("kAppRingMbufSize=%lu, kAppStridesPerWQE=%lu, kAppRingSize=%lu, kAppRQDepth=%lu\n", kAppRingMbufSize, kAppStridesPerWQE, kAppRingSize, kAppRQDepth);
268 |     auto* cqe_arr = mp_cqe_arr;
269 |     cqe_snapshot_t prev_snapshot;
270 |     snapshot_cqe(&cqe_arr[kAppRecvCQDepth - 1], prev_snapshot);
271 | 
272 |     return new DMAcontext{
273 |         .pd = pd,
274 |         .ctx = context,
275 |         .receive_cq = rec_cq,
276 |         .send_cq = snd_cq,
277 |         .send_mr = send_mr,
278 |         .send_region = send_buf,
279 |         .data_qp = qp,
280 | 
281 |         .mp_recv_qp = mp_recv_qp,
282 |         .mp_recv_cq = mp_recv_cq,
283 |         .mp_wq = mp_wq,
284 |         .mp_wq_family = mp_wq_family,
285 |         .mp_ind_tbl = mp_ind_tbl,
286 |         .mp_cqe_arr = mp_cqe_arr,
287 |         .mp_sge = mp_sge,
288 |         .mp_recv_ring = mp_recv_ring,
289 |         .mp_send_ring = mp_send_ring,
290 |         .mp_send_mr = mp_send_mr,
291 | 
292 |         .id = thread_id,
293 |         .total_received = 0,
294 |         .total_sent = 0,
295 |         .my_send_queue_length = my_send_queue_length,
296 |         .my_recv_queue_length = my_recv_queue_length,
297 | 
298 |         .ring_head = 0,
299 |         .nb_rx_rolling = 0,
300 |         .sge_idx = 0,
301 |         .cqe_idx = 0,
302 |         .prev_snapshot = prev_snapshot,
303 |         .isPS = isPS,
304 |         .isMarkTimeStamp = false,
305 |     };
306 | }
307 | 
308 | void send_packet(DMAcontext* dma_context, int chunk_size, uint64_t offset)
309 | {
310 |     int ret;
311 | 
312 |     struct ibv_sge sg;
313 |     struct ibv_exp_send_wr wr, *bad_wr;
314 |     // struct ibv_send_wr wr;
315 |     // struct ibv_send_wr *bad_wr;
316 | 
317 |     memset(&sg, 0, sizeof(sg));
318 |     sg.addr = (uintptr_t)((char*)dma_context->send_region + offset * P4ML_LAYER_SIZE);
319 |     // printf("%d\n", sg.addr);
320 |     sg.length = chunk_size;
321 |     sg.lkey = dma_context->send_mr->lkey;
322 | 
323 |     memset(&wr, 0, sizeof(wr));
324 |     wr.wr_id = 0;
325 |     wr.sg_list = &sg;
326 |     wr.num_sge = 1;
327 |     // wr.opcode     = IBV_WR_SEND;
328 |     wr.exp_opcode = IBV_EXP_WR_TSO;
329 |     wr.tso.mss = P4ML_LAYER_SIZE; // Maximum Segment Size example
330 |     wr.tso.hdr_sz = IP_ETH_UDP_HEADER_SIZE; // ETH/IPv4/TCP header example
331 |     char hdr[IP_ETH_UDP_HEADER_SIZE]; // ETH/IPv4/TCP header example
332 |     if (dma_context->isPS)
333 |         memcpy(hdr, PS_IP_ETH_UDP_HEADER, IP_ETH_UDP_HEADER_SIZE); // Assuming that the header buffer was define before.
334 |     else
335 |         memcpy(hdr, WORKER_IP_ETH_UDP_HEADER, IP_ETH_UDP_HEADER_SIZE); // Assuming that the header buffer was define before.
336 | 
337 |     hdr[5] = dma_context->id;
338 |     // hdr[37] = dma_context->id;
339 |     wr.tso.hdr = hdr; // There is no need to use malloc operation in this case, local definition of hdr is ok.
340 |         //wr.exp_send_flags = IBV_SEND_INLINE;
341 |     wr.exp_send_flags |= IBV_SEND_SIGNALED;
342 | 
343 |     if (DEBUG_PRINT_ALL_SENDING_PACKET)
344 |         for (int i = 0; i < chunk_size / P4ML_LAYER_SIZE; i++) 
345 |             p4ml_header_print_h((agghdr*)((char *)sg.addr + i * P4ML_LAYER_SIZE), "SEND");
346 | 
347 |     // mark first time sending timestamp
348 |     if (dma_context->isMarkTimeStamp) {
349 |         std::chrono::high_resolution_clock::time_point current_time = std::chrono::high_resolution_clock::now();
350 |         for (int i = 0; i < chunk_size / P4ML_LAYER_SIZE; i++) {
351 |             agghdr* p4ml_header = (agghdr*)((char*)sg.addr + i * P4ML_LAYER_SIZE);
352 |             if (!dma_context->isSent[ntohs(p4ml_header->seq_num)]) {
353 |                 dma_context->isSent[ntohs(p4ml_header->seq_num)] = true;
354 |                 dma_context->first_send_time[ntohs(p4ml_header->seq_num)] = current_time;
355 |             } else {
356 |                 /* Resend may trigger */
357 |             }
358 |         }
359 |     }
360 | 
361 |     // we dont need to wait cq cause received represent sent
362 |     ret = ibv_exp_post_send(dma_context->data_qp, &wr, &bad_wr);
363 |     if (ret < 0) {
364 |         fprintf(stderr, "failed in post send\n");
365 |         exit(1);
366 |     }
367 | 
368 |     struct ibv_wc wc_send_cq[POLLING_SIZE];
369 |     ibv_poll_cq(dma_context->send_cq, POLLING_SIZE, wc_send_cq);
370 |     if (DEBUG_CHECK_SEND_RECEIVE_TOTAL)
371 |         dma_context->total_sent += chunk_size / P4ML_LAYER_SIZE;
372 | }
373 | 
374 | size_t receive_packet(DMAcontext *dma_context, cqe_snapshot_t* new_snapshot)
375 | {
376 |     // cqe_snapshot_t new_snapshot;
377 |     // cur_snapshot = new_snapshot;
378 |     snapshot_cqe(&dma_context->mp_cqe_arr[dma_context->cqe_idx], *new_snapshot);
379 |     const size_t delta = get_cycle_delta(dma_context->prev_snapshot, *new_snapshot);
380 | 
381 |     if (!(delta == 0 || delta >= kAppNumRingEntries)) {
382 |         if (DEBUG_CHECK_SEND_RECEIVE_TOTAL)
383 |             dma_context->total_received += delta;
384 |         return delta;
385 |     }
386 |     else 
387 |         return 0;
388 |     // return delta;
389 | }
390 | 
391 | void dma_postback(DMAcontext *dma_context)
392 | {
393 |     dma_context->ring_head = (dma_context->ring_head + 1) % kAppNumRingEntries;
394 |     dma_context->nb_rx_rolling++;
395 |     if (dma_context->nb_rx_rolling == kAppStridesPerWQE)
396 |     {
397 |         dma_context->nb_rx_rolling = 0;
398 |         int ret = dma_context->mp_wq_family->recv_burst(dma_context->mp_wq, &dma_context->mp_sge[dma_context->sge_idx], 1);
399 |         rt_assert(ret == 0);
400 |         dma_context->sge_idx = (dma_context->sge_idx + 1) % kAppRQDepth;
401 |     }
402 | }
403 | 
404 | void dma_update_snapshot(DMAcontext *dma_context, cqe_snapshot_t new_snapshot)
405 | {
406 |     dma_context->prev_snapshot = new_snapshot;
407 |     dma_context->cqe_idx = (dma_context->cqe_idx + 1) % kAppRecvCQDepth;
408 | }
409 | 
410 | const char* ibv_wc_opcode_str(enum ibv_wc_opcode opcode)
411 | {
412 |     switch (opcode) {
413 |     case IBV_EXP_WC_SEND:
414 |         return "IBV_WC_SEND";
415 |     case IBV_EXP_WC_RDMA_WRITE:
416 |         return "IBV_WC_RDMA_WRITE";
417 |     case IBV_EXP_WC_RDMA_READ:
418 |         return "IBV_WC_RDMA_READ";
419 |     case IBV_WC_COMP_SWAP:
420 |         return "IBV_WC_COMP_SWAP";
421 |     case IBV_WC_FETCH_ADD:
422 |         return "IBV_WC_FETCH_ADD";
423 |     case IBV_WC_BIND_MW:
424 |         return "IBV_WC_BIND_MW";
425 |         /* receive-side: inbound completion */
426 |     case IBV_EXP_WC_RECV:
427 |         return "IBV_WC_RECV";
428 |     case IBV_EXP_WC_RECV_RDMA_WITH_IMM:
429 |         return "IBV_WC_RECV_RDMA_WITH_IMM";
430 |     default:
431 |         return "IBV_WC_UNKNOWN";
432 |     }
433 | }
434 | 
435 | // Install a flow rule
436 | void install_flow_rule(struct ibv_qp* qp, uint16_t thread_id, bool isPS)
437 | {
438 |     static constexpr size_t rule_sz = sizeof(ibv_exp_flow_attr) + sizeof(ibv_exp_flow_spec_eth) + sizeof(ibv_exp_flow_spec_ipv4_ext);
439 | 
440 |     uint8_t* flow_rule = new uint8_t[rule_sz];
441 |     memset(flow_rule, 0, rule_sz);
442 |     uint8_t* buf = flow_rule;
443 | 
444 |     auto* flow_attr = reinterpret_cast<struct ibv_exp_flow_attr*>(flow_rule);
445 |     flow_attr->type = IBV_EXP_FLOW_ATTR_NORMAL;
446 |     flow_attr->size = rule_sz;
447 |     flow_attr->priority = 0;
448 |     flow_attr->num_of_specs = 1;
449 |     flow_attr->port = 1;
450 |     flow_attr->flags = 0;
451 |     flow_attr->reserved = 0;
452 |     buf += sizeof(struct ibv_exp_flow_attr);
453 | 
454 |     // Ethernet - all wildcard
455 |     auto* eth_spec = reinterpret_cast<struct ibv_exp_flow_spec_eth*>(buf);
456 |     eth_spec->type = IBV_EXP_FLOW_SPEC_ETH;
457 |     eth_spec->size = sizeof(struct ibv_exp_flow_spec_eth);
458 |     buf += sizeof(struct ibv_exp_flow_spec_eth);
459 | 
460 |     const unsigned char R_SRC_MAC[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
461 |     unsigned char R_DST_MAC[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
462 |     if (isPS)
463 |         memcpy(R_DST_MAC, PS_FILTER_TEMPLATE_R, sizeof(R_DST_MAC));
464 |     else
465 |         memcpy(R_DST_MAC, WORKER_FILTER_TEMPLATE_R, sizeof(R_DST_MAC));
466 | 
467 |     R_DST_MAC[5] = thread_id;
468 |     
469 |     const unsigned char R_SRC_MAC_MASK[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
470 |     const unsigned char R_DST_MAC_MASK[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
471 |     memcpy(eth_spec->val.dst_mac, R_DST_MAC, sizeof(R_DST_MAC));
472 |     memcpy(eth_spec->val.src_mac, R_SRC_MAC, sizeof(R_SRC_MAC));
473 |     memcpy(eth_spec->mask.dst_mac, R_DST_MAC_MASK, sizeof(R_DST_MAC_MASK));
474 |     memcpy(eth_spec->mask.src_mac, R_SRC_MAC_MASK, sizeof(R_SRC_MAC_MASK));
475 |     eth_spec->val.vlan_tag = 0;
476 |     eth_spec->mask.ether_type = 0;
477 | 
478 |     rt_assert(ibv_exp_create_flow(qp, flow_attr) != nullptr);
479 | }
480 | 
481 | // Install a UDP destination port--based flow rule
482 | void install_udp_flow_rule(struct ibv_qp* qp, uint16_t dst_port)
483 | {
484 |     static constexpr size_t rule_sz = sizeof(ibv_exp_flow_attr) + sizeof(ibv_exp_flow_spec_eth) + sizeof(ibv_exp_flow_spec_ipv4_ext) + sizeof(ibv_exp_flow_spec_tcp_udp);
485 | 
486 |     uint8_t* flow_rule = new uint8_t[rule_sz];
487 |     memset(flow_rule, 0, rule_sz);
488 |     uint8_t* buf = flow_rule;
489 | 
490 |     auto* flow_attr = reinterpret_cast<struct ibv_exp_flow_attr*>(flow_rule);
491 |     flow_attr->type = IBV_EXP_FLOW_ATTR_NORMAL;
492 |     flow_attr->size = rule_sz;
493 |     flow_attr->priority = 0;
494 |     flow_attr->num_of_specs = 1;
495 |     flow_attr->port = 1;
496 |     flow_attr->flags = 0;
497 |     flow_attr->reserved = 0;
498 |     buf += sizeof(struct ibv_exp_flow_attr);
499 | 
500 |     // Ethernet - all wildcard
501 |     auto* eth_spec = reinterpret_cast<struct ibv_exp_flow_spec_eth*>(buf);
502 |     eth_spec->type = IBV_EXP_FLOW_SPEC_ETH;
503 |     eth_spec->size = sizeof(struct ibv_exp_flow_spec_eth);
504 |     buf += sizeof(struct ibv_exp_flow_spec_eth);
505 | 
506 |     // IPv4 - all wildcard
507 |     auto* spec_ipv4 = reinterpret_cast<struct ibv_exp_flow_spec_ipv4_ext*>(buf);
508 |     spec_ipv4->type = IBV_EXP_FLOW_SPEC_IPV4_EXT;
509 |     spec_ipv4->size = sizeof(struct ibv_exp_flow_spec_ipv4_ext);
510 |     buf += sizeof(struct ibv_exp_flow_spec_ipv4_ext);
511 | 
512 |     // UDP - match dst port
513 |     auto* udp_spec = reinterpret_cast<struct ibv_exp_flow_spec_tcp_udp*>(buf);
514 |     udp_spec->type = IBV_EXP_FLOW_SPEC_UDP;
515 |     udp_spec->size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
516 |     udp_spec->val.dst_port = htons(dst_port);
517 |     udp_spec->mask.dst_port = 0xffffu;
518 |     udp_spec->mask.dst_port = 0;
519 | 
520 |     rt_assert(ibv_exp_create_flow(qp, flow_attr) != nullptr);
521 | }
522 | 
523 | void snapshot_cqe(volatile mlx5_cqe64* cqe, cqe_snapshot_t& cqe_snapshot)
524 | {
525 |     while (true) {
526 |         uint16_t wqe_id_0 = cqe->wqe_id;
527 |         uint16_t wqe_counter_0 = cqe->wqe_counter;
528 |         memory_barrier();
529 |         uint16_t wqe_id_1 = cqe->wqe_id;
530 | 
531 |         if (likely(wqe_id_0 == wqe_id_1)) {
532 |             cqe_snapshot.wqe_id = ntohs(wqe_id_0);
533 |             cqe_snapshot.wqe_counter = ntohs(wqe_counter_0);
534 |             return;
535 |         }
536 |     }
537 | }
538 | 
539 | size_t get_cycle_delta(const cqe_snapshot_t& prev, const cqe_snapshot_t& cur)
540 | {
541 |     size_t prev_idx = prev.get_cqe_snapshot_cycle_idx();
542 |     size_t cur_idx = cur.get_cqe_snapshot_cycle_idx();
543 |     assert(prev_idx < kAppCQESnapshotCycle && cur_idx < kAppCQESnapshotCycle);
544 | 
545 |     return ((cur_idx + kAppCQESnapshotCycle) - prev_idx) % kAppCQESnapshotCycle;
546 | }
547 | 


--------------------------------------------------------------------------------
/common/dma_common.h:
--------------------------------------------------------------------------------
  1 | #ifndef DMA_COMMON_H
  2 | #define DMA_COMMON_H
  3 | 
  4 | #include "mlx5_defs.h"
  5 | #include "packet.h"
  6 | #include "utils.h"
  7 | #include <assert.h>
  8 | #include <cmath>
  9 | #include <inttypes.h>
 10 | #include <net/if.h> //ifreq
 11 | #include <netdb.h>
 12 | #include <netinet/in.h>
 13 | #include <rdma/rdma_cma.h>
 14 | #include <sstream>
 15 | #include <stdio.h>
 16 | #include <stdlib.h>
 17 | #include <string.h>
 18 | #include <sys/ioctl.h>
 19 | #include <sys/ipc.h>
 20 | #include <sys/mman.h>
 21 | #include <sys/shm.h>
 22 | #include <sys/socket.h>
 23 | 
 24 | #define POLLING_SIZE 400
 25 | #define ENTRY_SIZE 256 /* maximum size of each buffer */
 26 | #define PORT_NUM 1
 27 | 
 28 | #define DEBUG_PRINT_ALL_SENDING_PACKET false
 29 | #define DEBUG_PRINT_ALL_RECEIVING_PACKET false
 30 | 
 31 | #define DEBUG_CHECK_SEND_RECEIVE_TOTAL false
 32 | 
 33 | static constexpr size_t kAppRecvCQDepth = 8;
 34 | static constexpr size_t kAppRQDepth = 4; // Multi-packet RQ depth
 35 | 
 36 | static constexpr size_t kAppLogNumStrides = 9;
 37 | static constexpr size_t kAppLogStrideBytes = 9;
 38 | static constexpr size_t kAppMaxPostlist = 512;
 39 | 
 40 | static constexpr bool kAppVerbose = false;
 41 | static constexpr bool kAppCheckContents = true; // Check buffer contents
 42 | 
 43 | /// Size of one ring message buffer
 44 | static constexpr size_t kAppRingMbufSize = (1ull << kAppLogStrideBytes);
 45 | 
 46 | /// Number of strides in one multi-packet RECV WQE
 47 | static constexpr size_t kAppStridesPerWQE = (1ull << kAppLogNumStrides);
 48 | 
 49 | /// Packets after which the CQE snapshot cycles
 50 | static constexpr size_t kAppCQESnapshotCycle = 65536 * kAppStridesPerWQE;
 51 | 
 52 | /// Total number of entries in the RX ring
 53 | static constexpr size_t kAppNumRingEntries = (kAppStridesPerWQE * kAppRQDepth);
 54 | 
 55 | static constexpr size_t kAppRingSize = (kAppNumRingEntries * kAppRingMbufSize);
 56 | 
 57 | /// A consistent snapshot of CQE fields in host endian format
 58 | struct cqe_snapshot_t {
 59 |     uint16_t wqe_id;
 60 |     uint16_t wqe_counter;
 61 | 
 62 |     /// Return this packet's index in the CQE snapshot cycle
 63 |     size_t get_cqe_snapshot_cycle_idx() const
 64 |     {
 65 |         return wqe_id * kAppStridesPerWQE + wqe_counter;
 66 |     }
 67 | 
 68 |     std::string to_string()
 69 |     {
 70 |         std::ostringstream ret;
 71 |         ret << "[ID " << std::to_string(wqe_id) << ", counter "
 72 |             << std::to_string(wqe_counter) << "]";
 73 |         return ret.str();
 74 |     }
 75 | };
 76 | 
 77 | struct DMAcontext {
 78 |     struct ibv_pd* pd;
 79 |     struct ibv_context* ctx;
 80 |     struct ibv_cq* receive_cq;
 81 |     struct ibv_cq* send_cq;
 82 |     struct ibv_mr* send_mr;
 83 |     void* send_region;
 84 |     struct ibv_qp* data_qp;
 85 | 
 86 |     struct ibv_qp* mp_recv_qp;
 87 |     struct ibv_cq* mp_recv_cq;
 88 |     struct ibv_exp_wq* mp_wq;
 89 |     struct ibv_exp_wq_family* mp_wq_family;
 90 |     struct ibv_exp_rwq_ind_table* mp_ind_tbl;
 91 |     volatile mlx5_cqe64* mp_cqe_arr;
 92 |     struct ibv_sge* mp_sge;
 93 |     uint8_t* mp_recv_ring;
 94 |     uint8_t* mp_send_ring;
 95 |     struct ibv_mr* mp_send_mr;
 96 | 
 97 |     // for connection
 98 |     int id;
 99 |     int total_received;
100 |     int total_sent;
101 |     int my_send_queue_length;
102 |     int my_recv_queue_length;
103 | 
104 |     size_t ring_head;
105 |     size_t nb_rx_rolling;
106 |     size_t sge_idx;
107 |     size_t cqe_idx;
108 | 
109 |     cqe_snapshot_t prev_snapshot;
110 | 
111 |     bool isPS;
112 | 
113 |     // // For window adjustment
114 |     bool isMarkTimeStamp;
115 |     bool* isSent;
116 |     std::chrono::high_resolution_clock::time_point* first_send_time;
117 |     std::chrono::high_resolution_clock::time_point* first_receive_time;
118 | };
119 | 
120 | DMAcontext* DMA_create(ibv_device* ib_dev, int thread_id, bool isPS);
121 | const char* ibv_wc_opcode_str(enum ibv_wc_opcode opcode);
122 | void send_packet(DMAcontext* dma_context, int packet_size, uint64_t offset);
123 | size_t receive_packet(DMAcontext *dma_context, cqe_snapshot_t* new_snapshot);
124 | void dma_postback(DMAcontext *dma_context);
125 | void dma_update_snapshot(DMAcontext *dma_context, cqe_snapshot_t new_snapshot);
126 | void dma_context_print(DMAcontext* dma_context, const char* caption);
127 | 
128 | // Install a UDP destination port--based flow rule
129 | void install_flow_rule(struct ibv_qp* qp, uint16_t thread_id, bool isPS);
130 | void install_udp_flow_rule(struct ibv_qp* qp, uint16_t dst_port);
131 | void snapshot_cqe(volatile mlx5_cqe64* cqe, cqe_snapshot_t& cqe_snapshot);
132 | size_t get_cycle_delta(const cqe_snapshot_t& prev, const cqe_snapshot_t& cur);
133 | #endif
134 | 


--------------------------------------------------------------------------------
/common/mlx5_defs.h:
--------------------------------------------------------------------------------
  1 | #ifndef MLX5_DEFS_H
  2 | #define MLX5_DEFS_H
  3 | 
  4 | #include <infiniband/verbs_exp.h>
  5 | #include <inttypes.h>
  6 | #include <linux/types.h>
  7 | #include <stdint.h>
  8 | 
  9 | enum mlx5_alloc_type {
 10 |     MLX5_ALLOC_TYPE_ANON,
 11 |     MLX5_ALLOC_TYPE_HUGE,
 12 |     MLX5_ALLOC_TYPE_CONTIG,
 13 |     MLX5_ALLOC_TYPE_PEER_DIRECT,
 14 |     MLX5_ALLOC_TYPE_PREFER_HUGE,
 15 |     MLX5_ALLOC_TYPE_PREFER_CONTIG,
 16 |     MLX5_ALLOC_TYPE_ALL
 17 | };
 18 | 
 19 | enum mlx5_lock_type {
 20 |     MLX5_SPIN_LOCK = 0,
 21 |     MLX5_MUTEX = 1,
 22 | };
 23 | 
 24 | enum mlx5_lock_state { MLX5_USE_LOCK,
 25 |     MLX5_LOCKED,
 26 |     MLX5_UNLOCKED };
 27 | 
 28 | struct mlx5_lock {
 29 |     pthread_mutex_t mutex;
 30 |     pthread_spinlock_t slock;
 31 |     enum mlx5_lock_state state;
 32 |     enum mlx5_lock_type type;
 33 | };
 34 | 
 35 | struct mlx5_numa_req {
 36 |     int valid;
 37 |     int numa_id;
 38 | };
 39 | 
 40 | struct mlx5_peer_direct_mem {
 41 |     uint32_t dir;
 42 |     uint64_t va_id;
 43 |     struct ibv_exp_peer_buf* pb;
 44 |     struct ibv_exp_peer_direct_attr* ctx;
 45 | };
 46 | 
 47 | struct mlx5_buf {
 48 |     void* buf;
 49 |     size_t length;
 50 |     int base;
 51 |     struct mlx5_hugetlb_mem* hmem;
 52 |     struct mlx5_peer_direct_mem peer;
 53 |     enum mlx5_alloc_type type;
 54 |     struct mlx5_numa_req numa_req;
 55 |     int numa_alloc;
 56 | };
 57 | 
 58 | struct mlx5_mini_cqe8 {
 59 |     union {
 60 |         uint32_t rx_hash_result;
 61 |         uint32_t checksum;
 62 |         struct {
 63 |             uint16_t wqe_counter;
 64 |             uint8_t s_wqe_opcode;
 65 |             uint8_t reserved;
 66 |         } s_wqe_info;
 67 |     };
 68 |     uint32_t byte_cnt;
 69 | };
 70 | 
 71 | enum { MLX5_MINI_ARR_SIZE = 8 };
 72 | 
 73 | struct mlx5_tm_cqe {
 74 |     uint32_t success;
 75 |     uint32_t hw_phase_cnt;
 76 |     uint8_t rsvd0[10];
 77 | };
 78 | 
 79 | struct mlx5_cqe64 {
 80 |     uint8_t rsvd0[2];
 81 |     /*
 82 |    * wqe_id is valid only for
 83 |    * Striding RQ (Multi-Packet RQ).
 84 |    * It provides the WQE index inside the RQ.
 85 |    */
 86 |     uint16_t wqe_id;
 87 |     uint8_t rsvd4[8];
 88 |     uint32_t rx_hash_res;
 89 |     uint8_t rx_hash_type;
 90 |     uint8_t ml_path;
 91 |     uint8_t rsvd20[2];
 92 |     uint16_t checksum;
 93 |     uint16_t slid;
 94 |     uint32_t flags_rqpn;
 95 |     uint8_t hds_ip_ext;
 96 |     uint8_t l4_hdr_type_etc;
 97 |     __be16 vlan_info;
 98 |     uint32_t srqn_uidx;
 99 |     uint32_t imm_inval_pkey;
100 |     uint8_t app;
101 |     uint8_t app_op;
102 |     uint16_t app_info;
103 |     uint32_t byte_cnt;
104 |     __be64 timestamp;
105 |     union {
106 |         uint32_t sop_drop_qpn;
107 |         struct {
108 |             uint8_t sop;
109 |             uint8_t qpn[3];
110 |         } sop_qpn;
111 |     };
112 |     /*
113 |    * In Striding RQ (Multi-Packet RQ) wqe_counter provides
114 |    * the WQE stride index (to calc pointer to start of the message)
115 |    */
116 |     uint16_t wqe_counter;
117 |     uint8_t signature;
118 |     uint8_t op_own;
119 | };
120 | 
121 | struct mlx5_cq {
122 |     struct ibv_cq ibv_cq;
123 |     uint32_t creation_flags;
124 |     uint32_t pattern;
125 |     struct mlx5_buf buf_a;
126 |     struct mlx5_buf buf_b;
127 |     struct mlx5_buf* active_buf;
128 |     struct mlx5_buf* resize_buf;
129 |     int resize_cqes;
130 |     int active_cqes;
131 |     struct mlx5_lock lock;
132 |     uint32_t cqn;
133 |     uint32_t cons_index;
134 |     uint32_t wait_index;
135 |     uint32_t wait_count;
136 |     volatile uint32_t* dbrec;
137 |     int arm_sn;
138 |     int cqe_sz;
139 |     int resize_cqe_sz;
140 |     int stall_next_poll;
141 |     int stall_enable;
142 |     uint64_t stall_last_count;
143 |     int stall_adaptive_enable;
144 |     int stall_cycles;
145 |     uint8_t model_flags; /* use mlx5_cq_model_flags */
146 |     uint16_t cqe_comp_max_num;
147 |     uint8_t cq_log_size;
148 |     /* Compressed CQE data */
149 |     struct mlx5_cqe64 next_decomp_cqe64;
150 |     struct mlx5_resource* compressed_rsc;
151 |     uint16_t compressed_left;
152 |     uint16_t compressed_wqe_cnt;
153 |     uint8_t compressed_req;
154 |     uint8_t compressed_mp_rq;
155 |     uint8_t mini_arr_idx;
156 |     struct mlx5_mini_cqe8 mini_array[MLX5_MINI_ARR_SIZE];
157 |     /* peer-direct data */
158 |     int peer_enabled;
159 |     struct ibv_exp_peer_direct_attr* peer_ctx;
160 |     struct mlx5_buf peer_buf;
161 |     struct mlx5_peek_entry** peer_peek_table;
162 |     struct mlx5_peek_entry* peer_peek_free;
163 | };
164 | 
165 | #endif // MLX5_DEFS_H
166 | 


--------------------------------------------------------------------------------
/common/p4ml_struct.h:
--------------------------------------------------------------------------------
 1 | #ifndef P4ML_STRUCT_H
 2 | #define P4ML_STRUCT_H
 3 | #include <inttypes.h>
 4 | 
 5 | #include "packet.h"
 6 | 
 7 | struct ThreadInfo
 8 | {
 9 |     int thread_id;
10 |     int agtr_start_pos;
11 | };
12 | 
13 | struct Job
14 | {
15 |     uint64_t key;
16 |     float *float_data;
17 |     int32_t *int_data;
18 |     uint32_t len;
19 |     int cmd;
20 | };
21 | 
22 | struct AppInfo
23 | {
24 |     uint32_t host;
25 |     uint16_t appID;
26 |     uint8_t num_worker;
27 |     uint8_t num_PS;
28 | };
29 | 
30 | #endif


--------------------------------------------------------------------------------
/common/packet.h:
--------------------------------------------------------------------------------
  1 | #ifndef PACKET_P4ML_H
  2 | #define PACKET_P4ML_H
  3 | #include <stdint.h>
  4 | #include <net/ethernet.h>
  5 | #include <arpa/inet.h>
  6 | #include <cstring>
  7 | #include <cstdio>
  8 | #include <thread>
  9 | #include <mutex>
 10 | #include <inttypes.h>
 11 | #include <iostream>
 12 | #include <bitset>
 13 | #include <chrono>
 14 | #include "utils.h"
 15 | #include "p4ml_struct.h"
 16 | 
 17 | #define PS_FILTER_TEMPLATE 0x05, 0x04, 0x03, 0x02, 0x01, 0xFF
 18 | #define WORKER_FILTER_TEMPLATE 0x77, 0x77, 0x77, 0x77, 0x77, 0xFF
 19 | 
 20 | // #define SRC_MAC 0xb8, 0x59, 0x9f, 0x1d, 0x04, 0xf2 
 21 | #define SRC_MAC 0xe4, 0x1d, 0x2d, 0xf3, 0xdd, 0xcc
 22 | // #define DST_MAC 0xb8, 0x59, 0x9f, 0x0b, 0x30, 0x72
 23 | 
 24 | #define ETH_TYPE 0x07, 0x00
 25 | 
 26 | #define IP_HDRS 0x45, 0x00, 0x00, 0x54, 0x00, 0x00, 0x40, 0x00, 0x40, 0x01, 0xaf, 0xb6
 27 | 
 28 | #define SRC_IP 0x0d, 0x07, 0x38, 0x66
 29 | 
 30 | #define DST_IP 0x0d, 0x07, 0x38, 0x7f
 31 | 
 32 | #define SRC_PORT 0x67, 0x67
 33 | 
 34 | #define DST_PORT 0x78, 0x78
 35 | 
 36 | #define UDP_HDRS 0x00, 0x00, 0x00, 0x00
 37 | 
 38 | // Only a template, DST_IP will be modified soon
 39 | // This one is for sending
 40 | const unsigned char PS_IP_ETH_UDP_HEADER[] = { WORKER_FILTER_TEMPLATE, SRC_MAC, ETH_TYPE, IP_HDRS, SRC_IP, DST_IP };
 41 | const unsigned char WORKER_IP_ETH_UDP_HEADER[] = { PS_FILTER_TEMPLATE, SRC_MAC, ETH_TYPE, IP_HDRS, SRC_IP, DST_IP };
 42 | 
 43 | // P4ML_PACKET_SIZE = IP_ETH_HEADER_SIZE + P4ML_HEADER_SIZE + P4ML_DATA_SIZE
 44 | #define P4ML_PACKET_SIZE 308
 45 | #define P4ML_DATA_SIZE 248
 46 | #define P4ML_HEADER_SIZE 26
 47 | #define P4ML_LAYER_SIZE 274
 48 | #define IP_ETH_UDP_HEADER_SIZE 34
 49 | 
 50 | #define MAX_ENTRIES_PER_PACKET 62
 51 | 
 52 | #define BYTE_TO_BINARY_PATTERN "%c%c%c%c%c%c%c%c"
 53 | #define BYTE_TO_BINARY(byte)  \
 54 |   (byte & 0x80 ? '1' : '0'), \
 55 |   (byte & 0x40 ? '1' : '0'), \
 56 |   (byte & 0x20 ? '1' : '0'), \
 57 |   (byte & 0x10 ? '1' : '0'), \
 58 |   (byte & 0x08 ? '1' : '0'), \
 59 |   (byte & 0x04 ? '1' : '0'), \
 60 |   (byte & 0x02 ? '1' : '0'), \
 61 |   (byte & 0x01 ? '1' : '0') 
 62 | 
 63 | #pragma pack(push, 1)
 64 |     struct agghdr {
 65 |         uint32_t bitmap;
 66 |         uint8_t num_worker;
 67 |         uint8_t flag;
 68 |         // reserved       :  2;
 69 |         // isForceFoward  :  1;
 70 | 
 71 |         /* Current version
 72 |         overflow       :  1;
 73 |         PSIndex        :  2;
 74 |         dataIndex      :  1;
 75 |         ECN            :  1;
 76 |         isResend       :  1;
 77 |         isSWCollision  :  1;
 78 |         isACK          :  1;
 79 |         */
 80 | 
 81 |         uint16_t appID;
 82 |         uint16_t seq_num;
 83 |         uint16_t agtr;
 84 |         uint16_t agtr2;
 85 |         int32_t vector[MAX_ENTRIES_PER_PACKET];
 86 |         uint64_t key;
 87 |         uint32_t len_tensor;
 88 | };
 89 | #pragma pack(pop)
 90 | 
 91 | static std::mutex _packet_print_mutex;
 92 | 
 93 | void inline make_p4ml_layer_and_copy_to(void* payload, Job* job_info, AppInfo* app_info, uint16_t* agtr, uint16_t* seq_num, int* offset, bool isResend, bool isForceForward, bool isOverflow)
 94 | {
 95 |     agghdr* agg_header = (agghdr*)payload;
 96 |     agghdr* p4ml_header = agg_header;
 97 |     agg_header->key = job_info->key;
 98 |     agg_header->len_tensor = htonl(job_info->len);
 99 |     agg_header->bitmap = htonl(1 << (app_info->host));
100 |     agg_header->num_worker = app_info->num_worker;
101 |     agg_header->appID = htons(app_info->appID);
102 |     agg_header->flag = 0;
103 |     agg_header->agtr = htons(*agtr);
104 |     //TODO: clarify this and UsedSwitchAGTRcount
105 |     agg_header->agtr2 = htons(*agtr + MAX_AGTR_COUNT);
106 |     agg_header->seq_num = htons(*seq_num);
107 | 
108 |     agg_header->flag = ((job_info->key % app_info->num_PS)) << 5;
109 | 
110 |     if (isResend) 
111 |         agg_header->flag |= 4;
112 |         
113 |     if (isForceForward)
114 |         agg_header->flag |= 32;
115 | 
116 |     if (isOverflow) 
117 |         agg_header->flag |= 128;
118 |     // PS Index
119 |     // agg_header->flag |= (*num_PS << 5);
120 |     // printf("to PS: %d\n", ((*key % *num_PS)+1));
121 | 
122 |     int32_t* used_data;
123 |     if (isOverflow) {
124 |         used_data = (int32_t*) job_info->float_data;
125 |     }
126 |     else 
127 |         used_data = (int32_t*) job_info->int_data;
128 | 
129 |     int32_t* send_data;
130 |     if (*offset + MAX_ENTRIES_PER_PACKET > job_info->len) {
131 |         int32_t* tmp = new int32_t[MAX_ENTRIES_PER_PACKET]();
132 |         memcpy(tmp, used_data + *offset, sizeof(int32_t) * (job_info->len % MAX_ENTRIES_PER_PACKET));
133 |         send_data = tmp;
134 |         delete tmp;
135 |     } else {
136 |         send_data = used_data + *offset;
137 |     }
138 | 
139 |     // p4ml_header_print_h(agg_header, "Make");
140 | }
141 | 
142 | // void inline make_packet_and_copy_to(void* payload, uint64_t* key, uint32_t* len_tensor, uint32_t* workerID, uint8_t* num_worker, uint16_t* appID, uint16_t* agtr, uint16_t* seq_num, int32_t* data, bool isResend, bool isForceForward, uint8_t* num_PS, int thread_id)
143 | // {
144 | //     char* eth_ip_header = (char*)payload;
145 | //     memcpy(payload, IP_ETH_UDP_HEADER, sizeof(IP_ETH_UDP_HEADER));
146 | //     eth_ip_header[5] = thread_id;
147 | //     make_p4ml_layer_and_copy_to((char*)payload + sizeof(IP_ETH_UDP_HEADER), key, len_tensor, workerID, num_worker, appID, agtr, seq_num, data, isResend, isForceForward, num_PS);
148 | // }
149 | 
150 | void inline p4ml_header_ntoh(agghdr* p_p4ml)
151 | {
152 |     p_p4ml->len_tensor = ntohl(p_p4ml->len_tensor);
153 |     p_p4ml->bitmap = ntohl(p_p4ml->bitmap);
154 |     p_p4ml->seq_num = ntohs(p_p4ml->seq_num);
155 |     p_p4ml->agtr = ntohs(p_p4ml->agtr);
156 |     p_p4ml->agtr2 = ntohs(p_p4ml->agtr2);
157 |     p_p4ml->appID = ntohs(p_p4ml->appID);
158 |     int32_t* p_model = p_p4ml->vector;
159 | 
160 |     /* if not float */
161 |     if (!(p_p4ml->flag & 0x80)) {
162 |         for (int i = 0; i < MAX_ENTRIES_PER_PACKET; i++)
163 |             p_model[i] = ntohl(p_model[i]);
164 |     }
165 | }
166 | 
167 | void inline p4ml_header_ntoh_without_data(agghdr* p_p4ml)
168 | {
169 |     p_p4ml->len_tensor = ntohl(p_p4ml->len_tensor);
170 |     p_p4ml->bitmap = ntohl(p_p4ml->bitmap);
171 |     p_p4ml->seq_num = ntohs(p_p4ml->seq_num);
172 |     p_p4ml->agtr = ntohs(p_p4ml->agtr);
173 |     p_p4ml->agtr2 = ntohs(p_p4ml->agtr2);
174 |     p_p4ml->appID = ntohs(p_p4ml->appID);
175 |     // // p_p4ml->last_ack = ntohl(p_p4ml->last_ack);
176 |     int32_t* p_model = p_p4ml->vector;
177 | }
178 | 
179 | void inline p4ml_header_hton_without_data(agghdr* p_p4ml)
180 | {
181 |     p_p4ml->len_tensor = htonl(p_p4ml->len_tensor);
182 |     p_p4ml->bitmap = htonl(p_p4ml->bitmap);
183 |     p_p4ml->seq_num = htons(p_p4ml->seq_num);
184 |     p_p4ml->agtr = htons(p_p4ml->agtr);
185 |     p_p4ml->agtr2 = htons(p_p4ml->agtr2);
186 |     p_p4ml->appID = htons(p_p4ml->appID);
187 |     // // p_p4ml->last_ack = htonl(p_p4ml->last_ack);
188 | }
189 | 
190 | void inline p4ml_header_setACK(agghdr* p4ml_header)
191 | {
192 |     p4ml_header->flag |= 1;
193 | }
194 | 
195 | void inline p4ml_header_setOverflow(agghdr* p4ml_header)
196 | {
197 |     p4ml_header->flag |= 128;
198 | }
199 | 
200 | void inline p4ml_header_setOverflowRequest(agghdr* p4ml_header)
201 | {
202 |     p4ml_header->flag |= 128;
203 |     p4ml_header->flag &= ~(4);
204 | }
205 | 
206 | void inline p4ml_header_setCollisionBit(agghdr* p4ml_header)
207 | {
208 |     p4ml_header->flag |= 2;
209 | }
210 | 
211 | void inline p4ml_header_setLengthFieldToAgtr(agghdr* p4ml_header, uint16_t new_agtr)
212 | {
213 |     p4ml_header->len_tensor = new_agtr;
214 | }
215 | 
216 | void inline p4ml_header_resetIndex(agghdr* p4ml_header)
217 | {
218 |     p4ml_header->flag &= ~(16);
219 | }
220 | 
221 | void inline p4ml_header_resetCollisionBit(agghdr* p4ml_header)
222 | {
223 |     p4ml_header->flag &= ~(2);
224 | }
225 | 
226 | void inline p4ml_header_print(agghdr *p4ml_header, const char *caption)
227 | {
228 |     std::lock_guard<std::mutex> lock(_packet_print_mutex);
229 |     printf("[%s] \n key: %" PRIu64 ", len_tensor: %u, "
230 |            "bitmap: " BYTE_TO_BINARY_PATTERN ", num_worker: %u, appID: %u, "
231 |            "agtr: %u, agtr2: %u, seq_num: %u, isACK: %d, dataIndex: %d,"
232 |            "isResend: %d, isOverflow: %d, data: ",
233 |         caption, p4ml_header->key, p4ml_header->len_tensor,
234 |         BYTE_TO_BINARY(p4ml_header->bitmap), p4ml_header->num_worker, p4ml_header->appID,
235 |         p4ml_header->agtr, p4ml_header->agtr2, p4ml_header->seq_num,
236 |         p4ml_header->flag & 1 ? 1 : 0, p4ml_header->flag & 16 ? 1 : 0, p4ml_header->flag & 4 ? 1 : 0,
237 |         p4ml_header->flag & 128 ? 1 : 0);
238 | 
239 |     // is Overflow?
240 |     if (p4ml_header->flag & 128)
241 |         // is ACK?  isn't Resend?
242 |         if (p4ml_header->flag & 1 && !(p4ml_header->flag & 4))
243 |             printf("REQUEST - CARELESS.");
244 |         else
245 |             for (int i = 0; i < MAX_ENTRIES_PER_PACKET; i++)
246 |                 printf("%.7f ", ntohf((p4ml_header->vector)[i]));
247 |     else
248 |         for (int i = 0; i < MAX_ENTRIES_PER_PACKET; i++)
249 |             printf("%d ", p4ml_header->vector[i]);
250 |     printf("\n");
251 | }
252 | 
253 | void inline p4ml_header_print_h(agghdr *p4ml_header, const char *caption)
254 | {
255 |     std::lock_guard<std::mutex> lock(_packet_print_mutex);
256 |     printf("[%s] \n key: %" PRIu64 ", len_tensor: %u, "
257 |            "bitmap: " BYTE_TO_BINARY_PATTERN ", num_worker: %u, appID: %u, "
258 |            "agtr: %u, agtr2: %u, seq_num: %u, isACK: %d, dataIndex: %d,"
259 |            "isResend: %d, isOverflow: %d, data: ",
260 |         caption, p4ml_header->key, ntohl(p4ml_header->len_tensor),
261 |         BYTE_TO_BINARY(ntohl(p4ml_header->bitmap)), p4ml_header->num_worker, ntohs(p4ml_header->appID),
262 |         ntohs(p4ml_header->agtr), ntohs(p4ml_header->agtr2), ntohs(p4ml_header->seq_num),
263 |         p4ml_header->flag & 1 ? 1 : 0, p4ml_header->flag & 16 ? 1 : 0, p4ml_header->flag & 4 ? 1 : 0,
264 |         p4ml_header->flag & 128 ? 1 : 0);
265 | 
266 |     // is Overflow?
267 |     if (p4ml_header->flag & 128)
268 |         // is ACK?  isn't Resend?
269 |         if (p4ml_header->flag & 1 && !(p4ml_header->flag & 4))
270 |             printf("REQUEST - CARELESS.");
271 |         else 
272 |             for (int i = 0; i < MAX_ENTRIES_PER_PACKET; i++)
273 |                 printf("%.7f ", ((float *)(p4ml_header->vector))[i]);
274 |     else
275 |         for (int i = 0; i < MAX_ENTRIES_PER_PACKET; i++)
276 |             printf("%d ", ntohl(p4ml_header->vector[i]));
277 |     printf("\n");
278 | }
279 | 
280 | #endif
281 | 


--------------------------------------------------------------------------------
/common/quantize.h:
--------------------------------------------------------------------------------
  1 | #ifndef QUAN_P4ML_H
  2 | #define QUAN_P4ML_H
  3 | #include <immintrin.h>
  4 | #include <stdint.h>
  5 | 
  6 | // scale up float then translate it to int
  7 | // without any further optimization
  8 | inline static void quantizeNaive(char *data_ptr, uint32_t size)
  9 | {
 10 |     int factor = 1000000;
 11 |     int *int_data_ptr = (int *)data_ptr;
 12 |     float *float_data_ptr = (float *)data_ptr;
 13 |     for (uint32_t i = 0; i < size; i++)
 14 |     {
 15 |         int_data_ptr[i] = (int)(float_data_ptr[i] * factor);
 16 |     }
 17 | }
 18 | 
 19 | // translate back to float and scale down
 20 | // without any further optimization
 21 | inline static void dequantizeNaive(char *data_ptr, uint32_t size)
 22 | {
 23 |     float factor = 1000000.0;
 24 |     int *int_data_ptr = (int *)data_ptr;
 25 |     float *float_data_ptr = (float *)data_ptr;
 26 |     for (uint32_t i = 0; i < size; i++)
 27 |     {
 28 |         float_data_ptr[i] = (float)(int_data_ptr[i] / factor);
 29 |     }
 30 | }
 31 | 
 32 | // functioned the same as quantizeNaive
 33 | // boost with avx 256 instructions
 34 | inline static void quantizeAVX2(char *data_ptr, uint32_t size)
 35 | {
 36 |     // check alignment
 37 | 
 38 |     __m256 input;
 39 |     __m256i output;
 40 | 
 41 |     int unaligned_size = size % 8;
 42 |     int aligned_size = size / 8;
 43 | 
 44 |     const float factor = 1000000.0;
 45 |     float *float_data_ptr = (float *)data_ptr;
 46 |     int *int_data_ptr = (int *)data_ptr;
 47 | 
 48 |     // 0xF4240 is 1000000 in hex
 49 |     __m256 factor_in_avx = _mm256_broadcast_ss(&factor);
 50 | 
 51 |     for (uint32_t i = 0; i < aligned_size; i++)
 52 |     {
 53 |         float *current_pos = float_data_ptr + i * 8;
 54 |         input = _mm256_loadu_ps(current_pos);
 55 |         input = _mm256_mul_ps(input, factor_in_avx);
 56 |         output = _mm256_cvtps_epi32(input);
 57 |         _mm256_storeu_si256((__m256i *)current_pos, output);
 58 |     }
 59 | 
 60 |     for (uint32_t i = 0; i < unaligned_size; i++)
 61 |     {
 62 |         int_data_ptr[aligned_size * 8 + i] =
 63 |             (int)(float_data_ptr[aligned_size * 8 + i] * factor);
 64 |     }
 65 | }
 66 | 
 67 | // functioned the same as dequantizeNaive
 68 | // boost with avx 256 instructions
 69 | inline static void dequantizeAVX2(char *data_ptr, uint32_t size)
 70 | {
 71 |     __m256i input;
 72 |     __m256 output;
 73 | 
 74 |     int unaligned_size = size % 8;
 75 |     int aligned_size = size / 8;
 76 | 
 77 |     const float factor = 1000000.0;
 78 |     int *int_data_ptr = (int *)data_ptr;
 79 |     float *float_data_ptr = (float *)data_ptr;
 80 | 
 81 |     // __m256i* input_avx = (__m256i*) data_ptr;
 82 |     __m256 factor_in_avx = _mm256_broadcast_ss(&factor);
 83 | 
 84 |     for (uint32_t i = 0; i < aligned_size; i++)
 85 |     {
 86 |         float *current_pos = float_data_ptr + i * 8;
 87 |         input = _mm256_loadu_si256((__m256i *)current_pos);
 88 |         output = _mm256_cvtepi32_ps(input);
 89 |         output = _mm256_div_ps(output, factor_in_avx);
 90 |         _mm256_storeu_ps(current_pos, output);
 91 |     }
 92 | 
 93 |     for (uint32_t i = 0; i < unaligned_size; i++)
 94 |     {
 95 |         float_data_ptr[aligned_size * 8 + i] =
 96 |             (float)(int_data_ptr[aligned_size * 8 + i] / factor);
 97 |     }
 98 | }
 99 | 
100 | // functioned the same as quantizeNaive
101 | // boost with avx 256 instructions
102 | inline static void quantizeAVX2to(char *dst_ptr, char *src_ptr, uint32_t size)
103 | {
104 |     // check alignment
105 | 
106 |     __m256 input;
107 |     __m256i output;
108 | 
109 |     int unaligned_size = size % 8;
110 |     int aligned_size = size / 8;
111 | 
112 |     const float factor = 1000000.0;
113 |     float *float_data_ptr = (float *)src_ptr;
114 |     int *int_data_ptr = (int *)src_ptr;
115 | 
116 |     float *dst_float_data_ptr = (float *)dst_ptr;
117 |     int *dst_int_data_ptr = (int *)dst_ptr;
118 | 
119 |     // 0xF4240 is 1000000 in hex
120 |     __m256 factor_in_avx = _mm256_broadcast_ss(&factor);
121 | 
122 |     for (uint32_t i = 0; i < aligned_size; i++)
123 |     {
124 |         float *current_pos = float_data_ptr + i * 8;
125 |         float *current_dst_pos = dst_float_data_ptr + i * 8;
126 | 
127 |         input = _mm256_loadu_ps(current_pos);
128 |         input = _mm256_mul_ps(input, factor_in_avx);
129 |         output = _mm256_cvtps_epi32(input);
130 |         _mm256_storeu_si256((__m256i *)current_dst_pos, output);
131 |     }
132 | 
133 |     for (uint32_t i = 0; i < unaligned_size; i++)
134 |     {
135 |         dst_int_data_ptr[aligned_size * 8 + i] =
136 |             (int)(float_data_ptr[aligned_size * 8 + i] * factor);
137 |     }
138 | }
139 | 
140 | // functioned the same as dequantizeNaive
141 | // boost with avx 256 instructions
142 | inline static void dequantizeAVX2to(char *dst_ptr, char *src_ptr,
143 |                                     uint32_t size)
144 | {
145 |     __m256i input;
146 |     __m256 output;
147 | 
148 |     int unaligned_size = size % 8;
149 |     int aligned_size = size / 8;
150 | 
151 |     const float factor = 1000000.0;
152 |     int *int_data_ptr = (int *)src_ptr;
153 |     float *float_data_ptr = (float *)src_ptr;
154 | 
155 |     int *dst_int_data_ptr = (int *)dst_ptr;
156 |     float *dst_float_data_ptr = (float *)dst_ptr;
157 | 
158 |     // __m256i* input_avx = (__m256i*) src_ptr;
159 |     __m256 factor_in_avx = _mm256_broadcast_ss(&factor);
160 | 
161 |     for (uint32_t i = 0; i < aligned_size; i++)
162 |     {
163 |         float *current_pos = float_data_ptr + i * 8;
164 |         float *current_dst_pos = dst_float_data_ptr + i * 8;
165 | 
166 |         input = _mm256_loadu_si256((__m256i *)current_pos);
167 |         output = _mm256_cvtepi32_ps(input);
168 |         output = _mm256_div_ps(output, factor_in_avx);
169 |         _mm256_storeu_ps(current_dst_pos, output);
170 |     }
171 | 
172 |     for (uint32_t i = 0; i < unaligned_size; i++)
173 |     {
174 |         dst_float_data_ptr[aligned_size * 8 + i] =
175 |             (float)(int_data_ptr[aligned_size * 8 + i] / factor);
176 |     }
177 | }
178 | 
179 | #endif


--------------------------------------------------------------------------------
/common/utils.h:
--------------------------------------------------------------------------------
  1 | #ifndef UTILS_H
  2 | #define UTILS_H
  3 | 
  4 | #include <sched.h>
  5 | #include <sys/ioctl.h>
  6 | #include <net/if.h>
  7 | #include <stdint.h>
  8 | #include <stdio.h>
  9 | #include <stdlib.h>
 10 | #include <sys/mman.h>
 11 | #include <time.h>
 12 | #include <unistd.h>
 13 | #include <stdexcept>
 14 | 
 15 | // Because here we use 2 agtr for one packet, so /2
 16 | #define MAX_AGTR_COUNT 20000
 17 | #define AGTR_TO_USE_PER_APPLICATION 2800
 18 | 
 19 | #define EACH_HUGEPAGE_SIZE (2048*1024)
 20 | 
 21 | #define likely(x) __builtin_expect(!!(x), 1)
 22 | #define unlikely(x) __builtin_expect(!!(x), 0)
 23 | 
 24 | 
 25 | #define DIVUP(x, y) (((x)+(y)-1)/(y))
 26 | #define ROUNDUP(x, y) (DIVUP((x), (y))*(y))
 27 | 
 28 | template <typename T>
 29 | static inline T align_floor(T v, T align) {
 30 |   return v - (v % align);
 31 | }
 32 | 
 33 | template <typename T>
 34 | static inline T align_ceil(T v, T align) {
 35 |   return align_floor(v + align - 1, align);
 36 | }
 37 | 
 38 | static inline void ib_malloc(void** ptr, size_t size) {
 39 |   size_t page_size = sysconf(_SC_PAGESIZE);
 40 |   void* p;
 41 |   int size_aligned = ROUNDUP(size, page_size);
 42 |   int ret = posix_memalign(&p, page_size, size_aligned);
 43 |   if (ret != 0) {
 44 |     printf("posix_memalign error.\n");
 45 |     exit(1);
 46 |   }
 47 |   memset(p, 0, size);
 48 |   *ptr = p;
 49 | }
 50 | 
 51 | #define KB(x) (static_cast<size_t>(x) << 10)
 52 | #define KB_(x) (KB(x) - 1)
 53 | #define MB(x) (static_cast<size_t>(x) << 20)
 54 | #define MB_(x) (MB(x) - 1)
 55 | 
 56 | static void memory_barrier() { asm volatile("" ::: "memory"); }
 57 | static void lfence() { asm volatile("lfence" ::: "memory"); }
 58 | static void sfence() { asm volatile("sfence" ::: "memory"); }
 59 | static void mfence() { asm volatile("mfence" ::: "memory"); }
 60 | static void clflush(volatile void* p) { asm volatile("clflush (%0)" ::"r"(p)); }
 61 | static void cpuid(unsigned int* eax, unsigned int* ebx, unsigned int* ecx,
 62 |                   unsigned int* edx) {
 63 |   asm volatile("cpuid"
 64 |                : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), "=d"(*edx)
 65 |                : "0"(*eax), "2"(*ecx));
 66 | }
 67 | 
 68 | inline void bindingCPU(int num) {
 69 |   int result;
 70 |   cpu_set_t mask;
 71 |   CPU_ZERO(&mask);
 72 |   CPU_SET(num, &mask);
 73 |   result = sched_setaffinity(0, sizeof(mask), &mask);
 74 |   if (result < 0) {
 75 |     printf("binding CPU fails\n");
 76 |     exit(1);
 77 |   }
 78 | }
 79 | 
 80 | /// Check a condition at runtime. If the condition is false, throw exception.
 81 | static inline void rt_assert(bool condition) {
 82 |   if (unlikely(!condition)) throw std::runtime_error("");
 83 | }
 84 | 
 85 | 
 86 | /* allocate the huge pages. */
 87 | inline char *alloc_raw_pages(int cnt, int size) {
 88 |   /*
 89 |    *  Don't touch the page since then allocator would not allocate the page
 90 |    * right now.
 91 |    */
 92 |   int flag = MAP_SHARED | MAP_ANONYMOUS;
 93 |   if (size == EACH_HUGEPAGE_SIZE) flag |= MAP_HUGETLB;
 94 |   char *ptr =
 95 |       (char *)mmap(NULL, (int64_t)cnt * size, PROT_READ | PROT_WRITE, flag, -1, 0);
 96 |   if (ptr == (char *)-1) {
 97 |     perror("alloc_raw_pages");
 98 |     return NULL;
 99 |   }
100 |   return ptr;
101 | }
102 | 
103 | union {
104 |     float f;
105 |     uint32_t u;
106 | } if_value;
107 |     
108 | inline float ntohf(uint32_t net32)
109 | {
110 |     if_value.u = ntohl(net32);
111 |     return if_value.f;
112 | }
113 | 
114 | // /* Returns the MAC Address Params: int iNetType - 0: ethernet, 1: Wifi char chMAC[6] - MAC Address in binary format Returns: 0: success -1: Failure */
115 | // int getMACAddress(char chMAC[6])
116 | // {
117 | //     struct ifreq ifr;
118 | //     int sock;
119 | //     char* ifname = "enp178s0f0";
120 | //     sock = socket(AF_INET, SOCK_DGRAM, 0);
121 | //     strcpy(ifr.ifr_name, ifname);
122 | //     ifr.ifr_addr.sa_family = AF_INET;
123 | //     if (ioctl(sock, SIOCGIFHWADDR, &ifr) < 0) {
124 | //         return -1;
125 | //     }
126 | //     memcpy(chMAC, ifr.ifr_hwaddr.sa_data, 6);
127 | //     close(sock);
128 | //     return 0;
129 | // }
130 |  
131 | // /* Returns the interface IP Address Params: int iNetType - 0: ethernet, 1: Wifi char *chIP - IP Address string Return: 0: success / -1: Failure */
132 | // int getIpAddress(char chIP[16])
133 | // {
134 | //     struct ifreq ifr;
135 | //     int sock = 0;
136 | //     sock = socket(AF_INET, SOCK_DGRAM, 0);
137 | //     strcpy(ifr.ifr_name, "enp178s0f0");
138 | //     if (ioctl(sock, SIOCGIFADDR, &ifr) < 0) {
139 | //         strcpy(chIP, "0.0.0.0");
140 | //         return -1;
141 | //     }
142 | //     sprintf(chIP, "%s", inet_ntoa(((struct sockaddr_in*)&(ifr.ifr_addr))->sin_addr));
143 | //     close(sock);
144 | //     return 0;
145 | // }
146 | 
147 | #endif
148 | 


--------------------------------------------------------------------------------
/common/window_manager.h:
--------------------------------------------------------------------------------
 1 | #ifndef SLIDING_W_H
 2 | #define SLIDING_W_H
 3 | 
 4 | #include "packet.h"
 5 | #include "CC_manager.h"
 6 | #define RESEND_TRIGGER 1
 7 | 
 8 | class WindowManager {
 9 |     public:
10 |         bool* isACKed;
11 |         /* This three variable is completely useless, but
12 |         when deleting it, the performance will drop from 46Gbps to 40Gbps.. */
13 |         bool* isSent;
14 |         std::chrono::high_resolution_clock::time_point* send_time;
15 |         std::chrono::high_resolution_clock::time_point* receive_time;
16 |         /* */
17 |         int total_ACK;
18 |         int last_ACK;
19 | 
20 |         WindowManager() {
21 |             last_ACK = 0;
22 |         }
23 | 
24 |         bool inline UpdateWindow(uint16_t* seq_num)
25 |         {
26 |             bool isLastAckUpdated = false;
27 |             isACKed[*seq_num] = true;
28 |             while (isACKed[last_ACK + 1]) {
29 |                 last_ACK++;
30 |                 isLastAckUpdated = true;
31 |             }
32 |             return isLastAckUpdated;
33 |         }
34 | 
35 |         int inline Reset(int packet_total)
36 |         {
37 |             last_ACK = 0;
38 |             total_ACK = packet_total;
39 |             memset(isACKed, 0, sizeof(bool) * packet_total + 1);
40 |         }
41 | };
42 | 
43 | #endif


--------------------------------------------------------------------------------
/docs/benchmark.md:
--------------------------------------------------------------------------------
 1 | # Benchmark
 2 | 
 3 | ## Requirment - Run with Signal Switch
 4 | 
 5 | In this experiment, 2 physical workers, 1 physical PS (Parameter Server) and 1 switch is used.
 6 | 
 7 | 
 8 | ## Getting Started
 9 | ```
10 | $ git clone https://github.com/ATP-NSDI/ATP.git
11 | ```
12 | 
13 | ### Run Tofino Switch 
14 | 
15 | #### Compile P4 Program and Start the Tofino Model (Terminal1)
16 | If you are using physical switch, compile the switch program then jump to Terminal 2 directly.
17 | ```
18 | $ cd $SDE
19 | ```
20 | ```
21 | $ $TOOLS/p4_build.sh ~/git/p4ml/p4src/p4ml.p4
22 | ```
23 | ```
24 | # (Optional) for software Tofino behavior model
25 | $ ./run_tofino_model.sh -p p4ml
26 | ```
27 | #### Load Specified Switch Program (Terminal2)
28 | ```
29 | $ cd $SDE
30 | ```
31 | ```
32 | $ ./run_switchd.sh -p p4ml
33 | ```
34 | #### Enable Ports and Install Entries (Terminal3)
35 | ```
36 | $ $SDE/run_p4_tests.sh -t $ATP_REPO/ptf/ -p p4ml 
37 | ```
38 | ```
39 | $ $TOOLS/run_pd_rpc.py -p p4ml $ATP_REPO/run_pd_rpc/setup.py 
40 | ```
41 | 
42 | ### Run Parameter Server 
43 | #### Compile and Run Server (Terminal4)
44 | ```
45 | $ cd $ATP_REPO/server/
46 | ```
47 | ```
48 | $ make
49 | ```
50 | ```
51 | # Usage: ./app [AppID]
52 | sudo ./app 1
53 | ```
54 | 
55 | wait until all threads finish their QP creation.
56 | 
57 | ### Compile and Run Workers
58 | ```
59 | $ cd $ATP_REPO/client/
60 | ```
61 | ```
62 | $ make
63 | ```
64 | #### Run Worker1 (Terminal5)
65 | ```
66 | # Usage: ./app [MyID] [Num of Worker] [AppID] [Num of PS]
67 | $ sudo ./app 0 2 1 1
68 | ```
69 | #### Run Worker2 (Terminal6)
70 | ```
71 | # Usage: ./app [MyID] [Num of Worker] [AppID] [Num of PS]
72 | $ sudo ./app 1 2 1 1
73 | ```
74 | 
75 | Then you can switch to Terminal 5/6 to the see bandwidth report.
76 | 


--------------------------------------------------------------------------------
/p4src/includes/actions.p4:
--------------------------------------------------------------------------------
  1 | action processentry1() {
  2 |     write_data_entry1.execute_stateful_alu(p4ml_agtr_index.agtr);
  3 | }
  4 | 
  5 | action noequ0_processentry1() {
  6 |     noequ0_write_data_entry1.execute_stateful_alu(p4ml_agtr_index.agtr);
  7 | }
  8 | 
  9 | action processentry1andWriteToPacket() {
 10 |     write_read_data_entry1.execute_stateful_alu(p4ml_agtr_index.agtr);
 11 | }
 12 | 
 13 | action noequ0_processentry1andWriteToPacket() {
 14 |     noequ0_write_read_data_entry1.execute_stateful_alu(p4ml_agtr_index.agtr);
 15 | }
 16 | 
 17 | action do_cleanEntry1() {
 18 |     clean_entry1.execute_stateful_alu(p4ml_agtr_index.agtr);
 19 | }
 20 | 
 21 | action entry1WriteToPacket() {
 22 |     read_data_entry1.execute_stateful_alu(p4ml_agtr_index.agtr);
 23 | }
 24 | 
 25 | action processentry2() {
 26 |     write_data_entry2.execute_stateful_alu(p4ml_agtr_index.agtr);
 27 | }
 28 | 
 29 | action noequ0_processentry2() {
 30 |     noequ0_write_data_entry2.execute_stateful_alu(p4ml_agtr_index.agtr);
 31 | }
 32 | 
 33 | action processentry2andWriteToPacket() {
 34 |     write_read_data_entry2.execute_stateful_alu(p4ml_agtr_index.agtr);
 35 | }
 36 | 
 37 | action noequ0_processentry2andWriteToPacket() {
 38 |     noequ0_write_read_data_entry2.execute_stateful_alu(p4ml_agtr_index.agtr);
 39 | }
 40 | 
 41 | action do_cleanEntry2() {
 42 |     clean_entry2.execute_stateful_alu(p4ml_agtr_index.agtr);
 43 | }
 44 | 
 45 | action entry2WriteToPacket() {
 46 |     read_data_entry2.execute_stateful_alu(p4ml_agtr_index.agtr);
 47 | }
 48 | 
 49 | action processentry3() {
 50 |     write_data_entry3.execute_stateful_alu(p4ml_agtr_index.agtr);
 51 | }
 52 | 
 53 | action noequ0_processentry3() {
 54 |     noequ0_write_data_entry3.execute_stateful_alu(p4ml_agtr_index.agtr);
 55 | }
 56 | 
 57 | action processentry3andWriteToPacket() {
 58 |     write_read_data_entry3.execute_stateful_alu(p4ml_agtr_index.agtr);
 59 | }
 60 | 
 61 | action noequ0_processentry3andWriteToPacket() {
 62 |     noequ0_write_read_data_entry3.execute_stateful_alu(p4ml_agtr_index.agtr);
 63 | }
 64 | 
 65 | action do_cleanEntry3() {
 66 |     clean_entry3.execute_stateful_alu(p4ml_agtr_index.agtr);
 67 | }
 68 | 
 69 | action entry3WriteToPacket() {
 70 |     read_data_entry3.execute_stateful_alu(p4ml_agtr_index.agtr);
 71 | }
 72 | 
 73 | action processentry4() {
 74 |     write_data_entry4.execute_stateful_alu(p4ml_agtr_index.agtr);
 75 | }
 76 | 
 77 | action noequ0_processentry4() {
 78 |     noequ0_write_data_entry4.execute_stateful_alu(p4ml_agtr_index.agtr);
 79 | }
 80 | 
 81 | action processentry4andWriteToPacket() {
 82 |     write_read_data_entry4.execute_stateful_alu(p4ml_agtr_index.agtr);
 83 | }
 84 | 
 85 | action noequ0_processentry4andWriteToPacket() {
 86 |     noequ0_write_read_data_entry4.execute_stateful_alu(p4ml_agtr_index.agtr);
 87 | }
 88 | 
 89 | action do_cleanEntry4() {
 90 |     clean_entry4.execute_stateful_alu(p4ml_agtr_index.agtr);
 91 | }
 92 | 
 93 | action entry4WriteToPacket() {
 94 |     read_data_entry4.execute_stateful_alu(p4ml_agtr_index.agtr);
 95 | }
 96 | 
 97 | action processentry5() {
 98 |     write_data_entry5.execute_stateful_alu(p4ml_agtr_index.agtr);
 99 | }
100 | 
101 | action noequ0_processentry5() {
102 |     noequ0_write_data_entry5.execute_stateful_alu(p4ml_agtr_index.agtr);
103 | }
104 | 
105 | action processentry5andWriteToPacket() {
106 |     write_read_data_entry5.execute_stateful_alu(p4ml_agtr_index.agtr);
107 | }
108 | 
109 | action noequ0_processentry5andWriteToPacket() {
110 |     noequ0_write_read_data_entry5.execute_stateful_alu(p4ml_agtr_index.agtr);
111 | }
112 | 
113 | action do_cleanEntry5() {
114 |     clean_entry5.execute_stateful_alu(p4ml_agtr_index.agtr);
115 | }
116 | 
117 | action entry5WriteToPacket() {
118 |     read_data_entry5.execute_stateful_alu(p4ml_agtr_index.agtr);
119 | }
120 | 
121 | action processentry6() {
122 |     write_data_entry6.execute_stateful_alu(p4ml_agtr_index.agtr);
123 | }
124 | 
125 | action noequ0_processentry6() {
126 |     noequ0_write_data_entry6.execute_stateful_alu(p4ml_agtr_index.agtr);
127 | }
128 | 
129 | action processentry6andWriteToPacket() {
130 |     write_read_data_entry6.execute_stateful_alu(p4ml_agtr_index.agtr);
131 | }
132 | 
133 | action noequ0_processentry6andWriteToPacket() {
134 |     noequ0_write_read_data_entry6.execute_stateful_alu(p4ml_agtr_index.agtr);
135 | }
136 | 
137 | action do_cleanEntry6() {
138 |     clean_entry6.execute_stateful_alu(p4ml_agtr_index.agtr);
139 | }
140 | 
141 | action entry6WriteToPacket() {
142 |     read_data_entry6.execute_stateful_alu(p4ml_agtr_index.agtr);
143 | }
144 | 
145 | action processentry7() {
146 |     write_data_entry7.execute_stateful_alu(p4ml_agtr_index.agtr);
147 | }
148 | 
149 | action noequ0_processentry7() {
150 |     noequ0_write_data_entry7.execute_stateful_alu(p4ml_agtr_index.agtr);
151 | }
152 | 
153 | action processentry7andWriteToPacket() {
154 |     write_read_data_entry7.execute_stateful_alu(p4ml_agtr_index.agtr);
155 | }
156 | 
157 | action noequ0_processentry7andWriteToPacket() {
158 |     noequ0_write_read_data_entry7.execute_stateful_alu(p4ml_agtr_index.agtr);
159 | }
160 | 
161 | action do_cleanEntry7() {
162 |     clean_entry7.execute_stateful_alu(p4ml_agtr_index.agtr);
163 | }
164 | 
165 | action entry7WriteToPacket() {
166 |     read_data_entry7.execute_stateful_alu(p4ml_agtr_index.agtr);
167 | }
168 | 
169 | action processentry8() {
170 |     write_data_entry8.execute_stateful_alu(p4ml_agtr_index.agtr);
171 | }
172 | 
173 | action noequ0_processentry8() {
174 |     noequ0_write_data_entry8.execute_stateful_alu(p4ml_agtr_index.agtr);
175 | }
176 | 
177 | action processentry8andWriteToPacket() {
178 |     write_read_data_entry8.execute_stateful_alu(p4ml_agtr_index.agtr);
179 | }
180 | 
181 | action noequ0_processentry8andWriteToPacket() {
182 |     noequ0_write_read_data_entry8.execute_stateful_alu(p4ml_agtr_index.agtr);
183 | }
184 | 
185 | action do_cleanEntry8() {
186 |     clean_entry8.execute_stateful_alu(p4ml_agtr_index.agtr);
187 | }
188 | 
189 | action entry8WriteToPacket() {
190 |     read_data_entry8.execute_stateful_alu(p4ml_agtr_index.agtr);
191 | }
192 | 
193 | action processentry9() {
194 |     write_data_entry9.execute_stateful_alu(p4ml_agtr_index.agtr);
195 | }
196 | 
197 | action noequ0_processentry9() {
198 |     noequ0_write_data_entry9.execute_stateful_alu(p4ml_agtr_index.agtr);
199 | }
200 | 
201 | action processentry9andWriteToPacket() {
202 |     write_read_data_entry9.execute_stateful_alu(p4ml_agtr_index.agtr);
203 | }
204 | 
205 | action noequ0_processentry9andWriteToPacket() {
206 |     noequ0_write_read_data_entry9.execute_stateful_alu(p4ml_agtr_index.agtr);
207 | }
208 | 
209 | action do_cleanEntry9() {
210 |     clean_entry9.execute_stateful_alu(p4ml_agtr_index.agtr);
211 | }
212 | 
213 | action entry9WriteToPacket() {
214 |     read_data_entry9.execute_stateful_alu(p4ml_agtr_index.agtr);
215 | }
216 | 
217 | action processentry10() {
218 |     write_data_entry10.execute_stateful_alu(p4ml_agtr_index.agtr);
219 | }
220 | 
221 | action noequ0_processentry10() {
222 |     noequ0_write_data_entry10.execute_stateful_alu(p4ml_agtr_index.agtr);
223 | }
224 | 
225 | action processentry10andWriteToPacket() {
226 |     write_read_data_entry10.execute_stateful_alu(p4ml_agtr_index.agtr);
227 | }
228 | 
229 | action noequ0_processentry10andWriteToPacket() {
230 |     noequ0_write_read_data_entry10.execute_stateful_alu(p4ml_agtr_index.agtr);
231 | }
232 | 
233 | action do_cleanEntry10() {
234 |     clean_entry10.execute_stateful_alu(p4ml_agtr_index.agtr);
235 | }
236 | 
237 | action entry10WriteToPacket() {
238 |     read_data_entry10.execute_stateful_alu(p4ml_agtr_index.agtr);
239 | }
240 | 
241 | action processentry11() {
242 |     write_data_entry11.execute_stateful_alu(p4ml_agtr_index.agtr);
243 | }
244 | 
245 | action noequ0_processentry11() {
246 |     noequ0_write_data_entry11.execute_stateful_alu(p4ml_agtr_index.agtr);
247 | }
248 | 
249 | action processentry11andWriteToPacket() {
250 |     write_read_data_entry11.execute_stateful_alu(p4ml_agtr_index.agtr);
251 | }
252 | 
253 | action noequ0_processentry11andWriteToPacket() {
254 |     noequ0_write_read_data_entry11.execute_stateful_alu(p4ml_agtr_index.agtr);
255 | }
256 | 
257 | action do_cleanEntry11() {
258 |     clean_entry11.execute_stateful_alu(p4ml_agtr_index.agtr);
259 | }
260 | 
261 | action entry11WriteToPacket() {
262 |     read_data_entry11.execute_stateful_alu(p4ml_agtr_index.agtr);
263 | }
264 | 
265 | action processentry12() {
266 |     write_data_entry12.execute_stateful_alu(p4ml_agtr_index.agtr);
267 | }
268 | 
269 | action noequ0_processentry12() {
270 |     noequ0_write_data_entry12.execute_stateful_alu(p4ml_agtr_index.agtr);
271 | }
272 | 
273 | action processentry12andWriteToPacket() {
274 |     write_read_data_entry12.execute_stateful_alu(p4ml_agtr_index.agtr);
275 | }
276 | 
277 | action noequ0_processentry12andWriteToPacket() {
278 |     noequ0_write_read_data_entry12.execute_stateful_alu(p4ml_agtr_index.agtr);
279 | }
280 | 
281 | action do_cleanEntry12() {
282 |     clean_entry12.execute_stateful_alu(p4ml_agtr_index.agtr);
283 | }
284 | 
285 | action entry12WriteToPacket() {
286 |     read_data_entry12.execute_stateful_alu(p4ml_agtr_index.agtr);
287 | }
288 | 
289 | action processentry13() {
290 |     write_data_entry13.execute_stateful_alu(p4ml_agtr_index.agtr);
291 | }
292 | 
293 | action noequ0_processentry13() {
294 |     noequ0_write_data_entry13.execute_stateful_alu(p4ml_agtr_index.agtr);
295 | }
296 | 
297 | action processentry13andWriteToPacket() {
298 |     write_read_data_entry13.execute_stateful_alu(p4ml_agtr_index.agtr);
299 | }
300 | 
301 | action noequ0_processentry13andWriteToPacket() {
302 |     noequ0_write_read_data_entry13.execute_stateful_alu(p4ml_agtr_index.agtr);
303 | }
304 | 
305 | action do_cleanEntry13() {
306 |     clean_entry13.execute_stateful_alu(p4ml_agtr_index.agtr);
307 | }
308 | 
309 | action entry13WriteToPacket() {
310 |     read_data_entry13.execute_stateful_alu(p4ml_agtr_index.agtr);
311 | }
312 | 
313 | action processentry14() {
314 |     write_data_entry14.execute_stateful_alu(p4ml_agtr_index.agtr);
315 | }
316 | 
317 | action noequ0_processentry14() {
318 |     noequ0_write_data_entry14.execute_stateful_alu(p4ml_agtr_index.agtr);
319 | }
320 | 
321 | action processentry14andWriteToPacket() {
322 |     write_read_data_entry14.execute_stateful_alu(p4ml_agtr_index.agtr);
323 | }
324 | 
325 | action noequ0_processentry14andWriteToPacket() {
326 |     noequ0_write_read_data_entry14.execute_stateful_alu(p4ml_agtr_index.agtr);
327 | }
328 | 
329 | action do_cleanEntry14() {
330 |     clean_entry14.execute_stateful_alu(p4ml_agtr_index.agtr);
331 | }
332 | 
333 | action entry14WriteToPacket() {
334 |     read_data_entry14.execute_stateful_alu(p4ml_agtr_index.agtr);
335 | }
336 | 
337 | action processentry15() {
338 |     write_data_entry15.execute_stateful_alu(p4ml_agtr_index.agtr);
339 | }
340 | 
341 | action noequ0_processentry15() {
342 |     noequ0_write_data_entry15.execute_stateful_alu(p4ml_agtr_index.agtr);
343 | }
344 | 
345 | action processentry15andWriteToPacket() {
346 |     write_read_data_entry15.execute_stateful_alu(p4ml_agtr_index.agtr);
347 | }
348 | 
349 | action noequ0_processentry15andWriteToPacket() {
350 |     noequ0_write_read_data_entry15.execute_stateful_alu(p4ml_agtr_index.agtr);
351 | }
352 | 
353 | action do_cleanEntry15() {
354 |     clean_entry15.execute_stateful_alu(p4ml_agtr_index.agtr);
355 | }
356 | 
357 | action entry15WriteToPacket() {
358 |     read_data_entry15.execute_stateful_alu(p4ml_agtr_index.agtr);
359 | }
360 | 
361 | action processentry16() {
362 |     write_data_entry16.execute_stateful_alu(p4ml_agtr_index.agtr);
363 | }
364 | 
365 | action noequ0_processentry16() {
366 |     noequ0_write_data_entry16.execute_stateful_alu(p4ml_agtr_index.agtr);
367 | }
368 | 
369 | action processentry16andWriteToPacket() {
370 |     write_read_data_entry16.execute_stateful_alu(p4ml_agtr_index.agtr);
371 | }
372 | 
373 | action noequ0_processentry16andWriteToPacket() {
374 |     noequ0_write_read_data_entry16.execute_stateful_alu(p4ml_agtr_index.agtr);
375 | }
376 | 
377 | action do_cleanEntry16() {
378 |     clean_entry16.execute_stateful_alu(p4ml_agtr_index.agtr);
379 | }
380 | 
381 | action entry16WriteToPacket() {
382 |     read_data_entry16.execute_stateful_alu(p4ml_agtr_index.agtr);
383 | }
384 | 
385 | action processentry17() {
386 |     write_data_entry17.execute_stateful_alu(p4ml_agtr_index.agtr);
387 | }
388 | 
389 | action noequ0_processentry17() {
390 |     noequ0_write_data_entry17.execute_stateful_alu(p4ml_agtr_index.agtr);
391 | }
392 | 
393 | action processentry17andWriteToPacket() {
394 |     write_read_data_entry17.execute_stateful_alu(p4ml_agtr_index.agtr);
395 | }
396 | 
397 | action noequ0_processentry17andWriteToPacket() {
398 |     noequ0_write_read_data_entry17.execute_stateful_alu(p4ml_agtr_index.agtr);
399 | }
400 | 
401 | action do_cleanEntry17() {
402 |     clean_entry17.execute_stateful_alu(p4ml_agtr_index.agtr);
403 | }
404 | 
405 | action entry17WriteToPacket() {
406 |     read_data_entry17.execute_stateful_alu(p4ml_agtr_index.agtr);
407 | }
408 | 
409 | action processentry18() {
410 |     write_data_entry18.execute_stateful_alu(p4ml_agtr_index.agtr);
411 | }
412 | 
413 | action noequ0_processentry18() {
414 |     noequ0_write_data_entry18.execute_stateful_alu(p4ml_agtr_index.agtr);
415 | }
416 | 
417 | action processentry18andWriteToPacket() {
418 |     write_read_data_entry18.execute_stateful_alu(p4ml_agtr_index.agtr);
419 | }
420 | 
421 | action noequ0_processentry18andWriteToPacket() {
422 |     noequ0_write_read_data_entry18.execute_stateful_alu(p4ml_agtr_index.agtr);
423 | }
424 | 
425 | action do_cleanEntry18() {
426 |     clean_entry18.execute_stateful_alu(p4ml_agtr_index.agtr);
427 | }
428 | 
429 | action entry18WriteToPacket() {
430 |     read_data_entry18.execute_stateful_alu(p4ml_agtr_index.agtr);
431 | }
432 | 
433 | action processentry19() {
434 |     write_data_entry19.execute_stateful_alu(p4ml_agtr_index.agtr);
435 | }
436 | 
437 | action noequ0_processentry19() {
438 |     noequ0_write_data_entry19.execute_stateful_alu(p4ml_agtr_index.agtr);
439 | }
440 | 
441 | action processentry19andWriteToPacket() {
442 |     write_read_data_entry19.execute_stateful_alu(p4ml_agtr_index.agtr);
443 | }
444 | 
445 | action noequ0_processentry19andWriteToPacket() {
446 |     noequ0_write_read_data_entry19.execute_stateful_alu(p4ml_agtr_index.agtr);
447 | }
448 | 
449 | action do_cleanEntry19() {
450 |     clean_entry19.execute_stateful_alu(p4ml_agtr_index.agtr);
451 | }
452 | 
453 | action entry19WriteToPacket() {
454 |     read_data_entry19.execute_stateful_alu(p4ml_agtr_index.agtr);
455 | }
456 | 
457 | action processentry20() {
458 |     write_data_entry20.execute_stateful_alu(p4ml_agtr_index.agtr);
459 | }
460 | 
461 | action noequ0_processentry20() {
462 |     noequ0_write_data_entry20.execute_stateful_alu(p4ml_agtr_index.agtr);
463 | }
464 | 
465 | action processentry20andWriteToPacket() {
466 |     write_read_data_entry20.execute_stateful_alu(p4ml_agtr_index.agtr);
467 | }
468 | 
469 | action noequ0_processentry20andWriteToPacket() {
470 |     noequ0_write_read_data_entry20.execute_stateful_alu(p4ml_agtr_index.agtr);
471 | }
472 | 
473 | action do_cleanEntry20() {
474 |     clean_entry20.execute_stateful_alu(p4ml_agtr_index.agtr);
475 | }
476 | 
477 | action entry20WriteToPacket() {
478 |     read_data_entry20.execute_stateful_alu(p4ml_agtr_index.agtr);
479 | }
480 | 
481 | action processentry21() {
482 |     write_data_entry21.execute_stateful_alu(p4ml_agtr_index.agtr);
483 | }
484 | 
485 | action noequ0_processentry21() {
486 |     noequ0_write_data_entry21.execute_stateful_alu(p4ml_agtr_index.agtr);
487 | }
488 | 
489 | action processentry21andWriteToPacket() {
490 |     write_read_data_entry21.execute_stateful_alu(p4ml_agtr_index.agtr);
491 | }
492 | 
493 | action noequ0_processentry21andWriteToPacket() {
494 |     noequ0_write_read_data_entry21.execute_stateful_alu(p4ml_agtr_index.agtr);
495 | }
496 | 
497 | action do_cleanEntry21() {
498 |     clean_entry21.execute_stateful_alu(p4ml_agtr_index.agtr);
499 | }
500 | 
501 | action entry21WriteToPacket() {
502 |     read_data_entry21.execute_stateful_alu(p4ml_agtr_index.agtr);
503 | }
504 | 
505 | action processentry22() {
506 |     write_data_entry22.execute_stateful_alu(p4ml_agtr_index.agtr);
507 | }
508 | 
509 | action noequ0_processentry22() {
510 |     noequ0_write_data_entry22.execute_stateful_alu(p4ml_agtr_index.agtr);
511 | }
512 | 
513 | action processentry22andWriteToPacket() {
514 |     write_read_data_entry22.execute_stateful_alu(p4ml_agtr_index.agtr);
515 | }
516 | 
517 | action noequ0_processentry22andWriteToPacket() {
518 |     noequ0_write_read_data_entry22.execute_stateful_alu(p4ml_agtr_index.agtr);
519 | }
520 | 
521 | action do_cleanEntry22() {
522 |     clean_entry22.execute_stateful_alu(p4ml_agtr_index.agtr);
523 | }
524 | 
525 | action entry22WriteToPacket() {
526 |     read_data_entry22.execute_stateful_alu(p4ml_agtr_index.agtr);
527 | }
528 | 
529 | action processentry23() {
530 |     write_data_entry23.execute_stateful_alu(p4ml_agtr_index.agtr);
531 | }
532 | 
533 | action noequ0_processentry23() {
534 |     noequ0_write_data_entry23.execute_stateful_alu(p4ml_agtr_index.agtr);
535 | }
536 | 
537 | action processentry23andWriteToPacket() {
538 |     write_read_data_entry23.execute_stateful_alu(p4ml_agtr_index.agtr);
539 | }
540 | 
541 | action noequ0_processentry23andWriteToPacket() {
542 |     noequ0_write_read_data_entry23.execute_stateful_alu(p4ml_agtr_index.agtr);
543 | }
544 | 
545 | action do_cleanEntry23() {
546 |     clean_entry23.execute_stateful_alu(p4ml_agtr_index.agtr);
547 | }
548 | 
549 | action entry23WriteToPacket() {
550 |     read_data_entry23.execute_stateful_alu(p4ml_agtr_index.agtr);
551 | }
552 | 
553 | action processentry24() {
554 |     write_data_entry24.execute_stateful_alu(p4ml_agtr_index.agtr);
555 | }
556 | 
557 | action noequ0_processentry24() {
558 |     noequ0_write_data_entry24.execute_stateful_alu(p4ml_agtr_index.agtr);
559 | }
560 | 
561 | action processentry24andWriteToPacket() {
562 |     write_read_data_entry24.execute_stateful_alu(p4ml_agtr_index.agtr);
563 | }
564 | 
565 | action noequ0_processentry24andWriteToPacket() {
566 |     noequ0_write_read_data_entry24.execute_stateful_alu(p4ml_agtr_index.agtr);
567 | }
568 | 
569 | action do_cleanEntry24() {
570 |     clean_entry24.execute_stateful_alu(p4ml_agtr_index.agtr);
571 | }
572 | 
573 | action entry24WriteToPacket() {
574 |     read_data_entry24.execute_stateful_alu(p4ml_agtr_index.agtr);
575 | }
576 | 
577 | action processentry25() {
578 |     write_data_entry25.execute_stateful_alu(p4ml_agtr_index.agtr);
579 | }
580 | 
581 | action noequ0_processentry25() {
582 |     noequ0_write_data_entry25.execute_stateful_alu(p4ml_agtr_index.agtr);
583 | }
584 | 
585 | action processentry25andWriteToPacket() {
586 |     write_read_data_entry25.execute_stateful_alu(p4ml_agtr_index.agtr);
587 | }
588 | 
589 | action noequ0_processentry25andWriteToPacket() {
590 |     noequ0_write_read_data_entry25.execute_stateful_alu(p4ml_agtr_index.agtr);
591 | }
592 | 
593 | action do_cleanEntry25() {
594 |     clean_entry25.execute_stateful_alu(p4ml_agtr_index.agtr);
595 | }
596 | 
597 | action entry25WriteToPacket() {
598 |     read_data_entry25.execute_stateful_alu(p4ml_agtr_index.agtr);
599 | }
600 | 
601 | action processentry26() {
602 |     write_data_entry26.execute_stateful_alu(p4ml_agtr_index.agtr);
603 | }
604 | 
605 | action noequ0_processentry26() {
606 |     noequ0_write_data_entry26.execute_stateful_alu(p4ml_agtr_index.agtr);
607 | }
608 | 
609 | action processentry26andWriteToPacket() {
610 |     write_read_data_entry26.execute_stateful_alu(p4ml_agtr_index.agtr);
611 | }
612 | 
613 | action noequ0_processentry26andWriteToPacket() {
614 |     noequ0_write_read_data_entry26.execute_stateful_alu(p4ml_agtr_index.agtr);
615 | }
616 | 
617 | action do_cleanEntry26() {
618 |     clean_entry26.execute_stateful_alu(p4ml_agtr_index.agtr);
619 | }
620 | 
621 | action entry26WriteToPacket() {
622 |     read_data_entry26.execute_stateful_alu(p4ml_agtr_index.agtr);
623 | }
624 | 
625 | action processentry27() {
626 |     write_data_entry27.execute_stateful_alu(p4ml_agtr_index.agtr);
627 | }
628 | 
629 | action noequ0_processentry27() {
630 |     noequ0_write_data_entry27.execute_stateful_alu(p4ml_agtr_index.agtr);
631 | }
632 | 
633 | action processentry27andWriteToPacket() {
634 |     write_read_data_entry27.execute_stateful_alu(p4ml_agtr_index.agtr);
635 | }
636 | 
637 | action noequ0_processentry27andWriteToPacket() {
638 |     noequ0_write_read_data_entry27.execute_stateful_alu(p4ml_agtr_index.agtr);
639 | }
640 | 
641 | action do_cleanEntry27() {
642 |     clean_entry27.execute_stateful_alu(p4ml_agtr_index.agtr);
643 | }
644 | 
645 | action entry27WriteToPacket() {
646 |     read_data_entry27.execute_stateful_alu(p4ml_agtr_index.agtr);
647 | }
648 | 
649 | action processentry28() {
650 |     write_data_entry28.execute_stateful_alu(p4ml_agtr_index.agtr);
651 | }
652 | 
653 | action noequ0_processentry28() {
654 |     noequ0_write_data_entry28.execute_stateful_alu(p4ml_agtr_index.agtr);
655 | }
656 | 
657 | action processentry28andWriteToPacket() {
658 |     write_read_data_entry28.execute_stateful_alu(p4ml_agtr_index.agtr);
659 | }
660 | 
661 | action noequ0_processentry28andWriteToPacket() {
662 |     noequ0_write_read_data_entry28.execute_stateful_alu(p4ml_agtr_index.agtr);
663 | }
664 | 
665 | action do_cleanEntry28() {
666 |     clean_entry28.execute_stateful_alu(p4ml_agtr_index.agtr);
667 | }
668 | 
669 | action entry28WriteToPacket() {
670 |     read_data_entry28.execute_stateful_alu(p4ml_agtr_index.agtr);
671 | }
672 | 
673 | action processentry29() {
674 |     write_data_entry29.execute_stateful_alu(p4ml_agtr_index.agtr);
675 | }
676 | 
677 | action noequ0_processentry29() {
678 |     noequ0_write_data_entry29.execute_stateful_alu(p4ml_agtr_index.agtr);
679 | }
680 | 
681 | action processentry29andWriteToPacket() {
682 |     write_read_data_entry29.execute_stateful_alu(p4ml_agtr_index.agtr);
683 | }
684 | 
685 | action noequ0_processentry29andWriteToPacket() {
686 |     noequ0_write_read_data_entry29.execute_stateful_alu(p4ml_agtr_index.agtr);
687 | }
688 | 
689 | action do_cleanEntry29() {
690 |     clean_entry29.execute_stateful_alu(p4ml_agtr_index.agtr);
691 | }
692 | 
693 | action entry29WriteToPacket() {
694 |     read_data_entry29.execute_stateful_alu(p4ml_agtr_index.agtr);
695 | }
696 | 
697 | action processentry30() {
698 |     write_data_entry30.execute_stateful_alu(p4ml_agtr_index.agtr);
699 | }
700 | 
701 | action noequ0_processentry30() {
702 |     noequ0_write_data_entry30.execute_stateful_alu(p4ml_agtr_index.agtr);
703 | }
704 | 
705 | action processentry30andWriteToPacket() {
706 |     write_read_data_entry30.execute_stateful_alu(p4ml_agtr_index.agtr);
707 | }
708 | 
709 | action noequ0_processentry30andWriteToPacket() {
710 |     noequ0_write_read_data_entry30.execute_stateful_alu(p4ml_agtr_index.agtr);
711 | }
712 | 
713 | action do_cleanEntry30() {
714 |     clean_entry30.execute_stateful_alu(p4ml_agtr_index.agtr);
715 | }
716 | 
717 | action entry30WriteToPacket() {
718 |     read_data_entry30.execute_stateful_alu(p4ml_agtr_index.agtr);
719 | }
720 | 
721 | action processentry31() {
722 |     write_data_entry31.execute_stateful_alu(p4ml_agtr_index.agtr);
723 | }
724 | 
725 | action noequ0_processentry31() {
726 |     noequ0_write_data_entry31.execute_stateful_alu(p4ml_agtr_index.agtr);
727 | }
728 | 
729 | action processentry31andWriteToPacket() {
730 |     write_read_data_entry31.execute_stateful_alu(p4ml_agtr_index.agtr);
731 | }
732 | 
733 | action noequ0_processentry31andWriteToPacket() {
734 |     noequ0_write_read_data_entry31.execute_stateful_alu(p4ml_agtr_index.agtr);
735 | }
736 | 
737 | action do_cleanEntry31() {
738 |     clean_entry31.execute_stateful_alu(p4ml_agtr_index.agtr);
739 | }
740 | 
741 | action entry31WriteToPacket() {
742 |     read_data_entry31.execute_stateful_alu(p4ml_agtr_index.agtr);
743 | }
744 | 
745 | //action processentry32() {
746 | //    write_data_entry32.execute_stateful_alu(p4ml_agtr_index.agtr);
747 | //}
748 | 
749 | //actionoequ0_n processentry32() {
750 | //  noequ0_  write_data_entry32.execute_stateful_alu(p4ml_agtr_index.agtr);
751 | //}
752 | //
753 | //action processentry32andWriteToPacket() {
754 | //    write_read_data_entry32.execute_stateful_alu(p4ml_agtr_index.agtr);
755 | //}
756 | 
757 | //actionoequ0_n processentry32andWriteToPacket() {
758 | //  noequ0_  write_read_data_entry32.execute_stateful_alu(p4ml_agtr_index.agtr);
759 | //}
760 | 
761 | //action do_cleanEntryry32() {
762 | //    clean_entry32.execute_stateful_alu(p4ml_agtr_index.agtr);
763 | 
764 | 
765 | //action entry32WriteToPacket() {
766 | //    read_data_entry32.execute_stateful_alu(p4ml_agtr_index.agtr);
767 | //}
768 | //
769 | 


--------------------------------------------------------------------------------
/p4src/includes/common.p4:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * P4PS
  3 |  * /
  4 | 
  5 | /*************************************************************************
  6 |  ***********************  R E G I S T E R  *******************************
  7 |  *************************************************************************/
  8 | 
  9 | blackbox stateful_alu cleaning_agtr_time {
 10 |     reg: agtr_time;
 11 | 
 12 |     update_lo_1_value   :  0;
 13 | }
 14 | 
 15 | blackbox stateful_alu cleaning_ecn {
 16 |     reg: ecn_register;
 17 | 
 18 |     update_lo_1_value   :  0;
 19 | }
 20 | 
 21 | blackbox stateful_alu cleaning_bitmap {
 22 |     reg: bitmap;
 23 | 
 24 |     update_lo_1_value   :  0;
 25 | }
 26 | 
 27 | blackbox stateful_alu read_write_bitmap {
 28 |     reg: bitmap;
 29 | 
 30 |     output_dst            :  mdata.bitmap;
 31 | 
 32 |     output_value          :  register_lo;
 33 | 
 34 |     update_lo_1_value     :  register_lo | p4ml.bitmap;
 35 | }
 36 | 
 37 | blackbox stateful_alu read_write_bitmap_resend {
 38 |     reg: bitmap;
 39 | 
 40 |     output_dst            :  mdata.bitmap;
 41 | 
 42 |     output_value          :  register_lo;
 43 | 
 44 |     update_lo_1_value     :  0;
 45 | }
 46 | 
 47 | // if same application, output appID, if not, not output (zero)
 48 | blackbox stateful_alu check_app_id_and_seq {
 49 |     reg: appID_and_Seq;
 50 | 
 51 |     condition_lo           :  p4ml.appIDandSeqNum == register_lo;
 52 |     // The agtr is empty
 53 |     condition_hi           :  register_lo == 0;
 54 | 
 55 |     update_lo_1_predicate  :  condition_lo or condition_hi;
 56 |     update_lo_1_value      :  p4ml.appIDandSeqNum;
 57 | 
 58 |     output_predicate       :  condition_lo or condition_hi;
 59 |     output_dst             :  mdata.isMyAppIDandMyCurrentSeq;
 60 |     output_value           :  p4ml.appIDandSeqNum;
 61 | }
 62 | 
 63 | blackbox stateful_alu check_app_id_and_seq_resend {
 64 |     reg: appID_and_Seq;
 65 | 
 66 |     condition_lo           :  p4ml.appIDandSeqNum == register_lo;
 67 | 
 68 |     update_lo_1_predicate  :  condition_lo;
 69 |     update_lo_1_value      :  0;
 70 | 
 71 |     output_predicate       :  condition_lo;
 72 |     output_dst             :  mdata.isMyAppIDandMyCurrentSeq;
 73 |     output_value           :  register_lo;
 74 | }
 75 | 
 76 | blackbox stateful_alu clean_app_id_and_seq {
 77 |     reg: appID_and_Seq;
 78 | 
 79 |     condition_lo           :  p4ml.appIDandSeqNum == register_lo;
 80 | 
 81 |     update_lo_1_predicate  :  condition_lo;
 82 |     update_lo_1_value      :  0;
 83 | 
 84 |     output_predicate       :  condition_lo;
 85 |     output_dst             :  mdata.isMyAppIDandMyCurrentSeq;
 86 |     output_value           :  p4ml.appIDandSeqNum;
 87 | }
 88 | 
 89 | blackbox stateful_alu check_agtrTime {
 90 |     reg: agtr_time;
 91 | 
 92 |     condition_lo           :  mdata.isAggregate != 0;
 93 |     output_dst             :  mdata.current_agtr_time; 
 94 | 
 95 |     update_lo_1_predicate  :  condition_lo;
 96 |     update_lo_1_value      :  register_lo + 1;
 97 | 
 98 |     update_lo_2_predicate  :  not condition_lo;
 99 |     update_lo_2_value      :  register_lo;
100 | 
101 |     output_value           :  alu_lo;
102 | }
103 | 
104 | blackbox stateful_alu check_resend_agtrTime {
105 |     reg: agtr_time;
106 | 
107 |     condition_lo           :  mdata.isAggregate != 0;
108 |     // fake, force forward
109 |     output_dst             :  mdata.current_agtr_time; 
110 | 
111 |     update_lo_1_predicate  :  condition_lo;
112 |     update_lo_1_value      :  0;
113 | 
114 |     update_lo_2_predicate  :  not condition_lo;
115 |     update_lo_2_value      :  0;
116 | 
117 |     output_value           :  p4ml.agtr_time;
118 | }
119 | 
120 | blackbox stateful_alu do_comp_qdepth {
121 |     reg: dqueue_alert_threshold;
122 | 
123 |     condition_lo           :  eg_intr_md.deq_qdepth >= register_lo;
124 |     // fake, force forward
125 |     output_predicate       :  condition_lo;
126 |     output_dst             :  mdata.qdepth; 
127 |     output_value           :  eg_intr_md.deq_qdepth;
128 |     initial_register_lo_value : 1000;
129 | }
130 | 
131 | blackbox stateful_alu do_check_ecn {
132 |     reg: ecn_register;
133 | 
134 |  	condition_lo		   :  register_lo == 1; 
135 |   
136 |     update_lo_1_value      :  register_lo | mdata.is_ecn;
137 | 
138 | 	output_predicate 	   :  condition_lo;
139 |     output_value           :  mdata.value_one;
140 |     output_dst             :  p4ml.ECN;
141 | }
142 | 
143 | /*************************************************************************
144 |  **************  I N G R E S S   P R O C E S S I N G   *******************
145 |  *************************************************************************/
146 | 
147 | /*
148 |  * Actions 
149 |  */
150 | 
151 | action process_bitmap() {
152 |     read_write_bitmap.execute_stateful_alu(p4ml_agtr_index.agtr);
153 | }
154 | 
155 | action process_bitmap_resend() {
156 |     read_write_bitmap_resend.execute_stateful_alu(p4ml_agtr_index.agtr);
157 | }
158 | 
159 | 
160 | action check_aggregate_and_forward() {
161 |     // this is is for aggregation needed checking
162 |     bit_andcb(mdata.isAggregate, p4ml.bitmap, mdata.bitmap);
163 |     bit_or(mdata.integrated_bitmap, p4ml.bitmap, mdata.bitmap);
164 | }
165 | 
166 | action clean_agtr_time() {
167 |     cleaning_agtr_time.execute_stateful_alu(p4ml_agtr_index.agtr);
168 | }
169 | 
170 | action clean_ecn() {
171 |     cleaning_ecn.execute_stateful_alu(p4ml_agtr_index.agtr);
172 | }
173 | 
174 | action clean_bitmap() {
175 |     cleaning_bitmap.execute_stateful_alu(p4ml_agtr_index.agtr);
176 | }
177 | 
178 | action multicast(group) {
179 |     modify_field(ig_intr_md_for_tm.mcast_grp_a, group);
180 | }
181 | 
182 | action check_appID_and_seq() {
183 |     check_app_id_and_seq.execute_stateful_alu(p4ml_agtr_index.agtr);
184 |     //modify_field(mdata.qdepth, 0);   
185 | }
186 | 
187 | action check_appID_and_seq_resend() {
188 |     check_app_id_and_seq_resend.execute_stateful_alu(p4ml_agtr_index.agtr);
189 |  //   modify_field(mdata.qdepth, 0);   
190 | }
191 | 
192 | action clean_appID_and_seq() {
193 |     clean_app_id_and_seq.execute_stateful_alu(p4ml_agtr_index.agtr);
194 | }
195 | 
196 | action check_agtr_time() {
197 |     check_agtrTime.execute_stateful_alu(p4ml_agtr_index.agtr);
198 | }
199 | 
200 | action check_resend_agtr_time() {
201 |     check_resend_agtrTime.execute_stateful_alu(p4ml_agtr_index.agtr);
202 | }
203 | 
204 | action modify_packet_bitmap() {
205 |     modify_field(p4ml.bitmap, mdata.integrated_bitmap);
206 | }
207 | 
208 | action do_qdepth() {
209 |     do_comp_qdepth.execute_stateful_alu(0);
210 | }
211 | 
212 | action modify_ecn() {
213 |     modify_field(p4ml.ECN, 1);
214 | }
215 | 
216 | action mark_ecn() {
217 |     bit_or(mdata.is_ecn, mdata.qdepth, mdata.is_ecn);
218 | }
219 | 
220 | action modify_ipv4_ecn() {
221 |     modify_field(ipv4.ecn, 3);
222 | }
223 | 
224 | action check_ecn() {
225 |     do_check_ecn.execute_stateful_alu(p4ml_agtr_index.agtr);
226 | }
227 | 
228 | action setup_ecn() {
229 |     modify_field(mdata.is_ecn, 1);    
230 | }
231 | 
232 | action tag_collision_incoming() {
233 |     modify_field(p4ml.isSWCollision, 1);
234 |     // modify_field(p4ml.bitmap, mdata.isMyAppIDandMyCurrentSeq);
235 | }
236 | 
237 | action set_egr(egress_spec) {
238 |     modify_field(ig_intr_md_for_tm.ucast_egress_port, egress_spec);
239 |     // increase_p4ml_counter.execute_stateful_alu(ig_intr_md.ingress_port);
240 | }
241 | 
242 | action set_egr_and_set_index(egress_spec) {
243 |     modify_field(ig_intr_md_for_tm.ucast_egress_port, egress_spec);
244 |     modify_field(p4ml.dataIndex, 1);
245 |     // increase_p4ml_counter.execute_stateful_alu(ig_intr_md.ingress_port);
246 | }
247 | 
248 | action nop()
249 | {
250 | }
251 | 
252 | action drop_pkt() {
253 |     drop();
254 | }
255 | 
256 | action increase_counter() {
257 |     increase_p4ml_counter.execute_stateful_alu(0);
258 | }
259 | 
260 | table bitmap_table {
261 |     actions {
262 |         process_bitmap;
263 |     }
264 |     default_action: process_bitmap();
265 |     size : 1;
266 | }
267 | 
268 | table bitmap_resend_table {
269 |     actions {
270 |         process_bitmap_resend;
271 |     }
272 |     default_action: process_bitmap_resend();
273 |     size : 1;
274 | }
275 | 
276 | table bitmap_aggregate_table {
277 |     actions {
278 |         check_aggregate_and_forward;
279 |     }
280 |     default_action: check_aggregate_and_forward();
281 |     size : 1;
282 | }
283 | 
284 | table agtr_time_table {
285 |     actions {
286 |         check_agtr_time;
287 |     }
288 |     default_action: check_agtr_time();
289 |     size : 1;
290 | }
291 | 
292 | table agtr_time_resend_table {
293 |     actions {
294 |         check_resend_agtr_time;
295 |     }
296 |     default_action: check_resend_agtr_time();
297 |     size : 1;
298 | }
299 | 
300 | table immd_outPort_table {
301 |     reads {
302 |         p4ml.appIDandSeqNum mask 0xFFFF0000: exact;
303 |     }
304 |     actions {
305 |         set_egr;
306 |     }
307 | }
308 | 
309 | table outPort_table {
310 |     reads {
311 |         p4ml.appIDandSeqNum mask 0xFFFF0000: exact;
312 |         ig_intr_md.ingress_port: exact;
313 |         p4ml.dataIndex: exact;
314 |         p4ml.PSIndex: exact;
315 |     }
316 |     actions {
317 | 		nop;
318 |         set_egr;
319 |         set_egr_and_set_index;
320 |         drop_pkt;
321 |     }
322 |     default_action: drop_pkt();
323 | }
324 | 
325 | table bg_outPort_table {
326 |     reads {
327 |         // useless here, just can't use default action for variable
328 |         p4ml_bg.isACK : exact;
329 |     }
330 |     actions {
331 |         set_egr;
332 | 		nop;
333 |     }
334 | }
335 | 
336 | table multicast_table {
337 |     reads {
338 |         p4ml.isACK: exact;
339 |         p4ml.appIDandSeqNum mask 0xFFFF0000: exact;
340 |         ig_intr_md.ingress_port: exact;
341 |         p4ml.dataIndex: exact;
342 |     }
343 |     actions {
344 |         multicast; drop_pkt; set_egr_and_set_index;
345 |     }
346 |     default_action: drop_pkt();
347 | }
348 | 
349 | @pragma stage 3
350 | table clean_agtr_time_table {
351 |     actions {
352 |         clean_agtr_time;
353 |     }
354 |     default_action: clean_agtr_time();
355 |     size : 1;
356 | }
357 | 
358 | table clean_ecn_table {
359 |     actions {
360 |         clean_ecn;
361 |     }
362 |     default_action: clean_ecn();
363 |     size : 1;
364 | }
365 | 
366 | 
367 | table clean_bitmap_table {
368 |     actions {
369 |         clean_bitmap;
370 |     }
371 |     default_action: clean_bitmap();
372 |     size : 1;
373 | }
374 | 
375 | /* Counter */
376 | register p4ml_counter {
377 |     width : 32;
378 |     instance_count :1;
379 | }
380 | 
381 | blackbox stateful_alu increase_p4ml_counter {
382 |     reg: p4ml_counter;
383 | 
384 |     update_lo_1_value   :  register_lo + 1 ;
385 | }
386 | 
387 | table forward_counter_table {
388 |         actions {
389 |         increase_counter;
390 |     }
391 |     default_action: increase_counter();
392 | }
393 | 
394 | table appID_and_seq_table {
395 |         actions {
396 |         check_appID_and_seq;
397 |     }
398 |     default_action: check_appID_and_seq();
399 | }
400 | 
401 | table appID_and_seq_resend_table {
402 |         actions {
403 |         check_appID_and_seq_resend;
404 |     }
405 |     default_action: check_appID_and_seq_resend();
406 | }
407 | 
408 | table clean_appID_and_seq_table {
409 |         actions {
410 |         clean_appID_and_seq;
411 |     }
412 |     default_action: clean_appID_and_seq();
413 | }
414 | 
415 | table modify_packet_bitmap_table {
416 |     reads {
417 |         p4ml.dataIndex: exact;
418 |     }
419 |         actions {
420 |         modify_packet_bitmap; nop;
421 |     }
422 |     default_action: nop();
423 | }
424 | 
425 | table qdepth_table {
426 |     actions {
427 |         do_qdepth;
428 |     }
429 |     default_action: do_qdepth();
430 | }
431 | 
432 | table modify_ecn_table {
433 |     actions {
434 |         modify_ecn;
435 |     }
436 |     default_action: modify_ecn();
437 | }
438 | 
439 | table mark_ecn_ipv4_table {
440 |     actions {
441 |         modify_ipv4_ecn;
442 |     }
443 |     default_action: modify_ipv4_ecn();
444 | }
445 | 
446 | table ecn_mark_table {
447 |     actions {
448 |         mark_ecn;
449 |     }
450 |     default_action: mark_ecn();
451 | }
452 | 
453 | table ecn_register_table {
454 |     actions {
455 |         check_ecn;
456 |     }
457 |     default_action: check_ecn();
458 | }
459 | 
460 | table setup_ecn_table {
461 |     actions {
462 |         setup_ecn;
463 |     }
464 |     default_action: setup_ecn();
465 | }
466 | 
467 | table forward {
468 |     reads {
469 |         ethernet.dstAddr : exact;
470 |     }
471 |     actions {
472 |         set_egr; nop; drop_pkt;
473 |     }
474 |     default_action: drop_pkt();
475 | }
476 | 
477 | table drop_table {
478 |     reads {
479 |         ig_intr_md.ingress_port: exact;
480 |         p4ml.dataIndex : exact;
481 |     }
482 |     actions {
483 |         drop_pkt; set_egr; set_egr_and_set_index;
484 |     }
485 |     default_action: drop_pkt();
486 | }
487 | 
488 | table tag_collision_incoming_table {
489 |     actions {
490 |         tag_collision_incoming;
491 |     }
492 |     default_action: tag_collision_incoming();
493 | }
494 | 


--------------------------------------------------------------------------------
/p4src/includes/headers.p4:
--------------------------------------------------------------------------------
  1 | #define MAX_ENTRIES_PER_PACKET 32
  2 | /*************************************************************************
  3 |  ***********************  H E A D E R S  *********************************
  4 |  *************************************************************************/
  5 | 
  6 | // 14Byte
  7 | header_type ethernet_t {
  8 |     fields {
  9 |         dstAddr   : 48;
 10 |         srcAddr   : 48;
 11 |         etherType : 16;
 12 |     }
 13 | }
 14 | 
 15 | // 20Byte
 16 | header_type ipv4_t {
 17 |     fields {
 18 |         version        : 4;
 19 |         ihl            : 4;
 20 |         dscp           : 6;
 21 |         ecn            : 2;
 22 |         totalLen       : 16;
 23 |         identification : 16;
 24 |         flags          : 3;
 25 |         fragOffset     : 13;
 26 |         ttl            : 8;
 27 |         protocol       : 8;
 28 |         hdrChecksum    : 16;
 29 |         srcAddr        : 32;
 30 |         dstAddr        : 32;
 31 |     }
 32 | }
 33 | 
 34 | header_type udp_t {
 35 |     fields {
 36 |         srcPort : 16;
 37 |         dstPort : 16;
 38 |         length_ : 16;
 39 |         checksum : 16;
 40 |     }
 41 | }
 42 | 
 43 | // 12Byte * 2
 44 | header_type p4ml_t {
 45 |     fields {
 46 |         bitmap         :  32;
 47 |         agtr_time      :  8;
 48 |         overflow       :  1;
 49 |         /* For multiple PS */
 50 |         PSIndex        :  2;
 51 |         /* For signle PS */
 52 |         // reserved       :  2;
 53 |         // isForceFoward  :  1;
 54 |         dataIndex      :  1; 
 55 |         ECN            :  1;
 56 |         isResend       :  1;
 57 |         isSWCollision  :  1;
 58 |         isACK          :  1;
 59 |         appIDandSeqNum :  32;  //in switchml.p4: this is used to find the bit location 
 60 |     }
 61 | }
 62 | 
 63 | header_type p4ml_agtr_index_t {
 64 | 	fields{
 65 | 	    agtr	:16;
 66 |     }
 67 | }
 68 | 
 69 | header_type bg_p4ml_t {
 70 |     fields {
 71 |         key            :  64;
 72 |         len_tensor     :  32;
 73 |         bitmap         :  32;
 74 |         agtr_time      :  8;
 75 |         reserved       :  4;
 76 |         ECN            :  1;
 77 |         isResend       :  1;
 78 |         isSWCollision  :  1;
 79 |         isACK          :  1;
 80 |         agtr           :  16;
 81 |         appIDandSeqNum :  32;  //in switchml.p4: this is used to find the bit location
 82 |     }
 83 | }
 84 | 
 85 | // 108Byte * 2
 86 | header_type entry_t {
 87 |     fields {
 88 |         data0         : 32 (signed);
 89 |         data1         : 32 (signed);
 90 |         data2         : 32 (signed);
 91 |         data3         : 32 (signed);
 92 |         data4         : 32 (signed);
 93 |         data5         : 32 (signed);
 94 |         data6         : 32 (signed);
 95 |         data7         : 32 (signed);
 96 |         data8         : 32 (signed);
 97 |         data9         : 32 (signed);
 98 |         data10        : 32 (signed);
 99 |         data11        : 32 (signed);
100 |         data12        : 32 (signed);
101 |         data13        : 32 (signed);
102 |         data14        : 32 (signed);
103 |         data15        : 32 (signed);
104 |         data16        : 32 (signed);
105 |         data17        : 32 (signed);
106 |         data18        : 32 (signed);
107 |         data19        : 32 (signed);
108 |         data20        : 32 (signed);
109 |         data21        : 32 (signed);
110 |         data22        : 32 (signed);
111 |         data23        : 32 (signed);
112 |         data24        : 32 (signed);
113 |         data25        : 32 (signed);
114 |         data26        : 32 (signed);
115 |         data27        : 32 (signed);
116 |         data28        : 32 (signed);
117 |         data29        : 32 (signed);
118 |         data30        : 32 (signed);
119 | //        data31        : 32 (signed);
120 |     }
121 | }
122 | 
123 | //12Byte * 2
124 | // header_type entry2_t {
125 | //     fields {
126 | //         data27        : 32 (signed);
127 | //         data28        : 32 (signed);
128 | //         data29        : 32 (signed);
129 | //         data30        : 32 (signed);
130 | //         data31        : 32 (signed);
131 | //     }
132 | // }
133 | 
134 | /*************************************************************************
135 |  ***********************  M E T A D A T A  *******************************
136 |  *************************************************************************/
137 | 
138 | header_type p4ml_meta_t {
139 |     fields {
140 |         // P4ML
141 |         isMyAppIDandMyCurrentSeq : 16;
142 |         bitmap                   : 32;
143 |         isAggregate              : 32;
144 |         agtr_time                : 8;
145 |         integrated_bitmap        : 32;
146 |         current_agtr_time        : 8;
147 |         agtr_index 	          	 : 32;
148 | 	    isDrop                   : 32; 
149 |         inside_appID_and_Seq     : 1;
150 |         value_one                : 1;
151 |         qdepth                   : 16;
152 |  	    seen_bitmap0		     : 8;
153 | 	    seen_isAggregate 	     : 8;
154 |         is_ecn                   : 32;
155 | 	}
156 | }
157 | 
158 | header_type p4ml_constant_t {
159 | 	fields{
160 |         bitmap		:32;
161 |         agtr_time	:8;
162 | 	}
163 | }
164 | 


--------------------------------------------------------------------------------
/p4src/includes/parser.p4:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | metadata p4ml_meta_t mdata;
  4 | metadata p4ml_constant_t p4ml_constant;
  5 | 
  6 | header ethernet_t ethernet;
  7 | header ipv4_t     ipv4;
  8 | header udp_t      udp;
  9 | header p4ml_agtr_index_t p4ml_agtr_index;
 10 | header p4ml_agtr_index_t p4ml_agtr_index_useless;
 11 | header p4ml_agtr_index_t p4ml_agtr_index_useless2;
 12 | 
 13 | header p4ml_t p4ml;
 14 | header entry_t p4ml_entries;
 15 | header entry_t p4ml_entries_useless;
 16 | 
 17 | header bg_p4ml_t     p4ml_bg;
 18 | // header blank3_t blank3;
 19 | /*************************************************************************
 20 |  ***********************  P A R S E R  ***********************************
 21 |  *************************************************************************/
 22 | 
 23 | parser start {
 24 |     extract(ethernet);
 25 |     set_metadata(mdata.value_one, 1);   
 26 |     return select(ethernet.etherType) {
 27 |         0x0700   : parse_ipv4;
 28 |         0x0800   : parse_rdma;
 29 |         0x0900   : parse_bg;
 30 |         default  : ingress;
 31 |     }
 32 |     // return parse_ipv4;
 33 | 
 34 | }
 35 | 
 36 | parser parse_ipv4 {
 37 |     extract(ipv4);
 38 |     return parse_p4ml;
 39 | }
 40 | 
 41 | parser parse_p4ml {
 42 |     extract(p4ml);
 43 |     return  select(p4ml.dataIndex) {
 44 |         0x0     : check_if_resubmit;
 45 |         0x1     : use_second_p4ml_agtr_index_recirculate;
 46 |         default : ingress;
 47 |     }
 48 | }
 49 | 
 50 | parser check_if_resubmit {
 51 |     return  select(ig_intr_md.resubmit_flag) {
 52 |         // 0x0     : parse_p4ml_agtr_index;
 53 |         0x0     : use_first_p4ml_agtr_index_recirculate;
 54 |         // 0x1     : skip_first_p4ml_agtr_index;
 55 |         0x1     : use_second_p4ml_agtr_index_recirculate;
 56 |         default : ingress;
 57 |     }
 58 | }
 59 | 
 60 | /// resubmit 0x0
 61 | 
 62 | parser parse_p4ml_agtr_index {
 63 | 	extract(p4ml_agtr_index);
 64 | 	return skip_second_p4ml_agtr_index; 
 65 | }
 66 | 
 67 | @pragma force_shift ingress 16 /* 2 bytes */
 68 | parser skip_second_p4ml_agtr_index {
 69 | 	return parse_entry;
 70 | }
 71 | 
 72 | parser parse_entry {
 73 | 	extract(p4ml_entries);
 74 | 	return ingress;
 75 | }
 76 | 
 77 | /// resubmit 0x1
 78 | 
 79 | parser parse_p4ml_agtr_index2 {
 80 | 	extract(p4ml_agtr_index);
 81 | 	return skip_header_c_0_31;
 82 | }
 83 | 
 84 | @pragma force_shift ingress 16 /* 2 bytes */
 85 | parser skip_first_p4ml_agtr_index {
 86 | 	return parse_p4ml_agtr_index2;
 87 | }
 88 | 
 89 | /// recirculate 2
 90 | 
 91 | parser use_second_p4ml_agtr_index_recirculate {
 92 | 	extract(p4ml_agtr_index_useless2);
 93 | 	return parse_p4ml_agtr_index_recirculate;
 94 | }
 95 | 
 96 | parser parse_p4ml_agtr_index_recirculate {
 97 | 	extract(p4ml_agtr_index);
 98 | 	return parse_entry2;
 99 | }
100 | 
101 | parser parse_entry2 {
102 | 	extract(p4ml_entries_useless);
103 | 	return parse_entry;
104 | }
105 | 
106 | /// recirculate 1
107 | 
108 | parser use_first_p4ml_agtr_index_recirculate {
109 | 	extract(p4ml_agtr_index);
110 | 	return useless_second_p4ml_agtr_index_recirculate;
111 | }
112 | 
113 | parser useless_second_p4ml_agtr_index_recirculate {
114 | 	extract(p4ml_agtr_index_useless);
115 | 	return parse_entry;
116 | }
117 | ///
118 |  
119 | @pragma force_shift ingress 256 /* 32 bytes */
120 | parser skip_header_c_0_31 {
121 |     return skip_header_c_32_63;
122 | }
123 | 
124 | @pragma force_shift ingress 256 /* 32 bytes */
125 | parser skip_header_c_32_63 {
126 |     return skip_header_c_64_95;
127 | }
128 | 
129 | @pragma force_shift ingress 256 /* 32 bytes */
130 | parser skip_header_c_64_95 {
131 |     return skip_header_c_96_127;
132 | }
133 | 
134 | @pragma force_shift ingress 256 /* 32 bytes */
135 | parser skip_header_c_96_127 {
136 |     return parse_entry;
137 | }
138 | 
139 | 
140 | // /* RDMA */
141 | parser parse_rdma {
142 |     extract(ipv4);
143 |     return ingress;
144 | }
145 | 
146 | // /* BG */
147 | parser parse_bg {
148 |     extract(ipv4);
149 |     return parse_udp_bg;
150 | }
151 | 
152 | parser parse_udp_bg {
153 |     extract(udp);
154 |     return parse_p4ml_bg;
155 | }
156 | 
157 | parser parse_p4ml_bg {
158 |     extract(p4ml_bg);
159 | //set_metadata(mdata.qdepth, 0);    
160 | //    return ingress; 
161 |    return ingress;
162 | }
163 | 


--------------------------------------------------------------------------------
/p4src/includes/tables.p4:
--------------------------------------------------------------------------------
   1 | @pragma stage 4
   2 | table processEntry1 {
   3 |     reads {
   4 |         mdata.bitmap : ternary;
   5 |     }
   6 |     actions {
   7 |         processentry1;
   8 |         noequ0_processentry1;
   9 |     }
  10 |     // default_action : noequ0_processentry1;
  11 |     size : 2;
  12 | }
  13 | 
  14 | @pragma stage 4
  15 | table noequ0_processEntry1 {
  16 |     actions {
  17 |         noequ0_processentry1;
  18 |     }
  19 |     default_action : noequ0_processentry1();
  20 |     size : 1;
  21 | }
  22 | 
  23 | @pragma stage 4
  24 | table Entry1WriteToPacket {
  25 |     actions {
  26 |         entry1WriteToPacket;
  27 |     }
  28 |     default_action : entry1WriteToPacket();
  29 |     size : 1;
  30 | }
  31 | 
  32 | @pragma stage 4
  33 | table processEntry1andWriteToPacket {
  34 |     actions {
  35 |         processentry1andWriteToPacket;
  36 |     }
  37 |     size : 1;
  38 | }
  39 | 
  40 | @pragma stage 4
  41 | table noequ0_processEntry1andWriteToPacket {
  42 |     actions {
  43 |         noequ0_processentry1andWriteToPacket;
  44 |     }
  45 |     size : 1;
  46 | }
  47 | 
  48 | @pragma stage 4
  49 | table cleanEntry1 {
  50 |     actions {
  51 |         do_cleanEntry1;
  52 |     }
  53 |     default_action : do_cleanEntry1();
  54 |     size : 1;
  55 | }
  56 | 
  57 | table processEntry2 {
  58 |     reads {
  59 |         mdata.bitmap : ternary;
  60 |     }
  61 |     actions {
  62 |         processentry2;
  63 |         noequ0_processentry2;
  64 |     }
  65 |     // default_action : noequ0_processentry2;
  66 |     size : 2;
  67 | }
  68 | 
  69 | table noequ0_processEntry2 {
  70 |     actions {
  71 |         noequ0_processentry2;
  72 |     }
  73 |     default_action : noequ0_processentry2();
  74 |     size : 1;
  75 | }
  76 | 
  77 | table Entry2WriteToPacket {
  78 |     actions {
  79 |         entry2WriteToPacket;
  80 |     }
  81 |     default_action : entry2WriteToPacket();
  82 |     size : 1;
  83 | }
  84 | 
  85 | table processEntry2andWriteToPacket {
  86 |     actions {
  87 |         processentry2andWriteToPacket;
  88 |     }
  89 |     size : 1;
  90 | }
  91 | 
  92 | table noequ0_processEntry2andWriteToPacket {
  93 |     actions {
  94 |         noequ0_processentry2andWriteToPacket;
  95 |     }
  96 |     size : 1;
  97 | }
  98 | 
  99 | table cleanEntry2 {
 100 |     actions {
 101 |         do_cleanEntry2;
 102 |     }
 103 |     default_action : do_cleanEntry2();
 104 |     size : 1;
 105 | }
 106 | 
 107 | table processEntry3 {
 108 |     reads {
 109 |         mdata.bitmap : ternary;
 110 |     }
 111 |     actions {
 112 |         processentry3;
 113 |         noequ0_processentry3;
 114 |     }
 115 |     // default_action : noequ0_processentry3;
 116 |     size : 2;
 117 | }
 118 | 
 119 | table noequ0_processEntry3 {
 120 |     actions {
 121 |         noequ0_processentry3;
 122 |     }
 123 |     default_action : noequ0_processentry3();
 124 |     size : 1;
 125 | }
 126 | 
 127 | table Entry3WriteToPacket {
 128 |     actions {
 129 |         entry3WriteToPacket;
 130 |     }
 131 |     default_action : entry3WriteToPacket();
 132 |     size : 1;
 133 | }
 134 | 
 135 | table processEntry3andWriteToPacket {
 136 |     actions {
 137 |         processentry3andWriteToPacket;
 138 |     }
 139 |     size : 1;
 140 | }
 141 | 
 142 | table noequ0_processEntry3andWriteToPacket {
 143 |     actions {
 144 |         noequ0_processentry3andWriteToPacket;
 145 |     }
 146 |     size : 1;
 147 | }
 148 | 
 149 | table cleanEntry3 {
 150 |     actions {
 151 |         do_cleanEntry3;
 152 |     }
 153 |     default_action : do_cleanEntry3();
 154 |     size : 1;
 155 | }
 156 | 
 157 | table processEntry4 {
 158 |     reads {
 159 |         mdata.bitmap : ternary;
 160 |     }
 161 |     actions {
 162 |         processentry4;
 163 |         noequ0_processentry4;
 164 |     }
 165 |     // default_action : noequ0_processentry4;
 166 |     size : 2;
 167 | }
 168 | 
 169 | table noequ0_processEntry4 {
 170 |     actions {
 171 |         noequ0_processentry4;
 172 |     }
 173 |     default_action : noequ0_processentry4();
 174 |     size : 1;
 175 | }
 176 | 
 177 | table Entry4WriteToPacket {
 178 |     actions {
 179 |         entry4WriteToPacket;
 180 |     }
 181 |     default_action : entry4WriteToPacket();
 182 |     size : 1;
 183 | }
 184 | 
 185 | table processEntry4andWriteToPacket {
 186 |     actions {
 187 |         processentry4andWriteToPacket;
 188 |     }
 189 |     size : 1;
 190 | }
 191 | 
 192 | table noequ0_processEntry4andWriteToPacket {
 193 |     actions {
 194 |         noequ0_processentry4andWriteToPacket;
 195 |     }
 196 |     size : 1;
 197 | }
 198 | 
 199 | table cleanEntry4 {
 200 |     actions {
 201 |         do_cleanEntry4;
 202 |     }
 203 |     default_action : do_cleanEntry4();
 204 |     size : 1;
 205 | }
 206 | 
 207 | table processEntry5 {
 208 |     reads {
 209 |         mdata.bitmap : ternary;
 210 |     }
 211 |     actions {
 212 |         processentry5;
 213 |         noequ0_processentry5;
 214 |     }
 215 |     // default_action : noequ0_processentry5;
 216 |     size : 2;
 217 | }
 218 | 
 219 | table noequ0_processEntry5 {
 220 |     actions {
 221 |         noequ0_processentry5;
 222 |     }
 223 |     default_action : noequ0_processentry5();
 224 |     size : 1;
 225 | }
 226 | 
 227 | table Entry5WriteToPacket {
 228 |     actions {
 229 |         entry5WriteToPacket;
 230 |     }
 231 |     default_action : entry5WriteToPacket();
 232 |     size : 1;
 233 | }
 234 | 
 235 | table processEntry5andWriteToPacket {
 236 |     actions {
 237 |         processentry5andWriteToPacket;
 238 |     }
 239 |     size : 1;
 240 | }
 241 | 
 242 | table noequ0_processEntry5andWriteToPacket {
 243 |     actions {
 244 |         noequ0_processentry5andWriteToPacket;
 245 |     }
 246 |     size : 1;
 247 | }
 248 | 
 249 | table cleanEntry5 {
 250 |     actions {
 251 |         do_cleanEntry5;
 252 |     }
 253 |     default_action : do_cleanEntry5();
 254 |     size : 1;
 255 | }
 256 | 
 257 | table processEntry6 {
 258 |     reads {
 259 |         mdata.bitmap : ternary;
 260 |     }
 261 |     actions {
 262 |         processentry6;
 263 |         noequ0_processentry6;
 264 |     }
 265 |     // default_action : noequ0_processentry6;
 266 |     size : 2;
 267 | }
 268 | 
 269 | table noequ0_processEntry6 {
 270 |     actions {
 271 |         noequ0_processentry6;
 272 |     }
 273 |     default_action : noequ0_processentry6();
 274 |     size : 1;
 275 | }
 276 | 
 277 | table Entry6WriteToPacket {
 278 |     actions {
 279 |         entry6WriteToPacket;
 280 |     }
 281 |     default_action : entry6WriteToPacket();
 282 |     size : 1;
 283 | }
 284 | 
 285 | table processEntry6andWriteToPacket {
 286 |     actions {
 287 |         processentry6andWriteToPacket;
 288 |     }
 289 |     size : 1;
 290 | }
 291 | 
 292 | table noequ0_processEntry6andWriteToPacket {
 293 |     actions {
 294 |         noequ0_processentry6andWriteToPacket;
 295 |     }
 296 |     size : 1;
 297 | }
 298 | 
 299 | table cleanEntry6 {
 300 |     actions {
 301 |         do_cleanEntry6;
 302 |     }
 303 |     default_action : do_cleanEntry6();
 304 |     size : 1;
 305 | }
 306 | 
 307 | table processEntry7 {
 308 |     reads {
 309 |         mdata.bitmap : ternary;
 310 |     }
 311 |     actions {
 312 |         processentry7;
 313 |         noequ0_processentry7;
 314 |     }
 315 |     // default_action : noequ0_processentry7;
 316 |     size : 2;
 317 | }
 318 | 
 319 | table noequ0_processEntry7 {
 320 |     actions {
 321 |         noequ0_processentry7;
 322 |     }
 323 |     default_action : noequ0_processentry7();
 324 |     size : 1;
 325 | }
 326 | 
 327 | table Entry7WriteToPacket {
 328 |     actions {
 329 |         entry7WriteToPacket;
 330 |     }
 331 |     default_action : entry7WriteToPacket();
 332 |     size : 1;
 333 | }
 334 | 
 335 | table processEntry7andWriteToPacket {
 336 |     actions {
 337 |         processentry7andWriteToPacket;
 338 |     }
 339 |     size : 1;
 340 | }
 341 | 
 342 | table noequ0_processEntry7andWriteToPacket {
 343 |     actions {
 344 |         noequ0_processentry7andWriteToPacket;
 345 |     }
 346 |     size : 1;
 347 | }
 348 | 
 349 | table cleanEntry7 {
 350 |     actions {
 351 |         do_cleanEntry7;
 352 |     }
 353 |     default_action : do_cleanEntry7();
 354 |     size : 1;
 355 | }
 356 | 
 357 | table processEntry8 {
 358 |     reads {
 359 |         mdata.bitmap : ternary;
 360 |     }
 361 |     actions {
 362 |         processentry8;
 363 |         noequ0_processentry8;
 364 |     }
 365 |     // default_action : noequ0_processentry8;
 366 |     size : 2;
 367 | }
 368 | 
 369 | table noequ0_processEntry8 {
 370 |     actions {
 371 |         noequ0_processentry8;
 372 |     }
 373 |     default_action : noequ0_processentry8();
 374 |     size : 1;
 375 | }
 376 | 
 377 | table Entry8WriteToPacket {
 378 |     actions {
 379 |         entry8WriteToPacket;
 380 |     }
 381 |     default_action : entry8WriteToPacket();
 382 |     size : 1;
 383 | }
 384 | 
 385 | table processEntry8andWriteToPacket {
 386 |     actions {
 387 |         processentry8andWriteToPacket;
 388 |     }
 389 |     size : 1;
 390 | }
 391 | 
 392 | table noequ0_processEntry8andWriteToPacket {
 393 |     actions {
 394 |         noequ0_processentry8andWriteToPacket;
 395 |     }
 396 |     size : 1;
 397 | }
 398 | 
 399 | table cleanEntry8 {
 400 |     actions {
 401 |         do_cleanEntry8;
 402 |     }
 403 |     default_action : do_cleanEntry8();
 404 |     size : 1;
 405 | }
 406 | 
 407 | table processEntry9 {
 408 |     reads {
 409 |         mdata.bitmap : ternary;
 410 |     }
 411 |     actions {
 412 |         processentry9;
 413 |         noequ0_processentry9;
 414 |     }
 415 |     // default_action : noequ0_processentry9;
 416 |     size : 2;
 417 | }
 418 | 
 419 | table noequ0_processEntry9 {
 420 |     actions {
 421 |         noequ0_processentry9;
 422 |     }
 423 |     default_action : noequ0_processentry9();
 424 |     size : 1;
 425 | }
 426 | 
 427 | table Entry9WriteToPacket {
 428 |     actions {
 429 |         entry9WriteToPacket;
 430 |     }
 431 |     default_action : entry9WriteToPacket();
 432 |     size : 1;
 433 | }
 434 | 
 435 | table processEntry9andWriteToPacket {
 436 |     actions {
 437 |         processentry9andWriteToPacket;
 438 |     }
 439 |     size : 1;
 440 | }
 441 | 
 442 | table noequ0_processEntry9andWriteToPacket {
 443 |     actions {
 444 |         noequ0_processentry9andWriteToPacket;
 445 |     }
 446 |     size : 1;
 447 | }
 448 | 
 449 | table cleanEntry9 {
 450 |     actions {
 451 |         do_cleanEntry9;
 452 |     }
 453 |     default_action : do_cleanEntry9();
 454 |     size : 1;
 455 | }
 456 | 
 457 | table processEntry10 {
 458 |     reads {
 459 |         mdata.bitmap : ternary;
 460 |     }
 461 |     actions {
 462 |         processentry10;
 463 |         noequ0_processentry10;
 464 |     }
 465 |     // default_action : noequ0_processentry10;
 466 |     size : 2;
 467 | }
 468 | 
 469 | table noequ0_processEntry10 {
 470 |     actions {
 471 |         noequ0_processentry10;
 472 |     }
 473 |     default_action : noequ0_processentry10();
 474 |     size : 1;
 475 | }
 476 | 
 477 | table Entry10WriteToPacket {
 478 |     actions {
 479 |         entry10WriteToPacket;
 480 |     }
 481 |     default_action : entry10WriteToPacket();
 482 |     size : 1;
 483 | }
 484 | 
 485 | table processEntry10andWriteToPacket {
 486 |     actions {
 487 |         processentry10andWriteToPacket;
 488 |     }
 489 |     size : 1;
 490 | }
 491 | 
 492 | table noequ0_processEntry10andWriteToPacket {
 493 |     actions {
 494 |         noequ0_processentry10andWriteToPacket;
 495 |     }
 496 |     size : 1;
 497 | }
 498 | 
 499 | table cleanEntry10 {
 500 |     actions {
 501 |         do_cleanEntry10;
 502 |     }
 503 |     default_action : do_cleanEntry10();
 504 |     size : 1;
 505 | }
 506 | 
 507 | table processEntry11 {
 508 |     reads {
 509 |         mdata.bitmap : ternary;
 510 |     }
 511 |     actions {
 512 |         processentry11;
 513 |         noequ0_processentry11;
 514 |     }
 515 |     // default_action : noequ0_processentry11;
 516 |     size : 2;
 517 | }
 518 | 
 519 | table noequ0_processEntry11 {
 520 |     actions {
 521 |         noequ0_processentry11;
 522 |     }
 523 |     default_action : noequ0_processentry11();
 524 |     size : 1;
 525 | }
 526 | 
 527 | table Entry11WriteToPacket {
 528 |     actions {
 529 |         entry11WriteToPacket;
 530 |     }
 531 |     default_action : entry11WriteToPacket();
 532 |     size : 1;
 533 | }
 534 | 
 535 | table processEntry11andWriteToPacket {
 536 |     actions {
 537 |         processentry11andWriteToPacket;
 538 |     }
 539 |     size : 1;
 540 | }
 541 | 
 542 | table noequ0_processEntry11andWriteToPacket {
 543 |     actions {
 544 |         noequ0_processentry11andWriteToPacket;
 545 |     }
 546 |     size : 1;
 547 | }
 548 | 
 549 | table cleanEntry11 {
 550 |     actions {
 551 |         do_cleanEntry11;
 552 |     }
 553 |     default_action : do_cleanEntry11();
 554 |     size : 1;
 555 | }
 556 | 
 557 | table processEntry12 {
 558 |     reads {
 559 |         mdata.bitmap : ternary;
 560 |     }
 561 |     actions {
 562 |         processentry12;
 563 |         noequ0_processentry12;
 564 |     }
 565 |     // default_action : noequ0_processentry12;
 566 |     size : 2;
 567 | }
 568 | 
 569 | table noequ0_processEntry12 {
 570 |     actions {
 571 |         noequ0_processentry12;
 572 |     }
 573 |     default_action : noequ0_processentry12();
 574 |     size : 1;
 575 | }
 576 | 
 577 | table Entry12WriteToPacket {
 578 |     actions {
 579 |         entry12WriteToPacket;
 580 |     }
 581 |     default_action : entry12WriteToPacket();
 582 |     size : 1;
 583 | }
 584 | 
 585 | table processEntry12andWriteToPacket {
 586 |     actions {
 587 |         processentry12andWriteToPacket;
 588 |     }
 589 |     size : 1;
 590 | }
 591 | 
 592 | table noequ0_processEntry12andWriteToPacket {
 593 |     actions {
 594 |         noequ0_processentry12andWriteToPacket;
 595 |     }
 596 |     size : 1;
 597 | }
 598 | 
 599 | table cleanEntry12 {
 600 |     actions {
 601 |         do_cleanEntry12;
 602 |     }
 603 |     default_action : do_cleanEntry12();
 604 |     size : 1;
 605 | }
 606 | 
 607 | table processEntry13 {
 608 |     reads {
 609 |         mdata.bitmap : ternary;
 610 |     }
 611 |     actions {
 612 |         processentry13;
 613 |         noequ0_processentry13;
 614 |     }
 615 |     // default_action : noequ0_processentry13;
 616 |     size : 2;
 617 | }
 618 | 
 619 | table noequ0_processEntry13 {
 620 |     actions {
 621 |         noequ0_processentry13;
 622 |     }
 623 |     default_action : noequ0_processentry13();
 624 |     size : 1;
 625 | }
 626 | 
 627 | table Entry13WriteToPacket {
 628 |     actions {
 629 |         entry13WriteToPacket;
 630 |     }
 631 |     default_action : entry13WriteToPacket();
 632 |     size : 1;
 633 | }
 634 | 
 635 | table processEntry13andWriteToPacket {
 636 |     actions {
 637 |         processentry13andWriteToPacket;
 638 |     }
 639 |     size : 1;
 640 | }
 641 | 
 642 | table noequ0_processEntry13andWriteToPacket {
 643 |     actions {
 644 |         noequ0_processentry13andWriteToPacket;
 645 |     }
 646 |     size : 1;
 647 | }
 648 | 
 649 | table cleanEntry13 {
 650 |     actions {
 651 |         do_cleanEntry13;
 652 |     }
 653 |     default_action : do_cleanEntry13();
 654 |     size : 1;
 655 | }
 656 | 
 657 | table processEntry14 {
 658 |     reads {
 659 |         mdata.bitmap : ternary;
 660 |     }
 661 |     actions {
 662 |         processentry14;
 663 |         noequ0_processentry14;
 664 |     }
 665 |     // default_action : noequ0_processentry14;
 666 |     size : 2;
 667 | }
 668 | 
 669 | table noequ0_processEntry14 {
 670 |     actions {
 671 |         noequ0_processentry14;
 672 |     }
 673 |     default_action : noequ0_processentry14();
 674 |     size : 1;
 675 | }
 676 | 
 677 | table Entry14WriteToPacket {
 678 |     actions {
 679 |         entry14WriteToPacket;
 680 |     }
 681 |     default_action : entry14WriteToPacket();
 682 |     size : 1;
 683 | }
 684 | 
 685 | table processEntry14andWriteToPacket {
 686 |     actions {
 687 |         processentry14andWriteToPacket;
 688 |     }
 689 |     size : 1;
 690 | }
 691 | 
 692 | table noequ0_processEntry14andWriteToPacket {
 693 |     actions {
 694 |         noequ0_processentry14andWriteToPacket;
 695 |     }
 696 |     size : 1;
 697 | }
 698 | 
 699 | table cleanEntry14 {
 700 |     actions {
 701 |         do_cleanEntry14;
 702 |     }
 703 |     default_action : do_cleanEntry14();
 704 |     size : 1;
 705 | }
 706 | 
 707 | table processEntry15 {
 708 |     reads {
 709 |         mdata.bitmap : ternary;
 710 |     }
 711 |     actions {
 712 |         processentry15;
 713 |         noequ0_processentry15;
 714 |     }
 715 |     // default_action : noequ0_processentry15;
 716 |     size : 2;
 717 | }
 718 | 
 719 | table noequ0_processEntry15 {
 720 |     actions {
 721 |         noequ0_processentry15;
 722 |     }
 723 |     default_action : noequ0_processentry15();
 724 |     size : 1;
 725 | }
 726 | 
 727 | table Entry15WriteToPacket {
 728 |     actions {
 729 |         entry15WriteToPacket;
 730 |     }
 731 |     default_action : entry15WriteToPacket();
 732 |     size : 1;
 733 | }
 734 | 
 735 | table processEntry15andWriteToPacket {
 736 |     actions {
 737 |         processentry15andWriteToPacket;
 738 |     }
 739 |     size : 1;
 740 | }
 741 | 
 742 | table noequ0_processEntry15andWriteToPacket {
 743 |     actions {
 744 |         noequ0_processentry15andWriteToPacket;
 745 |     }
 746 |     size : 1;
 747 | }
 748 | 
 749 | table cleanEntry15 {
 750 |     actions {
 751 |         do_cleanEntry15;
 752 |     }
 753 |     default_action : do_cleanEntry15();
 754 |     size : 1;
 755 | }
 756 | 
 757 | table processEntry16 {
 758 |     reads {
 759 |         mdata.bitmap : ternary;
 760 |     }
 761 |     actions {
 762 |         processentry16;
 763 |         noequ0_processentry16;
 764 |     }
 765 |     // default_action : noequ0_processentry16;
 766 |     size : 2;
 767 | }
 768 | 
 769 | table noequ0_processEntry16 {
 770 |     actions {
 771 |         noequ0_processentry16;
 772 |     }
 773 |     default_action : noequ0_processentry16();
 774 |     size : 1;
 775 | }
 776 | 
 777 | table Entry16WriteToPacket {
 778 |     actions {
 779 |         entry16WriteToPacket;
 780 |     }
 781 |     default_action : entry16WriteToPacket();
 782 |     size : 1;
 783 | }
 784 | 
 785 | table processEntry16andWriteToPacket {
 786 |     actions {
 787 |         processentry16andWriteToPacket;
 788 |     }
 789 |     size : 1;
 790 | }
 791 | 
 792 | table noequ0_processEntry16andWriteToPacket {
 793 |     actions {
 794 |         noequ0_processentry16andWriteToPacket;
 795 |     }
 796 |     size : 1;
 797 | }
 798 | 
 799 | table cleanEntry16 {
 800 |     actions {
 801 |         do_cleanEntry16;
 802 |     }
 803 |     default_action : do_cleanEntry16();
 804 |     size : 1;
 805 | }
 806 | 
 807 | table processEntry17 {
 808 |     reads {
 809 |         mdata.bitmap : ternary;
 810 |     }
 811 |     actions {
 812 |         processentry17;
 813 |         noequ0_processentry17;
 814 |     }
 815 |     // default_action : noequ0_processentry17;
 816 |     size : 2;
 817 | }
 818 | 
 819 | table noequ0_processEntry17 {
 820 |     actions {
 821 |         noequ0_processentry17;
 822 |     }
 823 |     default_action : noequ0_processentry17();
 824 |     size : 1;
 825 | }
 826 | 
 827 | table Entry17WriteToPacket {
 828 |     actions {
 829 |         entry17WriteToPacket;
 830 |     }
 831 |     default_action : entry17WriteToPacket();
 832 |     size : 1;
 833 | }
 834 | 
 835 | table processEntry17andWriteToPacket {
 836 |     actions {
 837 |         processentry17andWriteToPacket;
 838 |     }
 839 |     size : 1;
 840 | }
 841 | 
 842 | table noequ0_processEntry17andWriteToPacket {
 843 |     actions {
 844 |         noequ0_processentry17andWriteToPacket;
 845 |     }
 846 |     size : 1;
 847 | }
 848 | 
 849 | table cleanEntry17 {
 850 |     actions {
 851 |         do_cleanEntry17;
 852 |     }
 853 |     default_action : do_cleanEntry17();
 854 |     size : 1;
 855 | }
 856 | 
 857 | table processEntry18 {
 858 |     reads {
 859 |         mdata.bitmap : ternary;
 860 |     }
 861 |     actions {
 862 |         processentry18;
 863 |         noequ0_processentry18;
 864 |     }
 865 |     // default_action : noequ0_processentry18;
 866 |     size : 2;
 867 | }
 868 | 
 869 | table noequ0_processEntry18 {
 870 |     actions {
 871 |         noequ0_processentry18;
 872 |     }
 873 |     default_action : noequ0_processentry18();
 874 |     size : 1;
 875 | }
 876 | 
 877 | table Entry18WriteToPacket {
 878 |     actions {
 879 |         entry18WriteToPacket;
 880 |     }
 881 |     default_action : entry18WriteToPacket();
 882 |     size : 1;
 883 | }
 884 | 
 885 | table processEntry18andWriteToPacket {
 886 |     actions {
 887 |         processentry18andWriteToPacket;
 888 |     }
 889 |     size : 1;
 890 | }
 891 | 
 892 | table noequ0_processEntry18andWriteToPacket {
 893 |     actions {
 894 |         noequ0_processentry18andWriteToPacket;
 895 |     }
 896 |     size : 1;
 897 | }
 898 | 
 899 | table cleanEntry18 {
 900 |     actions {
 901 |         do_cleanEntry18;
 902 |     }
 903 |     default_action : do_cleanEntry18();
 904 |     size : 1;
 905 | }
 906 | 
 907 | table processEntry19 {
 908 |     reads {
 909 |         mdata.bitmap : ternary;
 910 |     }
 911 |     actions {
 912 |         processentry19;
 913 |         noequ0_processentry19;
 914 |     }
 915 |     // default_action : noequ0_processentry19;
 916 |     size : 2;
 917 | }
 918 | 
 919 | table noequ0_processEntry19 {
 920 |     actions {
 921 |         noequ0_processentry19;
 922 |     }
 923 |     default_action : noequ0_processentry19();
 924 |     size : 1;
 925 | }
 926 | 
 927 | table Entry19WriteToPacket {
 928 |     actions {
 929 |         entry19WriteToPacket;
 930 |     }
 931 |     default_action : entry19WriteToPacket();
 932 |     size : 1;
 933 | }
 934 | 
 935 | table processEntry19andWriteToPacket {
 936 |     actions {
 937 |         processentry19andWriteToPacket;
 938 |     }
 939 |     size : 1;
 940 | }
 941 | 
 942 | table noequ0_processEntry19andWriteToPacket {
 943 |     actions {
 944 |         noequ0_processentry19andWriteToPacket;
 945 |     }
 946 |     size : 1;
 947 | }
 948 | 
 949 | table cleanEntry19 {
 950 |     actions {
 951 |         do_cleanEntry19;
 952 |     }
 953 |     default_action : do_cleanEntry19();
 954 |     size : 1;
 955 | }
 956 | 
 957 | table processEntry20 {
 958 |     reads {
 959 |         mdata.bitmap : ternary;
 960 |     }
 961 |     actions {
 962 |         processentry20;
 963 |         noequ0_processentry20;
 964 |     }
 965 |     // default_action : noequ0_processentry20;
 966 |     size : 2;
 967 | }
 968 | 
 969 | table noequ0_processEntry20 {
 970 |     actions {
 971 |         noequ0_processentry20;
 972 |     }
 973 |     default_action : noequ0_processentry20();
 974 |     size : 1;
 975 | }
 976 | 
 977 | table Entry20WriteToPacket {
 978 |     actions {
 979 |         entry20WriteToPacket;
 980 |     }
 981 |     default_action : entry20WriteToPacket();
 982 |     size : 1;
 983 | }
 984 | 
 985 | table processEntry20andWriteToPacket {
 986 |     actions {
 987 |         processentry20andWriteToPacket;
 988 |     }
 989 |     size : 1;
 990 | }
 991 | 
 992 | table noequ0_processEntry20andWriteToPacket {
 993 |     actions {
 994 |         noequ0_processentry20andWriteToPacket;
 995 |     }
 996 |     size : 1;
 997 | }
 998 | 
 999 | table cleanEntry20 {
1000 |     actions {
1001 |         do_cleanEntry20;
1002 |     }
1003 |     default_action : do_cleanEntry20();
1004 |     size : 1;
1005 | }
1006 | 
1007 | table processEntry21 {
1008 |     reads {
1009 |         mdata.bitmap : ternary;
1010 |     }
1011 |     actions {
1012 |         processentry21;
1013 |         noequ0_processentry21;
1014 |     }
1015 |     // default_action : noequ0_processentry21;
1016 |     size : 2;
1017 | }
1018 | 
1019 | table noequ0_processEntry21 {
1020 |     actions {
1021 |         noequ0_processentry21;
1022 |     }
1023 |     default_action : noequ0_processentry21();
1024 |     size : 1;
1025 | }
1026 | 
1027 | table Entry21WriteToPacket {
1028 |     actions {
1029 |         entry21WriteToPacket;
1030 |     }
1031 |     default_action : entry21WriteToPacket();
1032 |     size : 1;
1033 | }
1034 | 
1035 | table processEntry21andWriteToPacket {
1036 |     actions {
1037 |         processentry21andWriteToPacket;
1038 |     }
1039 |     size : 1;
1040 | }
1041 | 
1042 | table noequ0_processEntry21andWriteToPacket {
1043 |     actions {
1044 |         noequ0_processentry21andWriteToPacket;
1045 |     }
1046 |     size : 1;
1047 | }
1048 | 
1049 | table cleanEntry21 {
1050 |     actions {
1051 |         do_cleanEntry21;
1052 |     }
1053 |     default_action : do_cleanEntry21();
1054 |     size : 1;
1055 | }
1056 | 
1057 | table processEntry22 {
1058 |     reads {
1059 |         mdata.bitmap : ternary;
1060 |     }
1061 |     actions {
1062 |         processentry22;
1063 |         noequ0_processentry22;
1064 |     }
1065 |     // default_action : noequ0_processentry22;
1066 |     size : 2;
1067 | }
1068 | 
1069 | table noequ0_processEntry22 {
1070 |     actions {
1071 |         noequ0_processentry22;
1072 |     }
1073 |     default_action : noequ0_processentry22();
1074 |     size : 1;
1075 | }
1076 | 
1077 | table Entry22WriteToPacket {
1078 |     actions {
1079 |         entry22WriteToPacket;
1080 |     }
1081 |     default_action : entry22WriteToPacket();
1082 |     size : 1;
1083 | }
1084 | 
1085 | table processEntry22andWriteToPacket {
1086 |     actions {
1087 |         processentry22andWriteToPacket;
1088 |     }
1089 |     size : 1;
1090 | }
1091 | 
1092 | table noequ0_processEntry22andWriteToPacket {
1093 |     actions {
1094 |         noequ0_processentry22andWriteToPacket;
1095 |     }
1096 |     size : 1;
1097 | }
1098 | 
1099 | table cleanEntry22 {
1100 |     actions {
1101 |         do_cleanEntry22;
1102 |     }
1103 |     default_action : do_cleanEntry22();
1104 |     size : 1;
1105 | }
1106 | 
1107 | table processEntry23 {
1108 |     reads {
1109 |         mdata.bitmap : ternary;
1110 |     }
1111 |     actions {
1112 |         processentry23;
1113 |         noequ0_processentry23;
1114 |     }
1115 |     // default_action : noequ0_processentry23;
1116 |     size : 2;
1117 | }
1118 | 
1119 | table noequ0_processEntry23 {
1120 |     actions {
1121 |         noequ0_processentry23;
1122 |     }
1123 |     default_action : noequ0_processentry23();
1124 |     size : 1;
1125 | }
1126 | 
1127 | table Entry23WriteToPacket {
1128 |     actions {
1129 |         entry23WriteToPacket;
1130 |     }
1131 |     default_action : entry23WriteToPacket();
1132 |     size : 1;
1133 | }
1134 | 
1135 | table processEntry23andWriteToPacket {
1136 |     actions {
1137 |         processentry23andWriteToPacket;
1138 |     }
1139 |     size : 1;
1140 | }
1141 | 
1142 | table noequ0_processEntry23andWriteToPacket {
1143 |     actions {
1144 |         noequ0_processentry23andWriteToPacket;
1145 |     }
1146 |     size : 1;
1147 | }
1148 | 
1149 | table cleanEntry23 {
1150 |     actions {
1151 |         do_cleanEntry23;
1152 |     }
1153 |     default_action : do_cleanEntry23();
1154 |     size : 1;
1155 | }
1156 | 
1157 | table processEntry24 {
1158 |     reads {
1159 |         mdata.bitmap : ternary;
1160 |     }
1161 |     actions {
1162 |         processentry24;
1163 |         noequ0_processentry24;
1164 |     }
1165 |     // default_action : noequ0_processentry24;
1166 |     size : 2;
1167 | }
1168 | 
1169 | table noequ0_processEntry24 {
1170 |     actions {
1171 |         noequ0_processentry24;
1172 |     }
1173 |     default_action : noequ0_processentry24();
1174 |     size : 1;
1175 | }
1176 | 
1177 | table Entry24WriteToPacket {
1178 |     actions {
1179 |         entry24WriteToPacket;
1180 |     }
1181 |     default_action : entry24WriteToPacket();
1182 |     size : 1;
1183 | }
1184 | 
1185 | table processEntry24andWriteToPacket {
1186 |     actions {
1187 |         processentry24andWriteToPacket;
1188 |     }
1189 |     size : 1;
1190 | }
1191 | 
1192 | table noequ0_processEntry24andWriteToPacket {
1193 |     actions {
1194 |         noequ0_processentry24andWriteToPacket;
1195 |     }
1196 |     size : 1;
1197 | }
1198 | 
1199 | table cleanEntry24 {
1200 |     actions {
1201 |         do_cleanEntry24;
1202 |     }
1203 |     default_action : do_cleanEntry24();
1204 |     size : 1;
1205 | }
1206 | 
1207 | table processEntry25 {
1208 |     reads {
1209 |         mdata.bitmap : ternary;
1210 |     }
1211 |     actions {
1212 |         processentry25;
1213 |         noequ0_processentry25;
1214 |     }
1215 |     // default_action : noequ0_processentry25;
1216 |     size : 2;
1217 | }
1218 | 
1219 | table noequ0_processEntry25 {
1220 |     actions {
1221 |         noequ0_processentry25;
1222 |     }
1223 |     default_action : noequ0_processentry25();
1224 |     size : 1;
1225 | }
1226 | 
1227 | table Entry25WriteToPacket {
1228 |     actions {
1229 |         entry25WriteToPacket;
1230 |     }
1231 |     default_action : entry25WriteToPacket();
1232 |     size : 1;
1233 | }
1234 | 
1235 | table processEntry25andWriteToPacket {
1236 |     actions {
1237 |         processentry25andWriteToPacket;
1238 |     }
1239 |     size : 1;
1240 | }
1241 | 
1242 | table noequ0_processEntry25andWriteToPacket {
1243 |     actions {
1244 |         noequ0_processentry25andWriteToPacket;
1245 |     }
1246 |     size : 1;
1247 | }
1248 | 
1249 | table cleanEntry25 {
1250 |     actions {
1251 |         do_cleanEntry25;
1252 |     }
1253 |     default_action : do_cleanEntry25();
1254 |     size : 1;
1255 | }
1256 | 
1257 | table processEntry26 {
1258 |     reads {
1259 |         mdata.bitmap : ternary;
1260 |     }
1261 |     actions {
1262 |         processentry26;
1263 |         noequ0_processentry26;
1264 |     }
1265 |     // default_action : noequ0_processentry26;
1266 |     size : 2;
1267 | }
1268 | 
1269 | table noequ0_processEntry26 {
1270 |     actions {
1271 |         noequ0_processentry26;
1272 |     }
1273 |     default_action : noequ0_processentry26();
1274 |     size : 1;
1275 | }
1276 | 
1277 | table Entry26WriteToPacket {
1278 |     actions {
1279 |         entry26WriteToPacket;
1280 |     }
1281 |     default_action : entry26WriteToPacket();
1282 |     size : 1;
1283 | }
1284 | 
1285 | table processEntry26andWriteToPacket {
1286 |     actions {
1287 |         processentry26andWriteToPacket;
1288 |     }
1289 |     size : 1;
1290 | }
1291 | 
1292 | table noequ0_processEntry26andWriteToPacket {
1293 |     actions {
1294 |         noequ0_processentry26andWriteToPacket;
1295 |     }
1296 |     size : 1;
1297 | }
1298 | 
1299 | table cleanEntry26 {
1300 |     actions {
1301 |         do_cleanEntry26;
1302 |     }
1303 |     default_action : do_cleanEntry26();
1304 |     size : 1;
1305 | }
1306 | 
1307 | table processEntry27 {
1308 |     reads {
1309 |         mdata.bitmap : ternary;
1310 |     }
1311 |     actions {
1312 |         processentry27;
1313 |         noequ0_processentry27;
1314 |     }
1315 |     // default_action : noequ0_processentry27;
1316 |     size : 2;
1317 | }
1318 | 
1319 | table noequ0_processEntry27 {
1320 |     actions {
1321 |         noequ0_processentry27;
1322 |     }
1323 |     default_action : noequ0_processentry27();
1324 |     size : 1;
1325 | }
1326 | 
1327 | table Entry27WriteToPacket {
1328 |     actions {
1329 |         entry27WriteToPacket;
1330 |     }
1331 |     default_action : entry27WriteToPacket();
1332 |     size : 1;
1333 | }
1334 | 
1335 | table processEntry27andWriteToPacket {
1336 |     actions {
1337 |         processentry27andWriteToPacket;
1338 |     }
1339 |     size : 1;
1340 | }
1341 | 
1342 | table noequ0_processEntry27andWriteToPacket {
1343 |     actions {
1344 |         noequ0_processentry27andWriteToPacket;
1345 |     }
1346 |     size : 1;
1347 | }
1348 | 
1349 | table cleanEntry27 {
1350 |     actions {
1351 |         do_cleanEntry27;
1352 |     }
1353 |     default_action : do_cleanEntry27();
1354 |     size : 1;
1355 | }
1356 | 
1357 | table processEntry28 {
1358 |     reads {
1359 |         mdata.bitmap : ternary;
1360 |     }
1361 |     actions {
1362 |         processentry28;
1363 |         noequ0_processentry28;
1364 |     }
1365 |     // default_action : noequ0_processentry28;
1366 |     size : 2;
1367 | }
1368 | 
1369 | table noequ0_processEntry28 {
1370 |     actions {
1371 |         noequ0_processentry28;
1372 |     }
1373 |     default_action : noequ0_processentry28();
1374 |     size : 1;
1375 | }
1376 | 
1377 | table Entry28WriteToPacket {
1378 |     actions {
1379 |         entry28WriteToPacket;
1380 |     }
1381 |     default_action : entry28WriteToPacket();
1382 |     size : 1;
1383 | }
1384 | 
1385 | table processEntry28andWriteToPacket {
1386 |     actions {
1387 |         processentry28andWriteToPacket;
1388 |     }
1389 |     size : 1;
1390 | }
1391 | 
1392 | table noequ0_processEntry28andWriteToPacket {
1393 |     actions {
1394 |         noequ0_processentry28andWriteToPacket;
1395 |     }
1396 |     size : 1;
1397 | }
1398 | 
1399 | table cleanEntry28 {
1400 |     actions {
1401 |         do_cleanEntry28;
1402 |     }
1403 |     default_action : do_cleanEntry28();
1404 |     size : 1;
1405 | }
1406 | 
1407 | table processEntry29 {
1408 |     reads {
1409 |         mdata.bitmap : ternary;
1410 |     }
1411 |     actions {
1412 |         processentry29;
1413 |         noequ0_processentry29;
1414 |     }
1415 |     // default_action : noequ0_processentry29;
1416 |     size : 2;
1417 | }
1418 | 
1419 | table noequ0_processEntry29 {
1420 |     actions {
1421 |         noequ0_processentry29;
1422 |     }
1423 |     default_action : noequ0_processentry29();
1424 |     size : 1;
1425 | }
1426 | 
1427 | table Entry29WriteToPacket {
1428 |     actions {
1429 |         entry29WriteToPacket;
1430 |     }
1431 |     default_action : entry29WriteToPacket();
1432 |     size : 1;
1433 | }
1434 | 
1435 | table processEntry29andWriteToPacket {
1436 |     actions {
1437 |         processentry29andWriteToPacket;
1438 |     }
1439 |     size : 1;
1440 | }
1441 | 
1442 | table noequ0_processEntry29andWriteToPacket {
1443 |     actions {
1444 |         noequ0_processentry29andWriteToPacket;
1445 |     }
1446 |     size : 1;
1447 | }
1448 | 
1449 | table cleanEntry29 {
1450 |     actions {
1451 |         do_cleanEntry29;
1452 |     }
1453 |     default_action : do_cleanEntry29();
1454 |     size : 1;
1455 | }
1456 | 
1457 | table processEntry30 {
1458 |     reads {
1459 |         mdata.bitmap : ternary;
1460 |     }
1461 |     actions {
1462 |         processentry30;
1463 |         noequ0_processentry30;
1464 |     }
1465 |     // default_action : noequ0_processentry30;
1466 |     size : 2;
1467 | }
1468 | 
1469 | table noequ0_processEntry30 {
1470 |     actions {
1471 |         noequ0_processentry30;
1472 |     }
1473 |     default_action : noequ0_processentry30();
1474 |     size : 1;
1475 | }
1476 | 
1477 | table Entry30WriteToPacket {
1478 |     actions {
1479 |         entry30WriteToPacket;
1480 |     }
1481 |     default_action : entry30WriteToPacket();
1482 |     size : 1;
1483 | }
1484 | 
1485 | table processEntry30andWriteToPacket {
1486 |     actions {
1487 |         processentry30andWriteToPacket;
1488 |     }
1489 |     size : 1;
1490 | }
1491 | 
1492 | table noequ0_processEntry30andWriteToPacket {
1493 |     actions {
1494 |         noequ0_processentry30andWriteToPacket;
1495 |     }
1496 |     size : 1;
1497 | }
1498 | 
1499 | table cleanEntry30 {
1500 |     actions {
1501 |         do_cleanEntry30;
1502 |     }
1503 |     default_action : do_cleanEntry30();
1504 |     size : 1;
1505 | }
1506 | 
1507 | table processEntry31 {
1508 |     reads {
1509 |         mdata.bitmap : ternary;
1510 |     }
1511 |     actions {
1512 |         processentry31;
1513 |         noequ0_processentry31;
1514 |     }
1515 |     // default_action : noequ0_processentry31;
1516 |     size : 2;
1517 | }
1518 | 
1519 | table noequ0_processEntry31 {
1520 |     actions {
1521 |         noequ0_processentry31;
1522 |     }
1523 |     default_action : noequ0_processentry31();
1524 |     size : 1;
1525 | }
1526 | 
1527 | table Entry31WriteToPacket {
1528 |     actions {
1529 |         entry31WriteToPacket;
1530 |     }
1531 |     default_action : entry31WriteToPacket();
1532 |     size : 1;
1533 | }
1534 | 
1535 | table processEntry31andWriteToPacket {
1536 |     actions {
1537 |         processentry31andWriteToPacket;
1538 |     }
1539 |     size : 1;
1540 | }
1541 | 
1542 | table noequ0_processEntry31andWriteToPacket {
1543 |     actions {
1544 |         noequ0_processentry31andWriteToPacket;
1545 |     }
1546 |     size : 1;
1547 | }
1548 | 
1549 | table cleanEntry31 {
1550 |     actions {
1551 |         do_cleanEntry31;
1552 |     }
1553 |     default_action : do_cleanEntry31();
1554 |     size : 1;
1555 | }
1556 | 
1557 | //table processEntry32 {
1558 | //    actions {
1559 | //        processentry32;
1560 | //    }
1561 | //    default_action : processentry32();
1562 | //    size : 1;
1563 | // /
1564 | 
1565 | //tablnoequ0_e processEntry32 {
1566 | //    actions {
1567 | //      noequ0_  processentry32;
1568 | //    }
1569 | //    default_action noequ0_: processentry32();
1570 | //    size : 1;
1571 | //}
1572 | //
1573 | //table Entry32WriteToPacket {
1574 | //    actions {
1575 | //        entry32WriteToPacket;
1576 | //    }
1577 | //    default_action : entry32WriteToPacket();
1578 | //    size : 1;
1579 | //}
1580 | //
1581 | //table processEntry32andWriteToPacket {
1582 | //    default_action : processentry32andWriteToPacket();
1583 | //    size : 1;
1584 | 
1585 | //tablnoequ0_e processEntry32andWriteToPacket {
1586 | //    default_action noequ0_: processentry32andWriteToPacket();
1587 | //    size : 1;
1588 | 
1589 | //table cleanry3Entry2 {
1590 | // //    actions {
1591 | // /
1592 | // //table processEntry32andWriteToPacket {
1593 | // //    default_action : processentry32andWriteToPacket();
1594 | // //    size : 1;
1595 | 
1596 | // //noequ0_tablnoequ0_e processEntry32andWriteToPacket {
1597 | // //    default_action noequ0_: processentry32andWriteToPacket();
1598 | // noequ0_//    size : 1;
1599 | 
1600 | // //table cleanry3Entry2 {
1601 | // /
1602 | // /        do_cleanEntry32;
1603 | //    }
1604 | //    default_action : do_cleanEntry32();
1605 | //    size : 1;
1606 | //}
1607 | 


--------------------------------------------------------------------------------
/p4src/p4ml.p4:
--------------------------------------------------------------------------------
  1 | #include <tofino/intrinsic_metadata.p4>
  2 | #include <tofino/stateful_alu_blackbox.p4>
  3 | #include <tofino/constants.p4>
  4 | #include "includes/headers.p4"
  5 | #include "includes/parser.p4"
  6 | 
  7 | #include "includes/registers.p4"
  8 | #include "includes/tables.p4"
  9 | #include "includes/actions.p4"
 10 | #include "includes/common.p4"
 11 | 
 12 | field_list p4ml_resubmit_list{
 13 | 	mdata.agtr_time;	
 14 | }
 15 | 
 16 | action do_resubmit(){
 17 | 	resubmit(p4ml_resubmit_list);
 18 | }
 19 | 
 20 | table p4ml_resubmit{
 21 | 	actions{
 22 | 		do_resubmit;
 23 | 	}
 24 | 	default_action: do_resubmit();
 25 | 	size: 1;
 26 | 
 27 | }
 28 | control ingress 
 29 | {
 30 | 
 31 |     if (valid(p4ml_entries)) {
 32 | 
 33 |             if (ipv4.ecn == 3 or p4ml.ECN == 1) {
 34 |                 apply(setup_ecn_table);
 35 |             }
 36 |             // ack packet
 37 |             if (p4ml.isACK == 1) {
 38 |                 
 39 |                 if (p4ml.overflow == 1 and p4ml.isResend == 0) {
 40 | 
 41 |                 } else {
 42 |                     apply(clean_appID_and_seq_table);
 43 |                     
 44 |                     if (mdata.isMyAppIDandMyCurrentSeq != 0) {
 45 |                         /* Clean */
 46 |                         apply(clean_bitmap_table);
 47 |                         apply(clean_ecn_table);
 48 |                         apply(clean_agtr_time_table);
 49 |                         // apply(cleanEntry1);
 50 |                     }
 51 |                 }
 52 | 
 53 |                 /* Multicast Back */
 54 |                 if(ig_intr_md.resubmit_flag == 1) {
 55 |                     apply(multicast_table);
 56 |                 } else {
 57 |                     apply(p4ml_resubmit);
 58 |                 }
 59 |                 
 60 |             } else {
 61 | 
 62 |                 if (p4ml.overflow == 1) {
 63 |                     apply(outPort_table);
 64 |                 } else {
 65 |                     if (p4ml.isResend == 1) {
 66 |                         apply(appID_and_seq_resend_table);
 67 |                     } else {
 68 |                         apply(appID_and_seq_table);
 69 |                     }
 70 |                     // Correct ID and Seq
 71 |                     if (mdata.isMyAppIDandMyCurrentSeq != 0) {
 72 |                         
 73 |                         if (p4ml.isResend == 1) {
 74 |                             // Clean the bitmap also
 75 |                             apply(bitmap_resend_table);
 76 |                         } else {
 77 |                             apply(bitmap_table);
 78 |                         }
 79 | 
 80 |                         apply(ecn_register_table);
 81 |                         
 82 |                         apply(bitmap_aggregate_table);
 83 | 
 84 |                         if (p4ml.isResend == 1) {
 85 |                             // Force forward and clean
 86 |                             apply(agtr_time_resend_table);
 87 |                         } else {
 88 |                             apply(agtr_time_table);
 89 |                         }
 90 | 
 91 |                         // bitmap correct
 92 |                         if (mdata.isAggregate != 0) {
 93 |                             if (mdata.current_agtr_time == p4ml.agtr_time) {
 94 |                                 apply(noequ0_processEntry1andWriteToPacket);
 95 |                                 apply(noequ0_processEntry2andWriteToPacket);
 96 |                                 apply(noequ0_processEntry3andWriteToPacket);
 97 |                                 apply(noequ0_processEntry4andWriteToPacket);
 98 |                                 apply(noequ0_processEntry5andWriteToPacket);
 99 |                                 apply(noequ0_processEntry6andWriteToPacket);
100 |                                 apply(noequ0_processEntry7andWriteToPacket);
101 |                                 apply(noequ0_processEntry8andWriteToPacket);
102 |                                 apply(noequ0_processEntry9andWriteToPacket);
103 |                                 apply(noequ0_processEntry10andWriteToPacket);
104 |                                 apply(noequ0_processEntry11andWriteToPacket);
105 |                                 apply(noequ0_processEntry12andWriteToPacket);
106 |                                 apply(noequ0_processEntry13andWriteToPacket);
107 |                                 apply(noequ0_processEntry14andWriteToPacket);
108 |                                 apply(noequ0_processEntry15andWriteToPacket);
109 |                                 apply(noequ0_processEntry16andWriteToPacket);
110 |                                 apply(noequ0_processEntry17andWriteToPacket);
111 |                                 apply(noequ0_processEntry18andWriteToPacket);
112 |                                 apply(noequ0_processEntry19andWriteToPacket);
113 |                                 apply(noequ0_processEntry20andWriteToPacket);
114 |                                 apply(noequ0_processEntry21andWriteToPacket);
115 |                                 apply(noequ0_processEntry22andWriteToPacket);
116 |                                 apply(noequ0_processEntry23andWriteToPacket);
117 |                                 apply(noequ0_processEntry24andWriteToPacket);
118 |                                 apply(noequ0_processEntry25andWriteToPacket);
119 |                                 apply(noequ0_processEntry26andWriteToPacket);
120 |                                 apply(noequ0_processEntry27andWriteToPacket);
121 |                                 apply(noequ0_processEntry28andWriteToPacket);
122 |                                 apply(noequ0_processEntry29andWriteToPacket);
123 |                                 apply(noequ0_processEntry30andWriteToPacket);
124 |                                 apply(noequ0_processEntry31andWriteToPacket);
125 |                                 //apply(noequ0_processEntry32andWriteToPacket);
126 |                                 // set output port
127 |                                 // if(ig_intr_md.resubmit_flag == 1) {
128 |                                 apply(modify_packet_bitmap_table);
129 |                                 apply(outPort_table);
130 |                                 // } else {
131 |                                     // apply(p4ml_resubmit);
132 |                                 // }
133 |                             } else {
134 |                                 apply(processEntry1);
135 |                                 apply(processEntry2);
136 |                                 apply(processEntry3);
137 |                                 apply(processEntry4);
138 |                                 apply(processEntry5);
139 |                                 apply(processEntry6);
140 |                                 apply(processEntry7);
141 |                                 apply(processEntry8);
142 |                                 apply(processEntry9);
143 |                                 apply(processEntry10);
144 |                                 apply(processEntry11);
145 |                                 apply(processEntry12);
146 |                                 apply(processEntry13);
147 |                                 apply(processEntry14);
148 |                                 apply(processEntry15);
149 |                                 apply(processEntry16);
150 |                                 apply(processEntry17);
151 |                                 apply(processEntry18);
152 |                                 apply(processEntry19);
153 |                                 apply(processEntry20);
154 |                                 apply(processEntry21);
155 |                                 apply(processEntry22);
156 |                                 apply(processEntry23);
157 |                                 apply(processEntry24);
158 |                                 apply(processEntry25);
159 |                                 apply(processEntry26);
160 |                                 apply(processEntry27);
161 |                                 apply(processEntry28);
162 |                                 apply(processEntry29);
163 |                                 apply(processEntry30);
164 |                                 apply(processEntry31);
165 |                                 //apply(processEntry32);
166 |                                 
167 |                                 if (ig_intr_md.resubmit_flag == 1) {
168 |                                     apply(drop_table);
169 |                                 } else {
170 |                                     apply(p4ml_resubmit);
171 |                                 }
172 | 
173 |                             }
174 |                         } else {
175 |                             if (mdata.current_agtr_time == p4ml.agtr_time) {
176 |                                 apply(Entry1WriteToPacket);
177 |                                 apply(Entry2WriteToPacket);
178 |                                 apply(Entry3WriteToPacket);
179 |                                 apply(Entry4WriteToPacket);
180 |                                 apply(Entry5WriteToPacket);
181 |                                 apply(Entry6WriteToPacket);
182 |                                 apply(Entry7WriteToPacket);
183 |                                 apply(Entry8WriteToPacket);
184 |                                 apply(Entry9WriteToPacket);
185 |                                 apply(Entry10WriteToPacket);
186 |                                 apply(Entry11WriteToPacket);
187 |                                 apply(Entry12WriteToPacket);
188 |                                 apply(Entry13WriteToPacket);
189 |                                 apply(Entry14WriteToPacket);
190 |                                 apply(Entry15WriteToPacket);
191 |                                 apply(Entry16WriteToPacket);
192 |                                 apply(Entry17WriteToPacket);
193 |                                 apply(Entry18WriteToPacket);
194 |                                 apply(Entry19WriteToPacket);
195 |                                 apply(Entry20WriteToPacket);
196 |                                 apply(Entry21WriteToPacket);
197 |                                 apply(Entry22WriteToPacket);
198 |                                 apply(Entry23WriteToPacket);
199 |                                 apply(Entry24WriteToPacket);
200 |                                 apply(Entry25WriteToPacket);
201 |                                 apply(Entry26WriteToPacket);
202 |                                 apply(Entry27WriteToPacket);
203 |                                 apply(Entry28WriteToPacket);
204 |                                 apply(Entry29WriteToPacket);
205 |                                 apply(Entry30WriteToPacket);
206 |                                 apply(Entry31WriteToPacket);
207 |                                 //apply(Entry32WriteToPacket);
208 |                                 // set output port
209 |                                 // if(ig_intr_md.resubmit_flag == 1) {
210 |                                 apply(modify_packet_bitmap_table);
211 |                                 apply(outPort_table);
212 |                                 // } else {
213 |                                     // apply(p4ml_resubmit);
214 |                                 // }	
215 |                             }
216 |                         }
217 |                     } else {
218 |                         /* tag collision bit in incoming one */
219 |                         // if not empty
220 |                         if (p4ml.isResend == 0) {
221 |                             apply(tag_collision_incoming_table);
222 |                         }
223 |                         apply(outPort_table);
224 |                     }
225 |                 }
226 |             }
227 |     } else {
228 |         // // BG traffic doesn't have data layer
229 |         //   if (valid(p4ml_bg)){
230 |         //      apply(bg_outPort_table);
231 |         //   } else {
232 |         apply(forward);
233 |         //   }
234 |     }
235 | }
236 | 
237 | control egress 
238 | {
239 |       apply(qdepth_table);
240 |       if (valid(ipv4)) {
241 |           if (mdata.qdepth != 0) {
242 |             apply(mark_ecn_ipv4_table);
243 |           }
244 |       }
245 |       if (valid(p4ml_entries)) {
246 |         if (mdata.qdepth != 0) {
247 |             apply(modify_ecn_table);
248 |         }
249 |       }
250 | }
251 | 
252 | 


--------------------------------------------------------------------------------
/ptf/ptfTest.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import pd_base_tests
  4 | import pltfm_pm_rpc
  5 | import pal_rpc
  6 | import random
  7 | import sys
  8 | import time
  9 | import unittest
 10 | 
 11 | from pltfm_pm_rpc.ttypes import *
 12 | from pal_rpc.ttypes import *
 13 | from ptf import config
 14 | from ptf.testutils import *
 15 | from ptf.thriftutils import *
 16 | from res_pd_rpc.ttypes import *
 17 | from ptf import config
 18 | from ptf.thriftutils import *
 19 | 
 20 | from res_pd_rpc.ttypes import *
 21 | from port_mapping import *
 22 | 
 23 | from tm_api_rpc.ttypes import *
 24 | 
 25 | this_dir = os.path.dirname(os.path.abspath(__file__))
 26 | 
 27 | fp_ports = ["9/0","10/0","11/0","12/0","13/0","14/0","15/0","16/0","17/0","18/0","19/0"]
 28 | # fp_ports = ["13/0","14/0", "11/0"]
 29 | loopback_ports = ["20/0"]
 30 | # loopback_ports = ["1/0", "2/0", "3/0", "4/0", "5/0", "6/0", "7/0", "8/0", "25/0"]
 31 | def toInt8(n):
 32 |   n = n & 0xff
 33 |   return (n ^ 0x80) - 0x80
 34 | 
 35 | class L2Test(pd_base_tests.ThriftInterfaceDataPlane):
 36 |     def __init__(self):
 37 |         pd_base_tests.ThriftInterfaceDataPlane.__init__(self,
 38 |                                                         ["basic_switching"])
 39 | 
 40 |     # The setUp() method is used to prepare the test fixture. Typically
 41 |     # you would use it to establich connection to the Thrift server.
 42 |     #
 43 |     # You can also put the initial device configuration there. However,
 44 |     # if during this process an error is encountered, it will be considered
 45 |     # as a test error (meaning the test is incorrect),
 46 |     # rather than a test failure
 47 |     def setUp(self):
 48 |         # initialize the connection
 49 |         pd_base_tests.ThriftInterfaceDataPlane.setUp(self)
 50 |         self.sess_hdl = self.conn_mgr.client_init()
 51 |         self.dev_tgt = DevTarget_t(0, hex_to_i16(0xFFFF))
 52 |         self.devPorts = []
 53 |         self.LPPorts = []
 54 |         self.dev = 0
 55 |         self.platform_type = "mavericks"
 56 |         board_type = self.pltfm_pm.pltfm_pm_board_type_get()
 57 |         if re.search("0x0234|0x1234|0x4234|0x5234", hex(board_type)):
 58 |             self.platform_type = "mavericks"
 59 |         elif re.search("0x2234|0x3234", hex(board_type)):
 60 |             self.platform_type = "montara"
 61 | 
 62 |         # get the device ports from front panel ports
 63 |         try:
 64 |             for fpPort in fp_ports:
 65 |                 port, chnl = fpPort.split("/")
 66 |                 devPort = \
 67 |                     self.pal.pal_port_front_panel_port_to_dev_port_get(0,
 68 |                                                                     int(port),
 69 |                                                                     int(chnl))
 70 |                 self.devPorts.append(devPort)
 71 | 
 72 |             if test_param_get('setup') == True or (test_param_get('setup') != True
 73 |                 and test_param_get('cleanup') != True):
 74 | 
 75 |                 # add and enable the platform ports
 76 |                 for i in self.devPorts:
 77 |                     self.pal.pal_port_add(0, i,
 78 |                                         pal_port_speed_t.BF_SPEED_100G,
 79 |                                         pal_fec_type_t.BF_FEC_TYP_REED_SOLOMON)
 80 |                     self.pal.pal_port_an_set(0, i, 2);
 81 |                     self.pal.pal_port_enable(0, i)
 82 | 
 83 | ####################### LOOPBACK ###########################
 84 |             for lbPort in loopback_ports:
 85 |                 port, chnl = lbPort.split("/")
 86 |                 devPort = \
 87 |                     self.pal.pal_port_front_panel_port_to_dev_port_get(0,
 88 |                                                                     int(port),
 89 |                                                                     int(chnl))
 90 |                 self.LPPorts.append(devPort)
 91 | 
 92 |                 # add and enable the platform ports
 93 |             for i in self.LPPorts:
 94 |                 self.pal.pal_port_add(0, i,
 95 |                                     pal_port_speed_t.BF_SPEED_100G,
 96 |                                     pal_fec_type_t.BF_FEC_TYP_REED_SOLOMON)
 97 | 
 98 |                 self.pal.pal_port_loopback_mode_set(0, i,
 99 |                                     pal_loopback_mod_t.BF_LPBK_MAC_NEAR)
100 |                 self.pal.pal_port_an_set(0, i, 2);
101 |                 self.pal.pal_port_enable(0, i)
102 |                 
103 |             self.conn_mgr.complete_operations(self.sess_hdl)
104 | 
105 |         except Exception as e:
106 | 		print "Some Error in port init"
107 |         
108 |         # # flow control setting, follow "Barefoot Network Tofino Fixed Function API Guide"
109 |         # for i in range(len(self.devPorts)):
110 |         #     # step 1: Map loessless traffice to a PPG handle with a buffer limit
111 |         #     ppg_cells = 2000
112 |         #     self.ppg_handler = self.tm.tm_allocate_ppg(self.dev, self.devPorts[i])
113 |         #     self.tm.tm_set_ppg_guaranteed_min_limit(self.dev, self.ppg_handler, ppg_cells)
114 | 
115 |         #     # step 2: Map traffic to an iCos
116 |         #     icos_bmap = toInt8(0x01)
117 |         #     self.tm.tm_set_ppg_icos_mapping(self.dev, self.ppg_handler, icos_bmap)
118 | 
119 |         #     # step 3: Provision skid buffer set up pasue PFC generation
120 |         #     skid_cells = 4000
121 |         #     self.tm.tm_set_ppg_skid_limit(self.dev, self.ppg_handler, skid_cells)
122 |         #     self.tm.tm_enable_lossless_treatment(self.dev, self.ppg_handler)
123 |         #     # link-level flow control
124 |         #     fctype = 1 # BF_TM_PAUSE_PORT
125 |         #     self.tm.tm_set_port_flowcontrol_mode(self.dev, self.devPorts[i], fctype)
126 |         #     # iCos to Cos
127 |         #     icos_cos_map = tm_pfc_cos_map_t(CoS0_to_iCos=0)
128 |         #     self.tm.tm_set_port_pfc_cos_mapping(self.dev, self.devPorts[i], icos_cos_map)
129 | 
130 |         # ##########################################
131 |         # for i in range(len(self.devPorts)):
132 |         #     #step 4: Apply buffering
133 |         #     queue_id = 0
134 |         #     queue_cells = 25000
135 |         #     self.tm.tm_set_q_guaranteed_min_limit(self.dev, self.devPorts[i], queue_id, queue_cells)
136 | 
137 |         #     # step 5: Allocate queues
138 |         #     q_count = 8
139 |         #     q_map = tm_q_map_t(0,1,2,3,4,5,6,7)
140 |         #     self.tm.tm_set_port_q_mapping(self.dev, self.devPorts[i], q_count, q_map)
141 |         #     # step 6: Apply weighting if needed (skip, no use)
142 | 
143 |         #     # step 7: Honor pause/PFC event
144 |         #     cos = 0
145 |         #     self.tm.tm_set_q_pfc_cos_mapping(self.dev, self.devPorts[i], queue_id, cos)
146 | 
147 |         # # Can not find below API
148 |         # # self.tm.tm_set_port_flowcontrol_rx(self.dev, self.devPorts, fctype)
149 |         # self.tm.tm_complete_operations(self.dev)
150 | 
151 |         # for i in range(len(self.devPorts)):
152 |         #     # For MAC
153 |         #     self.pal.pal_port_flow_control_pfc_set(self.dev, self.devPorts[i], 1, 1)
154 |         # print("Done with PFC")
155 | 
156 |         return 
157 | 
158 |     def runTest(self):
159 |         print "runTest"
160 |    	    # self.conn_mgr.complete_operations(self.sess_hdl)
161 |     
162 |     def tearDown(self):
163 |         return 
164 |         # try:
165 |         #     print("Clearing table entries")
166 |         #     for table in self.entries.keys():
167 |         #         delete_func = "self.client." + table + "_table_delete"
168 |         #     for entry in self.entries[table]:
169 |         #         exec delete_func + "(self.sess_hdl, self.dev, entry)"
170 |         # except:
171 |         #     print("Error while cleaning up. ")
172 |         #     print("You might need to restart the driver")
173 |         # finally:
174 |         #     self.conn_mgr.complete_operations(self.sess_hdl)
175 |         #     self.conn_mgr.client_cleanup(self.sess_hdl)
176 |         #     print("Closed Session %d" % self.sess_hdl)
177 |         #     self.tm.tm_free_ppg(self.dev, self.ppg_handler)
178 |         #     print("Free ppg handler %d" % self.ppg_handler)
179 |         #     pd_base_tests.ThriftInterfaceDataPlane.tearDown(self)
180 | 


--------------------------------------------------------------------------------
/run_pd_rpc/setup.py:
--------------------------------------------------------------------------------
  1 | clear_all()
  2 | 
  3 | p4_pd.register_reset_all_agtr_time()
  4 | p4_pd.register_reset_all_appID_and_Seq()
  5 | p4_pd.register_reset_all_bitmap()
  6 | p4_pd.register_reset_all_register1()
  7 | p4_pd.register_reset_all_register2()
  8 | p4_pd.register_reset_all_register3()
  9 | p4_pd.register_reset_all_register4()
 10 | p4_pd.register_reset_all_register5()
 11 | p4_pd.register_reset_all_register6()
 12 | p4_pd.register_reset_all_register7()
 13 | p4_pd.register_reset_all_register8()
 14 | p4_pd.register_reset_all_register9()
 15 | p4_pd.register_reset_all_register10()
 16 | p4_pd.register_reset_all_register11()
 17 | p4_pd.register_reset_all_register12()
 18 | p4_pd.register_reset_all_register13()
 19 | p4_pd.register_reset_all_register14()
 20 | p4_pd.register_reset_all_register15()
 21 | p4_pd.register_reset_all_register16()
 22 | p4_pd.register_reset_all_register17()
 23 | p4_pd.register_reset_all_register18()
 24 | p4_pd.register_reset_all_register19()
 25 | p4_pd.register_reset_all_register20()
 26 | p4_pd.register_reset_all_register21()
 27 | p4_pd.register_reset_all_register22()
 28 | p4_pd.register_reset_all_register23()
 29 | p4_pd.register_reset_all_register24()
 30 | p4_pd.register_reset_all_register25()
 31 | p4_pd.register_reset_all_register26()
 32 | p4_pd.register_reset_all_register27()
 33 | p4_pd.register_reset_all_register28()
 34 | p4_pd.register_reset_all_register29()
 35 | p4_pd.register_reset_all_register30()
 36 | p4_pd.register_reset_all_register31()
 37 | # p4_pd.register_reset_all_register32()
 38 | 
 39 | 
 40 | # These are background traffic
 41 | # p4_pd.bg_outPort_table_table_add_with_set_egr(
 42 | #     p4_pd.bg_outPort_table_match_spec_t(0), 
 43 | #     p4_pd.set_egr_action_spec_t(4)
 44 | # )
 45 | 
 46 | # p4_pd.bg_outPort_table_table_add_with_set_egr(
 47 | #     p4_pd.bg_outPort_table_match_spec_t(1), 
 48 | #     p4_pd.set_egr_action_spec_t(0)
 49 | # )
 50 | 
 51 | # first Zero for pending
 52 | port_of_worker = [0, 56, 48, 40, 32, 24, 16, 8, 0, 4]
 53 | single_loopback_port = 20
 54 | 
 55 | MAC_address_of_worker = [ "0", 
 56 |                           "b8:59:9f:1d:04:f2"
 57 |                         , "b8:59:9f:0b:30:72"
 58 |                         , "98:03:9b:03:46:50"
 59 |                         , "b8:59:9f:02:0d:14"
 60 |                         , "b8:59:9f:b0:2d:50"
 61 |                         , "b8:59:9f:b0:2b:b0"
 62 |                         , "b8:59:9f:b0:2b:b8"
 63 |                         , "b8:59:9f:b0:2d:18"
 64 |                         , "b8:59:9f:b0:2d:58" ]
 65 | 
 66 | # first Zero for pending
 67 | # PSs = [0, 9, 8]
 68 | PSs = [0, 9]
 69 | 
 70 | len_workers = len(port_of_worker)
 71 | len_PS = len(PSs)
 72 | 
 73 | # Normal Switch traffic
 74 | for i in range(1, len_workers):
 75 |     p4_pd.forward_table_add_with_set_egr(
 76 |         p4_pd.forward_match_spec_t(macAddr_to_string(MAC_address_of_worker[i])),
 77 |         p4_pd.set_egr_action_spec_t(port_of_worker[i])
 78 |     )
 79 | 
 80 | 
 81 | # P4ML Traffic
 82 | 
 83 | # No Pending packet, First time enter switch
 84 | for i in range(1, len_workers - 1):
 85 |     for j in range(1, len_PS):
 86 |         p4_pd.outPort_table_table_add_with_set_egr_and_set_index(
 87 |         p4_pd.outPort_table_match_spec_t(
 88 |             1 << 16,
 89 |             port_of_worker[i],
 90 |             0,
 91 |             j-1), 
 92 |         # app1 -> worker3
 93 |         p4_pd.set_egr_and_set_index_action_spec_t(single_loopback_port))
 94 | 
 95 | # Not Pending packet, Second time enter switch
 96 | for j in range(1, len_PS):
 97 |     print(j, PSs[j])
 98 |     p4_pd.outPort_table_table_add_with_set_egr(
 99 |     p4_pd.outPort_table_match_spec_t(
100 |         1 << 16,
101 |         single_loopback_port,
102 |         1,
103 |         j-1), 
104 |     # app1 -> worker3
105 |     p4_pd.set_egr_action_spec_t(port_of_worker[PSs[j]]))
106 | 
107 | # INGRESSPORT, Index
108 | # Worker1 to Worker8
109 | for i in range(1, len_workers - 1):
110 |     p4_pd.drop_table_table_add_with_drop_pkt(
111 |         p4_pd.drop_table_match_spec_t(
112 |             port_of_worker[i],
113 |             1)
114 |     )
115 | 
116 | ####### Server ########
117 | for j in range(1, len_PS):
118 |     p4_pd.multicast_table_table_add_with_multicast(
119 |         p4_pd.multicast_table_match_spec_t(
120 |             1,
121 |             1 << 16,
122 |             port_of_worker[PSs[j]],
123 |             0),
124 |         # multicast app1 -> worker1, 2
125 |         p4_pd.multicast_action_spec_t(999)
126 |     )
127 | 
128 | 
129 | p4_pd.modify_packet_bitmap_table_table_add_with_modify_packet_bitmap(
130 |     p4_pd.modify_packet_bitmap_table_match_spec_t(1)
131 | )
132 | 
133 | p4_pd.modify_packet_bitmap_table_table_add_with_nop(
134 |     p4_pd.modify_packet_bitmap_table_match_spec_t(0)
135 | )
136 | 
137 | p4_pd.processEntry1_table_add_with_processentry1(
138 |     p4_pd.processEntry1_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
139 | )
140 | p4_pd.processEntry1_table_add_with_noequ0_processentry1(
141 |     p4_pd.processEntry1_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1,
142 | )
143 | p4_pd.processEntry2_table_add_with_processentry2(
144 |     p4_pd.processEntry2_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
145 | )
146 | p4_pd.processEntry2_table_add_with_noequ0_processentry2(
147 |     p4_pd.processEntry2_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
148 | )
149 | p4_pd.processEntry3_table_add_with_processentry3(
150 |     p4_pd.processEntry3_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
151 | )
152 | p4_pd.processEntry3_table_add_with_noequ0_processentry3(
153 |     p4_pd.processEntry3_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
154 | )
155 | p4_pd.processEntry4_table_add_with_processentry4(
156 |     p4_pd.processEntry4_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
157 | )
158 | p4_pd.processEntry4_table_add_with_noequ0_processentry4(
159 |     p4_pd.processEntry4_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
160 | )
161 | p4_pd.processEntry5_table_add_with_processentry5(
162 |     p4_pd.processEntry5_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
163 | )
164 | p4_pd.processEntry5_table_add_with_noequ0_processentry5(
165 |     p4_pd.processEntry5_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
166 | )
167 | p4_pd.processEntry6_table_add_with_processentry6(
168 |     p4_pd.processEntry6_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
169 | )
170 | p4_pd.processEntry6_table_add_with_noequ0_processentry6(
171 |     p4_pd.processEntry6_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
172 | )
173 | p4_pd.processEntry7_table_add_with_processentry7(
174 |     p4_pd.processEntry7_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
175 | )
176 | p4_pd.processEntry7_table_add_with_noequ0_processentry7(
177 |     p4_pd.processEntry7_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
178 | )
179 | p4_pd.processEntry8_table_add_with_processentry8(
180 |     p4_pd.processEntry8_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
181 | )
182 | p4_pd.processEntry8_table_add_with_noequ0_processentry8(
183 |     p4_pd.processEntry8_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
184 | )
185 | p4_pd.processEntry9_table_add_with_processentry9(
186 |     p4_pd.processEntry9_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
187 | )
188 | p4_pd.processEntry9_table_add_with_noequ0_processentry9(
189 |     p4_pd.processEntry9_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
190 | )
191 | p4_pd.processEntry10_table_add_with_processentry10(
192 |     p4_pd.processEntry10_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
193 | )
194 | p4_pd.processEntry10_table_add_with_noequ0_processentry10(
195 |     p4_pd.processEntry10_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
196 | )
197 | p4_pd.processEntry11_table_add_with_processentry11(
198 |     p4_pd.processEntry11_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
199 | )
200 | p4_pd.processEntry11_table_add_with_noequ0_processentry11(
201 |     p4_pd.processEntry11_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
202 | )
203 | p4_pd.processEntry12_table_add_with_processentry12(
204 |     p4_pd.processEntry12_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
205 | )
206 | p4_pd.processEntry12_table_add_with_noequ0_processentry12(
207 |     p4_pd.processEntry12_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
208 | )
209 | p4_pd.processEntry13_table_add_with_processentry13(
210 |     p4_pd.processEntry13_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
211 | )
212 | p4_pd.processEntry13_table_add_with_noequ0_processentry13(
213 |     p4_pd.processEntry13_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
214 | )
215 | p4_pd.processEntry14_table_add_with_processentry14(
216 |     p4_pd.processEntry14_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
217 | )
218 | p4_pd.processEntry14_table_add_with_noequ0_processentry14(
219 |     p4_pd.processEntry14_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
220 | )
221 | p4_pd.processEntry15_table_add_with_processentry15(
222 |     p4_pd.processEntry15_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
223 | )
224 | p4_pd.processEntry15_table_add_with_noequ0_processentry15(
225 |     p4_pd.processEntry15_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
226 | )
227 | p4_pd.processEntry16_table_add_with_processentry16(
228 |     p4_pd.processEntry16_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
229 | )
230 | p4_pd.processEntry16_table_add_with_noequ0_processentry16(
231 |     p4_pd.processEntry16_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
232 | )
233 | p4_pd.processEntry17_table_add_with_processentry17(
234 |     p4_pd.processEntry17_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
235 | )
236 | p4_pd.processEntry17_table_add_with_noequ0_processentry17(
237 |     p4_pd.processEntry17_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
238 | )
239 | p4_pd.processEntry18_table_add_with_processentry18(
240 |     p4_pd.processEntry18_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
241 | )
242 | p4_pd.processEntry18_table_add_with_noequ0_processentry18(
243 |     p4_pd.processEntry18_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
244 | )
245 | p4_pd.processEntry19_table_add_with_processentry19(
246 |     p4_pd.processEntry19_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
247 | )
248 | p4_pd.processEntry19_table_add_with_noequ0_processentry19(
249 |     p4_pd.processEntry19_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
250 | )
251 | p4_pd.processEntry20_table_add_with_processentry20(
252 |     p4_pd.processEntry20_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
253 | )
254 | p4_pd.processEntry20_table_add_with_noequ0_processentry20(
255 |     p4_pd.processEntry20_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
256 | )
257 | p4_pd.processEntry21_table_add_with_processentry21(
258 |     p4_pd.processEntry21_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
259 | )
260 | p4_pd.processEntry21_table_add_with_noequ0_processentry21(
261 |     p4_pd.processEntry21_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
262 | )
263 | p4_pd.processEntry22_table_add_with_processentry22(
264 |     p4_pd.processEntry22_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
265 | )
266 | p4_pd.processEntry22_table_add_with_noequ0_processentry22(
267 |     p4_pd.processEntry22_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
268 | )
269 | p4_pd.processEntry23_table_add_with_processentry23(
270 |     p4_pd.processEntry23_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
271 | )
272 | p4_pd.processEntry23_table_add_with_noequ0_processentry23(
273 |     p4_pd.processEntry23_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
274 | )
275 | p4_pd.processEntry24_table_add_with_processentry24(
276 |     p4_pd.processEntry24_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
277 | )
278 | p4_pd.processEntry24_table_add_with_noequ0_processentry24(
279 |     p4_pd.processEntry24_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
280 | )
281 | p4_pd.processEntry25_table_add_with_processentry25(
282 |     p4_pd.processEntry25_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
283 | )
284 | p4_pd.processEntry25_table_add_with_noequ0_processentry25(
285 |     p4_pd.processEntry25_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
286 | )
287 | p4_pd.processEntry26_table_add_with_processentry26(
288 |     p4_pd.processEntry26_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
289 | )
290 | p4_pd.processEntry26_table_add_with_noequ0_processentry26(
291 |     p4_pd.processEntry26_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
292 | )
293 | p4_pd.processEntry27_table_add_with_processentry27(
294 |     p4_pd.processEntry27_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
295 | )
296 | p4_pd.processEntry27_table_add_with_noequ0_processentry27(
297 |     p4_pd.processEntry27_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
298 | )
299 | p4_pd.processEntry28_table_add_with_processentry28(
300 |     p4_pd.processEntry28_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
301 | )
302 | p4_pd.processEntry28_table_add_with_noequ0_processentry28(
303 |     p4_pd.processEntry28_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
304 | )
305 | p4_pd.processEntry29_table_add_with_processentry29(
306 |     p4_pd.processEntry29_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
307 | )
308 | p4_pd.processEntry29_table_add_with_noequ0_processentry29(
309 |     p4_pd.processEntry29_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
310 | )
311 | p4_pd.processEntry30_table_add_with_processentry30(
312 |     p4_pd.processEntry30_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
313 | )
314 | p4_pd.processEntry30_table_add_with_noequ0_processentry30(
315 |     p4_pd.processEntry30_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
316 | )
317 | p4_pd.processEntry31_table_add_with_processentry31(
318 |     p4_pd.processEntry31_match_spec_t(hex_to_i32(0), hex_to_i32(0xFFFFFFFF)), 1,
319 | )
320 | p4_pd.processEntry31_table_add_with_noequ0_processentry31(
321 |     p4_pd.processEntry31_match_spec_t(hex_to_i32(0), hex_to_i32(0x00000000)), 1
322 | )
323 | try:
324 |     # TODO: understand it
325 |     # dont know why, but if group = input port,
326 |     # then the packet followed by that packet will execute multicast
327 |     # therefore make it 20, no 20th port is used.
328 |     mcg_all  = mc.mgrp_create(999)
329 |     mcg1  = mc.mgrp_create(998)
330 |     mcg2  = mc.mgrp_create(997)
331 |     # mcg3  = mc.mgrp_create(996)
332 | except:
333 |     print """
334 | clean_all() does not yet support cleaning the PRE programming.
335 | You need to restart the driver before running this script for the second time
336 | """
337 |     quit()
338 | 
339 | node_all = mc.node_create(
340 |     rid=999,
341 |     port_map=devports_to_mcbitmap([56,48,40,32,24,16,8,0]),
342 |     # port_map=devports_to_mcbitmap([port_of_worker[2], port_of_worker[3], port_of_worker[4],]),
343 |     lag_map=lags_to_mcbitmap(([]))
344 | )
345 | mc.associate_node(mcg_all, node_all, xid=0, xid_valid=False)
346 | 
347 | node1 = mc.node_create(
348 |     rid=998,
349 |     # Not multicast to "0" ( 0 as bg traffic )
350 |     port_map=devports_to_mcbitmap([56,48,40,32,24,16,8]),
351 |     # port_map=devports_to_mcbitmap([56,48,40]),
352 |     lag_map=lags_to_mcbitmap(([]))
353 | )
354 | mc.associate_node(mcg1, node1, xid=0, xid_valid=False)
355 | 
356 | node2 = mc.node_create(
357 |     rid=997,
358 |     # Not multicast to "0" ( 0 as bg traffic )
359 |     # port_map=devports_to_mcbitmap([56,48,40,32,24,16,8]),
360 |     port_map=devports_to_mcbitmap([24,16,8]),
361 |     lag_map=lags_to_mcbitmap(([]))
362 | )
363 | mc.associate_node(mcg2, node2, xid=0, xid_valid=False)
364 | 
365 | 
366 | conn_mgr.complete_operations()
367 | 
368 | def hex_to_i32(h):
369 |     x = int(h, 0)
370 |     if (x > 0xFFFFFFFF):
371 |         raise UIn_Error("Integer cannot fit within 32 bits")
372 |     if (x > 0x7FFFFFFF): x-= 0x100000000
373 |     return x


--------------------------------------------------------------------------------
/server/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | # All Target
 3 | all:
 4 | 	g++ -std=c++11 -O3 -g -c -o  ParameterServer.o  ParameterServer.cc
 5 | 	g++ -std=c++11 -O3 -g -c -o ../common/dma_common.o ../common/dma_common.cc
 6 | 	g++ -std=c++11 -O3 -g -c -o ../common/HashTable.o ../common/HashTable.cc
 7 | 	g++ -std=c++11 -O3 -g -o app  ParameterServer.o ../common/HashTable.o ../common/dma_common.o -lpthread -libverbs
 8 | 
 9 | 
10 | # Clean Target
11 | clean:
12 | 	rm *.o
13 | 	rm app
14 | 


--------------------------------------------------------------------------------
/server/ParameterServer.cc:
--------------------------------------------------------------------------------
  1 | #include "ParameterServer.h"
  2 | 
  3 | tensor_context *tensors;
  4 | 
  5 | int max_agtr_size_per_thread;
  6 | int UsedSwitchAGTRcount = MAX_AGTR_COUNT;
  7 | std::mutex _dma_mutex;
  8 | struct ibv_device **dev_list;
  9 | struct ibv_device *ib_dev;
 10 | ThreadPool* workQueue;
 11 | std::mutex __print_mutex;
 12 | std::mutex _init_mutex;
 13 | int num_thread;
 14 | int print_count = 0;
 15 | int appID;
 16 | 
 17 | long long int receive_in_sec[20] = {0};
 18 | bool receive_byte_reset_flag[20] = {0};
 19 | 
 20 | bool is_completed_p4ml_key[1024000] = {0};
 21 | 
 22 | int next_agtr[MAX_AGTR_COUNT] = {-1};
 23 | HashTable* hash_table;
 24 | 
 25 | int packet_full_count = 0;
 26 | int packet_partial_count = 0;
 27 | int packet_all_forward_count = 0;
 28 | int packet_partial_total_count = 0;
 29 | 
 30 | #define MAX_MEASUREMENT_KEY 12000
 31 | int full_packet_count[MAX_MEASUREMENT_KEY][16518] = { 0 };
 32 | int resend_packet_count[MAX_MEASUREMENT_KEY][16518] = { 0 };
 33 | 
 34 | 
 35 | DMAcontext** global_dma_contexts;
 36 | 
 37 | void main_receive_packet_loop(DMAcontext* dma_context, int thread_id) {
 38 |     int msgs_completed = 0;
 39 |     int this_pos_to_send = 0;
 40 |     int total_last_tensor_packet = 0;
 41 |     int imm_pos_to_send = dma_context->my_send_queue_length / 2;
 42 |     bool app_init[MAX_APP_PER_THREAD] = {0};
 43 |     
 44 |     /* Loss */
 45 |     int loss = 0;
 46 | 
 47 |     int rand_index = 0;
 48 |     int total_loss = 0;
 49 | 
 50 |     // app start from 1
 51 |     int* tensors_pos_of_app = new int[MAX_APP_PER_THREAD + 1];
 52 |     for (int i = 1; i <= MAX_APP_PER_THREAD; i++) {
 53 |         tensors_pos_of_app[i] = thread_id * MAX_STORAGE_PER_APP_PER_THREAD * MAX_APP_PER_THREAD + (i - 1) * MAX_STORAGE_PER_APP_PER_THREAD;
 54 |     }
 55 | 
 56 | 
 57 |     while (1) {
 58 | 
 59 |         cqe_snapshot_t cur_snapshot;
 60 |         msgs_completed = 0;
 61 |         
 62 |         std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now();
 63 |         while(1) {
 64 | 
 65 |             // if (receive_byte_reset_flag[thread_id]) {
 66 |             //     receive_in_sec[thread_id] = 0;
 67 |             //     receive_byte_reset_flag[thread_id] = false;
 68 |             // }
 69 |             
 70 |             std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now();
 71 |             std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1);
 72 |             
 73 |             msgs_completed = receive_packet(dma_context, &cur_snapshot);
 74 |             if (msgs_completed) {    
 75 |                 break;
 76 |             }
 77 |             if (time_span.count() > 20.0 && msgs_completed == 0 && dma_context->total_received > 0) {
 78 |                 std::lock_guard<std::mutex> lock(_dma_mutex);
 79 |                 fprintf(stderr, "Timeout happened this thread_id=%d, total_received=%d, total_sent=%d, last_ACK=%d, total_last_tensor_packet_recv=%d\n",
 80 |                     thread_id, global_dma_contexts[thread_id]->total_received, global_dma_contexts[thread_id]->total_sent, tensors[tensors_pos_of_app[1]].window_manager[0].last_ACK, total_last_tensor_packet);
 81 |                 for (int i = 0; i < num_thread; i++)
 82 |                     fprintf(stderr, "Timeout happened at thread_id=%d, total_received=%d, total_sent=%d\n", i, global_dma_contexts[i]->total_received, global_dma_contexts[i]->total_sent);
 83 | 
 84 |                 for (uint64_t i = 0; i < MAX_MEASUREMENT_KEY; i++) {
 85 |                     for (uint16_t j = 1; j <= ceil((float)MAX_TENSOR_SIZE/MAX_ENTRIES_PER_PACKET); j++) {
 86 |                         if (full_packet_count[i][j]) {
 87 |                             packet_full_count++;
 88 |                         } else if (resend_packet_count[i][j]) {
 89 |                             packet_partial_count++;
 90 |                             packet_partial_total_count += resend_packet_count[i][j];
 91 |                         } else {
 92 |                             packet_all_forward_count++;
 93 |                             // printf("i:%d, j:%d\n", i, j);
 94 |                         }
 95 |                     }
 96 |                 }
 97 |                 printf("%d, %d, %d, %d\n", packet_full_count, packet_partial_count, packet_all_forward_count, packet_partial_total_count);
 98 | 
 99 |                 int seen_agtrs = 0;
100 |                 for (int i = 0; i < MAX_AGTR_COUNT; i++)
101 |                     if (hash_table->isAlreadyDeclare[i])
102 |                         seen_agtrs++;
103 |                 printf("Seen agtrs: %d\n", seen_agtrs);
104 | 
105 |                 exit(-1);
106 |             }
107 |         }
108 |         
109 |         int to_be_sent = 0;
110 |         if (this_pos_to_send + max_agtr_size_per_thread + max_agtr_size_per_thread > dma_context->my_send_queue_length / 2)
111 |             this_pos_to_send = 0;
112 | 
113 |         // printf("%d packets received.\n", msgs_completed);
114 |         for(int msg=0; msg < msgs_completed; msg++) {
115 |             // std::chrono::high_resolution_clock::time_point packet_start = std::chrono::high_resolution_clock::now();
116 |             uint8_t* buf = &dma_context->mp_recv_ring[dma_context->ring_head * kAppRingMbufSize];
117 | 
118 |             agghdr* p4ml_header = reinterpret_cast<agghdr*>(buf + IP_ETH_UDP_HEADER_SIZE);
119 | 
120 |             //check ecn mark
121 |             // bool is_ecn_mark_packet = p4ml_header->flag & 0x08;
122 |             // if (is_ecn_mark_packet)
123 |             //     printf("ECN mark found.\n");
124 |             if (DEBUG_PRINT_ALL_RECEIVING_PACKET)
125 |                 p4ml_header_print_h(p4ml_header, "Receive");
126 | 
127 |             bool isTerminated_packet = p4ml_header->flag & 0x02;
128 |             bool isResend_packet = p4ml_header->flag & 0x04;
129 |             bool isOverflow_packet = p4ml_header->flag & 0x80;
130 |             
131 |             // exit(1);
132 |             p4ml_header_ntoh(p4ml_header);
133 |             /* Move AppID index */
134 |             int appID = p4ml_header->appID;
135 |             if (!app_init[appID]) {
136 |                 app_init[appID] = true;
137 |             } else {
138 |                 if (p4ml_header->key != tensors[tensors_pos_of_app[appID]].key && tensors[tensors_pos_of_app[appID]].isCompleted) {
139 |                     // p4ml_header_print(p4ml_header, "ERROR PACKET");
140 |                     // printf("tensors_pos_of_app[appID] from %d to %d\n", tensors_pos_of_app[appID], tensors_pos_of_app[appID]+1);
141 |                     tensors_pos_of_app[appID]++;
142 |                     if (tensors_pos_of_app[appID] == thread_id * MAX_APP_PER_THREAD * MAX_STORAGE_PER_APP_PER_THREAD + MAX_STORAGE_PER_APP_PER_THREAD * (appID))
143 |                         tensors_pos_of_app[appID] = tensors_pos_of_app[appID] - MAX_STORAGE_PER_APP_PER_THREAD;
144 |                 }
145 |             }
146 | 
147 |             if (!hash_table->isAlreadyDeclare[p4ml_header->agtr]) 
148 |                 hash_table->isAlreadyDeclare[p4ml_header->agtr] = true;
149 | 
150 |             /* Check if Collision packet */
151 |             bool is_collision_packet = p4ml_header->flag & 0x02;
152 | 
153 |             if (is_collision_packet) {
154 |                 tensors[tensors_pos_of_app[appID]].isCollision[p4ml_header->seq_num] = true;
155 |                 // p4ml_header_print(p4ml_header, "COLLISION PACKET");
156 |                 // exit(1);
157 |             }
158 | 
159 |             int my_tensors_pos = tensors_pos_of_app[appID];
160 | 
161 |             check_tensor_available(&tensors[my_tensors_pos], p4ml_header, thread_id);
162 | 
163 |             // char * eth_ip_header = (char*) dma_context->send_region + wc_recv_id * ENTRY_SIZE;
164 |             // uint8_t swap[6];
165 |             // for (int i = 0; i < 6; i++) {
166 |             //     swap[i] = eth_ip_header[i];
167 |             //     eth_ip_header[i] = eth_ip_header[i+6];
168 |             //     eth_ip_header[i+6] = swap[i];
169 |             // }
170 | 
171 |             if (OVERFLOW_HANDLE) {
172 |                 // Check Switch Overflow but not Host Overflow
173 |                 if (!isOverflow_packet)
174 |                     for (int i = 0; i < MAX_ENTRIES_PER_PACKET; i++)
175 |                         if (p4ml_header->vector[i] == INT32_MAX || p4ml_header->vector[i] == INT32_MIN)
176 |                         {
177 |                             if (p4ml_header->vector[i] == INT32_MIN)
178 |                                 p4ml_header_print(p4ml_header, "Switch Overflow");
179 |                             isOverflow_packet = true;
180 |                         }
181 |                         
182 |             // p4ml_header_print(p4ml_header, "Receive");
183 |                 if (isOverflow_packet) {
184 |                     /* Clean Integer Data */
185 |                     if (!tensors[my_tensors_pos].isFloat[p4ml_header->seq_num]) {
186 |                         // printf("ReadyForFloat\n");
187 |                         makeTensorReadyforFloat(p4ml_header, &tensors[my_tensors_pos]);
188 |                         tensors[my_tensors_pos].isFloat[p4ml_header->seq_num] = true;
189 |                     }
190 |                 }
191 | 
192 |                 /* Floating point request packet */
193 |                 bool sendFloatRequest = false;
194 |                 if (isOverflow_packet && !isResend_packet)
195 |                     sendFloatRequest = true;
196 |                 if (!isOverflow_packet && isResend_packet && tensors[my_tensors_pos].isFloat[p4ml_header->seq_num])
197 |                     sendFloatRequest = true;
198 | 
199 |                 if (sendFloatRequest) {
200 |                     /* Do floating point request */
201 |                     /* Send back request to everyone immediately */
202 |                     p4ml_header_hton_without_data(p4ml_header);
203 |                     memcpy((char*) dma_context->send_region + (imm_pos_to_send * P4ML_LAYER_SIZE), (char*) buf + IP_ETH_UDP_HEADER_SIZE, P4ML_LAYER_SIZE);
204 |                     /* then send ACK */
205 |                     p4ml_header_setACK((agghdr*)((char*)dma_context->send_region + (imm_pos_to_send * P4ML_LAYER_SIZE)));
206 |                     p4ml_header_setOverflowRequest((agghdr*)((char*)dma_context->send_region + (imm_pos_to_send * P4ML_LAYER_SIZE)));
207 |                     p4ml_header_resetIndex((agghdr*)((char*)dma_context->send_region + (imm_pos_to_send * P4ML_LAYER_SIZE)));
208 | 
209 |                     // p4ml_header_print_h((agghdr*)((char*)dma_context->send_region + (imm_pos_to_send * P4ML_LAYER_SIZE)), "Overflow Sendback PACKET");
210 |                     send_packet(dma_context, P4ML_LAYER_SIZE, imm_pos_to_send);
211 |                     imm_pos_to_send++;
212 |                     if (imm_pos_to_send == dma_context->my_send_queue_length - 1)
213 |                         imm_pos_to_send = dma_context->my_send_queue_length / 2 + 1;
214 | 
215 |                     /* Push Back */
216 |                     dma_postback(dma_context);
217 |                     continue;
218 |                 }
219 |             }
220 | 
221 |             /* Check Full Packet */
222 |             bool isFullPacket = (1 << p4ml_header->num_worker) - 1 == p4ml_header->bitmap? 1:0;
223 | 
224 |             
225 |             if (receive_byte_reset_flag[thread_id]) {
226 |                 receive_in_sec[thread_id] = 0;
227 |                 receive_byte_reset_flag[thread_id] = false;
228 |             }
229 | 
230 |             /* if full packet, update directly. */
231 |             if (isFullPacket) {
232 |                 // printf("%d: full packet - seq %d update model.\n", p4ml_header->key, p4ml_header->seq_num);
233 |                 updateModel_force(p4ml_header, &tensors[my_tensors_pos]);
234 |                 for (int i = 0; i < p4ml_header->num_worker; i++) 
235 |                     tensors[my_tensors_pos].window_manager[i].UpdateWindow(&p4ml_header->seq_num);
236 | 
237 |                 if (p4ml_header->key < MAX_MEASUREMENT_KEY) {
238 |                     if (isResend_packet) {
239 |                         resend_packet_count[p4ml_header->key][p4ml_header->seq_num]++;
240 |                     } else {
241 |                         full_packet_count[p4ml_header->key][p4ml_header->seq_num]++;
242 |                     }
243 |                 }
244 |             } else {
245 | 
246 | 
247 |                 bool type_consistent = false;
248 |                 if (tensors[my_tensors_pos].isFloat[p4ml_header->seq_num] && isOverflow_packet)
249 |                     type_consistent = true;
250 |                 if (!tensors[my_tensors_pos].isFloat[p4ml_header->seq_num] && !isOverflow_packet)
251 |                     type_consistent = true;
252 |                 
253 |                 if (type_consistent)  {
254 | 
255 |                     if (p4ml_header->key < MAX_MEASUREMENT_KEY) {
256 |                         if (isResend_packet)
257 |                             resend_packet_count[p4ml_header->key][p4ml_header->seq_num]++;
258 |                     }
259 |                     // printf("seq %d Partial packet receive.\n", p4ml_header->seq_num);
260 |                     // p4ml_header_print(p4ml_header, "Partial PACKET");
261 |                     int valid_bit = 1;
262 |                     bool need_to_update = true;
263 |                     // check if update is needed
264 |                     for (int i = 0; i < p4ml_header->num_worker; i++) {
265 |                         if (valid_bit & p4ml_header->bitmap) {
266 |                             if (tensors[my_tensors_pos].window_manager[i].isACKed[p4ml_header->seq_num]) {
267 |                                 // p4ml_header_print(p4ml_header, "ERROR PACKET");
268 |                                 // printf("[thread %d][worker %d]'s gredient is already integrated in PS, %d.\n", thread_id, i, p4ml_header->seq_num);
269 |                                 need_to_update = false;
270 |                                 break;
271 |                             }
272 |                         }   
273 |                         valid_bit <<= 1;    
274 |                     }
275 | 
276 |                     if (need_to_update) {
277 |                         // printf("need to update\n");
278 |                         int valid_bit = 1;
279 |                         for (int i = 0; i < p4ml_header->num_worker; i++) {
280 |                             if (valid_bit & p4ml_header->bitmap) { 
281 |                                 // TODO: Update Window will cause BUG, to be fix (floating point need reset ACK)
282 |                                 tensors[my_tensors_pos].window_manager[i].UpdateWindow(&p4ml_header->seq_num);
283 |                             }
284 |                             valid_bit <<= 1;
285 |                         }
286 |                         updateModel(p4ml_header, &tensors[my_tensors_pos], isOverflow_packet);
287 |                     }
288 |                 
289 |                 }
290 |             }
291 |             // if any of the worker doesn't complete slot
292 |             bool is_slot_completed = true;
293 |             for (int i = 0; i < p4ml_header->num_worker; i++) 
294 |                 if (!tensors[my_tensors_pos].window_manager[i].isACKed[p4ml_header->seq_num]) 
295 |                     is_slot_completed = false;
296 |             // printf("packet receive %d\n", p4ml_header->seq_num);
297 |             if (is_slot_completed) {
298 |                 p4ml_header->bitmap = 1;
299 |                 
300 |                 uint16_t new_agtr;
301 | 
302 |                 // if collsiion is happened.
303 |                 if (tensors[my_tensors_pos].isCollision[p4ml_header->seq_num] == true) {
304 |                     // Check if new agtr is already hashed
305 |                     if (next_agtr[p4ml_header->agtr] == -1) {
306 |                         int new_hash_agtr = hash_table->HashNew_predefine();
307 |                         // if get any of AGTR from hash
308 |                         if (new_hash_agtr != -1) {
309 |                             new_agtr = new_hash_agtr;
310 |                             next_agtr[p4ml_header->agtr] = new_agtr;
311 |                             hash_table->hash_map[p4ml_header->agtr] = new_agtr;
312 |                             // printf("old: %d -> new: %d\n", p4ml_header->agtr, new_agtr);
313 |                         } else {
314 |                             // if all of the AGTR is used, full
315 |                             // keep original AGTR
316 |                             // printf("Change Agtr fail, full.\n");
317 |                             new_agtr = p4ml_header->agtr;
318 |                         }
319 |                     } else {
320 |                         //TODO: Separate APP
321 |                         new_agtr = next_agtr[p4ml_header->agtr];
322 |                         // printf("New hash - already: %d\n", new_agtr);
323 |                         // printf("[hashed] old: %d -> new: %d\n", p4ml_header->agtr, new_agtr);
324 |                     }
325 | 
326 |                     p4ml_header_setLengthFieldToAgtr(p4ml_header, new_agtr);
327 |                     p4ml_header_setCollisionBit(p4ml_header);
328 |                 } else {
329 |                     p4ml_header_resetCollisionBit(p4ml_header);
330 |                 }
331 | 
332 |                 int offset = (p4ml_header->seq_num - 1) * MAX_ENTRIES_PER_PACKET;
333 |                 
334 |                 p4ml_header_hton_without_data(p4ml_header);
335 | 
336 |                     if (!isOverflow_packet)
337 |                         for (int i = 0; i < MAX_ENTRIES_PER_PACKET; i++)
338 |                             tensors[my_tensors_pos].data.data_int[offset + i] = htonl(tensors[my_tensors_pos].data.data_int[offset + i]);
339 | 
340 |                 // /* Give higher priority to Resend packet */
341 |                 if (isResend_packet) {
342 |                     // TODO: PACKET LOSS HANDLING FOR DOUBLE PACKET 
343 |                     // printf("Immediately send back Resend packet %d\n", ntohl(p4ml_header->seq_num));
344 |                     memcpy((char*) dma_context->send_region + (imm_pos_to_send * P4ML_LAYER_SIZE), (char*) buf + IP_ETH_UDP_HEADER_SIZE, P4ML_HEADER_SIZE - 12);
345 |                     memcpy((char*) dma_context->send_region + (imm_pos_to_send * P4ML_LAYER_SIZE) + P4ML_HEADER_SIZE - 12, tensors[my_tensors_pos].data.data_int + offset, P4ML_DATA_SIZE);
346 |                     memcpy((char*) dma_context->send_region + (imm_pos_to_send * P4ML_LAYER_SIZE) + 14 + P4ML_DATA_SIZE, (char*) buf + IP_ETH_UDP_HEADER_SIZE + P4ML_DATA_SIZE + 14, 12);
347 |                     /* then send ACK */
348 |                     p4ml_header_setACK((agghdr*)((char*)dma_context->send_region + (imm_pos_to_send * P4ML_LAYER_SIZE)));
349 |                     p4ml_header_resetIndex((agghdr*)((char*)dma_context->send_region + (imm_pos_to_send * P4ML_LAYER_SIZE)));
350 | 
351 |                     send_packet(dma_context, P4ML_LAYER_SIZE, imm_pos_to_send);
352 |                     imm_pos_to_send++;
353 |                     if (imm_pos_to_send == dma_context->my_send_queue_length - 1)
354 |                         imm_pos_to_send = dma_context->my_send_queue_length / 2 + 1;
355 | 
356 |                 } else {
357 |                     memcpy((char*) dma_context->send_region + (this_pos_to_send + to_be_sent) * P4ML_LAYER_SIZE, (char*) buf + IP_ETH_UDP_HEADER_SIZE, P4ML_HEADER_SIZE - 12);
358 |                     memcpy((char*) dma_context->send_region + (this_pos_to_send + to_be_sent) * P4ML_LAYER_SIZE + P4ML_HEADER_SIZE - 12, tensors[my_tensors_pos].data.data_int + offset, P4ML_DATA_SIZE);
359 |                     memcpy((char*) dma_context->send_region + (this_pos_to_send + to_be_sent) * P4ML_LAYER_SIZE + 14 + P4ML_DATA_SIZE, (char*) buf + IP_ETH_UDP_HEADER_SIZE + P4ML_DATA_SIZE + 14, 12);
360 |                     /* then send ACK */
361 |                     p4ml_header_setACK((agghdr*)((char*)dma_context->send_region + (this_pos_to_send + to_be_sent) * P4ML_LAYER_SIZE));
362 |                     p4ml_header_resetIndex((agghdr*)((char*)dma_context->send_region + (this_pos_to_send + to_be_sent) * P4ML_LAYER_SIZE));
363 | 
364 |                     to_be_sent++;
365 |                 }
366 |                 // printf("to_be_sent: %d\n", to_be_sent);
367 | 
368 |                 if (tensors[tensors_pos_of_app[appID]].num_worker > 0) {
369 |                     bool this_tensor_finished = true;
370 |                     for (int i = 0; i < tensors[tensors_pos_of_app[appID]].num_worker; i++)
371 |                         if (tensors[tensors_pos_of_app[appID]].window_manager[i].last_ACK < tensors[tensors_pos_of_app[appID]].window_manager[i].total_ACK)
372 |                             this_tensor_finished = false;
373 | 
374 |                     if (this_tensor_finished && !tensors[tensors_pos_of_app[appID]].isCompleted) {
375 |                         // printf("[Thread %d] Tensor %d at %d Completed.\n", thread_id, tensors[tensors_pos_of_app[appID]].key, tensors_pos_of_app[appID]);
376 |                         tensors[tensors_pos_of_app[appID]].isCompleted = true;
377 |                         rand_index = 0;
378 |                         // dma_context->total_received = 0;
379 |                         // dma_context->total_sent = 0;
380 |                     }
381 |                 }
382 |             }
383 | 
384 |             /* Push Back */
385 |             dma_postback(dma_context);
386 |         }
387 |         
388 |         dma_update_snapshot(dma_context, cur_snapshot);
389 | 
390 |         if (msgs_completed < 0) {
391 |             printf("Polling error\n");
392 |             exit(1);
393 |         }
394 | 
395 |         if (msgs_completed > 0) {
396 |             dma_context->total_received += msgs_completed;
397 |             if (receive_byte_reset_flag[thread_id]) {
398 |                 receive_in_sec[thread_id] = msgs_completed;
399 |                 receive_byte_reset_flag[thread_id] = false;
400 |             }
401 |             else
402 |                 receive_in_sec[thread_id] += msgs_completed;
403 |             if (to_be_sent > 0) {
404 |                 send_packet(dma_context, P4ML_LAYER_SIZE * to_be_sent, this_pos_to_send);
405 |             }
406 |             this_pos_to_send += to_be_sent;
407 |             // Let assume the last packet will not loss        
408 |         }
409 |         
410 |     }
411 | }
412 | 
413 | 
414 | void Start(int thread_id) {
415 |     bindingCPU(thread_id + 16);
416 |     DMAcontext* dma_context;
417 |     {
418 |         std::lock_guard<std::mutex> lock(_dma_mutex);
419 | 
420 |         dma_context = DMA_create(ib_dev, thread_id + ((appID - 1) * MAX_THREAD_PER_APP), true);
421 |         // dma_context->isSent = new bool[MAX_TENSOR_SIZE / MAX_ENTRIES_PER_PACKET + 1];
422 |         // dma_context->send_time = new std::chrono::high_resolution_clock::time_point[MAX_TENSOR_SIZE / MAX_ENTRIES_PER_PACKET + 1];
423 |         // dma_context->receive_time = new std::chrono::high_resolution_clock::time_point[MAX_TENSOR_SIZE / MAX_ENTRIES_PER_PACKET + 1];
424 |         global_dma_contexts[thread_id] = dma_context;
425 |     }
426 | 
427 |     main_receive_packet_loop(dma_context, thread_id); 
428 |     
429 |     sleep(1000);
430 | }
431 | 
432 | int main(int argc, char *argv[]) {
433 |     bindingCPU(15);
434 |     srand(time(NULL));
435 |     // num_thread = atoi(argv[1]);
436 | 
437 |     appID = atoi(argv[1]);
438 |     // Lam: this one is for experiment, disable temporary
439 |     // if (argv[1])
440 |     //     UsedSwitchAGTRcount = atoi(argv[1]);
441 |     // else
442 |     //     UsedSwitchAGTRcount = MAX_AGTR_COUNT;
443 |     num_thread = 12;
444 | 
445 |     dev_list = ibv_get_device_list(NULL);
446 |     if (!dev_list) {
447 |         perror("Failed to get devices list");
448 |         exit(1);
449 |     }
450 | 
451 |     ib_dev = dev_list[1];
452 |     if (!ib_dev) {
453 |         fprintf(stderr, "IB device not found\n");
454 |         exit(1);
455 |     }
456 | 
457 |     /* Init Thread */
458 |     workQueue = new ThreadPool(num_thread, [](){});
459 |     max_agtr_size_per_thread = 250;
460 |     global_dma_contexts = new DMAcontext*[num_thread];
461 |     printf("\nUsedSwitchAGTRcount: %d\n\n", UsedSwitchAGTRcount);
462 |     printf("max_agtr_size_per_thread: %d\n\n", max_agtr_size_per_thread);
463 | 
464 |     printf("Overflow Handled: %s\n\n", OVERFLOW_HANDLE? "TRUE":"FALSE");
465 |     /* Init tensors capacity */
466 |     tensors = new tensor_context[MAX_APP_PER_THREAD * MAX_STORAGE_PER_APP_PER_THREAD * num_thread];
467 |     printf("\nTensors memory pre-allocate...\n");
468 |     for (int i = 0; i < MAX_APP_PER_THREAD * MAX_STORAGE_PER_APP_PER_THREAD * num_thread; i++)
469 |         init_tensor(&tensors[i], MAX_TENSOR_SIZE);
470 | 
471 |     hash_table = new HashTable(UsedSwitchAGTRcount);
472 |     printf("\nHash table creating...\n\n");
473 |     memset(next_agtr, -1, sizeof(int) * MAX_AGTR_COUNT);
474 |     
475 |     for (int i = 0; i < num_thread; i++)
476 |         workQueue->enqueue(Start, i);
477 | 
478 |     std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now();
479 |     std::chrono::time_point<std::chrono::system_clock> timer = std::chrono::high_resolution_clock::now();
480 |     while (1) {
481 |         std::chrono::time_point<std::chrono::system_clock> current_time = std::chrono::high_resolution_clock::now();
482 |         std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double>>(current_time - timer);
483 |         std::chrono::duration<double> total_time = std::chrono::duration_cast<std::chrono::duration<double>>(current_time - t1);
484 |         if (time_span.count() >= 1) {
485 |             // printf("############################################\n");
486 |             double total_bandwidth = 0.0;
487 |             for (int i = 0; i < num_thread; i++) {
488 |                 // printf("[thread %d] %lf Gbps.\n", i, receive_in_sec[i] * 194.0 / 1024.0 / 1024.0 / 1024.0 * 8.0);
489 |                 total_bandwidth += receive_in_sec[i] * 194.0 / 1024.0 / 1024.0 / 1024.0 * 8.0;
490 |                 receive_byte_reset_flag[i] = true;
491 |                 // receive_in_sec[i] = 0;
492 |             }
493 | 
494 |             
495 |             // total_sent = 0;
496 |             timer = current_time;
497 |         }
498 |     }
499 | 
500 |     sleep(10000000);
501 | 
502 | }
503 | 


--------------------------------------------------------------------------------
/server/ParameterServer.h:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <ctime>
  3 | #include <cmath>
  4 | #include <random>
  5 | #include <arpa/inet.h>
  6 | #include <chrono>
  7 | #include <map>
  8 | #include <unistd.h>
  9 | #include <inttypes.h>
 10 | #include <assert.h>
 11 | #include <cmath>
 12 | #include <algorithm>
 13 | #include <netinet/ip.h>
 14 | #include <set>
 15 | #include "../common/packet.h"
 16 | #include "../common/dma_common.h"
 17 | #include "../common/ThreadPool.h"
 18 | #include "../common/utils.h"
 19 | #include "../common/window_manager.h"
 20 | #include "../common/HashTable.h"
 21 | 
 22 | #define MAX_TENSOR_SIZE 1024000
 23 | // Lam: this one is useless since a PS can only handle 1app, to be mod.
 24 | #define MAX_APP_PER_THREAD 5
 25 | #define MAX_STORAGE_PER_APP_PER_THREAD 10
 26 | #define MAX_WORKER 16
 27 | 
 28 | #define MAX_THREAD_PER_APP 20
 29 | 
 30 | #define OVERFLOW_HANDLE false
 31 | 
 32 | 
 33 | union data_t {
 34 |     int32_t *data_int;
 35 |     float *data_float;
 36 | };
 37 | 
 38 | struct tensor_context {
 39 |     bool* isOccupy;
 40 |     bool* isCollision;
 41 |     bool* isFloat;
 42 |     bool isCompleted;
 43 |     data_t data;
 44 |     uint32_t len;
 45 |     uint64_t key;
 46 |     uint8_t num_worker;
 47 |     WindowManager* window_manager;
 48 |     std::chrono::time_point<std::chrono::system_clock> start_time;
 49 | };
 50 | 
 51 | void inline init_tensor(tensor_context* tensor, uint32_t len) {
 52 |     tensor->data.data_int = new int32_t[len]();
 53 |     tensor->isCompleted = true;
 54 |     tensor->isOccupy = new bool[MAX_TENSOR_SIZE / MAX_ENTRIES_PER_PACKET + 1]();
 55 |     tensor->isCollision = new bool[MAX_TENSOR_SIZE / MAX_ENTRIES_PER_PACKET + 1]();
 56 |     tensor->isFloat = new bool[MAX_TENSOR_SIZE / MAX_ENTRIES_PER_PACKET + 1]();
 57 |     tensor->len = 0;
 58 |     tensor->num_worker = 0;
 59 |     tensor->key = 0xffffffffffffffff;
 60 |     tensor->window_manager = new WindowManager[MAX_WORKER];
 61 |     for (int i = 0; i < MAX_WORKER; i++) {
 62 |         tensor->window_manager[i].isACKed = new bool[MAX_TENSOR_SIZE / MAX_ENTRIES_PER_PACKET + 1]();
 63 |         tensor->window_manager[i].total_ACK = MAX_TENSOR_SIZE / MAX_ENTRIES_PER_PACKET + 1;
 64 |     }
 65 | }
 66 | 
 67 | int inline check_tensor_available(tensor_context* tensor, agghdr* p4ml_header, int thread_id) {
 68 |     // printf("*skey: %d, seq: %d\n", *skey, p4ml_header->seq_num);
 69 | 
 70 |     // Already have completed model and not retrieve
 71 |     if (tensor->isCompleted && p4ml_header->key != tensor->key) {
 72 |         int total_ACK = ceil((float)p4ml_header->len_tensor / MAX_ENTRIES_PER_PACKET);
 73 |         for (int i = 0; i < p4ml_header->num_worker; i++) 
 74 |             tensor->window_manager[i].Reset(total_ACK);
 75 |         // if (thread_id == 0)
 76 |         // printf("Reset tensors[%d] LAST_ACK: %d\n", *skey, tensor->window_manager[0].last_ACK);
 77 |         memset(tensor->data.data_int, 0, sizeof(int32_t) * p4ml_header->len_tensor);
 78 |         memset(tensor->isOccupy, 0, sizeof(bool) * (total_ACK + 1));
 79 |         memset(tensor->isCollision, 0, sizeof(bool) * (total_ACK + 1));
 80 |         memset(tensor->isFloat, 0, sizeof(bool) * (total_ACK + 1));
 81 |         tensor->num_worker = p4ml_header->num_worker; 
 82 |         tensor->len = p4ml_header->len_tensor;
 83 |         tensor->isCompleted = false;
 84 |         tensor->key = p4ml_header->key;
 85 |         // printf("Place %d available, real key = %d\n", *skey, tensors[*skey].key);
 86 |         return 1;
 87 |     } 
 88 |     return 0;
 89 | }
 90 | 
 91 | void inline makeTensorReadyforFloat(agghdr *p4ml_header, tensor_context *tensor_cnt) {
 92 |     int32_t* data = tensor_cnt->data.data_int;
 93 |     uint16_t *p_seq = &p4ml_header->seq_num;
 94 |     int32_t *p_model = p4ml_header->vector;
 95 |     uint32_t offset = (*p_seq - 1) * MAX_ENTRIES_PER_PACKET;
 96 |     
 97 |     /* Reset Data */
 98 |     memset(data + offset, 0, sizeof(int32_t) * MAX_ENTRIES_PER_PACKET);
 99 |     tensor_cnt->isOccupy[*p_seq] = false;
100 |     
101 |     /* Reset Bitmap */
102 |     for (int i = 0; i < p4ml_header->num_worker; i++) {
103 |         tensor_cnt->window_manager[i].isACKed[p4ml_header->seq_num] = 0;
104 |     }
105 | }
106 | 
107 | void inline updateModel(agghdr *p4ml_header, tensor_context *dst_place, bool isFloat) {
108 |     int32_t* data = dst_place->data.data_int;
109 |     uint16_t *p_seq = &p4ml_header->seq_num;
110 |     uint32_t *tensor_len = &p4ml_header->len_tensor;
111 |     int32_t *p_model = p4ml_header->vector;
112 |     uint32_t offset = (*p_seq - 1) * MAX_ENTRIES_PER_PACKET;
113 |     // printf("dst_place->isOccupy[%d]: %d\n", *p_seq - 1, dst_place->isOccupy[*p_seq - 1]);
114 |     if (!dst_place->isOccupy[*p_seq]) {
115 |         // printf("replace\n");
116 |         if (offset < *tensor_len) {
117 |             if (offset + MAX_ENTRIES_PER_PACKET > *tensor_len)
118 |                 memcpy(data + offset, p_model, sizeof(int32_t) * (*tensor_len % MAX_ENTRIES_PER_PACKET));
119 |             else
120 |                 memcpy(data + offset, p_model, sizeof(int32_t) * MAX_ENTRIES_PER_PACKET);
121 |         } else {
122 |             printf("Update with offset %d > tensor length %d, something wrong.\n", offset, *tensor_len);
123 |         }
124 |         dst_place->isOccupy[*p_seq] = true;
125 |     } else {
126 |         // printf("addition\n");
127 |         if (isFloat) {
128 |             float* data = dst_place->data.data_float;
129 |             float* p_model = (float*) p4ml_header->vector;
130 | 
131 |             if (offset < *tensor_len) {
132 |                 for (int i = 0; i < MAX_ENTRIES_PER_PACKET; i++)
133 |                     data[offset + i] += p_model[i];
134 |             } else {
135 |                 printf("Update with offset %d > tensor length %d, something wrong.\n", offset, *tensor_len);
136 |             }
137 |         } else {
138 |             if (offset < *tensor_len) {
139 |                 for (int i = 0; i < MAX_ENTRIES_PER_PACKET; i++)
140 |                     data[offset + i] +=  p_model[i];
141 |             } else {
142 |                 printf("Update with offset %d > tensor length %d, something wrong.\n", offset, *tensor_len);
143 |             }
144 |         }
145 |     }
146 | }
147 | 
148 | void inline updateModel_force(agghdr *p4ml_header, tensor_context *dst_place) {
149 |     int32_t* data = dst_place->data.data_int;
150 |     uint16_t *p_seq = &p4ml_header->seq_num;
151 |     uint32_t *tensor_len = &p4ml_header->len_tensor;
152 |     int32_t *p_model = p4ml_header->vector;
153 |     uint32_t offset = (*p_seq - 1) * MAX_ENTRIES_PER_PACKET;
154 |     
155 |     if (offset < *tensor_len) {
156 |         if (offset + MAX_ENTRIES_PER_PACKET > *tensor_len)
157 |             memcpy(data + offset, p_model, sizeof(int32_t) * (*tensor_len % MAX_ENTRIES_PER_PACKET));
158 |         else
159 |             memcpy(data + offset, p_model, sizeof(int32_t) * MAX_ENTRIES_PER_PACKET);
160 |     } else {
161 |         printf("Update with offset %d > tensor length %d, something wrong.\n", offset, *tensor_len);
162 |     }
163 |     dst_place->isOccupy[*p_seq] = true;
164 | }


--------------------------------------------------------------------------------