├── .gitignore
├── README.md
├── evaluation
    ├── README.md
    └── figures
    │   ├── example_topology.jpg
    │   ├── plot.png
    │   ├── plot_100000_05_netcache.png
    │   ├── plot_100000_05_nocache.png
    │   └── plot_netcache.png
├── report
    ├── Makefile
    ├── README.md
    ├── figures
    │   ├── cache_coherency_update.png
    │   ├── eth-nsg-header.pdf
    │   └── query_statistics.jpg
    ├── refs.bib
    ├── report.pdf
    └── report.tex
└── src
    ├── README.md
    ├── control_plane
        ├── controller.py
        └── crc.py
    ├── kv_store
        ├── client_api.py
        ├── exec_queries.py
        ├── gen_plots.py
        ├── gen_zipf_samples.py
        ├── metrics.py
        ├── produce_keyvals.sh
        ├── results
        │   ├── zipf_sample_100000_05_8_netcache.txt
        │   ├── zipf_sample_100000_05_8_nocache.txt
        │   ├── zipf_sample_100000_095_8_netcache.txt
        │   └── zipf_sample_100000_09_8_netcache.txt
        ├── server.py
        └── test.py
    └── p4
        ├── core
            ├── egress.p4
            ├── ingress.p4
            ├── netcache.p4
            └── query_statistics.p4
        ├── include
            ├── headers.p4
            └── parsers.p4
        ├── init_servers.sh
        ├── p4app.json
        ├── p4app_4_1.json
        ├── p4app_8_1.json
        └── p4app_gen.sh


/.gitignore:
--------------------------------------------------------------------------------
 1 | # don't track logs
 2 | src/p4/log/
 3 | *.log
 4 | 
 5 | # ignore pycache
 6 | src/kv_store/__pycache__/
 7 | src/kv_store/data
 8 | 
 9 | # ignore compiled files
10 | *.p4i
11 | *.pyc
12 | 
13 | # ignore db generated files
14 | *.db
15 | 
16 | # ignore vim swap files
17 | *.swp
18 | *.swo
19 | 
20 | # ignore json files except p4app.json
21 | *.json
22 | !src/p4/p4app*.json
23 | 
24 | # ignore latex related files
25 | report/*.aux
26 | report/*.bbl
27 | report/*.blg
28 | report/*.fdb_latexmk
29 | report/*.fls
30 | report/*.out
31 | report/*.synctex.gz
32 | report/*.toc
33 | 
34 | # ignore results files
35 | src/kv_store/results/
36 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # NetCache: Balancing Key-Value Stores with Fast In-Network Caching
  2 | 
  3 | This is an open source implementation of the [NetCache paper](https://www.cs.jhu.edu/~xinjin/files/SOSP17_NetCache.pdf).
  4 | Unlike the paper that targets the Tofino chip, we implement NetCache while targeting the
  5 | BMv2 simple\_switch architecture. Since the main principles of the implementation stay the same,
  6 | this repository can also be used as a reference for any target architecture.
  7 | 
  8 | 
  9 | ## Introduction
 10 | Recent advancements on the field of programmable switches along with the introduction of
 11 | the PISA architecture has enabled us to rethink and redesign various systems by allowing
 12 | us to program the switch packet-processing pipeline while being able to operate at line-rate.
 13 | Many key value stores have shifted away from flash and disk based storage to keeping their
 14 | data in-memory which requires a layer that could handle queries significanlty faster if we
 15 | want to provide caching functionality. Netcache exploits the capabilities of modern switches
 16 | to provide such a layer and offers in-network caching for in-memory key value stores.
 17 | 
 18 | Netcache is a novel approach of implementing a blazingly fast cache residing inside the switch.
 19 | Due to the current limitations in terms of memory on the programmable switches, this cache is
 20 | not supposed to be an ordinary fully-fledged cache, rather it is supposed to achieve a medium
 21 | cache hit ratio (< 50 %). This ratio proves to be sufficient to serve as a load balancer
 22 | because highly skewed workloads with disproportionately more accesses to a few hot keys
 23 | comprise most of the real-world access patterns in typical key-value stores.
 24 | 
 25 | Our Netcache implementation contains the following components:
 26 | *  Data plane design of the P4 switch
 27 | *  Controller using Thrift API to dynamically modify P4 switch behavior (control plane)
 28 | 
 29 | Apart from the Netcache implementation, we also implemented the following:
 30 | *  Simple distributed in-memory key value store (without data replication)
 31 | *  Python client API for our in-memory key value store
 32 | 
 33 | ## System Architecture
 34 | The architecture of Netcache is divided in the data-plane logic which is implemented
 35 | in P4 and runs directly inside the switch (by compiling and loading the executable),
 36 | and the control-plane logic which is implemented in Python and utilizes the Thrift
 37 | API to dynamically modify various components of the switch (e.g match-action tables
 38 | and registers).
 39 | 
 40 | The data plane design is optimized to utilize as less memory as possible since it
 41 | is restricted by the memory limitations of the P4 switch. On the other hand, the
 42 | controller runs in a typical machine (e.g server) and its communication with the
 43 | switch does not require massive optimization since this communication is not
 44 | happening on the critical data path.
 45 | 
 46 | 
 47 | ### Data-Plane Design
 48 | The data plane architecture of Netcache comprises of the following important modules:
 49 | *  Value Processing Module
 50 | *  Query Statistics Module
 51 | *  L2/L3 Forwarding Module
 52 | 
 53 | 
 54 | #### Value Processing module
 55 | This module is responsible for producing the actual values that will be returned to the
 56 | client who is performing the query. One major constraint of the P4 switches is that they
 57 | provide limited memory at each stage which does not enable us to keep the full cache as
 58 | a simple register array inside the switch. To circumvent this constraint, multiple
 59 | register arrays are used (along with multiple stages) and their values are combined
 60 | to produce the actual value.
 61 | 
 62 | To efficiently combine the values of each register array, the controller assigns to each key
 63 | inserted in the cache an index along with a bitmap (through match-action table)
 64 | which is later used by the packet processing pipeline to recreate the value corresponding
 65 | to this key. The index will be the position that will be accessed in the register array
 66 | at each stage, while the bitmap will indicate whether a register array at a specific stage
 67 | will contribute its value to the final value. This approach allows minimum memory consumption
 68 | while also surpassing the limit of being able to store only significantly small values
 69 | inside the switch.
 70 | 
 71 | 
 72 | #### Query Statistics module
 73 | This module is responsible for deciding which key value pairs should be actually inserted
 74 | into the cache. The architecture of this module is also optimized to surpass the memory
 75 | constraints of the switch. The typical approach of keeping counters for each key accessed
 76 | and then inserting the most popular ones is not immediately applicable because there is not
 77 | enough memory to deploy such a solution. Netcache takes a probabilistic approach to this
 78 | problem by trading accuracy for less memory space.
 79 | 
 80 | For the above reasons, Netcache maintains the following probabilistic data structures:
 81 | *  Count-Min Sketch to keep the approximate frequency of queries on uncached keys
 82 | *  Bloom-Filter to avoid reporting hot keys multiple times to the controller
 83 | *  Register array of packet counters to count accesses to cached keys
 84 | 
 85 | Below, we provide the figure describing the query statistics module as presented in the paper:
 86 | 
 87 | 
 88 | ![Query Statistics Module](https://github.com/dlekkas/netcache/blob/master/report/figures/query_statistics.jpg)
 89 | 
 90 | Additionally, to restrict the numbers of bits used by each array index of the count-min sketch,
 91 | bloom filter and also by the counters of cached keys, Netcache employs a scheme of reseting
 92 | the registers after a configurable time interval. In contrast with the paper, we have not
 93 | employed a sampling component in front of the query statistics module and we rather prefer
 94 | to examine all packets to extract our statistics.
 95 | 
 96 | 
 97 | #### L2/L3 Forwarding module
 98 | This module employs the typical networking functionality and is responsible for forwarding or
 99 | routing packets by using standard L2/L3 protocols. In our case, since the routing/forwarding
100 | behavior was not our primary goal, we have currently implemented a simple static l2 forwarding
101 | scheme.to
102 | 
103 | The controller on start up, reads the topology and all the interconnected interfaces and populates
104 | an l2 forwarding match-action table inside the P4 switch which contains static assignments by
105 | matching on the L2 destination address and by providing the egress port that the corresponding
106 | packet should be forwarded to.
107 | 
108 | 
109 | ### Controller
110 | The controller has the responsibility of receiving hot key reports from the switch and updating
111 | the cache accordingly by modifying the lookup table inside the P4 switch and by allocating the
112 | memory required to store the value. As mentioned before (value processing module), the memory
113 | of where the value resides is represented by a tuple of an index and a bitmap.
114 | 
115 | #### Memory Management Unit
116 | To decide where to place each key-value pair in the cache, the controller implements the
117 | First-Fit algorithm which is a classic heuristic algorithm for bin packing problems and we
118 | use this approach to allocate memory slots to a given key. To evict a key we simply deallocate
119 | its memory by representing those memory slots as empty and adding them again to the memory
120 | pool of the controller.
121 | 
122 | #### Cache Eviction Policy
123 | As proposed by the Netcache paper, we use an eviction policy similar to the policy employed by
124 | Redis 3.0 (described in [Redis blog](https://redis.io/topics/lru-cache). In contrast to the
125 | paper we do not take into account the counters of the uncached keys by the count-min sketch
126 | and we rather prefer to only use the counters for cached keys.
127 | 
128 | Particularly, we employ an approximated LFU (Least Frequently Used) algorithm by periodically
129 | checking whether we have exceeded a specific memory usage (e.g 80%) and if we are above this
130 | limit then we sample a configurable amount of the cached keys and we evict from cache the K
131 | keys with the smallest counters. This operation can be implemented efficiently by using
132 | quick select algorithm to find the K elements with the smallest values.
133 | 
134 | 
135 | #### Cache Coherency Unit
136 | Since the controller inserts and deletes items from cache we should ensure that cache
137 | coherency is also achieved and to assure that we need the controller to be able to
138 | communicate with the key value store servers.
139 | 
140 | The controller maintains an out-of-band channel to communicate with the servers through
141 | Unix sockets due to the peculiarities of enabling ordinary TCP/IP communication between
142 | them (e.g residing in different network namespaces).
143 | 
144 | When a delete/insert is completed, the controller informs the server in order to tell
145 | him to stop blocking further updates on the given key.
146 | 
147 | 
148 | ## Key Value Store
149 | In order to evaluate Netcache we implemented our own in-memory distributed key value store
150 | based on simple primitives but we also wanted this key value store to be able to adequately
151 | serve multiple clients and multiple servers present in our experiment scenarios.
152 | 
153 | ### Partitioning Scheme
154 | For our key-value store we decided to shard our data based on a simple range based partitioning
155 | scheme. Basically, we decide where to store each key by taking into account only its first letter
156 | and using this letter to get the integer corresponding to its ascii representation.
157 | 
158 | We also want to experiment with consistent hashing partioning with offers much more robust
159 | partitioning and is also used widely by several distributed database products. Though, by
160 | prepopulating the servers with values we could equally load them with data and avoid skewness
161 | that would appear for such a partioning scheme in a real-world scenario.
162 | 
163 | 
164 | 


--------------------------------------------------------------------------------
/evaluation/README.md:
--------------------------------------------------------------------------------
  1 | # Evaluation
  2 | 
  3 | Below we present evaluation results on Netcache. Our results demonstrate that Netcache
  4 | achieves strong load balancing even in highly skewed workloads. Additionally, we show
  5 | that theoretically we also achieve significant performance improvements on throughput
  6 | and latency. Though, those metrics cannot be accurately evaluated due to the virtualized
  7 | network environment which is created through Mininet.
  8 | 
  9 | ## Getting Started
 10 | 
 11 | To facilitate our evaluation process, we have created several python files and bash
 12 | scripts which automate various procedures which will be used throughout the process
 13 | of evaluating Netcache. Below we briefly describe the files which are of high
 14 | importance and are extensively used afterwards:
 15 | 
 16 | *	`p4app_gen.sh`: creates well typed p4app.json documents with variable number of
 17 | 	clients and servers where both of those numbers can be supplied through the command
 18 | 	line options `-s <num-servers>` and `-c <num-clients>` respectively. For further
 19 | 	reference, feel free to inspect the file and explore its usage.
 20 | 
 21 | *	`produce_keyvals.sh`: generates data files containing key value pairs which will
 22 | 	subsequently be used to populate the key-value store of each server. Those key value
 23 | 	pairs take into account the range partioning scheme used to assign accurate keys to
 24 | 	each server. This scripts takes as arguments the number of servers through the
 25 | 	`-s <num-servers` command line option and the number of values to feed to each server
 26 | 	through the `-n <num-values>` command line option.
 27 | 
 28 | *	`init_servers.sh`: initiates a number of servers given as command line argument
 29 | 	and prepopulates them based on the values created by the script `produce_keyvals.sh`.
 30 | 	Additionally, through command line arguments one can specify any server flags to supply.
 31 | 	For reference of such flags, feel free to inspect the usage of `server.py` by running the
 32 | 	command `server.py -h`. Such server flags can disable netcache and suppress server output.
 33 | 
 34 | *	`gen_zipf_samples.py`: generates a sequence of keys to query and output them
 35 | 	on a file which is self describing based on its command line parameters. To invoke
 36 | 	this file, one should specify the number of queries to generate through the
 37 | 	`--n-queries <num-queries>` option, the number of servers through the
 38 | 	`--n-servers <num-servers>` option and the skewness of the workload through the
 39 | 	`--skew <skewness> option which represents a parameter between 0 and 1. The workload
 40 | 	generated by this script is based on the Zipf distribution which is a typical kind
 41 | 	of workload for testing key-value stores and is also the evaluation measure in the
 42 | 	original Netcache paper.
 43 | 
 44 | *	`exec_queries.py`: executes read queries for all the keys residing in the files
 45 | 	given as arguments and also specifies the number of servers. After the execution of the
 46 | 	queries, important statistics are returned including throughput, latency and load distribution.
 47 | 
 48 | ## Requirements
 49 | 
 50 | To reproduce our results and execute our scripts successfully there are the following dependencies:
 51 | *  `sudo apt install python3-matplotlib`
 52 | *  `sudo apt install python3-numpy`
 53 | 
 54 | ### Simple Example
 55 | First, we present a simple example where we showcase the functionality of Netcache
 56 | by realising a simple topology and by executing a script `test.py` which executes
 57 | a sequence of queries on the key-value store.
 58 | 
 59 | Our simple topology will consist of 1 client and 4 servers and is also visualised
 60 | in the figure below:
 61 | ![example-topo](figures/example_topology.jpg)
 62 | 
 63 | 
 64 | Below are presented step-by-step instructions to run this simple example:
 65 | 
 66 | 1. Navigate to `src/kv_store` and execute the `produce_keyvals.sh` script to generate 500
 67 | key-value pairs for each server:
 68 | ```bash
 69 | ./produce_keyvals -s 4 -n 500
 70 | ```
 71 | 2. Navigate to `src/p4` directory and start the topology with the appropriate p4app file:
 72 | ```bash
 73 | sudo p4run --config p4app_4_1.json
 74 | ```
 75 | 3. In another terminal window, navigate to `src/control_plane` and start the controller:
 76 | ```bash
 77 | sudo python controller.py
 78 | ```
 79 | 4. In another terminal window, navigate to `src/p4` directory and initialize servers while
 80 | also prepopulating them with the values generated at step 1:
 81 | ```bash
 82 | ./init_servers 4
 83 | ```
 84 | 5. In another terminal window, navigate to `src/kv_store` and start a client to execute the
 85 | program `test.py` which uses the netcache client API to perform a number of queries on the key-value store:
 86 | ```bash
 87 | mx client1 python3 test.py
 88 | ```
 89 | 6. Verify that netcache works as intended. The comments are explaining the desired behavior
 90 | and notice that two keys are not found on purpose and throw a corresponding error. By also
 91 | inspecting the output of servers and controller the behavior is more clearly shown.
 92 | 
 93 | 
 94 | 
 95 | ## Experiments
 96 | In this section, we conduct experiments by generating querying workload that more closely
 97 | resembles that of a real-world scenario. We achieve that by generating keys to query based
 98 | on the Zipf distribution. Below, we present experiments which also serve as examples
 99 | to make the readers able to conduct their own independent experiments by tweaking some
100 | script and system parameters.
101 | 
102 | 
103 | ### Experiment 1
104 | Our first experimental topology consists of 1 clients and 8 servers. We generate key-value
105 | pairs and populate each server with 10000 distinct keys. Based on those data, we use the
106 | zipf distribution to generate a workload of 100000 queries. The skewness of the workload is
107 | determined by the skewness parameter provided as command line option of the
108 | `gen_zipf_samples.py`.
109 | 
110 | To evaluate Netcache under the conditions described above, follow the instructions below:
111 | 
112 | 1. Navigate to `src/kv_store` and execute the `produce_keyvals.sh` script to generate
113 | 10000 key-value pairs for each server:
114 | ```bash
115 | ./produce_keyvals -s 8 -n 10000
116 | ```
117 | 
118 | 2. Inside `src/kv_store` directory, we use `gen_zipf_samples.py` file to generate a zipf
119 | based query workload of 100000 queries with skewness parameter of 0.5.
120 | ```bash
121 | python3 gen_zipf_samples.py --n-servers 8 --n-queries 100000 --skew 0.5
122 | ```
123 | 
124 | 3. Navigate to `src/p4` directory and execute the `p4app_gen.sh` to create a p4app json
125 | file that resembles our desired topology:
126 | ```bash
127 | ./p4app_gen.sh -s 8 -c 1
128 | ```
129 | 
130 | 4. Inside `src/p4` use the previously generated p4app file to start the topology:
131 | ```bash
132 | sudo p4run --config p4app_8_1.json
133 | ```
134 | 
135 | 5. In another terminal window, navigate to `src/control_plane` and start the controller:
136 | ```bash
137 | sudo python controller
138 | ```
139 | 
140 | 6. In another terminal window, navigate to `src/p4` and initialize the servers while
141 | also prepopulating them with the key-value pairs generated at Step 1:
142 | ```bash
143 | ./init_servers.sh 8
144 | ```
145 | 
146 | 7. In another terminal window, navigate to `src/kv_store` and spawn a client to execute
147 | the workload that we generated at step 2.
148 | ```bash
149 | mkdir -p results
150 | mx client1 python3 exec_queries.py --n-servers 8 --suppress --input data/zipf_sample_100000_05.txt
151 | ```
152 | 
153 | 8. Inside `src/kv_store` we use the `gen_plots.py` script and the results file generated
154 | by the previous step to display a plot of the load (number of requests) handled by each server:
155 | ```bash
156 | python3 gen_plots.py --input results/zipf_sample_100000_05_8_netcache.txt
157 | ```
158 | 
159 | Below, we present the plot generated by the previous command which demonstrates remarkable
160 | results with very satisfying load balancing given the skewness of the workload:
161 | 
162 | ![netcachexxx](figures/plot_100000_05_netcache.png)
163 | 
164 | 
165 | To realise the huge contribution of our implementation of NetCache, we also present below
166 | how to execute the same workload without using the in-network caching and we print the
167 | same plot to showcase the difference:
168 | 
169 | 1. Navigate to `src/p4` directory and execute again the `init_servers.sh` script to initialize
170 | 8 servers but this time the cache will be disabled:
171 | ```bash
172 | ./init_servers 8 --disable-cache
173 | ```
174 | 
175 | 2. In another terminal window, navigate to `src/kv_store` and spawn a client to execute
176 | again the workload that we generated at step 2 but with caching disabled.
177 | ```bash
178 | mkdir -p results
179 | mx client1 python3 exec_queries.py --n-servers 8 --disable-cache --suppress --input data/zipf_sample_100000_05.txt
180 | ```
181 | 
182 | 3. Inside `src/kv_store` we use the `gen_plots.py` script and the results file generated
183 | by the previous step to display a plot of the load (number of requests) handled by each server:
184 | ```bash
185 | python3 gen_plots.py --input results/zipf_sample_100000_05_8_nocache.txt
186 | ```
187 | 
188 | Below, we present the load distribution among servers without netcache:
189 | 
190 | ![nocachexxx](figures/plot_100000_05_nocache.png)
191 | 
192 | 
193 | Hence, we confirm the correctness of our implementation as well as the impact of its
194 | deployment to the equal distribution of load among the storage nodes.
195 | 
196 | 
197 | 


--------------------------------------------------------------------------------
/evaluation/figures/example_topology.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlekkas/netcache/7e37194319b43ce1be5f4eb54416fbbeb76c6a24/evaluation/figures/example_topology.jpg


--------------------------------------------------------------------------------
/evaluation/figures/plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlekkas/netcache/7e37194319b43ce1be5f4eb54416fbbeb76c6a24/evaluation/figures/plot.png


--------------------------------------------------------------------------------
/evaluation/figures/plot_100000_05_netcache.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlekkas/netcache/7e37194319b43ce1be5f4eb54416fbbeb76c6a24/evaluation/figures/plot_100000_05_netcache.png


--------------------------------------------------------------------------------
/evaluation/figures/plot_100000_05_nocache.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlekkas/netcache/7e37194319b43ce1be5f4eb54416fbbeb76c6a24/evaluation/figures/plot_100000_05_nocache.png


--------------------------------------------------------------------------------
/evaluation/figures/plot_netcache.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlekkas/netcache/7e37194319b43ce1be5f4eb54416fbbeb76c6a24/evaluation/figures/plot_netcache.png


--------------------------------------------------------------------------------
/report/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | NOTE  = !! change the next line to fit your filename; no spaces at file name end !!
 3 | FILE  = report
 4 | 
 5 | all:
 6 | 	pdflatex $(FILE)
 7 | 	bibtex $(FILE)
 8 | 	pdflatex $(FILE)
 9 | 	pdflatex $(FILE)
10 | 
11 | clean:
12 | 	rm -f *.dvi *.log *.aux *.bbl *.blg *.toc *.lof *.lot *.cb *.~ *.out *.fdb_latexmk *.fls


--------------------------------------------------------------------------------
/report/README.md:
--------------------------------------------------------------------------------
1 | # Report
2 | 
3 | Put here all the report related files and documents. You can erase this readme if
4 | nothing needs to be added to the README.
5 | 


--------------------------------------------------------------------------------
/report/figures/cache_coherency_update.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlekkas/netcache/7e37194319b43ce1be5f4eb54416fbbeb76c6a24/report/figures/cache_coherency_update.png


--------------------------------------------------------------------------------
/report/figures/eth-nsg-header.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlekkas/netcache/7e37194319b43ce1be5f4eb54416fbbeb76c6a24/report/figures/eth-nsg-header.pdf


--------------------------------------------------------------------------------
/report/figures/query_statistics.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlekkas/netcache/7e37194319b43ce1be5f4eb54416fbbeb76c6a24/report/figures/query_statistics.jpg


--------------------------------------------------------------------------------
/report/refs.bib:
--------------------------------------------------------------------------------
 1 | @article{bosshart2014p4,
 2 |   title={P4: Programming protocol-independent packet processors},
 3 |   author={Bosshart, Pat and Daly, Dan and Gibb, Glen and Izzard, Martin and McKeown, Nick and Rexford, Jennifer and Schlesinger, Cole and Talayco, Dan and Vahdat, Amin and Varghese, George and others},
 4 |   journal={ACM SIGCOMM Computer Communication Review},
 5 |   volume={44},
 6 |   number={3},
 7 |   pages={87--95},
 8 |   year={2014},
 9 |   publisher={ACM}
10 | }
11 | 
12 | @inproceedings{jin2017netcache,
13 |   title={Netcache: Balancing key-value stores with fast in-network caching},
14 |   author={Jin, Xin and Li, Xiaozhou and Zhang, Haoyu and Soul{\'e}, Robert and Lee, Jeongkeun and Foster, Nate and Kim, Changhoon and Stoica, Ion},
15 |   booktitle={Proceedings of the 26th Symposium on Operating Systems Principles},
16 |   pages={121--136},
17 |   year={2017},
18 |   organization={ACM}
19 | }
20 | 
21 | @inproceedings{fan2011small,
22 |   title={Small cache, big effect: Provable load balancing for randomly partitioned cluster services},
23 |   author={Fan, Bin and Lim, Hyeontaek and Andersen, David G and Kaminsky, Michael},
24 |   booktitle={Proceedings of the 2nd ACM Symposium on Cloud Computing},
25 |   pages={23},
26 |   year={2011},
27 |   organization={ACM}
28 | }
29 | 
30 | 


--------------------------------------------------------------------------------
/report/report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dlekkas/netcache/7e37194319b43ce1be5f4eb54416fbbeb76c6a24/report/report.pdf


--------------------------------------------------------------------------------
/report/report.tex:
--------------------------------------------------------------------------------
  1 | \documentclass[11pt,oneside,a4paper]{article}
  2 | \usepackage{graphicx}
  3 | \usepackage{booktabs}
  4 | \usepackage{caption}
  5 | \usepackage{subcaption}
  6 | \usepackage{amsmath}
  7 | \usepackage{amsfonts}
  8 | \usepackage{amssymb}
  9 | \usepackage{lscape}
 10 | \usepackage{psfrag}
 11 | \usepackage[usenames]{color}
 12 | \usepackage{bbm}
 13 | \usepackage[update]{epstopdf}
 14 | \usepackage[bookmarks,pdfstartview=FitH,a4paper,pdfborder={0 0 0}]{hyperref}
 15 | \usepackage{verbatim}
 16 | \usepackage{listings}
 17 | \usepackage{textcomp}
 18 | \usepackage{fancyhdr}
 19 | \usepackage{multirow}
 20 | \usepackage{tikz}
 21 | \usepackage{lipsum}
 22 | \usepackage{xcolor}
 23 | \usepackage[margin=1in]{geometry}
 24 | \newcommand{\hint}[1]{{\color{blue} \em #1}}
 25 | 
 26 | \makeatletter
 27 | \def\cleardoublepage{\clearpage\if@twoside \ifodd\c@page\else%
 28 | \hbox{}%
 29 | \thispagestyle{empty}%
 30 | \clearpage%
 31 | \if@twocolumn\hbox{}\clearpage\fi\fi\fi}
 32 | \makeatother
 33 | 
 34 | \sloppy
 35 | % \widowpenalty=10000
 36 | % \clubpenalty=10000
 37 | 
 38 | \title{
 39 |     \vspace*{0.0mm}
 40 |     \LARGE\bf\sf Advanced Topics in \\Communication Networks (Fall 2018)
 41 |     \vspace*{10.0mm} \\
 42 |     \Large\bf\sf Group Project Report \vspace*{30.0mm}\\
 43 |     %
 44 |     \Huge\bf\sf NetCache: Balancing Key-Value Stores with Fast In-Network Caching
 45 |     %
 46 |     \vspace*{30.0mm} \\
 47 |     \normalsize
 48 |     %
 49 |     \sf Authors:\\[5pt]
 50 |     \sf Malte Brodmann \\ [5pt]
 51 |     \sf Dimitris Lekkas \vspace*{5mm}\\
 52 |     %
 53 |     \sf  Advisor:  Thomas Holterbach \vspace*{5mm}\\
 54 |     %
 55 |     \sf  Supervisor:  Prof. Dr. Laurent Vanbever \vspace*{20.0mm}\\
 56 |     %
 57 |     \sf Submitted: Dec 16, 2019\\ [5pt]
 58 |     \sf \pageref{lastpage} Pages
 59 | }
 60 | \date{}
 61 | 
 62 | \begin{document}
 63 | 
 64 | \begin{figure}
 65 |     \includegraphics[width=\textwidth]{figures/eth-nsg-header}
 66 | \end{figure}
 67 | 
 68 | \maketitle
 69 | \thispagestyle{empty}
 70 | \raggedbottom
 71 | \clearpage
 72 | 
 73 | \pagenumbering{roman}
 74 | 
 75 | \begin{abstract}
 76 | The main result of this project is a P4 implementation of the NetCache key-value store architecture.
 77 | Following the major ideas we implemented most of the aspects and algorithms that were described in the paper.
 78 | Additionally we also implemented simple key-value stores, a client API and a controller application as well as an environment to test our implementation.
 79 | Some of the challenges included (...)
 80 | Our P4 code does not target a specific programmable switch architecture but was developed and tested for BMv2 software switch.
 81 | Thus we were not able to directly compare our resulting implementation with the results of the paper that were obtained using a Tofino switch.
 82 | However we ran an experiment that shows that our implementation achieves good load-balancing for key-value stores connected to a switch running our P4 application.
 83 | (results ...)
 84 | 
 85 | \end{abstract}
 86 | 
 87 | \clearpage
 88 | \setcounter{tocdepth}{2}
 89 | \tableofcontents
 90 | \clearpage
 91 | \pagenumbering{arabic}
 92 | 
 93 | \section{Introduction}
 94 | \hint{Introduction to the problem that was solved in this project.} \\
 95 | The main goal of this project is to give an implementation of NetCache  \cite{jin2017netcache}.
 96 | Netcache is a new approach that presents one possible answer to the  problem of load-balancing key-value stores - especially in the context of high-throughput internet services.
 97 | The main contribution of the paper is a new key-value store architecture where data is cached on programmable switches to allow for dynamic load balancing.
 98 | As part of our project we implemented a P4 application that enables programmable switches placed on network paths to key-value stores to cache data.
 99 | Our implementation follows the major concepts and techniques presented in the paper with only minor adaptations.
100 | As the scope of the project was limited we only provide a P4 implementation for a software switch, namely the BMv2 simple switch architecture, instead of targeting an actual programmable switch (e.g. Barefoot Tofino).
101 | In addition we also implemented a simple NetCache compatible in-memory key-value store and a client API to be able to test our NetCache implementation for correctness.
102 | Finally we conducted a small experiment that is inspired by the nature of actual key-value store workloads.
103 | These workloads are usually highly skewed in practice.
104 | We examined whether our implementation achieves the dynamic load-balancing of the connected key-value stores presented in the paper.
105 | 
106 | 
107 | 
108 | \section{Background and related work}
109 | \hint{Briefly describe background information and related papers (if any). You do not need to describe topics that were covered in the lecture, only other topics that are relevant for your project.} \\
110 | Many modern internet or web applications highly depend on high-throughput key-value  stores. 
111 | Recent studies showed that the workload for such services is oftentimes skewed resulting in a rather small set of keys which is queried very frequently whereas the majority of keys are only queried occasionally. 
112 | This can lead to situations where some key-value stores can't handle the large amounts of queries they receive while others are barely utilized.
113 | An important theoretical result proves that caching can be used to improve load-balancing for key-value stores \cite{fan2011small} . 
114 | However there are two important requirements for this approach. 
115 | First, for $N$ key-value store nodes a cache must be able to store $O(N log N)$ items.
116 | Second, the throughput of the cache must be at least the aggregate throughput of all key-value store nodes.
117 | With the recent shift from flash and disk based racks to in-memory key-value stores it becomes increasingly hard to satisfy this requirement using in-memory caches.
118 | By moving the caching layer to the network the throughput of the cache can be increased to be able to provide the aggregate throughput of all in-memory key-value stores.
119 | NetCache gives such an implementation of a cache placed on the network.
120 | (perhaps also address some of the challenges that are involved)
121 | 
122 | \section{Implementation}
123 | \hint{Describe how you solved the problem and how your implementation works. Do not paste source code here.} \\
124 | We implemented all major aspects described in the  NetCache paper.
125 | As the paper extensively covers all employed techniques and algorithms we mostly focus on the challenges we faced and aspects that were not specified precisely by the NetCache authors.
126 | In addition we briefly describe our client API, key-value store and controller implementations as well as our test setup.
127 | 
128 | \subsection{Setup and network topology}
129 | 
130 | For our P4 application we assumed and tested our implementation using the following network topology.
131 | The topology consists of a set of servers running our key-value store application.
132 | They are connected to a programmable switch which runs our P4 NetCache application and also implements L2 and L3 layer forwarding.
133 | Additionally a client is directly connected to this switch as well.
134 | The client can send queries to every key-value store instance using our client API.
135 | 
136 | \subsection{NetCache Protocol}
137 | 
138 | \subsubsection{Packet format}
139 | 
140 | The authors of NetCache proposed an application-layer protocol that is used to send and answer key-value store queries in the context of NetCache.
141 | The major header fields of the protocol are OP, SEQ, KEY and VALUE.
142 | OP denotes the corresponding operation of the query (e.g. Get, Put or Delete), SEQ represents a sequence number, KEY is the key of a key-value pair stored in a key-value store and VALUE is a variable length field storing the respective value.
143 | All of our application components implement this protocol using UDP for Get/Read queries and TCP for Put/Write and Delete queries with some minor adaptations.
144 | First, as the authors did not explain how they implemented the variable-length value field we fixed the length of the value field to 128 bytes.
145 | Thus we allow for values with a length of at most 128 bytes.
146 | We achieve variable-length values by adding zero-bytes to all values that do not exceed this limit.
147 | Another option could consist of introducing a new protocol header field that states the length of the value.
148 | However we decided against this approach to not change to NetCache protocol on a large scale.
149 | Second, we introduce a few new values for the operation entry of the NetCache header.
150 | These represent specific states that are important for cache coherence will be explained later in section ...
151 | 
152 | 
153 | \subsubsection{Network Routing}
154 | 
155 | On top of NetCache the P4 application we implemented also supports L2 and L3 layer forwarding.
156 | All corresponding forwarding tables are populated assuming the topology described above.
157 | 
158 | \subsection{Query Handling}
159 | 
160 | After receiving a packet the switch parses it using the corresponding L2, L3 and L4 layer parsers.
161 | A packet is parsed as a NetCache packet when the respective UDP or TCP packet contains a special port number we reserved for the NetCache protocol as either a source or destination port.
162 | Additionally the payload of this packet must contain the NetCache packet header values.
163 | We reserve one byte for OP, four bytes for SEQ, 16 bytes for KEY and 128 bytes for the VALUE field.
164 | Therefore a packet must have a payload length of 149 bytes in order to be correctly parsed as a NetCache packet.
165 | 
166 | Packets that were not parsed as NetCache packets will simply be forwarded using the corresponding routing tables.
167 | The following sections describe how NetCache packets will be handled by our switch application.
168 | The switch application will perform different actions depending on the operation of the NetCache query.
169 | By simply looking at the OP header field the switch is able to distinguish between the different types of queries.
170 | 
171 | \subsubsection{Get / Read queries}
172 | 
173 | When receiving a Read query the switch first has to determine whether it cached the queried key.
174 | If yes, the switch can directly reply to this query otherwise it has to forward the query to key-value store.
175 | To do so, we follow the approach described in the paper.
176 | A match-action table matching on the NetCache KEY header field is used to determine whether the respective item was cached by the switch or not.
177 | The corresponding table is maintained by the controller, i.e. the controller inserts entries or removes entries from the table whenever a new item is inserted or evicted from the cache.
178 | This is done using the SimpleSwitchAPI.
179 | If there is no matching key in the table the switch will simply continue to forward the packet.
180 | If the switch cached the queried item we additionally have to check if the respective cache entry is valid.
181 | This is important to ensure cache coherence (see section ...).
182 | Following the descriptions of the paper we use a register array that contains a bit for every possible slot where a cache item can be stored. 
183 | The bit represents the validity of the corresponding cache items.
184 | If the read to the corresponding register returned the information that the cache item is invalid the switch will only be able to forward the query.
185 | Otherwise we know that the switch cached the queried item and should directly respond the client.
186 | Therefore we swap all L2, L3 and L4 source and destination header fields and route the packet to the respective egress port. 
187 | While all of the above happened in the ingress part, it is the egress parts responsibility to insert the value of the cached item into the VALUE header field.
188 | If a item is cached by the switch applying the match-action table in the ingress part also sets some metadata. This metadata described where the corresponding entry is located in the switch and how the value can be retrieved.
189 | Using this metadata we append the missing information to the VALUE header field when the queried item is cached and valid.
190 | We will cover the details on how this is done in the section about memory management.
191 | This completes all actions to be performed for handling Read queries.
192 | 
193 | 
194 | 
195 | \subsubsection{Put / Write queries}
196 | 
197 | \subsubsection{Delete queries}
198 | 
199 | \subsection{Client API}
200 | 
201 | We implemented a simple client API in python that can be used to send NetCache queries to a set of key-value stores.
202 | 
203 | \subsection{Key-Value Store}
204 | 
205 | \section{Evaluation}
206 | \hint{Describe how you tested your implementation and summarize the results.} \\
207 | As we are only able to test our implementation using software switches in the scope of this project we are not able to directly reproduce the results of the paper.
208 | However a correct implementation should still be able to achieve good load-balancing  for all key-value stores with high probability.
209 | Thus we generated zipf-distributed workloads for our implementation using our client API.
210 | (...)
211 | 
212 | \section{Conclusion}
213 | \hint{A brief conclusion about how you solved the project.} \\
214 | \lipsum[1]
215 | 
216 | \label{lastpage} % this must stay here
217 | \clearpage
218 | \addcontentsline{toc}{section}{References}
219 | \bibliographystyle{acm}
220 | \bibliography{refs}
221 | 
222 | \clearpage
223 | \appendix
224 | \pagenumbering{Roman}
225 | 
226 | \section{Group organization}
227 | \hint{Briefly describe what each team member was contributing to the project}
228 | 
229 | \paragraph{Malte Brodmann}
230 | \lipsum[2]
231 | 
232 | \paragraph{Dimitris Lekkas}
233 | \lipsum[3]
234 | 
235 | \end{document}


--------------------------------------------------------------------------------
/src/README.md:
--------------------------------------------------------------------------------
1 | # Source Code
2 | 
3 | Brief description of each source code block. 
4 | 


--------------------------------------------------------------------------------
/src/control_plane/controller.py:
--------------------------------------------------------------------------------
  1 | from p4utils.utils.topology import Topology
  2 | from p4utils.utils.sswitch_API import SimpleSwitchAPI
  3 | from scapy.all import sniff, Packet, Ether, IP, UDP, TCP, BitField, Raw
  4 | from crc import Crc
  5 | 
  6 | import threading
  7 | import struct
  8 | import random
  9 | 
 10 | # P4 SWITCH ACTION TABLE NAMES DEFINITIONS
 11 | NETCACHE_LOOKUP_TABLE = "lookup_table"
 12 | 
 13 | 
 14 | # P4 SWITCH REGISTER NAMES DEFINITIONS
 15 | SKETCH_REG_PREFIX = "sketch"
 16 | BLOOMF_REG_PREFIX = "bloom"
 17 | CACHED_KEYS_COUNTER = "query_freq_cnt"
 18 | 
 19 | 
 20 | BLOOMF_REGISTERS_NUM = 3
 21 | SKETCH_REGISTERS_NUM = 4
 22 | 
 23 | 
 24 | STATISTICS_REFRESH_INTERVAL = 30.0  # measured in seconds
 25 | 
 26 | VTABLE_NAME_PREFIX = 'vt'
 27 | VTABLE_SLOT_SIZE = 8   # in bytes
 28 | VTABLE_ENTRIES = 65536
 29 | 
 30 | CONTROLLER_MIRROR_SESSION = 100
 31 | 
 32 | NETCACHE_READ_QUERY = 0
 33 | NETCACHE_HOT_READ_QUERY = 3
 34 | NETCACHE_KEY_NOT_FOUND = 20  # ???
 35 | NETCACHE_UPDATE_COMPLETE = 4
 36 | NETCACHE_DELETE_COMPLETE = 5
 37 | 
 38 | UNIX_CHANNEL = '/tmp/server_cont.s'
 39 | CACHE_INSERT_COMPLETE = 'INSERT_OK'
 40 | 
 41 | 
 42 | crc32_polinomials = [0x04C11DB7, 0xEDB88320, 0xDB710641, 0x82608EDB,
 43 |                      0x741B8CD7, 0xEB31D82E, 0x0D663B05, 0xBA0DC66B,
 44 |                      0x32583499, 0x992C1A4C, 0x32583499, 0x992C1A4C]
 45 | 
 46 | 
 47 | class NetcacheHeader(Packet):
 48 |     name = 'NcachePacket'
 49 |     fields_desc = [BitField('op', 0, 8), BitField('seq', 0, 32),
 50 |             BitField('key', 0, 128), BitField('value', 0, 512)]
 51 | 
 52 | 
 53 | class NCacheController(object):
 54 | 
 55 |     def __init__(self, sw_name, vtables_num=8):
 56 |         self.topo = Topology(db="../p4/topology.db")
 57 |         self.sw_name = sw_name
 58 |         self.thrift_port = self.topo.get_thrift_port(self.sw_name)
 59 |         self.cpu_port = self.topo.get_cpu_port_index(self.sw_name)
 60 |         self.controller = SimpleSwitchAPI(self.thrift_port)
 61 | 
 62 |         self.custom_calcs = self.controller.get_custom_crc_calcs()
 63 |         self.sketch_register_num = len(self.custom_calcs)
 64 | 
 65 |         self.vtables = []
 66 |         self.vtables_num = vtables_num
 67 | 
 68 | 
 69 |         # create a pool of ids (as much as the total amount of keys)
 70 |         # this pool will be used to assign index to keys which will be
 71 |         # used to index the cached key counter and the validity register
 72 |         self.ids_pool = range(0, VTABLE_ENTRIES * VTABLE_SLOT_SIZE);
 73 | 
 74 |         # array of bitmap, which marks available slots per cache line
 75 |         # as 0 bits and occupied slots as 1 bits
 76 |         self.mem_pool = [0] * VTABLE_ENTRIES
 77 | 
 78 |         # number of memory slots used (useful for lfu eviction policy)
 79 |         self.used_mem_slots = 0
 80 | 
 81 |         # dictionary storing the value table index, bitmap and counter/validity
 82 |         # register index in the P4 switch that corresponds to each key
 83 |         self.key_map = {}
 84 | 
 85 |         self.setup()
 86 | 
 87 |         #self.out_of_band_test()
 88 | 
 89 | 
 90 |     def inform_server(self):
 91 |         sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
 92 |         try:
 93 |             sock.connect(UNIX_CHANNEL)
 94 |         except socket.error as msg:
 95 |             #print('Error: Unable to contact server for cache operation completion')
 96 |             return
 97 | 
 98 |         sock.sendall(CACHE_INSERT_COMPLETE)
 99 | 
100 | 
101 |     # reports the value of counters for each cached key
102 |     # (used only for debugging purposes)
103 |     def report_counters(self):
104 |         for key, val in self.key_map.items():
105 |             vt_idx, bitmap, key_idx = val
106 | 
107 |             res = self.controller.counter_read(CACHED_KEYS_COUNTER, key_idx)
108 |             if res != 0:
109 |                 print("[COUNTER] key = " + key + " [ " + str(res.packets) + " ]")
110 | 
111 | 
112 | 
113 |     # periodically reset registers pertaining to query statistics module of the
114 |     # P4 switch (count-min sketch registers, bloom filters and counters)
115 |     def periodic_registers_reset(self):
116 |         t = threading.Timer(STATISTICS_REFRESH_INTERVAL, self.periodic_registers_reset)
117 |         t.daemon = True
118 |         t.start()
119 | 
120 |         # before reseting registers check if the cache is utilized above a
121 |         # threshold (e.g 80%) and evict keys using lfu policy if needed
122 |         self.cache_lfu_eviction(threshold=0.8, sampling=0.2, to_remove=0.5)
123 | 
124 |         # reset bloom filter related registers
125 |         for i in range(BLOOMF_REGISTERS_NUM):
126 |             self.controller.register_reset(BLOOMF_REG_PREFIX + str(i+1))
127 | 
128 |         # reset count min sketch related registers
129 |         for i in range(SKETCH_REGISTERS_NUM):
130 |             self.controller.register_reset(SKETCH_REG_PREFIX + str(i+1))
131 | 
132 |         # reset counter register storing the query frequency of each cached item
133 |         self.controller.counter_reset(CACHED_KEYS_COUNTER)
134 | 
135 |         print("[INFO]: Reset query statistics registers.")
136 | 
137 | 
138 | 
139 |     # the controller periodically checks if the memory used has exceeded a given threshold
140 |     # (e.g 80 %) and if that is the case then it evicts keys according to an approximated
141 |     # LFU policy inspired by REDIS (https://redis.io/topics/lru-cache))
142 |     def cache_lfu_eviction(self, threshold=0.8, sampling=0.2, to_remove=0.5):
143 | 
144 |         # if the threshold has not been surpassed then nothing to do
145 |         if self.used_mem_slots <= (threshold * len(self.mem_pool) * VTABLE_SLOT_SIZE):
146 |             return
147 | 
148 |         n_samples = int(sampling * len(self.key_map.items()))
149 | 
150 |         samples = random.sample(self.key_map.items(), n_samples)
151 | 
152 |         # read the counter for each sample and store them in an array
153 |         evict_list = []
154 |         for key, val in samples:
155 |             x, y, cnt_idx = self.key_map[key]
156 |             counter = self.controller.counter_read(CACHED_KEYS_COUNTER, cnt_idx).packets
157 |             evict_list.append((key, counter))
158 | 
159 |         # sort the array and pick the smallest K-th counters and evict their keys
160 |         # (this could be achieved more optimally by using quickselect)
161 |         import operator
162 |         evict_list.sort(key = operator.itemgetter(1))
163 | 
164 |         for i in range(int(to_remove * n_samples)):
165 |             curr = evict_list[i]
166 |             self.evict(curr[0])
167 | 
168 | 
169 | 
170 | 
171 |     def setup(self):
172 |         if self.cpu_port:
173 |             self.controller.mirroring_add(CONTROLLER_MIRROR_SESSION, self.cpu_port)
174 | 
175 |         # create custom hash functions for count min sketch and bloom filters
176 |         self.set_crc_custom_hashes()
177 |         self.create_hashes()
178 | 
179 |         # set a daemon to periodically reset registers
180 |         self.periodic_registers_reset()
181 | 
182 |         # spawn new thread to serve incoming udp connections
183 |         # (i.e hot reports from the switch)
184 |         #udp_t = threading.Thread(target=self.hot_reports_loop)
185 |         #udp_t.start()
186 | 
187 |     def set_crc_custom_hashes(self):
188 |         i = 0
189 |         for custom_crc32, width in sorted(self.custom_calcs.items()):
190 |             self.controller.set_crc32_parameters(custom_crc32,
191 |                     crc32_polinomials[i], 0xffffffff, 0xffffffff, True, True)
192 |             i += 1
193 | 
194 |     def create_hashes(self):
195 |         self.hashes = []
196 |         for i in range(self.sketch_register_num):
197 |             self.hashes.append(Crc(32, crc32_polinomials[i], True, 0xffffffff, True, 0xffffffff))
198 | 
199 | 
200 |     # set a static allocation scheme for l2 forwarding where the mac address of
201 |     # each host is associated with the port connecting this host to the switch
202 |     def set_forwarding_table(self):
203 |         for host in self.topo.get_hosts_connected_to(self.sw_name):
204 |             port = self.topo.node_to_node_port_num(self.sw_name, host)
205 |             host_mac = self.topo.get_host_mac(host)
206 |             self.controller.table_add("l2_forward", "set_egress_port", [str(host_mac)], [str(port)])
207 | 
208 | 
209 |     def set_value_tables(self):
210 |         for i in range(self.vtables_num):
211 |             self.controller.table_add("vtable_" + str(i), "process_array_" + str(i), ['1'], [])
212 | 
213 | 
214 |     # this function manages the mapping between between slots in register arrays
215 |     # and the cached items by implementing the First Fit algorithm described in
216 |     # Memory Management section of 4.4.2 (netcache paper)
217 |     def first_fit(self, key, value_size):
218 | 
219 |         n_slots = (value_size / (VTABLE_SLOT_SIZE + 1)) + 1
220 |         if value_size <= 0:
221 |             return None
222 |         if key in self.key_map:
223 |             return None
224 | 
225 | 
226 |         for idx in range(len(self.mem_pool)):
227 |             old_bitmap = self.mem_pool[idx]
228 |             n_zeros = 8 - bin(old_bitmap).count("1")
229 | 
230 |             if n_zeros >= n_slots:
231 |                 cnt = 0
232 |                 bitmap = 0
233 |                 for i in reversed(range(8)):
234 |                     if cnt >= n_slots:
235 |                         break
236 | 
237 |                     if not self.bit_is_set(old_bitmap, i):
238 |                         bitmap = bitmap | (1 << i)
239 |                         cnt += 1
240 | 
241 |                 # mark last n_slots 0 bits as 1 bits because we assigned
242 |                 # them to the new key and they are now allocated
243 |                 self.mem_pool[idx] = old_bitmap | bitmap
244 | 
245 |                 self.used_mem_slots += bin(bitmap).count("1")
246 | 
247 |                 return (idx, bitmap)
248 | 
249 |         return None
250 | 
251 | 
252 |     # converts a list of 1s and 0s represented as strings and converts it
253 |     # to a bitmap using bitwise operations (this intermediate representation
254 |     # of a list of 1s and 0s is used to avoid low level bitwise logic inside
255 |     # core implementation logic)
256 |     def convert_to_bitmap(self, strlist, bitmap_len):
257 |         bitmap = 0
258 |         # supports only bitmaps with multiple of 8 bits size
259 |         if bitmap_len % 8 != 0:
260 |             return bitmap
261 |         for i in strlist:
262 |             bitmap = bitmap << 1
263 |             bitmap = bitmap | int(i)
264 | 
265 |         return bitmap
266 | 
267 | 
268 |     # this function checks whether the k-th bit of a given number is set
269 |     def bit_is_set(self, n, k):
270 |         if n & (1 << k):
271 |             return True
272 |         else:
273 |             return False
274 | 
275 | 
276 |     # given a key and its associated value, we update the lookup table on
277 |     # the switch and we also update the value registers with the value
278 |     # given as argument (stored in multiple slots)
279 |     def insert(self, key, value, cont=True):
280 |         # find where to put the value for given key
281 |         mem_info = self.first_fit(key, len(value))
282 | 
283 |         # if key already exists or not space available then stop
284 |         if mem_info == None:
285 |             return
286 | 
287 |         vt_index, bitmap = mem_info
288 | 
289 |         # keep track of number of bytes of the value written so far
290 |         cnt = 0
291 | 
292 |         # store the value of the key in the vtables of the switch while
293 |         # incrementally storing a part of the value at each value table
294 |         # if the correspoding bit of the bitmap is set
295 |         for i in range(self.vtables_num):
296 | 
297 |             if self.bit_is_set(bitmap, self.vtables_num - i - 1):
298 |                 partial_val = value[cnt:cnt+VTABLE_SLOT_SIZE]
299 |                 self.controller.register_write(VTABLE_NAME_PREFIX + str(i),
300 |                         vt_index, self.str_to_int(partial_val))
301 | 
302 |                 cnt += VTABLE_SLOT_SIZE
303 | 
304 |         # allocate an id from the pool to index the counter and validity register
305 |         # (we take the last element of list because in python list is implemented
306 |         # to optimize for inserting and removing elements from the end of the list)
307 |         key_index = self.ids_pool.pop()
308 | 
309 |         # add the new key to the cache lookup table of the p4 switch
310 |         self.controller.table_add(NETCACHE_LOOKUP_TABLE, "set_lookup_metadata",
311 |             [str(self.str_to_int(key))], [str(bitmap), str(vt_index), str(key_index)])
312 | 
313 |         # mark cache entry for this key as valid
314 |         self.controller.register_write("cache_status", key_index, 1)
315 | 
316 |         self.key_map[key] = vt_index, bitmap, key_index
317 | 
318 |         # inform the server about the successful cache insertion
319 |         if cont:
320 |             self.inform_server()
321 | 
322 |         print("Inserted key-value pair to cache: ("+key+","+value+")")
323 | 
324 | 
325 |     # converts a string to a bytes representation and afterwards returns
326 |     # its integer representation of width specified by argument int_width
327 |     # (seems hacky due to restriction to use python2.7)
328 |     def str_to_int(self, x, int_width=VTABLE_SLOT_SIZE):
329 |         if len(x) > int_width:
330 |             print "Error: Overflow while converting string to int"
331 | 
332 |         # add padding with 0x00 if input string size less than int_width
333 |         bytearr = bytearray(int_width - len(x))
334 |         bytearr.extend(x.encode('utf-8'))
335 |         return struct.unpack(">Q", bytearr)[0]
336 | 
337 | 
338 |     # given an arbitrary sized integer, the max width (in bits) of the integer
339 |     # it returns the string representation of the number (also stripping it of
340 |     # any '0x00' characters) (network byte order is assumed)
341 |     def int_to_packed(self, int_val, max_width=128, word_size=32):
342 |         num_words = max_width / word_size
343 |         words = self.int_to_words(int_val, num_words, word_size)
344 | 
345 |         fmt = '>%dI' % (num_words)
346 |         return struct.pack(fmt, *words).strip('\x00')
347 | 
348 |     # split up an arbitrary sized integer to words (needed to hack
349 |     # around struct.pack limitation to convert to byte any integer
350 |     # greater than 8 bytes)
351 |     def int_to_words(self, int_val, num_words, word_size):
352 |         max_int = 2 ** (word_size*num_words) - 1
353 |         max_word_size = 2 ** word_size - 1
354 |         words = []
355 |         for _ in range(num_words):
356 |             word = int_val & max_word_size
357 |             words.append(int(word))
358 |             int_val >>= word_size
359 |         words.reverse()
360 |         return words
361 | 
362 | 
363 |     # update the value of the given key with the new value given as argument
364 |     # (by allowing updates also to be done by the controller, the client is
365 |     # also able to update keys with values bigger than the previous one)
366 |     # in netcache paper this restriction is not resolved
367 |     def update(self, key, value):
368 |         # if key is not in cache then nothing to do
369 |         if key not in self.key_map:
370 |             return
371 | 
372 |         # update key-value pair by removing old pair and inserting new one
373 |         self.evict(key)
374 |         self.insert(key, value)
375 | 
376 | 
377 |     # evict given key from the cache by deleting its associated entries in
378 |     # action tables of the switch, by deallocating its memory space and by
379 |     # marking the cache entry as valid once the deletion is completed
380 |     def evict(self, key):
381 | 
382 |         if key not in self.key_map:
383 |             return
384 | 
385 |         # delete entry from the lookup_table
386 |         entry_handle = self.controller.get_handle_from_match(
387 |                 NETCACHE_LOOKUP_TABLE, [str(self.str_to_int(key)), ])
388 | 
389 |         if entry_handle is not None:
390 |             self.controller.table_delete(NETCACHE_LOOKUP_TABLE, entry_handle)
391 | 
392 |         # delete mapping of key from controller's dictionary
393 |         vt_idx, bitmap, key_idx = self.key_map[key]
394 |         del self.key_map[key]
395 | 
396 |         # deallocate space from memory pool
397 |         self.mem_pool[vt_idx] = self.mem_pool[vt_idx] ^ bitmap
398 |         self.used_mem_slots = self.used_mem_slots - bin(bitmap).count("1")
399 | 
400 |         # free the id used to index the validity/counter register and append
401 |         # it back to the id pool of the controller
402 |         self.ids_pool.append(key_idx)
403 | 
404 |         # mark cache entry as valid again (should be the last thing to do)
405 |         self.controller.register_write("cache_status", key_idx, 1)
406 | 
407 | 
408 | 
409 |     # used for testing purposes and static population of cache
410 |     def dummy_populate_vtables(self):
411 |         test_values_l = ["alpha", "beta", "gamma", "delta", "epsilon", "zeta",
412 |                          "hita", "theta", "yiota", "kappa", "lambda", "meta"]
413 |         test_keys_l = ["one", "two", "three", "four", "five", "six", "seven",
414 |                        "eight", "nine", "ten", "eleven", "twelve"]
415 |         cnt = 0
416 |         for i in range(11):
417 |             self.insert(test_keys_l[i], test_values_l[i], False)
418 | 
419 | 
420 | 
421 |     # handling reports from the switch corresponding to hot keys, updates to
422 |     # key-value pairs or deletions - this function receives a packet, extracts
423 |     # its netcache header and manipulates cache based on the operation field
424 |     # of the netcache header (callback function)
425 |     def recv_switch_updates(self, pkt):
426 |         print("Received message from switch")
427 | 
428 |         # extract netcache header information
429 |         if pkt.haslayer(UDP):
430 |             ncache_header = NetcacheHeader(pkt[UDP].payload)
431 |         elif pkt.haslayer(TCP):
432 |             ncache_header = NetcacheHeader(pkt[TCP].payload)
433 | 
434 |         key = self.int_to_packed(ncache_header.key, max_width=128)
435 |         value = self.int_to_packed(ncache_header.value, max_width=1024)
436 | 
437 |         op = ncache_header.op
438 | 
439 |         if op == NETCACHE_HOT_READ_QUERY:
440 |             print("Received hot report for key = " + key)
441 |             # if the netcache header has null value or if the "hot key"
442 |             # reported doesn't exist then do not update cache
443 |             if ncache_header.op == NETCACHE_KEY_NOT_FOUND:
444 |                 return
445 | 
446 |             self.insert(key, value)
447 | 
448 |         elif op == NETCACHE_DELETE_COMPLETE:
449 |             print("Received query to delete key = " + key)
450 |             self.evict(key)
451 | 
452 |         elif op == NETCACHE_UPDATE_COMPLETE:
453 |             print("Received query to update key = " + key)
454 |             self.update(key, value)
455 | 
456 |         else:
457 |             print("Error: unrecognized operation field of netcache header")
458 | 
459 | 
460 |     # sniff infinitely the interface connected to the P4 switch and when a valid netcache
461 |     # packet is captured, handle the packet via a callback to recv_switch_updates function
462 |     def hot_reports_loop(self):
463 |         cpu_port_intf = str(self.topo.get_cpu_port_intf(self.sw_name))
464 |         sniff(iface=cpu_port_intf, prn=self.recv_switch_updates, filter="port 50000")
465 | 
466 | 
467 |     def main(self):
468 |         self.set_forwarding_table()
469 |         self.set_value_tables()
470 |         self.dummy_populate_vtables()
471 |         self.hot_reports_loop()
472 | 
473 | 
474 | if __name__ == "__main__":
475 |     controller = NCacheController('s1').main()
476 | 


--------------------------------------------------------------------------------
/src/control_plane/crc.py:
--------------------------------------------------------------------------------
  1 | import struct
  2 | 
  3 | class Crc(object):
  4 |     """
  5 |     A base class for CRC routines.
  6 |     """
  7 |     # pylint: disable=too-many-instance-attributes
  8 | 
  9 |     def __init__(self, width, poly, reflect_in, xor_in, reflect_out, xor_out, table_idx_width=None, slice_by=1):
 10 |         """The Crc constructor.
 11 | 
 12 |         The parameters are as follows:
 13 |             width
 14 |             poly
 15 |             reflect_in
 16 |             xor_in
 17 |             reflect_out
 18 |             xor_out
 19 |         """
 20 |         # pylint: disable=too-many-arguments
 21 | 
 22 |         self.width = width
 23 |         self.poly = poly
 24 |         self.reflect_in = reflect_in
 25 |         self.xor_in = xor_in
 26 |         self.reflect_out = reflect_out
 27 |         self.xor_out = xor_out
 28 |         self.tbl_idx_width = table_idx_width
 29 |         self.slice_by = slice_by
 30 | 
 31 |         self.msb_mask = 0x1 << (self.width - 1)
 32 |         self.mask = ((self.msb_mask - 1) << 1) | 1
 33 |         if self.tbl_idx_width != None:
 34 |             self.tbl_width = 1 << self.tbl_idx_width
 35 |         else:
 36 |             self.tbl_idx_width = 8
 37 |             self.tbl_width = 1 << self.tbl_idx_width
 38 | 
 39 |         self.direct_init = self.xor_in
 40 |         self.nondirect_init = self.__get_nondirect_init(self.xor_in)
 41 |         if self.width < 8:
 42 |             self.crc_shift = 8 - self.width
 43 |         else:
 44 |             self.crc_shift = 0
 45 | 
 46 | 
 47 |     def __get_nondirect_init(self, init):
 48 |         """
 49 |         return the non-direct init if the direct algorithm has been selected.
 50 |         """
 51 |         crc = init
 52 |         for dummy_i in range(self.width):
 53 |             bit = crc & 0x01
 54 |             if bit:
 55 |                 crc ^= self.poly
 56 |             crc >>= 1
 57 |             if bit:
 58 |                 crc |= self.msb_mask
 59 |         return crc & self.mask
 60 | 
 61 | 
 62 |     def reflect(self, data, width):
 63 |         """
 64 |         reflect a data word, i.e. reverts the bit order.
 65 |         """
 66 |         # pylint: disable=no-self-use
 67 | 
 68 |         res = data & 0x01
 69 |         for dummy_i in range(width - 1):
 70 |             data >>= 1
 71 |             res = (res << 1) | (data & 0x01)
 72 |         return res
 73 | 
 74 | 
 75 |     def bit_by_bit(self, in_data):
 76 |         """
 77 |         Classic simple and slow CRC implementation.  This function iterates bit
 78 |         by bit over the augmented input message and returns the calculated CRC
 79 |         value at the end.
 80 |         """
 81 | 
 82 |         reg = self.nondirect_init
 83 |         for octet in in_data:
 84 |             octet = struct.unpack("B", octet)[0]
 85 |             if self.reflect_in:
 86 |                 octet = self.reflect(octet, 8)
 87 |             for i in range(8):
 88 |                 topbit = reg & self.msb_mask
 89 |                 reg = ((reg << 1) & self.mask) | ((octet >> (7 - i)) & 0x01)
 90 |                 if topbit:
 91 |                     reg ^= self.poly
 92 | 
 93 |         for i in range(self.width):
 94 |             topbit = reg & self.msb_mask
 95 |             reg = ((reg << 1) & self.mask)
 96 |             if topbit:
 97 |                 reg ^= self.poly
 98 | 
 99 |         if self.reflect_out:
100 |             reg = self.reflect(reg, self.width)
101 |         return (reg ^ self.xor_out) & self.mask
102 | 
103 | 
104 |     def bit_by_bit_fast(self, in_data):
105 |         """
106 |         This is a slightly modified version of the bit-by-bit algorithm: it
107 |         does not need to loop over the augmented bits, i.e. the Width 0-bits
108 |         wich are appended to the input message in the bit-by-bit algorithm.
109 |         """
110 | 
111 |         reg = self.direct_init
112 |         for octet in in_data:
113 |             octet = struct.unpack("B", octet)[0]
114 |             if self.reflect_in:
115 |                 octet = self.reflect(octet, 8)
116 |             for i in range(8):
117 |                 topbit = reg & self.msb_mask
118 |                 if octet & (0x80 >> i):
119 |                     topbit ^= self.msb_mask
120 |                 reg <<= 1
121 |                 if topbit:
122 |                     reg ^= self.poly
123 |             reg &= self.mask
124 |         if self.reflect_out:
125 |             reg = self.reflect(reg, self.width)
126 |         return reg ^ self.xor_out


--------------------------------------------------------------------------------
/src/kv_store/client_api.py:
--------------------------------------------------------------------------------
  1 | import socket
  2 | import time
  3 | import sys
  4 | 
  5 | NETCACHE_PORT = 50000
  6 | NOCACHE_PORT = 50001
  7 | 
  8 | MAX_SUPPORTED_SERVERS = 254
  9 | 
 10 | NETCACHE_READ_QUERY = 0
 11 | NETCACHE_WRITE_QUERY = 1
 12 | NETCACHE_DELETE_QUERY = 2
 13 | 
 14 | NETCACHE_KEY_NOT_FOUND = 20
 15 | NETCACHE_METRICS_REPORT = 30
 16 | 
 17 | 
 18 | def convert(val):
 19 | 	return int.from_bytes(bytes(val, "utf-8"), "big")
 20 | 
 21 | def build_message(op, key, seq=0, value = ""):
 22 | 
 23 |     msg = bytearray()
 24 |     msg += op.to_bytes(1, 'big')
 25 |     msg += seq.to_bytes(4, 'big')
 26 | 
 27 |     if len(key) <= 8:
 28 |         msg += convert(key).to_bytes(16, 'big')
 29 |     else:
 30 |         print("Error: Key should be up to 8 bytes")
 31 |         return None
 32 | 
 33 |     if len(value) <= 64:
 34 |         msg += convert(value).to_bytes(64, 'big')
 35 |     else:
 36 |         print("Error: Value should be up to 64 bytes")
 37 |         return None
 38 | 
 39 |     return msg
 40 | 
 41 | 
 42 | class NetCacheClient:
 43 | 
 44 |     def __init__(self, n_servers=1, no_cache=False):
 45 |         self.n_servers = n_servers
 46 |         self.servers = []
 47 | 
 48 |         if no_cache:
 49 |             self.port = NOCACHE_PORT
 50 |         else:
 51 |             self.port = NETCACHE_PORT
 52 | 
 53 |         self.udps = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
 54 |         self.get_servers_ips()
 55 | 
 56 |         # store all latencies of the requests sent (used for evaluation)
 57 |         self.latencies = []
 58 | 
 59 | 
 60 |     # the IP addresses assigned to servers are based on the assignment
 61 |     # strategy defined at the p4app.json file; the basic l2 strategy
 62 |     # that we are using assigns IP addresses starting from 10.0.0.1
 63 |     # and assigns incrementing addresses to defined hosts
 64 |     def get_servers_ips(self):
 65 |         if self.n_servers > MAX_SUPPORTED_SERVERS:
 66 |             print("Error: Exceeded maximum supported servers")
 67 |             sys.exit()
 68 | 
 69 |         for i in range(self.n_servers):
 70 |             self.servers.append("10.0.0." + str(i+1));
 71 | 
 72 |     # return the right node who contains the given key - our implementation
 73 |     # is based on client side partitioning i.e the client directly sends
 74 |     # the message to the correct node
 75 |     # TODO:1(dimlek): implement consistent hashing partitioning
 76 |     # TODO:2(dimlek): explore option of proxy assisted partitioning
 77 |     def get_node(self, key, partition_scheme='range'):
 78 | 
 79 |         if partition_scheme == 'range':
 80 |             # find the right node through range partitioning based on 1st key character
 81 |             first_letter = ord(key[0])
 82 |             return self.servers[first_letter % self.n_servers]
 83 | 
 84 |         elif partition_scheme == 'hash':
 85 |             return self.servers[hash(key) % self.n_servers]
 86 | 
 87 |         elif partition_scheme == 'consistent-hash':
 88 |             # TODO(dimlek): impelement consistent hashing partitioning
 89 |             pass
 90 | 
 91 |         else:
 92 |             print("Error: Invalid partitioning scheme")
 93 |             sys.exit()
 94 | 
 95 |         return -1
 96 | 
 97 | 
 98 |     def read(self, key, seq=0, suppress=False):
 99 |         msg = build_message(NETCACHE_READ_QUERY, key, seq)
100 |         if msg is None:
101 |             return
102 | 
103 |         start_time = time.time()
104 | 
105 |         self.udps.connect((self.get_node(key), self.port))
106 |         self.udps.send(msg)
107 | 
108 |         data = self.udps.recv(1024)
109 |         op = data[0]
110 | 
111 |         latency = time.time() - start_time
112 |         self.latencies.append(latency)
113 | 
114 |         if suppress:
115 |             return
116 | 
117 |         if op == NETCACHE_KEY_NOT_FOUND:
118 |             print('Error: Key not found (key = ' + key + ')')
119 |         else:
120 |             val = data[21:].decode("utf-8")
121 |             print(val)
122 | 
123 | 
124 |     def put(self, key, value, seq = 0, proto='udp'):
125 |         msg = build_message(NETCACHE_WRITE_QUERY, key, seq, value)
126 |         if msg is None:
127 |             return
128 | 
129 |         if proto == 'udp':
130 |             start_time = time.time()
131 |             self.udps.connect((self.get_node(key), self.port))
132 |             self.udps.send(msg)
133 | 
134 |             status = self.udps.recv(1024)
135 |             latency = time.time() - start_time
136 | 
137 |             if status[0] == NETCACHE_KEY_NOT_FOUND:
138 |                 print('Error: Key not found (key = ' + key + ')')
139 | 
140 |             self.latencies.append(latency)
141 | 
142 |         elif proto == 'tcp':
143 |             tcps = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
144 |             tcps.connect((self.get_node(key), self.port))
145 | 
146 |             start_time = time.time()
147 | 
148 |             tcps.send(msg)
149 |             status = tcps.recv(1024)
150 | 
151 |             latency = time.time() - start_time
152 |             self.latencies.append(latency)
153 | 
154 |             if status[0] == NETCACHE_KEY_NOT_FOUND:
155 |                 print('Error: Key not found (key = ' + key + ')')
156 | 
157 |             tcps.close()
158 | 
159 |         else:
160 |             print('Protocol for write (' + proto + ') unsupported')
161 | 
162 | 
163 |     def delete(self, key, seq = 0):
164 |         msg = build_message(NETCACHE_DELETE_QUERY, key, seq)
165 |         if msg is None:
166 |             return
167 | 
168 |         tcps = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
169 |         tcps.connect((self.get_node(key), self.port))
170 | 
171 |         start_time = time.time()
172 | 
173 |         tcps.send(msg)
174 |         status = tcps.recv(1024)
175 | 
176 |         latency = time.time() - start_time
177 |         self.latencies.append(latency)
178 | 
179 |         if status[0] == NETCACHE_KEY_NOT_FOUND:
180 |             print('Error: Key not found (key = ' + key + ')')
181 | 
182 |         tcps.close()
183 | 
184 | 
185 |     def request_metrics_report(self, output=sys.stdout):
186 |         results = []
187 | 
188 |         for server in self.servers:
189 |             msg = build_message(NETCACHE_METRICS_REPORT, "")
190 | 
191 |             self.udps.connect((server, self.port))
192 |             self.udps.send(msg)
193 | 
194 |             reply = self.udps.recv(1024)
195 |             output.write(reply.decode("utf-8"))
196 | 
197 |         cnt = 0
198 |         for latency in self.latencies:
199 |             cnt += latency
200 | 
201 |         # calculate average latency in milliseconds
202 |         avg_latency = (cnt / len(self.latencies)) * 1000
203 | 
204 |         output.write('avg_latency = ' + '{:.3f}'.format(avg_latency))
205 | 


--------------------------------------------------------------------------------
/src/kv_store/exec_queries.py:
--------------------------------------------------------------------------------
 1 | from client_api import NetCacheClient
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def main(n_servers, disable_cache, suppress, input_files):
 7 |     client = NetCacheClient(n_servers=n_servers, no_cache=disable_cache)
 8 | 
 9 |     for filepath in input_files:
10 |         sample = []
11 | 
12 |         with open(filepath) as fp:
13 |             line = fp.readline()
14 |             while line:
15 |                 sample.append(line.strip())
16 |                 line = fp.readline()
17 | 
18 |         for query in sample:
19 |             client.read(query, suppress=suppress)
20 | 
21 |         #print("\n########## SERVER METRICS REPORT ##########")
22 |         #print("########## [{}] ##########\n".format(filepath))
23 | 
24 |         if disable_cache:
25 |             x = 'nocache'
26 |         else:
27 |             x = 'netcache'
28 | 
29 |         input_file = filepath.split('/')[1].split('.')[0]
30 | 
31 |         out_file = 'results/{}_{}_{}.txt'.format(input_file, n_servers, x)
32 |         out_fd = open(out_file, 'w')
33 | 
34 |         client.request_metrics_report(output=out_fd)
35 | 
36 | 
37 | if __name__=="__main__":
38 | 
39 |     import argparse
40 |     parser = argparse.ArgumentParser()
41 | 
42 |     parser.add_argument('--n-servers', help='number of servers', type=int, required=False, default=1)
43 |     parser.add_argument('--disable-cache', help='disable in-network caching', action='store_true')
44 |     parser.add_argument('--suppress', help='suppress output', action='store_true')
45 |     parser.add_argument('--input', help='input files to execute queries', required=True, nargs="+")
46 |     args = parser.parse_args()
47 | 
48 |     main(args.n_servers, args.disable_cache, args.suppress, args.input)
49 | 


--------------------------------------------------------------------------------
/src/kv_store/gen_plots.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | import sys
 4 | 
 5 | def main(input_file):
 6 | 
 7 |     """
 8 |     if len(sys.argv) != 4:
 9 |         print("Usage: python3 plotqueries.py <number_of_servers> <show/save> <yaxis_limit>")
10 |         return
11 |     """
12 |     """
13 |     for i in range(1,NO_SERVERS+1):
14 |         with open("../p4/server" + str(i) + ".log") as fp:
15 |             last = fp.readlines()[-1]
16 |             answered_queries = last.split("     ")[-1]
17 |             no_queries.append(int(answered_queries.strip()))
18 |     """
19 | 
20 |     # the code below assumes the format of the output printed by the
21 |     # script exec_queries.py
22 | 
23 |     yvalues = []
24 |     xlabels = []
25 | 
26 |     with open(input_file, 'r') as fp:
27 |         lines = fp.readlines()
28 | 
29 |         for i in range(0, len(lines) - 1, 2):
30 |             server_name = lines[i].split(']')[0][1:]
31 |             n_requests = lines[i].split('=')[1].strip()
32 |             throughput = lines[i+1].split('=')[1].strip()
33 | 
34 |             yvalues.append(int(n_requests))
35 |             xlabels.append(server_name)
36 | 
37 | 
38 |     xvalues = range(1, len(xlabels) + 1)
39 | 
40 |     plt.bar(xvalues, yvalues)
41 |     plt.xticks(xvalues, xlabels)
42 | 
43 |     axes = plt.gca()
44 |     axes.set_ylim([0, 1.3 * max(yvalues)])
45 | 
46 |     plt.savefig("plot.png")
47 | 
48 | 
49 | if __name__=="__main__":
50 | 
51 |     import argparse
52 |     parser = argparse.ArgumentParser()
53 | 
54 |     parser.add_argument('--input', help='input file of results to generate plot', required=True)
55 |     args = parser.parse_args()
56 | 
57 |     main(args.input)
58 | 


--------------------------------------------------------------------------------
/src/kv_store/gen_zipf_samples.py:
--------------------------------------------------------------------------------
 1 | from random import shuffle
 2 | 
 3 | import numpy as np
 4 | import argparse
 5 | 
 6 | DATA_DIR='data/'
 7 | 
 8 | def main(n_servers, n_queries, skew):
 9 | 
10 |     alpha = 1.0 / (1.0 - skew)
11 | 
12 |     keys = []
13 |     sample = []
14 | 
15 |     # adds all generated keys to the set of keys to sample from
16 |     for i in range(1, 1+int(n_servers)):
17 |         with open(DATA_DIR + 'server' + str(i) + '.txt') as f:
18 |             content = f.readlines()
19 |         content = [x.strip().split('=')[0] for x in content]
20 |         keys.extend(content)
21 | 
22 |     # shuffle keys
23 |     shuffle(keys)
24 | 
25 |     # draw random query items
26 |     while len(sample) < int(n_queries):
27 | 
28 |         # zipf distribution will return any natural number (>= 1)
29 |         # the probability decreases for larger values
30 |         # when the index is larger than the number of keys
31 |         # we sample, we simply try again
32 |         query_index = np.random.zipf(alpha, 1)[0]
33 |         if query_index <= len(keys):
34 |             sample.append(keys[query_index-1])
35 | 
36 | 
37 |     sample_file = '{}zipf_sample_{}_{}.txt'.format(DATA_DIR, n_queries,
38 |             str(skew).replace('.',''))
39 | 
40 |     with open(sample_file, 'w') as f:
41 |     	for query_item in sample:
42 |             f.write("%s\n" % query_item)
43 | 
44 | 
45 | 
46 | def check_valid_skew(value):
47 |     ivalue = float(value)
48 |     if ivalue >= 1 or ivalue <= 0:
49 |         raise argparse.ArgumentTypeError("value should be (0 < skew < 1)")
50 |     return ivalue
51 | 
52 | if __name__=="__main__":
53 | 
54 |     parser = argparse.ArgumentParser()
55 | 
56 |     parser.add_argument('--n-servers', help='number of servers', type=int, required=True)
57 |     parser.add_argument('--n-queries', help='number of queries to generate', type=int, required=True)
58 |     parser.add_argument('--skew', help='skewness of the workload (0 < skew < 1)', type=check_valid_skew,
59 |             required=False, default=0.9)
60 |     args = parser.parse_args()
61 | 
62 |     main(args.n_servers, args.n_queries, args.skew)
63 | 


--------------------------------------------------------------------------------
/src/kv_store/metrics.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | 
 4 | class Metrics:
 5 | 
 6 |     def __init__(self, total_messages_sent=0, total_messages_received=0, start_time=0.0, end_time=0.0):
 7 |         self.total_messages_sent = total_messages_sent
 8 |         self.total_messages_received = total_messages_received
 9 |         self.start_time = start_time
10 |         self.end_time = end_time
11 |         # dictionary with key = <request-id> and value = list [<send-time>, <deliver-time]
12 |         self.latency_list = {}
13 | 
14 | 
15 |     # calculate throughput by dividing the total messages sent by the time elapsed
16 |     # between the first message and the delivery of the last message
17 |     def calculate_throughput(self):
18 |         total_elapsed_time = (self.end_time - self.start_time)
19 |         if total_elapsed_time != 0:
20 |             throughput = self.total_messages_sent / total_elapsed_time
21 |         else:
22 |             throughput = 0
23 |         return throughput
24 | 
25 |     # calculate system's average latency in milliseconds
26 |     def calculate_avg_latency(self):
27 |         total_latency = 0.0
28 |         for tup in self.latency_list.values():
29 |             latency = tup[1] - tup[0]
30 |             total_latency = total_latency + latency
31 |         total_latency = total_latency
32 |         if len(self.latency_list) != 0:
33 |             avg_latency = total_latency / len(self.latency_list)
34 |         else:
35 |             avg_latency = 0
36 |         # return average latency in milliseconds
37 |         return avg_latency * (10 ** 3)
38 | 
39 | 
40 |     def print_info(self, output_fd=sys.stdout):
41 |         """
42 |         Print the following metrics to evaluate system's performance:
43 |         System throughput - how many queries are served over time
44 |         System latency - average message delivery time
45 |         Messages cost - total queries sent/received
46 | 
47 |         :param output_fd: file descriptor to output performance data
48 |         """
49 |         throughput = self.calculate_throughput()
50 |         avg_latency = self.calculate_avg_latency()
51 |         total_messages = self.total_messages_sent + self.total_messages_received
52 |         output_fd.write('\n\n-----Performance analytics -----\n')
53 |         output_fd.write('System throughput = %.2f messages/sec\n' % throughput)
54 |         output_fd.write('System latency = %.3f ms\n' % avg_latency)
55 |         output_fd.write('Messages received = %d\n' % self.total_messages_received)
56 |         output_fd.write('Total messages = %d\n' % total_messages)
57 | 
58 | 


--------------------------------------------------------------------------------
/src/kv_store/produce_keyvals.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script servers the purpose of generating data files containing
 4 | # key value pairs which will subsequently be used to populate the key-value
 5 | # store of each server. The key-value pairs generated follow a specific
 6 | # format in order to make crystal clear which key and which value is
 7 | # assigned to which server. This will significantly help the evaluation
 8 | # of our netcache implementation.
 9 | 
10 | data_dir=data
11 | 
12 | n_values=100
13 | n_servers=1
14 | 
15 | usage="${0} [-n <n_values>] [-s <n_servers>]"
16 | while getopts 'n:s:' opt
17 | do
18 | 
19 | 	case $opt in
20 | 		n ) n_values=$OPTARG ;;
21 | 		s ) n_servers=$OPTARG ;;
22 | 	   \? ) echo "Error: Invalid Option: ${usage}"
23 | 		   exit 1;
24 | 	esac
25 | 
26 | done
27 | 
28 | 
29 | # helper function to convert a character to its
30 | # ascii representation
31 | ord() {
32 | 	LC_CTYPE=C printf '%d' "'$1"
33 | }
34 | 
35 | 
36 | mkdir -p $data_dir
37 | 
38 | for i in $(seq $n_servers); do
39 | 
40 | 	# key must start with this char to be assigned to this
41 | 	# server based on the range-based partitioning scheme
42 | 	# that is used to partition data across storage nodes
43 | 	start_char=''
44 | 
45 | 	# since we are using range-based partitioning, we need
46 | 	# to populate the servers with appropriate values that
47 | 	# start with a proper symbol, to find such a symbol we
48 | 	# iterate over the alphabet until out first match
49 | 	for letter in {{a..z},{A..Z}}; do
50 | 
51 | 		node_no=$(($(ord ${letter}) % ${n_servers}))
52 | 
53 | 		if (( $node_no == $i-1 )); then
54 | 			start_char=$letter
55 | 			break
56 | 		fi
57 | 
58 | 	done
59 | 
60 | 
61 | 	file_name="${data_dir}/server${i}.txt"
62 | 
63 | 	for j in $(seq $n_values); do
64 | 		echo "${start_char}_${j}=s${i}_val${j}" >> $file_name
65 | 	done
66 | 
67 | done
68 | 


--------------------------------------------------------------------------------
/src/kv_store/results/zipf_sample_100000_05_8_netcache.txt:
--------------------------------------------------------------------------------
 1 | [server1] requests_received = 169
 2 | [server1] throughput = 0.5432959015041506
 3 | [server2] requests_received = 162
 4 | [server2] throughput = 0.5207828281782161
 5 | [server3] requests_received = 152
 6 | [server3] throughput = 0.4886578095723195
 7 | [server4] requests_received = 180
 8 | [server4] throughput = 0.5786813140763083
 9 | [server5] requests_received = 209
10 | [server5] throughput = 0.6718622019513142
11 | [server6] requests_received = 136
12 | [server6] throughput = 0.43717805257734726
13 | [server7] requests_received = 114
14 | [server7] throughput = 0.36644233807425586
15 | [server8] requests_received = 125
16 | [server8] throughput = 0.4017975837533586
17 | avg_latency = 1.845


--------------------------------------------------------------------------------
/src/kv_store/results/zipf_sample_100000_05_8_nocache.txt:
--------------------------------------------------------------------------------
 1 | [server1] requests_received = 65120
 2 | [server1] throughput = 333.6462487092303
 3 | [server2] requests_received = 1629
 4 | [server2] throughput = 8.347862651222437
 5 | [server3] requests_received = 850
 6 | [server3] throughput = 4.355763252903676
 7 | [server4] requests_received = 9512
 8 | [server4] throughput = 48.741520440232804
 9 | [server5] requests_received = 17785
10 | [server5] throughput = 91.1298871796878
11 | [server6] requests_received = 3227
12 | [server6] throughput = 16.533067423513234
13 | [server7] requests_received = 1564
14 | [server7] throughput = 8.014601114845307
15 | [server8] requests_received = 313
16 | [server8] throughput = 1.6033189264731968
17 | avg_latency = 1.382


--------------------------------------------------------------------------------
/src/kv_store/results/zipf_sample_100000_095_8_netcache.txt:
--------------------------------------------------------------------------------
 1 | [server1] requests_received = 0
 2 | [server1] throughput = 0.0
 3 | [server2] requests_received = 19
 4 | [server2] throughput = 0.03867527573774815
 5 | [server3] requests_received = 7
 6 | [server3] throughput = 0.0142480104237291
 7 | [server4] requests_received = 9
 8 | [server4] throughput = 0.018319361767238973
 9 | [server5] requests_received = 6
10 | [server5] throughput = 0.012212405869570217
11 | [server6] requests_received = 0
12 | [server6] throughput = 0.0
13 | [server7] requests_received = 8
14 | [server7] throughput = 0.016283039162363497
15 | [server8] requests_received = 6
16 | [server8] throughput = 0.012213100430360814
17 | avg_latency = 1.618


--------------------------------------------------------------------------------
/src/kv_store/results/zipf_sample_100000_09_8_netcache.txt:
--------------------------------------------------------------------------------
 1 | [server1] requests_received = 0
 2 | [server1] throughput = 0.0
 3 | [server2] requests_received = 19
 4 | [server2] throughput = 0.05829253754921669
 5 | [server3] requests_received = 7
 6 | [server3] throughput = 0.021474433972044824
 7 | [server4] requests_received = 9
 8 | [server4] throughput = 0.02761111578939127
 9 | [server5] requests_received = 6
10 | [server5] throughput = 0.018406270895786364
11 | [server6] requests_received = 0
12 | [server6] throughput = 0.0
13 | [server7] requests_received = 4
14 | [server7] throughput = 0.012270660615879003
15 | [server8] requests_received = 6
16 | [server8] throughput = 0.01840785056736825
17 | avg_latency = 1.605


--------------------------------------------------------------------------------
/src/kv_store/server.py:
--------------------------------------------------------------------------------
  1 | from collections import deque
  2 | 
  3 | import socket
  4 | import logging
  5 | import threading
  6 | import time
  7 | import sys
  8 | import os
  9 | 
 10 | STATISTICS_REFRESH_INTERVAL = 30.0
 11 | 
 12 | NETCACHE_PORT = 50000
 13 | NOCACHE_PORT = 50001
 14 | 
 15 | NETCACHE_READ_QUERY = 0
 16 | NETCACHE_WRITE_QUERY = 1
 17 | NETCACHE_DELETE_QUERY = 2
 18 | NETCACHE_HOT_READ_QUERY = 3
 19 | NETCACHE_UPDATE_COMPLETE = 4
 20 | NETCACHE_DELETE_COMPLETE = 5
 21 | NETCACHED_UPDATE = 6
 22 | NETCACHE_UPDATE_COMPLETE_OK = 7
 23 | 
 24 | NETCACHE_REQUEST_SUCCESS = 10
 25 | NETCACHE_KEY_NOT_FOUND = 20
 26 | NETCACHE_METRICS_REPORT = 30
 27 | 
 28 | 
 29 | 
 30 | def convert(val):
 31 |     return int.from_bytes(bytes(val, "utf-8"), "big")
 32 | 
 33 | def build_message(op, key, seq=0, value = ""):
 34 | 
 35 |     msg = bytearray()
 36 |     msg += op.to_bytes(1, 'big')
 37 |     msg += seq.to_bytes(4, 'big')
 38 |     msg += key.to_bytes(16, 'big')
 39 | 
 40 |     msg += convert(value).to_bytes(64, 'big')
 41 | 
 42 |     return msg
 43 | 
 44 | 
 45 | class KVServer:
 46 | 
 47 |     def __init__(self, host, nocache=False, suppress=False, max_listen=10):
 48 |         # simple in-memory key value store, represented by a dictionary
 49 |         self.kv_store = {}
 50 | 
 51 |         # server ip address
 52 |         self.host = host
 53 |         # server name
 54 |         self.name = 'server' + self.host.split('.')[-1]
 55 | 
 56 |         # port server is listening to
 57 |         if nocache:
 58 |             self.port = NOCACHE_PORT
 59 |         else:
 60 |             self.port = NETCACHE_PORT
 61 | 
 62 |         # suppress printing messages
 63 |         self.suppress = suppress
 64 |         # udp server socket
 65 |         self.udpss = None
 66 |         #tcp server socket
 67 |         self.tcpss = None
 68 |         # max clients to listen to
 69 |         self.max_listen = max_listen
 70 |         # specifies whether the server is blocking for cache updates
 71 |         self.blocking = False
 72 |         # queue to store incoming requests while blocking
 73 |         self.incoming_requests = deque()
 74 | 
 75 |         # keep number of requests dispatched to use for evaluation
 76 |         self.requests_cnt = 0
 77 | 
 78 |         # unix socket for out of band communication with controller
 79 |         # (used for cache coherency purposes)
 80 |         self.unixss = None
 81 | 
 82 | 
 83 |     def activate(self):
 84 | 
 85 |         # enable logging for debuggin purposes
 86 |         logging.basicConfig(
 87 |                 filename='log/{}.log'.format(self.name),
 88 |                 format='%(asctime)s %(levelname)-8s %(message)s',
 89 |                 level=logging.DEBUG,
 90 |                 datefmt='%d-%m-%Y %H:%M:%S')
 91 | 
 92 |         # create udp socket server
 93 |         self.udpss = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
 94 |         self.udpss.bind((self.host, self.port))
 95 | 
 96 |         # create tcp socket server
 97 |         self.tcpss = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 98 |         self.tcpss.bind((self.host, self.port))
 99 |         self.tcpss.listen(1)
100 | 
101 |         # spawn new thread that serves incoming udp (read) queries
102 |         server_udp_t = threading.Thread(target=self.handle_client_udp_request)
103 |         server_udp_t.start()
104 | 
105 |         # spawn new thread that serves incoming tcp (put/delete) queries
106 |         server_tcp_t = threading.Thread(target=self.handle_client_tcp_request)
107 |         server_tcp_t.start()
108 | 
109 |         # self.periodic_request_report()
110 | 
111 |         # starting time of serving requests (used for throughput calculation)
112 |         self.start_time = time.time()
113 | 
114 | 
115 | 
116 |     def create_controller_channel(self):
117 |         try:
118 |             os.unlink(UNIX_CHANNEL)
119 |         except:
120 |             if os.path.exists(UNIX_CHANNEL):
121 |                 print('Error: unlinking unix socket')
122 |                 sys.exit(1)
123 | 
124 |         self.unixss = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
125 |         self.unixss.bind(UNIX_CHANNEL)
126 |         self.unixss.listen(1)
127 | 
128 |         # spawn new thread that servers requests from controller (out-of-band communication)
129 |         server_cont_t = threading.Thread(target=self.handle_controller_request)
130 |         server_cont_t.start()
131 | 
132 | 
133 |     # periodically print the number of requests received (used for testing purposes
134 |     # to evalute the quality of load balancing)
135 |     def periodic_request_report(self):
136 |         t = threading.Timer(STATISTICS_REFRESH_INTERVAL, self.periodic_request_report)
137 |         t.daemon = True
138 |         t.start()
139 | 
140 |         # TODO(dimlek): add whatever statistics here
141 |         if not self.suppress:
142 |             print('[{}] Number of requests received = {}'.format(self.name, self.requests_cnt))
143 | 
144 | 
145 |     # handles incoming udp queries
146 |     def handle_client_udp_request(self):
147 | 
148 |         while True:
149 | 
150 |             # if server is not currently blocking updates/writes then if there are
151 |             # requests waiting in the queue then serve those requests, elsewise
152 |             # serve the new incoming packet
153 |             if not self.blocking and len(self.incoming_requests) > 0:
154 |                 netcache_pkt, addr = self.incoming_requests.popleft()
155 |             else:
156 |                 netcache_pkt, addr = self.udpss.recvfrom(1024)
157 | 
158 |             # netcache_pkt is an array of bytes belonging to incoming packet's data
159 |             # the data portion of the packet represents the netcache header, so we
160 |             # can extract all the fields defined in the netcache custom protocol
161 | 
162 |             op = netcache_pkt[0]
163 |             seq = netcache_pkt[1:5]
164 |             key = netcache_pkt[5:21]
165 |             value = netcache_pkt[21:]
166 | 
167 |             #transform key to int
168 |             key_s = int.from_bytes(key,'big')
169 |             key = key.decode('utf-8').lstrip('\x00')
170 |             seq = int.from_bytes(seq,'big')
171 | 
172 |             #transform val to string
173 |             value = value.decode("utf-8")
174 | 
175 | 
176 |             # if server is blocking to wait for cache to finish updating, then check
177 |             # if the update is finished or otherwise put the received packet into
178 |             # queue to serialize writes/updates
179 | 
180 |             if self.blocking:
181 | 
182 |                 if op == NETCACHE_UPDATE_COMPLETE_OK:
183 |                     logging.info('Successfully completed UPDATE(' + key + ') from client '
184 |                             + addr[0] + ':' + str(addr[1]))
185 | 
186 |                     if not self.suppress:
187 |                         print('[{}] Successfully completed UPDATE({})'.format(self.name, key))
188 | 
189 |                     # start accepting writes/updates again
190 |                     self.blocking = False
191 |                     continue
192 | 
193 |                 elif op != NETCACHE_READ_QUERY:
194 |                     self.incoming_requests.append((netcache_pkt, addr))
195 |                     continue
196 | 
197 | 
198 | 
199 |             if op == NETCACHE_READ_QUERY:
200 |                 logging.info('Received READ(' + key + ') from client ' + addr[0])
201 | 
202 |                 if not self.suppress:
203 |                     print('[{}] Received READ({}) from client {}'.format(self.name, key, addr[0]))
204 | 
205 |                 if key in self.kv_store:
206 |                     val = self.kv_store[key]
207 |                     msg = build_message(NETCACHE_READ_QUERY, key_s, seq, val)
208 |                     self.udpss.sendto(msg, addr)
209 |                 else:
210 |                     msg = build_message(NETCACHE_KEY_NOT_FOUND, key_s, seq)
211 |                     self.udpss.sendto(msg, addr)
212 | 
213 |                 self.requests_cnt += 1
214 | 
215 | 
216 |             elif op == NETCACHE_HOT_READ_QUERY:
217 | 
218 |                 if not self.suppress:
219 |                     print('[{}] Received HOTREAD({}) from client {}'.format(self.name, key, addr[0]))
220 | 
221 |                 if key in self.kv_store:
222 |                     val = self.kv_store[key]
223 |                     msg = build_message(NETCACHE_HOT_READ_QUERY, key_s, seq, val)
224 |                     self.udpss.sendto(msg, addr)
225 |                 else:
226 |                     msg = build_message(NETCACHE_KEY_NOT_FOUND, key_s, seq)
227 |                     self.udpss.sendto(msg, addr)
228 | 
229 |                 self.requests_cnt += 1
230 | 
231 | 
232 |             elif op == NETCACHED_UPDATE:
233 |                 logging.info('Received UPDATE(' + key + ') from client '
234 |                         + addr[0] + ":" + str(addr[1]))
235 | 
236 |                 if not self.suppress:
237 |                     print('[{}] Received UPDATE({}) from client {}'.format(self.name, key, addr[0]))
238 | 
239 |                 # if key already exists in server then it's a valid update query
240 |                 if key in self.kv_store:
241 |                     # update the value of the requested key
242 |                     self.kv_store[key] = value
243 | 
244 |                     # reply to client immediately that the request is dispatched
245 |                     msg = build_message(NETCACHE_REQUEST_SUCCESS, key_s, seq)
246 |                     self.udpss.sendto(msg, addr)
247 | 
248 |                     # inform the switch with appropriate operation field of netcache
249 |                     # header to update its cache and to validate the key again
250 |                     msg = build_message(NETCACHE_UPDATE_COMPLETE, key_s, seq, value)
251 |                     self.udpss.sendto(msg, addr)
252 | 
253 |                     # server now should block until cache is updated before serving further writes/updates
254 |                     self.blocking = True
255 | 
256 | 
257 |                 else:
258 |                     logging.error('Key exists in cache but not in server (key = ' + key + ')')
259 |                     print('Error: Key exists in cache but not in server (key = ' + key + ')')
260 | 
261 |                 self.requests_cnt += 1
262 | 
263 | 
264 |             elif op == NETCACHE_WRITE_QUERY:
265 |                 logging.info('Received WRITE(' + key + ') from client '
266 |                         + addr[0] + ":" + str(addr[1]))
267 | 
268 |                 if not self.suppress:
269 |                     print('[{}] Received WRITE({}) from client {}'.format(self.name, key, addr[0]))
270 | 
271 |                 # write the value of the requested key
272 |                 self.kv_store[key] = value
273 | 
274 |                 # reply back to client that write was successful
275 |                 msg = build_message(NETCACHE_REQUEST_SUCCESS, key_s, seq, value)
276 |                 self.udpss.sendto(msg, addr)
277 | 
278 |                 self.requests_cnt += 1
279 | 
280 | 
281 |             elif op == NETCACHE_METRICS_REPORT:
282 | 
283 |                 if not self.suppress:
284 |                     print('[{}] Received METRICS_REPORT_REQUEST() from client {}'
285 |                             .format(self.name, key, addr[0]))
286 | 
287 |                 total_elapsed_time = time.time() - self.start_time
288 |                 if total_elapsed_time != 0:
289 |                     throughput = self.requests_cnt / total_elapsed_time
290 |                 else:
291 |                     throughput = 0
292 | 
293 |                 data = '\n'.join((
294 |                     "[{}] requests_received = {}".format(self.name, self.requests_cnt),
295 |                     "[{}] throughput = {}\n".format(self.name, throughput)))
296 | 
297 |                 self.udpss.sendto(bytes(data, "utf-8"), addr)
298 | 
299 | 
300 |             else:
301 |                 logging.info('Unsupported/Invalid query type received from client ' + addr[0])
302 |                 print('Unsupported query type (received op = ' + str(op) + ')')
303 | 
304 | 
305 |     # serves incoming tcp queries (i.e. put/delete)
306 |     def handle_client_tcp_request(self):
307 | 
308 |         while True:
309 | 
310 |             conn, addr = self.tcpss.accept()
311 | 
312 |             netcache_pkt = conn.recv(1024)
313 | 
314 |             op = netcache_pkt[0]
315 |             seq = netcache_pkt[1:5]
316 |             key = netcache_pkt[5:21]
317 |             value = netcache_pkt[21:]
318 | 
319 |             #transform key to int
320 |             key_s = int.from_bytes(key,'big')
321 |             seq = int.from_bytes(seq, 'big')
322 | 
323 |             #transform val to string
324 |             value = value.decode("utf-8")
325 |             #transform key to string
326 |             key = key.decode("utf-8").lstrip('\x00')
327 | 
328 | 
329 |             if op == NETCACHE_WRITE_QUERY or op == NETCACHED_UPDATE:
330 |                 logging.info('Received WRITE(' + key + ') from client '
331 |                         + addr[0] + ":" + str(addr[1]))
332 | 
333 |                 if not self.suppress:
334 |                     print('[{}] Received WRITE({}) from client {}'.format(self.name, key, addr[0]))
335 | 
336 |                 # update the value of the requested key
337 |                 self.kv_store[key] = value
338 | 
339 |                 # inform the switch with appropriate operation field of netcache header
340 |                 # to update its cache and to validate the key again
341 |                 msg = build_message(NETCACHE_UPDATE_COMPLETE, key_s, seq, value)
342 |                 conn.sendall(msg)
343 |                 conn.close()
344 | 
345 |                 self.requests_cnt += 1
346 | 
347 | 
348 | 
349 |             elif op == NETCACHE_DELETE_QUERY:
350 |                 logging.info('Received DELETE(' + key + ') from client '
351 |                         + addr[0] + ":" + str(addr[1]))
352 | 
353 |                 if not self.suppress:
354 |                     print('[{}] Received DELETE({}) from client {}'.format(self.name, key, addr[0]))
355 | 
356 |                 if key in self.kv_store:
357 |                     # delete the key from the key-value store
358 |                     del self.kv_store[key]
359 | 
360 |                     # inform the switch with appropriate operation field of netcache header
361 |                     # to evict this key from cache
362 |                     msg = build_message(NETCACHE_DELETE_COMPLETE, key_s, seq)
363 |                     conn.sendall(msg)
364 |                 else:
365 |                     msg = build_message(NETCACHE_KEY_NOT_FOUND, key_s, seq)
366 |                     conn.sendall(msg)
367 | 
368 |                 conn.close()
369 |                 self.requests_cnt += 1
370 | 
371 |             else:
372 |                 logging.info('Unsupported query type received from client '
373 |                         + addr[0] + ":" + str(addr[1]))
374 | 
375 | 
376 | 
377 |     # populate the running server with key-value pairs from a data file
378 |     def populate_from_file(self, file_name):
379 |         if os.path.exists(file_name):
380 |             with open(file_name, 'r') as fp:
381 |                 try:
382 |                     # parse each line and insert the key-value pair into
383 |                     # the key-value store
384 |                     for line in fp:
385 |                         key = line.rstrip('\n').split('=')[0]
386 |                         val = line.rstrip('\n').split('=')[1]
387 |                         self.kv_store[key] = val
388 |                 except:
389 |                     # if a parsing error occurs then we stop parsing the file,
390 |                     # though pairs added up to the error are not reverted
391 |                     print("Error: while parsing " + str(file_name))
392 |         else:
393 |             print("Error: file " + str(file_name) + " doesn't exist.")
394 | 
395 | 
396 | 
397 | 
398 | def main(disable_cache, suppress_output, input_files):
399 | 
400 |     from subprocess import check_output
401 | 
402 |     # dynamically get the IP address of the server
403 |     server_ip = check_output(['hostname', '--all-ip-addresses']).decode('utf-8').rstrip()
404 |     server = KVServer(server_ip, nocache=disable_cache, suppress=suppress_output)
405 | 
406 |     # populate the server with all the files given as command line
407 |     # arguments (Usage: python3 server.py [file1 file2 ...])
408 |     for data_file in input_files:
409 |         server.populate_from_file(data_file)
410 | 
411 |     server.activate()
412 | 
413 | 
414 | if __name__ == "__main__":
415 | 
416 |     import argparse
417 |     parser = argparse.ArgumentParser()
418 | 
419 |     parser.add_argument('--disable-cache', help='do not use netcache', action='store_true')
420 |     parser.add_argument('--suppress-output', help='supress output printing messages', action='store_true')
421 |     parser.add_argument('--input', help='input files to prepopulate server', required=False, nargs="*")
422 |     args = parser.parse_args()
423 | 
424 |     main(args.disable_cache, args.suppress_output, args.input)
425 | 


--------------------------------------------------------------------------------
/src/kv_store/test.py:
--------------------------------------------------------------------------------
 1 | from client_api import NetCacheClient
 2 | 
 3 | 
 4 | def main(n_servers, no_cache):
 5 |     client = NetCacheClient(n_servers=n_servers, no_cache=no_cache)
 6 | 
 7 |     # read should be forwared to KV-Store and return error (not inserted)
 8 |     client.read("test")
 9 | 
10 |     # put query should be forwarded to KV-Store
11 |     client.put("ctest", "test_okay")
12 | 
13 |     # read should be forwared to KV-Store
14 |     client.read("ctest")
15 |     client.read("ctest")
16 | 
17 |     # delete query should be forwarded to KV-Store
18 |     client.delete("ctest")
19 | 
20 |     # read should fail for hot key report threshold set to 3 (testing purposes)
21 |     client.read("ctest")
22 | 
23 |     client.put("ctest_2", "tOmZmAvVujaXBP8nFm2TX10w")
24 |     client.put("ctest_2", "abcdeaaaujaXBP8nFm2TX10w")
25 |     client.put("ctest_2", "abcdefghijklmnopkalutera")
26 | 
27 |     # those queries should be replied by the server
28 |     client.read("ctest_2")
29 |     client.read("ctest_2")
30 |     client.read("ctest_2")
31 |     client.read("ctest_2")
32 | 
33 |     # queries should be replied from the cache (threshold > 3)
34 |     client.read("ctest_2")
35 |     client.read("ctest_2")
36 | 
37 |     client.put("ctest_2", "another")
38 |     client.read("ctest_2")
39 |     client.read("ctest_2")
40 |     client.read("ctest_2")
41 | 
42 |     client.put("ctest_2", "123456789alelajdsflkjads")
43 | 
44 |     client.read("ctest_2")
45 |     client.read("ctest_2")
46 | 
47 |     #client.request_metrics_report()
48 | 
49 |     """
50 |     # delete query forwarded to KV-store
51 |     client.delete("ctest_2")
52 | 
53 |     # key should be invalidated in the cache and hence it will be replied by the server
54 |     client.read("ctest_2")
55 | 
56 |     # test prepopulated value
57 |     client.read("c_s4_key44")
58 |     """
59 | 
60 | 
61 | if __name__=="__main__":
62 | 
63 |     import argparse
64 |     parser = argparse.ArgumentParser()
65 | 
66 |     parser.add_argument('--n-servers', help='number of servers', type=int, required=False, default=1)
67 |     parser.add_argument('--disable-cache', help='do not use netcache', action='store_true')
68 |     args = parser.parse_args()
69 | 
70 |     main(args.n_servers, args.disable_cache)
71 | 


--------------------------------------------------------------------------------
/src/p4/core/egress.p4:
--------------------------------------------------------------------------------
 1 | #include <core.p4>
 2 | #include <v1model.p4>
 3 | 
 4 | #include "../include/headers.p4"
 5 | 
 6 | #define CONTROLLER_MIRROR_SESSION 100
 7 | #define HOT_KEY_THRESHOLD 3
 8 | 
 9 | #define PKT_INSTANCE_TYPE_NORMAL 0
10 | #define PKT_INSTANCE_TYPE_INGRESS_CLONE 1
11 | #define PKT_INSTANCE_TYPE_EGRESS_CLONE 2
12 | #define PKT_INSTANCE_TYPE_COALESCED 3
13 | #define PKT_INSTANCE_TYPE_INGRESS_RECIRC 4
14 | #define PKT_INSTANCE_TYPE_REPLICATION 5
15 | #define PKT_INSTANCE_TYPE_RESUBMIT 6
16 | 
17 | #define pkt_is_mirrored \
18 | 	((standard_metadata.instance_type != PKT_INSTANCE_TYPE_NORMAL) && \
19 | 	 (standard_metadata.instance_type != PKT_INSTANCE_TYPE_REPLICATION))
20 | 
21 | #define pkt_is_not_mirrored \
22 | 	 ((standard_metadata.instance_type == PKT_INSTANCE_TYPE_NORMAL) || \
23 | 	  (standard_metadata.instance_type == PKT_INSTANCE_TYPE_REPLICATION))
24 | 
25 | 
26 | control MyEgress(inout headers hdr,
27 |                  inout metadata meta,
28 |                  inout standard_metadata_t standard_metadata) {
29 | 
30 | 	#include "query_statistics.p4"
31 | 
32 | 	// per-key counter to keep query frequency of each cached item
33 | 	counter((bit<32>) NETCACHE_ENTRIES * NETCACHE_VTABLE_NUM, CounterType.packets) query_freq_cnt;
34 | 
35 | 
36 |     apply {
37 | 
38 | 		if (hdr.netcache.isValid()) {
39 | 
40 | 			// if the bitmap is not full of zeros then we had cache hit
41 | 			bool cache_hit = (meta.vt_bitmap != 0);
42 | 
43 | 			if (hdr.netcache.op == READ_QUERY) {
44 | 
45 | 
46 | 				if (!cache_hit) {
47 | 
48 | 					// waiting for the answer of the KV store allows us to
49 | 					// retrieve the actual key-value pair from the reply
50 | 					if (pkt_is_not_mirrored && hdr.udp.srcPort != NETCACHE_PORT) {
51 | 
52 | 						update_count_min_sketch();
53 | 						if (meta.key_cnt >= HOT_KEY_THRESHOLD) {
54 | 
55 | 							inspect_bloom_filter();
56 | 							if (meta.hot_query == 1) {
57 | 								update_bloom_filter();
58 | 
59 | 								// inform the server that he will receive a read query
60 | 								// for a hot key (this is needed, so he will block until
61 | 								// the insertion in cache is completed) - cache coherence
62 | 								hdr.netcache.op = HOT_READ_QUERY;
63 | 
64 | 								//clone(CloneType.E2E, CONTROLLER_MIRROR_SESSION);
65 | 							}
66 | 						}
67 | 					}
68 | 
69 | 				} else {
70 | 					// update query frequency counter for cached item
71 | 					query_freq_cnt.count((bit<32>) meta.key_idx);
72 | 				}
73 | 
74 | 
75 | 
76 | 			// if the server informs us that the delete operation on the key completed
77 | 			// successfully then we forward this packet to the controller to update the
78 | 			// cache and validate the key again
79 | 			} else if (hdr.netcache.op == DELETE_COMPLETE && cache_hit) {
80 | 
81 | 				if (pkt_is_not_mirrored && hdr.tcp.srcPort == NETCACHE_PORT) {
82 | 					clone(CloneType.E2E, CONTROLLER_MIRROR_SESSION);
83 | 				}
84 | 
85 | 			}
86 | 
87 | 		}
88 | 
89 | 	}
90 | 
91 | }
92 | 


--------------------------------------------------------------------------------
/src/p4/core/ingress.p4:
--------------------------------------------------------------------------------
  1 | #include <core.p4>
  2 | #include <v1model.p4>
  3 | 
  4 | #include "../include/headers.p4"
  5 | 
  6 | #define CONTROLLER_MIRROR_SESSION 100
  7 | 
  8 | #define pkt_instance_type_normal 0
  9 | #define pkt_instance_type_ingress_clone 1
 10 | #define pkt_instance_type_egress_clone 2
 11 | #define pkt_instance_type_coalesced 3
 12 | #define pkt_instance_type_ingress_recirc 4
 13 | #define pkt_instance_type_replication 5
 14 | #define pkt_instance_type_resubmit 6
 15 | 
 16 | #define pkt_is_mirrored \
 17 | 	((standard_metadata.instance_type != pkt_instance_type_normal) && \
 18 | 	 (standard_metadata.instance_type != pkt_instance_type_replication))
 19 | 
 20 | #define pkt_is_not_mirrored \
 21 | 	 ((standard_metadata.instance_type == pkt_instance_type_normal) || \
 22 | 	  (standard_metadata.instance_type == pkt_instance_type_replication))
 23 | 
 24 | 
 25 | control MyIngress(inout headers hdr,
 26 |                   inout metadata meta,
 27 |                   inout standard_metadata_t standard_metadata) {
 28 | 
 29 | 
 30 | 	action drop() {
 31 | 		mark_to_drop(standard_metadata);
 32 | 	}
 33 | 
 34 | 	action set_egress_port(egressSpec_t port) {
 35 | 		standard_metadata.egress_spec = port;
 36 | 	}
 37 | 
 38 | 
 39 | 	/* Simple l2 forwarding logic */
 40 | 	table l2_forward {
 41 | 
 42 | 		key = {
 43 | 			hdr.ethernet.dstAddr: exact;
 44 | 		}
 45 | 
 46 | 		actions = {
 47 | 			set_egress_port;
 48 | 			drop;
 49 | 		}
 50 | 
 51 | 		size = 1024;
 52 | 		default_action = drop();
 53 | 
 54 | 	}
 55 | 
 56 | 	 /* update the packet header by swapping the source and destination addresses
 57 | 	  * and ports in L2-L4 header fields in order to make the packet ready to
 58 | 	  * return to the sender (tcp is more subtle than just swapping addresses) */
 59 | 	action ret_pkt_to_sender() {
 60 | 
 61 | 		macAddr_t macTmp;
 62 | 		macTmp = hdr.ethernet.srcAddr;
 63 | 		hdr.ethernet.srcAddr = hdr.ethernet.dstAddr;
 64 | 		hdr.ethernet.dstAddr = macTmp;
 65 | 
 66 | 		ip4Addr_t ipTmp;
 67 | 		ipTmp = hdr.ipv4.srcAddr;
 68 | 		hdr.ipv4.srcAddr = hdr.ipv4.dstAddr;
 69 | 		hdr.ipv4.dstAddr = ipTmp;
 70 | 
 71 | 		bit<16> portTmp;
 72 | 		if (hdr.udp.isValid()) {
 73 | 			portTmp = hdr.udp.srcPort;
 74 | 			hdr.udp.srcPort = hdr.udp.dstPort;
 75 | 			hdr.udp.dstPort = portTmp;
 76 | 		} else if (hdr.tcp.isValid()) {
 77 | 			portTmp = hdr.tcp.srcPort;
 78 | 			hdr.tcp.srcPort = hdr.tcp.dstPort;
 79 | 			hdr.tcp.dstPort = portTmp;
 80 | 		}
 81 | 
 82 | 	}
 83 | 
 84 | 
 85 | 	/* store metadata for a given key to find its values and index it properly */
 86 | 	action set_lookup_metadata(vtableBitmap_t vt_bitmap, vtableIdx_t vt_idx, keyIdx_t key_idx) {
 87 | 
 88 | 		meta.vt_bitmap = vt_bitmap;
 89 | 		meta.vt_idx = vt_idx;
 90 | 		meta.key_idx = key_idx;
 91 | 
 92 | 	}
 93 | 
 94 | 	/* define cache lookup table */
 95 | 	table lookup_table {
 96 | 
 97 | 		key = {
 98 | 			hdr.netcache.key : exact;
 99 | 		}
100 | 
101 | 		actions = {
102 | 			set_lookup_metadata;
103 | 			NoAction;
104 | 		}
105 | 
106 | 		size = NETCACHE_ENTRIES * NETCACHE_VTABLE_NUM;
107 | 		default_action = NoAction;
108 | 
109 | 	}
110 | 
111 | 
112 |     // register storing a bit to indicate whether an element in the cache
113 |     // is valid or invalid
114 |     register<bit<1>>(NETCACHE_ENTRIES * NETCACHE_VTABLE_NUM) cache_status;
115 | 
116 | 	// maintain 8 value tables since we need to spread them across stages
117 | 	// where part of the value is created from each stage (4.4.2 section)
118 | 	register<bit<NETCACHE_VTABLE_SLOT_WIDTH>>(NETCACHE_ENTRIES) vt0;
119 | 	register<bit<NETCACHE_VTABLE_SLOT_WIDTH>>(NETCACHE_ENTRIES) vt1;
120 | 	register<bit<NETCACHE_VTABLE_SLOT_WIDTH>>(NETCACHE_ENTRIES) vt2;
121 | 	register<bit<NETCACHE_VTABLE_SLOT_WIDTH>>(NETCACHE_ENTRIES) vt3;
122 | 	register<bit<NETCACHE_VTABLE_SLOT_WIDTH>>(NETCACHE_ENTRIES) vt4;
123 | 	register<bit<NETCACHE_VTABLE_SLOT_WIDTH>>(NETCACHE_ENTRIES) vt5;
124 | 	register<bit<NETCACHE_VTABLE_SLOT_WIDTH>>(NETCACHE_ENTRIES) vt6;
125 | 	register<bit<NETCACHE_VTABLE_SLOT_WIDTH>>(NETCACHE_ENTRIES) vt7;
126 | 
127 | 	// count how many stages actually got triggered (1s on bitmap)
128 | 	// this variable is needed for the shifting logic
129 | 	bit<8> valid_stages_num = 0;
130 | 
131 | 	// build the value incrementally by concatenating the value
132 | 	// attained by each register array (stage) based on whether the
133 | 	// corresponding bit of the bitmap stored in metadata is set
134 | 
135 | 	// the way of implementing the 'append' of each value from each stage is based
136 | 	// on a few constraints of the simple_switch architecture. The constraints are:
137 | 	// 1) Concatenation of bit strings is only allowed for strings of same bit width
138 | 	// 2) Bitwise operations are only allowed for types of same bit width
139 | 	// 3) Multiplication is not supported (only shifting by power of 2)
140 | 
141 | 	// Our approach to appending is to do OR operations between the value of the key
142 | 	// (in the header) with every value of any valid stage (bitmap bit set to 1). As
143 | 	// we progress through the stages, we need to shift the value we read from array
144 | 	// at stage i by 7 * (1s in bitmap till position i) in order to put the value in
145 | 	// the correct position of the final value. To calculate the shifting we need
146 | 	// (i.e 7 * (1s in bitmap so far)), we convert it to
147 | 	// 8 * (1s in bitmap so far) - (1s in bitmap so far) to avoid multiplication
148 | 	// and be able to do it while only using shifting operators
149 | 
150 | 	action process_array_0() {
151 | 		// store value of the array at this stage
152 | 		bit<NETCACHE_VTABLE_SLOT_WIDTH> curr_stage_val;
153 | 		vt0.read(curr_stage_val, (bit<32>) meta.vt_idx);
154 | 
155 | 		hdr.netcache.value = (bit<NETCACHE_VALUE_WIDTH_MAX>) curr_stage_val;
156 | 		valid_stages_num = valid_stages_num + 1;
157 | 	}
158 | 
159 | 
160 | 	action process_array_1() {
161 | 		bit<NETCACHE_VTABLE_SLOT_WIDTH> curr_stage_val;
162 | 		vt1.read(curr_stage_val, (bit<32>) meta.vt_idx);
163 | 
164 | 		bit<8> shift_pos = 0;
165 | 		if (valid_stages_num != 0) {
166 | 			shift_pos = 64 << (valid_stages_num - 1);
167 | 		}
168 | 
169 | 		hdr.netcache.value = (bit<NETCACHE_VALUE_WIDTH_MAX>) hdr.netcache.value << 64;
170 | 		hdr.netcache.value = hdr.netcache.value | (bit<NETCACHE_VALUE_WIDTH_MAX>) curr_stage_val;
171 | 
172 | 		valid_stages_num = valid_stages_num + 1;
173 | 	}
174 | 
175 | 	action process_array_2() {
176 | 		bit<NETCACHE_VTABLE_SLOT_WIDTH> curr_stage_val;
177 | 		vt2.read(curr_stage_val, (bit<32>) meta.vt_idx);
178 | 
179 | 		bit<8> shift_pos = 0;
180 | 		if (valid_stages_num != 0) {
181 | 			shift_pos = 64 << (valid_stages_num - 1);
182 | 		}
183 | 
184 | 		hdr.netcache.value = (bit<NETCACHE_VALUE_WIDTH_MAX>) hdr.netcache.value << 64;
185 | 		hdr.netcache.value = hdr.netcache.value | (bit<NETCACHE_VALUE_WIDTH_MAX>) curr_stage_val;
186 | 
187 | 		valid_stages_num = valid_stages_num + 1;
188 | 	}
189 | 
190 | 	action process_array_3() {
191 | 		bit<NETCACHE_VTABLE_SLOT_WIDTH> curr_stage_val;
192 | 		vt3.read(curr_stage_val, (bit<32>) meta.vt_idx);
193 | 
194 | 		bit<8> shift_pos = 0;
195 | 		if (valid_stages_num != 0) {
196 | 			shift_pos = 64 << (valid_stages_num - 1);
197 | 		}
198 | 
199 | 		hdr.netcache.value = (bit<NETCACHE_VALUE_WIDTH_MAX>) hdr.netcache.value << 64;
200 | 		hdr.netcache.value = hdr.netcache.value | (bit<NETCACHE_VALUE_WIDTH_MAX>) curr_stage_val;
201 | 
202 | 		valid_stages_num = valid_stages_num + 1;
203 | 	}
204 | 
205 | 	action process_array_4() {
206 | 		bit<NETCACHE_VTABLE_SLOT_WIDTH> curr_stage_val;
207 | 		vt4.read(curr_stage_val, (bit<32>) meta.vt_idx);
208 | 
209 | 		bit<8> shift_pos = 0;
210 | 		if (valid_stages_num != 0) {
211 | 			shift_pos = 64 << (valid_stages_num - 1);
212 | 		}
213 | 
214 | 		hdr.netcache.value = (bit<NETCACHE_VALUE_WIDTH_MAX>) hdr.netcache.value << 64;
215 | 		hdr.netcache.value = hdr.netcache.value | (bit<NETCACHE_VALUE_WIDTH_MAX>) curr_stage_val;
216 | 
217 | 		valid_stages_num = valid_stages_num + 1;
218 | 	}
219 | 
220 | 	action process_array_5() {
221 | 		bit<NETCACHE_VTABLE_SLOT_WIDTH> curr_stage_val;
222 | 		vt5.read(curr_stage_val, (bit<32>) meta.vt_idx);
223 | 
224 | 		bit<8> shift_pos = 0;
225 | 		if (valid_stages_num != 0) {
226 | 			shift_pos = 64 << (valid_stages_num - 1);
227 | 		}
228 | 
229 | 		hdr.netcache.value = (bit<NETCACHE_VALUE_WIDTH_MAX>) hdr.netcache.value << 64;
230 | 		hdr.netcache.value = hdr.netcache.value | (bit<NETCACHE_VALUE_WIDTH_MAX>) curr_stage_val;
231 | 
232 | 		valid_stages_num = valid_stages_num + 1;
233 | 	}
234 | 
235 | 	action process_array_6() {
236 | 		bit<NETCACHE_VTABLE_SLOT_WIDTH> curr_stage_val;
237 | 		vt6.read(curr_stage_val, (bit<32>) meta.vt_idx);
238 | 
239 | 		bit<8> shift_pos = 0;
240 | 		if (valid_stages_num != 0) {
241 | 			shift_pos = 64 << (valid_stages_num - 1);
242 | 		}
243 | 
244 | 		hdr.netcache.value = (bit<NETCACHE_VALUE_WIDTH_MAX>) hdr.netcache.value << 64;
245 | 		hdr.netcache.value = hdr.netcache.value | (bit<NETCACHE_VALUE_WIDTH_MAX>) curr_stage_val;
246 | 
247 | 		valid_stages_num = valid_stages_num + 1;
248 | 	}
249 | 
250 | 	action process_array_7() {
251 | 		bit<NETCACHE_VTABLE_SLOT_WIDTH> curr_stage_val;
252 | 		vt7.read(curr_stage_val, (bit<32>) meta.vt_idx);
253 | 
254 | 		bit<8> shift_pos = 0;
255 | 		if (valid_stages_num != 0) {
256 | 			shift_pos = 64 << (valid_stages_num - 1);
257 | 		}
258 | 
259 | 		hdr.netcache.value = (bit<NETCACHE_VALUE_WIDTH_MAX>) hdr.netcache.value << 64;
260 | 		hdr.netcache.value = hdr.netcache.value | (bit<NETCACHE_VALUE_WIDTH_MAX>) curr_stage_val;
261 | 
262 | 		valid_stages_num = valid_stages_num + 1;
263 | 	}
264 | 
265 | 
266 | 	table vtable_0 {
267 | 		key = {
268 | 			meta.vt_bitmap[7:7]: exact;
269 | 		}
270 | 		actions = {
271 | 			process_array_0;
272 | 			NoAction;
273 | 		}
274 | 		size = NETCACHE_ENTRIES;
275 | 		default_action = NoAction;
276 | 	}
277 | 
278 | 	table vtable_1 {
279 | 		key = {
280 | 			meta.vt_bitmap[6:6]: exact;
281 | 		}
282 | 		actions = {
283 | 			process_array_1;
284 | 			NoAction;
285 | 		}
286 | 		size = NETCACHE_ENTRIES;
287 | 		default_action = NoAction;
288 | 	}
289 | 
290 | 	table vtable_2 {
291 | 		key = {
292 | 			meta.vt_bitmap[5:5]: exact;
293 | 		}
294 | 		actions = {
295 | 			process_array_2;
296 | 			NoAction;
297 | 		}
298 | 		size = NETCACHE_ENTRIES;
299 | 		default_action = NoAction;
300 | 	}
301 | 
302 | 	table vtable_3 {
303 | 		key = {
304 | 			meta.vt_bitmap[4:4]: exact;
305 | 		}
306 | 		actions = {
307 | 			process_array_3;
308 | 			NoAction;
309 | 		}
310 | 		size = NETCACHE_ENTRIES;
311 | 		default_action = NoAction;
312 | 	}
313 | 
314 | 	table vtable_4 {
315 | 		key = {
316 | 			meta.vt_bitmap[3:3]: exact;
317 | 		}
318 | 		actions = {
319 | 			process_array_4;
320 | 			NoAction;
321 | 		}
322 | 		size = NETCACHE_ENTRIES;
323 | 		default_action = NoAction;
324 | 	}
325 | 
326 | 	table vtable_5 {
327 | 		key = {
328 | 			meta.vt_bitmap[2:2]: exact;
329 | 		}
330 | 		actions = {
331 | 			process_array_5;
332 | 			NoAction;
333 | 		}
334 | 		size = NETCACHE_ENTRIES;
335 | 		default_action = NoAction;
336 | 	}
337 | 
338 | 	table vtable_6 {
339 | 		key = {
340 | 			meta.vt_bitmap[1:1]: exact;
341 | 		}
342 | 		actions = {
343 | 			process_array_6;
344 | 			NoAction;
345 | 		}
346 | 		size = NETCACHE_ENTRIES;
347 | 		default_action = NoAction;
348 | 	}
349 | 
350 | 	table vtable_7 {
351 | 		key = {
352 | 			meta.vt_bitmap[0:0]: exact;
353 | 		}
354 | 		actions = {
355 | 			process_array_7;
356 | 			NoAction;
357 | 		}
358 | 		size = NETCACHE_ENTRIES;
359 | 		default_action = NoAction;
360 | 	}
361 | 
362 | 
363 | 
364 | 	apply {
365 | 
366 | 		if (hdr.netcache.isValid()) {
367 | 
368 | 
369 |             switch(lookup_table.apply().action_run) {
370 | 
371 | 				set_lookup_metadata: {
372 | 
373 |                     if (hdr.netcache.op == READ_QUERY){
374 | 
375 | 						bit<1> cache_valid_bit;
376 | 						cache_status.read(cache_valid_bit, (bit<32>) meta.key_idx);
377 | 
378 | 						// read query should be answered by switch if the key
379 | 						// resides in cache and its entry is valid
380 | 						meta.cache_valid = (cache_valid_bit == 1);
381 | 
382 | 						/*
383 | 						if(meta.cache_valid && hdr.udp.srcPort != NETCACHE_PORT) {
384 | 							ret_pkt_to_sender();
385 | 						}
386 | 						*/
387 | 
388 | 
389 | 						if (meta.cache_valid && hdr.udp.srcPort != NETCACHE_PORT) {
390 | 							vtable_0.apply(); vtable_1.apply(); vtable_2.apply(); vtable_3.apply();
391 | 							vtable_4.apply(); vtable_5.apply(); vtable_6.apply(); vtable_7.apply();
392 | 
393 | 							ret_pkt_to_sender();
394 | 						}
395 | 
396 |                     }
397 | 
398 | 
399 | 					// if the key of the write query exists in the cache then we should inform
400 | 					// the controller to initiate the 3-way cache coherency handshake
401 |                     else if (hdr.netcache.op == WRITE_QUERY) {
402 | 
403 |                         cache_status.write((bit<32>) meta.key_idx, (bit<1>) 0);
404 | 
405 | 						hdr.netcache.op = CACHED_UPDATE;
406 |                     }
407 |                     // the server will block subsequent writes and update the entry
408 |                     // in the cache. to notify the server that the entry is cached
409 |                     // we set a special header
410 | 
411 |                     // delete query is forwarded to key-value server and if the
412 | 					// key resides in cache then its entry is invalidated
413 |                     // the paper does not specify what we should do additionally
414 |                     // probably the kv-store should delete the entry and notify the
415 |                     // controller as well -> perhaps use the mirroring CPU port approach as well
416 |                     else if (hdr.netcache.op == DELETE_QUERY) {
417 | 
418 |                         cache_status.write((bit<32>) meta.key_idx, (bit<1>) 0);
419 | 
420 | 					}
421 | 
422 | 					else if (hdr.netcache.op == UPDATE_COMPLETE) {
423 | 
424 | 						// if it's an update query then ensure that the switch will
425 | 						// forward the packet back to the server to complete the
426 | 						// cache coherency handshake
427 | 						ret_pkt_to_sender();
428 | 
429 | 						bit<8> stages_cnt = 0;
430 | 						bit<8> shift_pos = 0;
431 | 
432 | 
433 | 						if (meta.vt_bitmap[0:0] == 1) {
434 | 							bit<NETCACHE_VTABLE_SLOT_WIDTH> new_val;
435 | 							new_val = (bit<NETCACHE_VTABLE_SLOT_WIDTH>) (hdr.netcache.value >> shift_pos);
436 | 
437 | 							vt7.write((bit<32>) meta.vt_idx, new_val);
438 | 
439 | 							shift_pos = shift_pos + NETCACHE_VTABLE_SLOT_WIDTH;
440 | 						}
441 | 
442 | 						if (meta.vt_bitmap[1:1] == 1) {
443 | 							bit<NETCACHE_VTABLE_SLOT_WIDTH> new_val;
444 | 							new_val = (bit<NETCACHE_VTABLE_SLOT_WIDTH>) (hdr.netcache.value >> shift_pos);
445 | 
446 | 							vt6.write((bit<32>) meta.vt_idx, new_val);
447 | 
448 | 							shift_pos = shift_pos + NETCACHE_VTABLE_SLOT_WIDTH;
449 | 						}
450 | 
451 | 						if (meta.vt_bitmap[2:2] == 1) {
452 | 							bit<NETCACHE_VTABLE_SLOT_WIDTH> new_val;
453 | 							new_val = (bit<NETCACHE_VTABLE_SLOT_WIDTH>) (hdr.netcache.value >> shift_pos);
454 | 
455 | 							vt5.write((bit<32>) meta.vt_idx, new_val);
456 | 
457 | 							shift_pos = shift_pos + NETCACHE_VTABLE_SLOT_WIDTH;
458 | 						}
459 | 
460 | 						if (meta.vt_bitmap[3:3] == 1) {
461 | 							bit<NETCACHE_VTABLE_SLOT_WIDTH> new_val;
462 | 							new_val = (bit<NETCACHE_VTABLE_SLOT_WIDTH>) (hdr.netcache.value >> shift_pos);
463 | 
464 | 							vt4.write((bit<32>) meta.vt_idx, new_val);
465 | 
466 | 							shift_pos = shift_pos + NETCACHE_VTABLE_SLOT_WIDTH;
467 | 						}
468 | 
469 | 						if (meta.vt_bitmap[4:4] == 1) {
470 | 							bit<NETCACHE_VTABLE_SLOT_WIDTH> new_val;
471 | 							new_val = (bit<NETCACHE_VTABLE_SLOT_WIDTH>) (hdr.netcache.value >> shift_pos);
472 | 
473 | 							vt3.write((bit<32>) meta.vt_idx, new_val);
474 | 
475 | 							shift_pos = shift_pos + NETCACHE_VTABLE_SLOT_WIDTH;
476 | 						}
477 | 
478 | 						if (meta.vt_bitmap[5:5] == 1) {
479 | 							bit<NETCACHE_VTABLE_SLOT_WIDTH> new_val;
480 | 							new_val = (bit<NETCACHE_VTABLE_SLOT_WIDTH>) (hdr.netcache.value >> shift_pos);
481 | 
482 | 							vt2.write((bit<32>) meta.vt_idx, new_val);
483 | 
484 | 							shift_pos = shift_pos + NETCACHE_VTABLE_SLOT_WIDTH;
485 | 						}
486 | 
487 | 						if (meta.vt_bitmap[6:6] == 1) {
488 | 							bit<NETCACHE_VTABLE_SLOT_WIDTH> new_val;
489 | 							new_val = (bit<NETCACHE_VTABLE_SLOT_WIDTH>) (hdr.netcache.value >> shift_pos);
490 | 
491 | 							vt1.write((bit<32>) meta.vt_idx, new_val);
492 | 
493 | 							shift_pos = shift_pos + NETCACHE_VTABLE_SLOT_WIDTH;
494 | 						}
495 | 
496 | 						if (meta.vt_bitmap[7:7] == 1) {
497 | 							bit<NETCACHE_VTABLE_SLOT_WIDTH> new_val;
498 | 							new_val = (bit<NETCACHE_VTABLE_SLOT_WIDTH>) (hdr.netcache.value >> shift_pos);
499 | 
500 | 							vt0.write((bit<32>) meta.vt_idx, new_val);
501 | 						}
502 | 
503 | 
504 | 						cache_status.write((bit<32>) meta.key_idx, (bit<1>) 1);
505 | 
506 | 						hdr.netcache.op = UPDATE_COMPLETE_OK;
507 | 
508 | 					}
509 | 
510 | 				}
511 | 
512 | 				NoAction: {
513 | 
514 | 					if (hdr.netcache.op == HOT_READ_QUERY) {
515 | 
516 | 						// inform the controller for the hot key to insert to cache
517 | 						if (pkt_is_not_mirrored) {
518 | 							clone(CloneType.I2E, CONTROLLER_MIRROR_SESSION);
519 | 						}
520 | 
521 | 					}
522 | 				}
523 | 
524 | 
525 |             }
526 | 
527 |         }
528 | 
529 | 		l2_forward.apply();
530 | 	}
531 | 
532 | }
533 | 


--------------------------------------------------------------------------------
/src/p4/core/netcache.p4:
--------------------------------------------------------------------------------
  1 | /* -*- P4_16 -*- */
  2 | #include <core.p4>
  3 | #include <v1model.p4>
  4 | 
  5 | #include "../include/parsers.p4"
  6 | #include "egress.p4"
  7 | #include "ingress.p4"
  8 | 
  9 | /*************************************************************************
 10 | ************   C H E C K S U M    V E R I F I C A T I O N   *************
 11 | *************************************************************************/
 12 | 
 13 | control MyVerifyChecksum(inout headers hdr, inout metadata meta) {
 14 |     apply {  }
 15 | }
 16 | 
 17 | 
 18 | /*************************************************************************
 19 | *************   C H E C K S U M    C O M P U T A T I O N   **************
 20 | *************************************************************************/
 21 | 
 22 | control MyComputeChecksum(inout headers hdr, inout metadata meta) {
 23 |      apply {
 24 |         update_checksum(
 25 |             hdr.ipv4.isValid(),
 26 |                  { hdr.ipv4.version,
 27 |                    hdr.ipv4.ihl,
 28 |                    hdr.ipv4.dscp,
 29 |                    hdr.ipv4.ecn,
 30 |                    hdr.ipv4.totalLen,
 31 |                    hdr.ipv4.identification,
 32 |                    hdr.ipv4.flags,
 33 |                    hdr.ipv4.fragOffset,
 34 |                    hdr.ipv4.ttl,
 35 |                    hdr.ipv4.protocol,
 36 |                    hdr.ipv4.srcAddr,
 37 |                    hdr.ipv4.dstAddr },
 38 |                    hdr.ipv4.hdrChecksum,
 39 |                    HashAlgorithm.csum16);
 40 | 
 41 |         update_checksum(
 42 |             // only update checksum of udp-netcache packets
 43 |             // that were created on the switch
 44 |             hdr.udp.isValid() && hdr.netcache.isValid(),
 45 |                 {   hdr.ipv4.srcAddr,
 46 |                     hdr.ipv4.dstAddr,
 47 |                     8w0,
 48 |                     hdr.ipv4.protocol,
 49 |                     hdr.udp.len,
 50 |                     hdr.udp.srcPort,
 51 |                     hdr.udp.dstPort,
 52 |                     hdr.udp.len,
 53 |                     hdr.netcache.op,
 54 |                     hdr.netcache.seq,
 55 |                     hdr.netcache.key,
 56 |                     hdr.netcache.value },
 57 |                     hdr.udp.checksum,
 58 |                     HashAlgorithm.csum16);
 59 | 
 60 | 
 61 | 		update_checksum(
 62 | 			hdr.tcp.isValid() && hdr.netcache.isValid(),
 63 | 			{
 64 | 				hdr.ipv4.srcAddr,
 65 | 				hdr.ipv4.dstAddr,
 66 | 				8w0,
 67 | 				hdr.ipv4.protocol,
 68 | 				meta.tcpLength,
 69 | 				hdr.tcp.srcPort,
 70 | 				hdr.tcp.dstPort,
 71 | 				hdr.tcp.seqNo,
 72 | 				hdr.tcp.ackNo,
 73 | 				hdr.tcp.dataOffset,
 74 | 				hdr.tcp.res,
 75 | 				hdr.tcp.cwr,
 76 | 				hdr.tcp.ece,
 77 | 				hdr.tcp.urg,
 78 | 				hdr.tcp.ack,
 79 | 				hdr.tcp.psh,
 80 | 				hdr.tcp.rst,
 81 | 				hdr.tcp.syn,
 82 | 				hdr.tcp.fin,
 83 | 				hdr.tcp.window,
 84 | 				hdr.tcp.urgentPtr,
 85 | 				hdr.tcp_options.options,
 86 | 				hdr.netcache.op,
 87 | 				hdr.netcache.seq,
 88 | 				hdr.netcache.key,
 89 | 				hdr.netcache.value
 90 | 			},
 91 | 			hdr.tcp.checksum,
 92 | 			HashAlgorithm.csum16);
 93 | 
 94 | 
 95 | 
 96 |     }
 97 | }
 98 | 
 99 | /*************************************************************************
100 | ***********************  S W I T C H  *******************************
101 | *************************************************************************/
102 | 
103 | //switch architecture
104 | V1Switch(
105 | MyParser(),
106 | MyVerifyChecksum(),
107 | MyIngress(),
108 | MyEgress(),
109 | MyComputeChecksum(),
110 | MyDeparser()
111 | ) main;
112 | 


--------------------------------------------------------------------------------
/src/p4/core/query_statistics.p4:
--------------------------------------------------------------------------------
  1 | #include <core.p4>
  2 | #include <v1model.p4>
  3 | 
  4 | 
  5 | // BLOOM FILTER REGISTERS
  6 | register<bit<1>>(BLOOM_FILTER_ENTRIES) bloom1;
  7 | register<bit<1>>(BLOOM_FILTER_ENTRIES) bloom2;
  8 | register<bit<1>>(BLOOM_FILTER_ENTRIES) bloom3;
  9 | 
 10 | 
 11 | #define SKETCH_BUCKET_LENGTH 65535
 12 | #define SKETCH_CELL_BIT_WIDTH 16
 13 | #define SKETCH_IDX_WIDTH 16
 14 | 
 15 | // COUNT MIN SKETCH REGISTERS
 16 | register<bit<SKETCH_CELL_BIT_WIDTH>>(SKETCH_BUCKET_LENGTH) sketch1;
 17 | register<bit<SKETCH_CELL_BIT_WIDTH>>(SKETCH_BUCKET_LENGTH) sketch2;
 18 | register<bit<SKETCH_CELL_BIT_WIDTH>>(SKETCH_BUCKET_LENGTH) sketch3;
 19 | register<bit<SKETCH_CELL_BIT_WIDTH>>(SKETCH_BUCKET_LENGTH) sketch4;
 20 | 
 21 | 
 22 | action inspect_bloom_filter() {
 23 | 
 24 | 	hash(meta.bloom_idx1, HashAlgorithm.crc32_custom, (bit<1>) 0,
 25 | 			{ hdr.netcache.key }, (bit<16>) BLOOM_FILTER_ENTRIES);
 26 | 
 27 | 	hash(meta.bloom_idx2, HashAlgorithm.crc32_custom, (bit<1>) 0,
 28 | 			{ hdr.netcache.key }, (bit<16>) BLOOM_FILTER_ENTRIES);
 29 | 
 30 | 	hash(meta.bloom_idx3, HashAlgorithm.crc32_custom, (bit<1>) 0,
 31 | 			{ hdr.netcache.key }, (bit<16>) BLOOM_FILTER_ENTRIES);
 32 | 
 33 | 
 34 | 	bit<1> val_1;
 35 | 	bloom1.read(val_1, (bit<32>) meta.bloom_idx1);
 36 | 	bit<1> val_2;
 37 | 	bloom2.read(val_2, (bit<32>) meta.bloom_idx2);
 38 | 	bit<1> val_3;
 39 | 	bloom3.read(val_3, (bit<32>) meta.bloom_idx3);
 40 | 
 41 | 	// if the following condition holds true then the key already exists
 42 | 	// with high probability in the bloom filter and we won't send it to
 43 | 	// the controller since it is already reported
 44 | 	if (!(val_1 == 1 && val_2 == 1 && val_3 == 1)) {
 45 | 		meta.hot_query = 1;
 46 | 	}
 47 | 
 48 | 
 49 | }
 50 | 
 51 | 
 52 | action update_bloom_filter() {
 53 | 
 54 | 	bloom1.write((bit<32>) meta.bloom_idx1, (bit<1>) 1);
 55 | 	bloom2.write((bit<32>) meta.bloom_idx2, (bit<1>) 1);
 56 | 	bloom3.write((bit<32>) meta.bloom_idx3, (bit<1>) 1);
 57 | 
 58 | }
 59 | 
 60 | 
 61 | action update_count_min_sketch() {
 62 | 
 63 | 	bit<SKETCH_IDX_WIDTH> sketch_idx1;
 64 | 	bit<SKETCH_CELL_BIT_WIDTH> sketch_val1;
 65 | 	hash(sketch_idx1, HashAlgorithm.crc32_custom, (bit<1>) 0,
 66 | 			{hdr.netcache.key}, (bit<16>) SKETCH_BUCKET_LENGTH);
 67 | 	sketch1.read(sketch_val1, (bit<32>) sketch_idx1);
 68 | 	sketch1.write((bit<32>) sketch_idx1, sketch_val1+1);
 69 | 
 70 | 
 71 | 	bit<SKETCH_IDX_WIDTH> sketch_idx2;
 72 | 	bit<SKETCH_CELL_BIT_WIDTH> sketch_val2;
 73 | 	hash(sketch_idx2, HashAlgorithm.crc32_custom, (bit<1>) 0,
 74 | 			{hdr.netcache.key}, (bit<16>) SKETCH_BUCKET_LENGTH);
 75 | 	sketch2.read(sketch_val2, (bit<32>) sketch_idx2);
 76 | 	sketch2.write((bit<32>) sketch_idx2, sketch_val2+1);
 77 | 
 78 | 
 79 | 	bit<SKETCH_IDX_WIDTH> sketch_idx3;
 80 | 	bit<SKETCH_CELL_BIT_WIDTH> sketch_val3;
 81 | 	hash(sketch_idx3, HashAlgorithm.crc32_custom, (bit<1>) 0,
 82 | 			{hdr.netcache.key}, (bit<16>) SKETCH_BUCKET_LENGTH);
 83 | 	sketch3.read(sketch_val3, (bit<32>) sketch_idx3);
 84 | 	sketch3.write((bit<32>) sketch_idx3, sketch_val3+1);
 85 | 
 86 | 	bit<SKETCH_IDX_WIDTH> sketch_idx4;
 87 | 	bit<SKETCH_CELL_BIT_WIDTH> sketch_val4;
 88 | 	hash(sketch_idx4, HashAlgorithm.crc32_custom, (bit<1>) 0,
 89 | 			{hdr.netcache.key}, (bit<16>) SKETCH_BUCKET_LENGTH);
 90 | 	sketch4.read(sketch_val4, (bit<32>) sketch_idx4);
 91 | 	sketch4.write((bit<32>) sketch_idx4, sketch_val4+1);
 92 | 
 93 | 
 94 | 	// take the minimum out of all the sketch values
 95 | 
 96 | 	if (sketch_val1 <= sketch_val2 && sketch_val1 <= sketch_val3 &&
 97 | 			sketch_val1 <= sketch_val4) {
 98 | 		meta.key_cnt = sketch_val1;
 99 | 	}
100 | 
101 | 	if (sketch_val2 <= sketch_val1 && sketch_val2 <= sketch_val3 &&
102 | 			sketch_val2 <= sketch_val4) {
103 | 		meta.key_cnt = sketch_val2;
104 | 	}
105 | 
106 | 	if (sketch_val3 <= sketch_val1 && sketch_val3 <= sketch_val2 &&
107 | 			sketch_val3 <= sketch_val4) {
108 | 		meta.key_cnt = sketch_val3;
109 | 	}
110 | 
111 | 	if (sketch_val4 <= sketch_val1 && sketch_val4 <= sketch_val2 &&
112 | 			sketch_val4 <= sketch_val3) {
113 | 		meta.key_cnt = sketch_val4;
114 | 	}
115 | 
116 | }
117 | 


--------------------------------------------------------------------------------
/src/p4/include/headers.p4:
--------------------------------------------------------------------------------
  1 | #ifndef HEADERS_P4
  2 | #define HEADERS_P4
  3 | 
  4 | 
  5 | #define BLOOM_FILTER_ENTRIES 4096
  6 | #define BLOOM_IDX_WIDTH 12
  7 | 
  8 | 
  9 | #define SKETCH_BUCKET_LENGTH 65535
 10 | #define SKETCH_CELL_BIT_WIDTH 16
 11 | #define SKETCH_IDX_WIDTH 16
 12 | 
 13 | 
 14 | /* netcache size */
 15 | #define NETCACHE_ENTRIES 65536
 16 | 
 17 | /* netcache value table constant definitions */
 18 | #define NETCACHE_VTABLE_NUM 8
 19 | #define NETCACHE_VTABLE_SIZE_WIDTH 16
 20 | #define NETCACHE_VTABLE_SLOT_WIDTH 64    // in bits
 21 | 
 22 | 
 23 | /* minpow2(NETCACHE_ENTRIES * NETCACHE_VTABLE_NUM) */
 24 | #define KEY_IDX_WIDTH 20
 25 | #define MAX_KEYS (NETCACHE_ENTRIES * NETCACHE_VTABLE_NUM)
 26 | 
 27 | /* maximum number of bits of netcache fields */
 28 | #define NETCACHE_VALUE_WIDTH_MAX 512
 29 | #define NETCACHE_KEY_WIDTH 128
 30 | 
 31 | /* special reserved port for NetCache */
 32 | const bit<16> NETCACHE_PORT = 50000;
 33 | const bit<16> TYPE_IPV4 = 0x800;
 34 | const bit<8> TYPE_TCP = 0x06;
 35 | const bit<8> TYPE_UDP = 0x11;
 36 | 
 37 | /* current query supported types */
 38 | const bit<8> READ_QUERY = 0x00;
 39 | const bit<8> WRITE_QUERY = 0x01;
 40 | const bit<8> DELETE_QUERY = 0x02;
 41 | const bit<8> HOT_READ_QUERY= 0x03;
 42 | const bit<8> UPDATE_COMPLETE = 0x04;
 43 | const bit<8> DELETE_COMPLETE = 0x05;
 44 | const bit<8> CACHED_UPDATE = 0x06;
 45 | const bit<8> UPDATE_COMPLETE_OK = 0x07;
 46 | 
 47 | /* netcache header field types */
 48 | typedef bit<NETCACHE_KEY_WIDTH> key_t;
 49 | typedef bit<NETCACHE_VALUE_WIDTH_MAX> value_t;
 50 | typedef bit<NETCACHE_VTABLE_SIZE_WIDTH> vtableIdx_t;
 51 | typedef bit<NETCACHE_VTABLE_NUM> vtableBitmap_t;
 52 | typedef bit<KEY_IDX_WIDTH> keyIdx_t;
 53 | 
 54 | typedef bit<9>  egressSpec_t;
 55 | typedef bit<48> macAddr_t;
 56 | typedef bit<32> ip4Addr_t;
 57 | 
 58 | 
 59 | header ethernet_t {
 60 |     macAddr_t dstAddr;
 61 |     macAddr_t srcAddr;
 62 |     bit<16>   etherType;
 63 | }
 64 | 
 65 | header ipv4_t {
 66 |     bit<4>    version;
 67 |     bit<4>    ihl;
 68 |     bit<6>    dscp;
 69 |     bit<2>    ecn;
 70 |     bit<16>   totalLen;
 71 |     bit<16>   identification;
 72 |     bit<3>    flags;
 73 |     bit<13>   fragOffset;
 74 |     bit<8>    ttl;
 75 |     bit<8>    protocol;
 76 |     bit<16>   hdrChecksum;
 77 |     ip4Addr_t srcAddr;
 78 |     ip4Addr_t dstAddr;
 79 | }
 80 | 
 81 | header tcp_t{
 82 |     bit<16> srcPort;
 83 |     bit<16> dstPort;
 84 |     bit<32> seqNo;
 85 |     bit<32> ackNo;
 86 |     bit<4>  dataOffset;
 87 |     bit<4>  res;
 88 |     bit<1>  cwr;
 89 |     bit<1>  ece;
 90 |     bit<1>  urg;
 91 |     bit<1>  ack;
 92 |     bit<1>  psh;
 93 |     bit<1>  rst;
 94 |     bit<1>  syn;
 95 |     bit<1>  fin;
 96 |     bit<16> window;
 97 |     bit<16> checksum;
 98 |     bit<16> urgentPtr;
 99 | }
100 | 
101 | header tcp_options_t {
102 | 	varbit<320> options;
103 | }
104 | 
105 | header Tcp_option_end_h {
106 |     bit<8> kind;
107 | }
108 | header Tcp_option_nop_h {
109 |     bit<8> kind;
110 | }
111 | header Tcp_option_ss_h {
112 |     bit<32> maxSegmentSize;
113 | }
114 | header Tcp_option_s_h {
115 |     bit<8>  kind;
116 |     bit<8>  len;
117 |     bit<8>  shift;
118 | }
119 | header Tcp_option_sack_p_h {
120 |     bit<8>         kind;
121 |     bit<8>         length;
122 | }
123 | header Tcp_option_sack_h {
124 |     bit<8>         kind;
125 |     bit<8>         length;
126 |     varbit<256>    sack;
127 | }
128 | 
129 | header Tcp_option_timestamp_h {
130 |     bit<80> timestamp;
131 | }
132 | 
133 | header_union Tcp_option_h {
134 |     Tcp_option_end_h  end;
135 |     Tcp_option_nop_h  nop;
136 |     Tcp_option_ss_h   ss;
137 |     Tcp_option_s_h    s;
138 |     Tcp_option_sack_p_h sack_p;
139 |     Tcp_option_sack_h sack;
140 |     Tcp_option_timestamp_h ts;
141 | }
142 | 
143 | // Defines a stack of 10 tcp options
144 | typedef Tcp_option_h[10] Tcp_option_stack;
145 | 
146 | header Tcp_option_padding_h {
147 |     varbit<256> padding;
148 | }
149 | 
150 | header udp_t {
151 | 	bit<16> srcPort;
152 | 	bit<16> dstPort;
153 | 	bit<16> len;
154 | 	bit<16> checksum;
155 | }
156 | 
157 | 
158 | header netcache_t {
159 | 	bit<8> op;
160 | 	bit<32> seq;
161 | 	key_t  key;
162 | 	value_t value;
163 | }
164 | 
165 | struct fwd_metadata_t {
166 |     bit<32> l2ptr;
167 |     bit<24> out_bd;
168 | }
169 | 
170 | struct metadata {
171 | 	vtableBitmap_t vt_bitmap;
172 | 	vtableIdx_t vt_idx;
173 | 
174 | 	bit<BLOOM_IDX_WIDTH> bloom_idx1;
175 | 	bit<BLOOM_IDX_WIDTH> bloom_idx2;
176 | 	bit<BLOOM_IDX_WIDTH> bloom_idx3;
177 | 
178 | 	bit<SKETCH_CELL_BIT_WIDTH> key_cnt;
179 | 
180 | 	keyIdx_t key_idx;
181 | 
182 | 
183 | 	bit<1> hot_query;
184 | 
185 |     fwd_metadata_t fwd_metadata;
186 | 
187 |     bool cache_valid;
188 | 
189 | 	bit<16> tcpLength;
190 | 
191 | }
192 | 
193 | struct headers {
194 |     ethernet_t   ethernet;
195 |     ipv4_t       ipv4;
196 |     tcp_t        tcp;
197 | 	tcp_options_t tcp_options;
198 |     //Tcp_option_stack tcp_options_vec;
199 |     //Tcp_option_padding_h tcp_options_padding;
200 | 	udp_t		 udp;
201 | 	netcache_t   netcache;
202 | }
203 | 
204 | error {
205 |     TcpDataOffsetTooSmall,
206 |     TcpOptionTooLongForHeader,
207 |     TcpBadSackOptionLength
208 | }
209 | 
210 | struct Tcp_option_sack_top
211 | {
212 |     bit<8> kind;
213 |     bit<8> length;
214 | }
215 | 
216 | #endif   // HEADERS_P4
217 | 


--------------------------------------------------------------------------------
/src/p4/include/parsers.p4:
--------------------------------------------------------------------------------
  1 | #ifndef PARSERS_P4
  2 | #define PARSERS_P4
  3 | 
  4 | #include "headers.p4"
  5 | 
  6 | // modified version of https://github.com/jafingerhut/p4-guide/tree/master/tcp-options-parser
  7 | // enables parser to process tcp header options
  8 | parser Tcp_option_parser(packet_in b,
  9 |                          in bit<4> tcp_hdr_data_offset,
 10 |                          out Tcp_option_stack vec,
 11 |                          out Tcp_option_padding_h padding)
 12 | {
 13 |     bit<7> tcp_hdr_bytes_left;
 14 | 
 15 |     state start {
 16 |         // RFC 793 - the Data Offset field is the length of the TCP
 17 |         // header in units of 32-bit words.  It must be at least 5 for
 18 |         // the minimum length TCP header, and since it is 4 bits in
 19 |         // size, can be at most 15, for a maximum TCP header length of
 20 |         // 15*4 = 60 bytes.
 21 |         verify(tcp_hdr_data_offset >= 5, error.TcpDataOffsetTooSmall);
 22 |         tcp_hdr_bytes_left = 4 * (bit<7>) (tcp_hdr_data_offset - 5);
 23 |         // always true here: 0 <= tcp_hdr_bytes_left <= 40
 24 |         transition next_option;
 25 |     }
 26 |     state next_option {
 27 |         transition select(tcp_hdr_bytes_left) {
 28 |             0 : accept;  // no TCP header bytes left
 29 |             default : next_option_part2;
 30 |         }
 31 |     }
 32 |     state next_option_part2 {
 33 |         // precondition: tcp_hdr_bytes_left >= 1
 34 |         transition select(b.lookahead<bit<8>>()) {
 35 |             0: parse_tcp_option_end;
 36 |             1: parse_tcp_option_nop;
 37 |             2: parse_tcp_option_ss;
 38 |             3: parse_tcp_option_s;
 39 |             4: parse_tcp_option_sack_p;
 40 |             5: parse_tcp_option_sack;
 41 |             8: parse_tcp_option_timestamp;
 42 |         }
 43 |     }
 44 |     state parse_tcp_option_end {
 45 |         b.extract(vec.next.end);
 46 |         // TBD: This code is an example demonstrating why it would be
 47 |         // useful to have sizeof(vec.next.end) instead of having to
 48 |         // put in a hard-coded length for each TCP option.
 49 |         tcp_hdr_bytes_left = tcp_hdr_bytes_left - 1;
 50 |         transition consume_remaining_tcp_hdr_and_accept;
 51 |     }
 52 |     state consume_remaining_tcp_hdr_and_accept {
 53 |         // A more picky sub-parser implementation would verify that
 54 |         // all of the remaining bytes are 0, as specified in RFC 793,
 55 |         // setting an error and rejecting if not.  This one skips past
 56 |         // the rest of the TCP header without checking this.
 57 | 
 58 |         // tcp_hdr_bytes_left might be as large as 40, so multiplying
 59 |         // it by 8 it may be up to 320, which requires 9 bits to avoid
 60 |         // losing any information.
 61 |         b.extract(padding, (bit<32>) (8 * (bit<9>) tcp_hdr_bytes_left));
 62 |         transition accept;
 63 |     }
 64 |     state parse_tcp_option_nop {
 65 |         b.extract(vec.next.nop);
 66 |         tcp_hdr_bytes_left = tcp_hdr_bytes_left - 1;
 67 |         transition next_option;
 68 |     }
 69 |     state parse_tcp_option_ss {
 70 |         verify(tcp_hdr_bytes_left >= 4, error.TcpOptionTooLongForHeader);
 71 |         tcp_hdr_bytes_left = tcp_hdr_bytes_left - 4;
 72 |         b.extract(vec.next.ss);
 73 |         transition next_option;
 74 |     }
 75 |     state parse_tcp_option_s {
 76 |         verify(tcp_hdr_bytes_left >= 3, error.TcpOptionTooLongForHeader);
 77 |         tcp_hdr_bytes_left = tcp_hdr_bytes_left - 3;
 78 |         b.extract(vec.next.s);
 79 |         transition next_option;
 80 |     }
 81 |     state parse_tcp_option_sack_p {
 82 |         verify(tcp_hdr_bytes_left >= 2, error.TcpOptionTooLongForHeader);
 83 |         tcp_hdr_bytes_left = tcp_hdr_bytes_left - 2;
 84 |         b.extract(vec.next.sack_p);
 85 |         transition next_option;
 86 |     }
 87 |     state parse_tcp_option_sack {
 88 |         bit<8> n_sack_bytes = b.lookahead<Tcp_option_sack_top>().length;
 89 |         // I do not have global knowledge of all TCP SACK
 90 |         // implementations, but from reading the RFC, it appears that
 91 |         // the only SACK option lengths that are legal are 2+8*n for
 92 |         // n=1, 2, 3, or 4, so set an error if anything else is seen.
 93 |         verify(n_sack_bytes == 10 || n_sack_bytes == 18 ||
 94 |                n_sack_bytes == 26 || n_sack_bytes == 34,
 95 |                error.TcpBadSackOptionLength);
 96 |         verify(tcp_hdr_bytes_left >= (bit<7>) n_sack_bytes,
 97 |                error.TcpOptionTooLongForHeader);
 98 |         tcp_hdr_bytes_left = tcp_hdr_bytes_left - (bit<7>) n_sack_bytes;
 99 |         b.extract(vec.next.sack, (bit<32>) (8 * n_sack_bytes - 16));
100 |         transition next_option;
101 |     }
102 | 
103 |     state parse_tcp_option_timestamp {
104 |         verify(tcp_hdr_bytes_left >= 10, error.TcpOptionTooLongForHeader);
105 |         tcp_hdr_bytes_left = tcp_hdr_bytes_left - 10;
106 |         b.extract(vec.next.ts);
107 |         transition next_option;
108 |     }
109 | }
110 | 
111 | parser MyParser(packet_in packet, out headers hdr, inout metadata meta,
112 |                 inout standard_metadata_t standard_metadata) {
113 | 
114 |     state start {
115 |         transition parse_ethernet;
116 |     }
117 | 
118 |     state parse_ethernet {
119 |         packet.extract(hdr.ethernet);
120 |         transition select(hdr.ethernet.etherType){
121 |             TYPE_IPV4: parse_ipv4;
122 |             default: accept;
123 |         }
124 |     }
125 | 
126 |     state parse_ipv4 {
127 |         packet.extract(hdr.ipv4);
128 |         transition select(hdr.ipv4.protocol){
129 |             TYPE_TCP : parse_tcp;
130 | 			TYPE_UDP : parse_udp;
131 |             default: accept;
132 |         }
133 |     }
134 | 
135 |     state parse_tcp {
136 |         packet.extract(hdr.tcp);
137 | 		meta.tcpLength = hdr.ipv4.totalLen - 4 * (bit<16>) hdr.ipv4.ihl;
138 | 
139 | 		transition select(hdr.tcp.dataOffset, hdr.tcp.dstPort, hdr.tcp.srcPort) {
140 | 			(5, NETCACHE_PORT, _): parse_netcache;
141 | 			(5, _, NETCACHE_PORT): parse_netcache;
142 | 			(5, _, _) : accept;
143 | 			default: parse_tcp_options;
144 | 		}
145 | 
146 | 		/*
147 |         Tcp_option_parser.apply(packet, hdr.tcp.dataOffset,
148 |                                 //hdr.tcp_options_vec, hdr.tcp_options_padding);
149 | 
150 | 
151 |         bit<16> tcp_payload_len = hdr.ipv4.totalLen - 4 * (bit<16>) hdr.ipv4.ihl - 4 * (bit<16>) hdr.tcp.dataOffset;
152 |         transition select(tcp_payload_len, hdr.tcp.dstPort, hdr.tcp.srcPort) {
153 | 			(0, _, _) : accept;
154 | 			(_, _, NETCACHE_PORT): parse_netcache;
155 |             (_, NETCACHE_PORT,_): parse_netcache;
156 | 			default: accept;
157 | 		}
158 | 		*/
159 |     }
160 | 
161 | 	state parse_tcp_options {
162 | 		bit<10> len = ((bit<10>) (hdr.tcp.dataOffset - 5) * 4 * 8);
163 | 		packet.extract(hdr.tcp_options, (bit<32>) len);
164 | 
165 | 		transition select (hdr.tcp.dstPort, hdr.tcp.srcPort) {
166 | 			(NETCACHE_PORT, _) : parse_netcache;
167 | 			(_, NETCACHE_PORT) : parse_netcache;
168 | 			default: accept;
169 | 		}
170 | 	}
171 | 
172 | 	state parse_udp {
173 | 		packet.extract(hdr.udp);
174 | 		transition select(hdr.udp.dstPort, hdr.udp.srcPort) {
175 | 			(NETCACHE_PORT, _) : parse_netcache;
176 | 			(_, NETCACHE_PORT) : parse_netcache;
177 | 			default: accept;
178 | 		}
179 | 	}
180 | 
181 | 	state parse_netcache {
182 | 		/* TODO #1(dimlek): enforce in some way that write queries are TCP */
183 | 		/* TODO #2(dimlek): decide how many bytes to extract for value field */
184 | 		packet.extract(hdr.netcache);
185 | 		transition accept;
186 | 	}
187 | 
188 | }
189 | 
190 | /*************************************************************************
191 | ***********************  D E P A R S E R  *****************************
192 | *************************************************************************/
193 | 
194 | control MyDeparser(packet_out packet, in headers hdr) {
195 |     apply {
196 | 
197 |     	packet.emit(hdr.ethernet);
198 | 		packet.emit(hdr.ipv4);
199 | 		packet.emit(hdr.tcp);
200 | 		packet.emit(hdr.tcp_options);
201 |         //packet.emit(hdr.tcp_options_vec);
202 |         //packet.emit(hdr.tcp_options_padding);
203 | 		packet.emit(hdr.udp);
204 | 		packet.emit(hdr.netcache);
205 | 
206 |     }
207 | }
208 | 
209 | 
210 | #endif     // PARSERS_P4
211 | 


--------------------------------------------------------------------------------
/src/p4/init_servers.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | NCACHE_DIR=../../
 4 | 
 5 | PYTHON="python3"
 6 | 
 7 | usage="${0} <n_servers> [<server-init-flags>]"
 8 | 
 9 | n_servers=$1
10 | server_flags=$2
11 | 
12 | if [ -z $n_servers ]; then
13 | 	echo "Error: invalid input: ${usage}"
14 | 	exit 1
15 | fi
16 | 
17 | 
18 | for i in $(seq $n_servers); do
19 | 	server_data="$NCACHE_DIR/src/kv_store/data/server${i}.txt"
20 | 	mx server$i $PYTHON $NCACHE_DIR/src/kv_store/server.py $server_flags --input $server_data &
21 | done
22 | 


--------------------------------------------------------------------------------
/src/p4/p4app.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "program": "core/netcache.p4",
 3 |   "switch": "simple_switch",
 4 |   "compiler": "p4c",
 5 |   "options": "--target bmv2 --arch v1model --std p4-16",
 6 |   "switch_cli": "simple_switch_CLI",
 7 |   "cli": true,
 8 |   "pcap_dump": false,
 9 |   "enable_log": true,
10 |     "topo_module": {
11 |     "file_path": "",
12 |     "module_name": "p4utils.mininetlib.apptopo",
13 |     "object_name": "AppTopoStrategies"
14 |   },
15 |   "controller_module": null,
16 |   "topodb_module": {
17 |     "file_path": "",
18 |     "module_name": "p4utils.utils.topology",
19 |     "object_name": "Topology"
20 |   },
21 |   "mininet_module": {
22 |     "file_path": "",
23 |     "module_name": "p4utils.mininetlib.p4net",
24 |     "object_name": "P4Mininet"
25 |   },
26 |   "topology": {
27 |     "assignment_strategy" : "l2",
28 |     "links": [
29 | 		["server1", "s1"],
30 | 		["client1", "s1"]
31 | 	],
32 |     "hosts": {
33 |       "server1": { },
34 |       "client1": { }
35 |     },
36 |     "switches": {
37 |       "s1": {
38 |         "cli_input": "s1-commands.txt",
39 |         "program": "core/netcache.p4",
40 | 		"cpu_port": true
41 |       }
42 |     }
43 |   }
44 | }
45 | 


--------------------------------------------------------------------------------
/src/p4/p4app_4_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "program": "core/netcache.p4",
 3 |   "switch": "simple_switch",
 4 |   "compiler": "p4c",
 5 |   "options": "--target bmv2 --arch v1model --std p4-16",
 6 |   "switch_cli": "simple_switch_CLI",
 7 |   "cli": true,
 8 |   "pcap_dump": false,
 9 |   "enable_log": true,
10 |     "topo_module": {
11 |     "file_path": "",
12 |     "module_name": "p4utils.mininetlib.apptopo",
13 |     "object_name": "AppTopoStrategies"
14 |   },
15 |   "controller_module": null,
16 |   "topodb_module": {
17 |     "file_path": "",
18 |     "module_name": "p4utils.utils.topology",
19 |     "object_name": "Topology"
20 |   },
21 |   "mininet_module": {
22 |     "file_path": "",
23 |     "module_name": "p4utils.mininetlib.p4net",
24 |     "object_name": "P4Mininet"
25 |   },
26 |   "topology": {
27 |     "assignment_strategy" : "l2",
28 |     "links": [
29 | 		["server1", "s1"],
30 | 		["server2", "s1"],
31 | 		["server3", "s1"],
32 | 		["server4", "s1"],
33 | 		["client1", "s1"]
34 | 	],
35 |     "hosts": {
36 | 		"server1": { },
37 | 		"server2": { },
38 | 		"server3": { },
39 | 		"server4": { },
40 | 		"client1": { }
41 |     },
42 |     "switches": {
43 |       "s1": {
44 |         "cli_input": "s1-commands.txt",
45 |         "program": "core/netcache.p4",
46 | 		"cpu_port": true
47 |       }
48 |     }
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/src/p4/p4app_8_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "program": "core/netcache.p4",
 3 |   "switch": "simple_switch",
 4 |   "compiler": "p4c",
 5 |   "options": "--target bmv2 --arch v1model --std p4-16",
 6 |   "switch_cli": "simple_switch_CLI",
 7 |   "cli": true,
 8 |   "pcap_dump": false,
 9 |   "enable_log": true,
10 |     "topo_module": {
11 |     "file_path": "",
12 |     "module_name": "p4utils.mininetlib.apptopo",
13 |     "object_name": "AppTopoStrategies"
14 |   },
15 |   "controller_module": null,
16 |   "topodb_module": {
17 |     "file_path": "",
18 |     "module_name": "p4utils.utils.topology",
19 |     "object_name": "Topology"
20 |   },
21 |   "mininet_module": {
22 |     "file_path": "",
23 |     "module_name": "p4utils.mininetlib.p4net",
24 |     "object_name": "P4Mininet"
25 |   },
26 |   "topology": {
27 |     "assignment_strategy" : "l2",
28 |     "links": [
29 | 		["server1", "s1"],
30 | 		["server2", "s1"],
31 | 		["server3", "s1"],
32 | 		["server4", "s1"],
33 | 		["server5", "s1"],
34 | 		["server6", "s1"],
35 | 		["server7", "s1"],
36 | 		["server8", "s1"],
37 | 		["client1", "s1"]
38 | 	],
39 |     "hosts": {
40 | 		"server1": { },
41 | 		"server2": { },
42 | 		"server3": { },
43 | 		"server4": { },
44 | 		"server5": { },
45 | 		"server6": { },
46 | 		"server7": { },
47 | 		"server8": { },
48 | 		"client1": { }
49 |     },
50 |     "switches": {
51 |       "s1": {
52 |         "cli_input": "s1-commands.txt",
53 |         "program": "core/netcache.p4",
54 | 		"cpu_port": true
55 |       }
56 |     }
57 |   }
58 | }
59 | 


--------------------------------------------------------------------------------
/src/p4/p4app_gen.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | #
 4 | # This scripts servers the purpose of creating well typed p4app.json documents
 5 | # with the specified number of servers and number of clients given as options
 6 | # This functionality allows fast iterations between multiple experiments with
 7 | # different number of running servers/clients
 8 | #
 9 | # Usage: ./p4app_gen.sh [-s <n_servers>] [-c <n_clients>]
10 | #
11 | 
12 | n_servers=1
13 | n_clients=1
14 | 
15 | usage="${0} [-s <n_servers>] [-c <n_clients>]"
16 | while getopts 's:c:' opt
17 | do
18 | 	case $opt in
19 | 		s) n_servers=$OPTARG;;
20 | 		c) n_clients=$OPTARG;;
21 | 	   \?) echo "Error: invalid input: ${usage}"
22 | 		   exit 1
23 |    esac
24 | done
25 | 
26 | 
27 | tmp_file='tmp_x'
28 | if [ -f "$tmp_file" ]; then
29 | 	echo "Error: Temporary file ${tmp_file} already exists."
30 | 	exit 1
31 | fi
32 | 
33 | # populate all the server entries in "links" field
34 | for i in $(seq $n_servers); do
35 | 	echo "\t\t[\"server${i}\", \"s1\"]," >> ${tmp_file}
36 | done
37 | 
38 | # populate all the client entires in "links" field
39 | for i in $(seq $(($n_clients-1))); do
40 | 	echo "\t\t[\"client${i}\", \"s1\"]," >> ${tmp_file}
41 | done
42 | 
43 | # last entry should omit a trailing comma
44 | echo "\t\t[\"client${n_clients}\", \"s1\"]" >> ${tmp_file}
45 | 
46 | 
47 | tmp_file_2='tmp_x2'
48 | if [ -f "$tmp_file_2" ]; then
49 | 	echo "Error: Temporary file ${tmp_file_2} already exists."
50 | 	exit 1
51 | fi
52 | 
53 | # populate all the server entires in "hosts" field
54 | for i in $(seq $n_servers); do
55 | 	echo "\t\t\"server${i}\": { }," >> ${tmp_file_2}
56 | done
57 | 
58 | # populate all the client entires in "links" field
59 | for i in $(seq $(($n_clients-1))); do
60 | 	echo "\t\t\"client${i}\": { }," >> ${tmp_file_2}
61 | done
62 | 
63 | # last entry should omit a trailing comma
64 | echo "\t\t\"client${n_clients}\": { }" >> ${tmp_file_2}
65 | 
66 | 
67 | prototype='p4app.json'
68 | if ! [ -f "$prototype" ]; then
69 | 	echo "Error: File ${prototype} does not exist."
70 | 	exit 1
71 | fi
72 | 
73 | 
74 | generated_p4app="p4app_${n_servers}_${n_clients}.json"
75 | 
76 | sed '/"links":/{n;N;d}' ${prototype} > ${generated_p4app}
77 | sed -i "/\"links\":/r ${tmp_file}" ${generated_p4app}
78 | sed -i '/"hosts":/{n;N;d}' ${generated_p4app}
79 | sed -i "/\"hosts\":/r ${tmp_file_2}" ${generated_p4app}
80 | 
81 | 
82 | # clean up temp files
83 | rm -f ${tmp_file} ${tmp_file_2}
84 | 
85 | 


--------------------------------------------------------------------------------