├── README.md ├── dataplanes ├── CModel │ ├── README.md │ ├── example.pcap │ ├── makefile │ ├── runExample.sh │ └── turboflow.cpp ├── P4Generic │ ├── README.md │ └── p4src │ │ ├── mainTurboFlow.p4 │ │ ├── parser.p4 │ │ ├── statefulFunctions.p4 │ │ └── turboFlow.p4 ├── P4Netronome │ ├── README.md │ ├── headers.p4 │ ├── main.p4 │ ├── primitive_actions.c │ └── turboflow.p4 └── tofino │ └── p4src │ ├── mainTurboFlow.p4 │ ├── parser.p4 │ └── turboFlow.p4 └── hashtables ├── aggregator_batch.cc ├── aggregator_flat.cc ├── aggregator_intkey.cc ├── aggregator_std.cc ├── benchmarkAll.sh ├── common.h ├── flat_hash_map.hpp ├── makefile ├── optimizations.h ├── readme.md └── xxhash.hpp /README.md: -------------------------------------------------------------------------------- 1 | ### TurboFlow ### 2 | 3 | This repo contains turboflow source code. Eventually, a refactored end to end implementation will be here. For now, this repo contains individual components used in the paper. 4 | 5 | contents: 6 | - dataplane/ -- implementations of the P4 dataplane component. 7 | - hashtables/ -- implementation of the core hash table data structures for the CPU component. 8 | -------------------------------------------------------------------------------- /dataplanes/CModel/README.md: -------------------------------------------------------------------------------- 1 | #### TurboFlow generic C dataplane 2 | 3 | This is a reference C implementation of the TurboFlow dataplane (i.e., microflow generator). 4 | 5 | Usage: 6 | ./turboflow *input pcap* *cache size* 7 | 8 | - modify dumpMicroflowRecord if you want to save evicted microflows to a file. 9 | - see example.sh for an example of running it. 10 | - In the paper, this implementation was used to evaluate eviction rates with CAIDA header traces: https://www.caida.org/data/passive/passive_dataset.xml 11 | -------------------------------------------------------------------------------- /dataplanes/CModel/example.pcap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jsonch/TurboFlow/fb33a27fbddfc4a0b4702019a972891e75fa2228/dataplanes/CModel/example.pcap -------------------------------------------------------------------------------- /dataplanes/CModel/makefile: -------------------------------------------------------------------------------- 1 | all: turboflow 2 | turboflow: turboflow.cpp 3 | g++ turboflow.cpp -o turboflow -lpcap -std=c++11 4 | clean: 5 | rm turboflow 6 | -------------------------------------------------------------------------------- /dataplanes/CModel/runExample.sh: -------------------------------------------------------------------------------- 1 | make turboflow 2 | ./turboflow example.pcap 1024 3 | -------------------------------------------------------------------------------- /dataplanes/CModel/turboflow.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include // for ostringstream 15 | using namespace std; 16 | 17 | // Generate 18 | // Dump the microflow records the TurboFlow microflow generator 19 | // would send to the switch CPU. 20 | // parameters: 21 | // 1 -- input pcap trace (expects IP trace, change traceType for eth) 22 | // 2 -- height of the hash table (number of flow slots) 23 | 24 | // g++ turboflow.cpp -o turboflow -lpcap -std=c++11 25 | // ./turboflow ~/datasets/caida2015/caida2015_02_dirA.pcap 13 26 | 27 | 28 | uint64_t dbg_packetCt; 29 | uint64_t dbg_evictCt; 30 | 31 | uint64_t dbg_collisionCt; 32 | uint64_t dbg_removeFlowCt; 33 | uint64_t dbg_addFlowCt; 34 | 35 | 36 | // Static options. 37 | #define traceType 1 // 0 = ethernet pcap, 1 = ip4v pcap (i.e., caida datasets) 38 | #define KEYLEN 12 // key length in bytes. 39 | #define STOP_CT 10000000 // stop execution after STOP_CT packets. 40 | #define LOG_CT 1000000 // print info every LOG_CT packets. 41 | 42 | char nullKey[KEYLEN] = { 0 }; 43 | 44 | // Internal structures. 45 | struct MicroflowRecord { 46 | char key[KEYLEN]; 47 | uint32_t byteCount; 48 | uint16_t packetCount; 49 | }; 50 | 51 | struct Metadata { 52 | std::string key; 53 | unsigned hash; 54 | uint32_t byteCount; 55 | uint64_t ts; 56 | }; 57 | 58 | // Global state. 59 | uint32_t TABLELEN; 60 | uint32_t packetSinceEvict = 0; 61 | uint32_t packetsPerEvict; 62 | // Table of hash -> keys. 63 | char ** keyTable; 64 | // Table of hash -> byte counts. 65 | uint32_t * byteCountTable; 66 | // Table of hash -> packet counts. 67 | uint16_t * packetCountTable; 68 | 69 | uint64_t startTs = 0; 70 | uint64_t dur = 0; 71 | 72 | // init global state for turboflow. 73 | void stateInit(int tableSize); 74 | 75 | // Handle packets. 76 | void packetHandler(u_char *userData, const struct pcap_pkthdr* pkthdr, const u_char* packet); 77 | 78 | // update microflow tables. 79 | void updateTables(Metadata md); 80 | 81 | // Write a micro flow record to a file or std out. 82 | void dumpMicroflowRecord(MicroflowRecord mfr, bool collision); 83 | 84 | // Helper functions. 85 | void printHeader(); 86 | unsigned simpleHash(const char* s, int len, int maxHashVal); 87 | std::string getKey(const struct ip* ipHeader, const struct tcphdr* tcpHeader); 88 | uint64_t getMicrosecondTs(uint32_t seconds, uint32_t microSeconds); 89 | void printStats(); 90 | 91 | 92 | int main(int argc, char *argv[]) { 93 | if (argc != 3){ 94 | cout << "incorrect number of arguments. Need 2. (filename, hash size)." << endl; 95 | return 0; 96 | } 97 | char * inputFile = argv[1]; 98 | cout << "reading from file: " << inputFile << endl; 99 | // Setup state. 100 | int tableSize = atoi(argv[2]); 101 | stateInit(tableSize); 102 | 103 | pcap_t *descr; 104 | char errbuf[PCAP_ERRBUF_SIZE]; 105 | // open capture file for offline processing 106 | descr = pcap_open_offline(inputFile, errbuf); 107 | printHeader(); 108 | if (descr == NULL) { 109 | cerr << "pcap_open_live() failed: " << errbuf << endl; 110 | return 1; 111 | } 112 | // start packet processing loop, just like live capture 113 | if (pcap_loop(descr, 0, packetHandler, NULL) < 0) { 114 | cerr << "pcap_loop() failed: " << pcap_geterr(descr); 115 | return 1; 116 | } 117 | cout << "FINAL STATS:" << endl; 118 | printStats(); 119 | 120 | return 0; 121 | } 122 | 123 | void stateInit(int tableSize){ 124 | TABLELEN = tableSize; 125 | cout << "initializing hash tables to size: " << TABLELEN << endl; 126 | // Keys. 127 | keyTable = new char*[tableSize]; 128 | for (int i = 0; i< tableSize; i++){ 129 | keyTable[i] = new char[KEYLEN]; 130 | } 131 | // Counters. 132 | byteCountTable = new uint32_t[tableSize]; 133 | packetCountTable = new uint16_t[tableSize]; 134 | 135 | // cout << "setting to attempt eviction once every " << pktsPerEvict << " packets " << endl; 136 | // packetsPerEvict = pktsPerEvict; 137 | return; 138 | } 139 | 140 | 141 | // The packet handler that implements the flow record generator. 142 | void packetHandler(u_char *userData, const struct pcap_pkthdr* pkthdr, const u_char* packet) { 143 | const struct ether_header* ethernetHeader; 144 | const struct ip* ipHeader; 145 | const struct tcphdr* tcpHeader; 146 | 147 | if (traceType == 0){ 148 | ethernetHeader = (struct ether_header*)packet; 149 | if (ntohs(ethernetHeader->ether_type) == ETHERTYPE_IP) { 150 | ipHeader = (struct ip*)(packet + sizeof(struct ether_header)); 151 | } 152 | } 153 | else if (traceType == 1) { 154 | ipHeader = (struct ip*)(packet); 155 | 156 | } 157 | tcpHeader = (tcphdr*)((u_char*)ipHeader + sizeof(struct ip)); 158 | 159 | // Build metadata. 160 | Metadata md; 161 | md.key = getKey(ipHeader, tcpHeader); 162 | md.ts = getMicrosecondTs(pkthdr->ts.tv_sec, pkthdr->ts.tv_usec); 163 | md.byteCount = pkthdr->len; 164 | md.hash = simpleHash(md.key.c_str(), KEYLEN, TABLELEN); 165 | if (startTs == 0){ 166 | startTs = md.ts; 167 | } 168 | dur = md.ts - startTs; 169 | 170 | // Update microflow tables. 171 | updateTables(md); 172 | 173 | // break after STOP_CT packets. 174 | dbg_packetCt++; 175 | #ifdef STOP_CT 176 | if (dbg_packetCt > STOP_CT){ 177 | printStats(); 178 | exit(1); 179 | } 180 | #endif 181 | #ifdef LOG_CT 182 | if (dbg_packetCt % LOG_CT == 0){ 183 | printStats(); 184 | } 185 | #endif 186 | } 187 | 188 | void updateTables(Metadata md){ 189 | // increment packet counter. 190 | packetSinceEvict++; 191 | // update key table. 192 | // read key at hash. 193 | char curKey[KEYLEN]; 194 | memcpy(curKey, keyTable[md.hash], KEYLEN); 195 | bool evictedFlow = false; 196 | MicroflowRecord evictedMfr; 197 | // cout << "hash: " << md.hash << endl; 198 | // if the key is null, insert new entry. 199 | if (memcmp(curKey, nullKey, KEYLEN) == 0){ 200 | dbg_addFlowCt++; 201 | // cout << "inserting new. " << endl; 202 | memcpy(keyTable[md.hash], md.key.c_str(), KEYLEN); 203 | packetCountTable[md.hash] = 1; 204 | byteCountTable[md.hash] = md.byteCount; 205 | } 206 | else { 207 | // if key matches packet's key, update. 208 | if (memcmp(curKey, md.key.c_str(), KEYLEN) == 0){ 209 | packetCountTable[md.hash]++; 210 | byteCountTable[md.hash]+= md.byteCount; 211 | } 212 | // otherwise, it is a collision. Evict and then replace. 213 | else { 214 | // Evict. 215 | evictedFlow = true; 216 | memcpy(evictedMfr.key, curKey, KEYLEN); 217 | evictedMfr.packetCount = packetCountTable[md.hash]; 218 | evictedMfr.byteCount = byteCountTable[md.hash]; 219 | // Replace. 220 | memcpy(keyTable[md.hash], md.key.c_str(), KEYLEN); 221 | packetCountTable[md.hash] = 1; 222 | byteCountTable[md.hash] = md.byteCount; 223 | 224 | } 225 | } 226 | // write microflow record if anything was evicted. 227 | if (evictedFlow){ 228 | dumpMicroflowRecord(evictedMfr, true); 229 | } 230 | return; 231 | } 232 | 233 | 234 | void printHeader(){ 235 | cout << "packet counts, trace time (ms), packets per microflow" << endl; 236 | } 237 | 238 | void printStats(){ 239 | float packetsPerMicroflow = float(dbg_packetCt) / float(dbg_evictCt); 240 | cout << dbg_packetCt << "," << dur/1000 << "," << packetsPerMicroflow << endl; 241 | // fwrite(&mfr, 1, sizeof(mfr), stdout); 242 | return; 243 | } 244 | 245 | 246 | void dumpMicroflowRecord(MicroflowRecord mfr, bool collision){ 247 | if (collision) dbg_collisionCt++; 248 | else dbg_removeFlowCt++; 249 | 250 | dbg_evictCt++; 251 | // Just write the microflow record to stdout. 252 | // fwrite(&mfr, 1, sizeof(mfr), stdout); 253 | return; 254 | } 255 | 256 | 257 | std::string getKey(const struct ip* ipHeader, const struct tcphdr* tcpHeader){ 258 | char keyBuf[KEYLEN]; 259 | memcpy(&(keyBuf[0]), &ipHeader->ip_src, 4); 260 | memcpy(&(keyBuf[4]), &ipHeader->ip_dst, 4); 261 | memcpy(&(keyBuf[8]), &tcpHeader->source, 2); 262 | memcpy(&(keyBuf[10]), &tcpHeader->dest, 2); 263 | std::string key = string(keyBuf, KEYLEN); 264 | return key; 265 | } 266 | 267 | // Get 64 bit timestamp. 268 | uint64_t getMicrosecondTs(uint32_t seconds, uint32_t microSeconds){ 269 | uint64_t sec64, ms64; 270 | sec64 = (uint64_t) seconds; 271 | ms64 = (uint64_t) microSeconds; 272 | uint64_t ts = sec64 * 1000000 + ms64; 273 | return ts; 274 | } 275 | // A simple hashing function. 276 | unsigned simpleHash(const char* s, int len, int maxHashVal) 277 | { 278 | unsigned h = 0; 279 | for (int i=0; i removeTfHeader() 243 | // (port == cpuPort, isMirror == false) --> clone_e2e to truncator and drop. 244 | // (port == cpuPort, isMirror == true) --> do nothing. 245 | table teProcessTfHeader { 246 | reads { 247 | eg_intr_md.egress_port : exact; 248 | tfMeta.isClone : exact; 249 | } 250 | actions { aeDoNothing; aeRemoveTfHeader; aeCloneToTruncator;} 251 | default_action : aeRemoveTfHeader(); 252 | } 253 | 254 | action aeDoNothing() { 255 | no_op(); 256 | } 257 | 258 | action aeRemoveTfHeader() { 259 | modify_field(ethernet.etherType, tfExportStart.realEtherType); 260 | remove_header(tfExportStart); 261 | remove_header(tfExportKey); 262 | remove_header(tfExportFeatures); 263 | } 264 | 265 | action aeCloneToTruncator() { 266 | modify_field(tfMeta.isClone, 1); 267 | clone_e2e(TF_CLONE_MID, flCloneMeta); 268 | // sample_e2e(TF_COAL_MID, 72); 269 | drop(); 270 | } 271 | field_list flCloneMeta { 272 | tfMeta.isClone; 273 | } 274 | 275 | /*===== End of TurboFlow Egress Pipeline. ======*/ 276 | 277 | 278 | -------------------------------------------------------------------------------- /dataplanes/P4Netronome/README.md: -------------------------------------------------------------------------------- 1 | #### TurboFlow netronome P4_14 dataplane 2 | This is a netronome P4 implementation of the TurboFlow dataplane (i.e., microflow generator). 3 | 4 | These files are a refactoring of previous prototypes, and may not be fully functional. 5 | There are still a few pieces missing: 6 | 7 | 1. netronome project files 8 | 2. calling micro-c sempahores from P4 9 | 3. correctness testing 10 | 4. benchmark scripts 11 | 5. exporting data to the CPU 12 | -------------------------------------------------------------------------------- /dataplanes/P4Netronome/headers.p4: -------------------------------------------------------------------------------- 1 | // Headers: Eth, IP, and UDP headers. 2 | header_type intrinsic_metadata_t { 3 | fields { 4 | mcast_grp : 4; 5 | egress_rid : 4; 6 | mcast_hash : 16; 7 | lf_field_list : 32; 8 | resubmit_flag : 16; 9 | recirculate_flag : 16; 10 | ingress_global_timestamp : 32; 11 | } 12 | } 13 | metadata intrinsic_metadata_t intrinsic_metadata; 14 | 15 | 16 | header_type ethernet_t { 17 | fields { 18 | dstAddr : 48; 19 | srcAddr : 48; 20 | etherType : 16; 21 | } 22 | } 23 | // 20 byte header. 24 | header_type ipv4_t { 25 | fields { 26 | version : 4; 27 | ihl : 4; 28 | diffserv : 8; 29 | totalLen : 16; 30 | identification : 16; 31 | flags : 3; 32 | fragOffset : 13; 33 | ttl : 8; 34 | protocol : 8; 35 | hdrChecksum : 16; 36 | srcAddr : 32; 37 | dstAddr: 32; 38 | } 39 | } 40 | 41 | // 20 byte tcp header. 20 + 20 + 14 = 54 bytes of network headers. 42 | header_type tcp_t { 43 | fields { 44 | ports : 32; 45 | seqNo : 32; 46 | ackNo : 32; 47 | dataOffset : 4; 48 | res : 3; 49 | ecn : 3; 50 | ctrl : 6; 51 | window : 16; 52 | checksum : 16; 53 | urgentPtr : 16; 54 | } 55 | } 56 | 57 | parser parse_ethernet { 58 | extract(ethernet); 59 | return parse_ipv4; 60 | } 61 | parser parse_ipv4 { 62 | extract(ipv4); 63 | // return parse_udp; 64 | return parse_tcp; 65 | } 66 | 67 | parser parse_tcp { 68 | // return ingress; 69 | extract(tcp); 70 | return parse_custom; 71 | } 72 | -------------------------------------------------------------------------------- /dataplanes/P4Netronome/main.p4: -------------------------------------------------------------------------------- 1 | #include "headers.p4" 2 | #include "turboflow_v1.p4" 3 | 4 | 5 | 6 | header ethernet_t ethernet; 7 | header ipv4_t ipv4; 8 | header tcp_t tcp; 9 | 10 | parser start { return parse_ethernet; } 11 | parser parse_custom { return ingress; } 12 | 13 | 14 | control ingress { 15 | forwarding_logic(); 16 | tracking_logic(); 17 | apply(profiling_table); 18 | } 19 | 20 | control forwarding_logic { 21 | apply(forward_table); 22 | } 23 | 24 | // forward packets out of a port. 25 | table forward_table { 26 | reads { standard_metadata.ingress_port: exact; } 27 | actions { do_forward; } 28 | } 29 | action do_forward(egress_port) { 30 | modify_field(standard_metadata.egress_spec, egress_port); 31 | } 32 | 33 | table profiling_table { 34 | actions {do_nothing;} 35 | } 36 | 37 | action do_nothing() { 38 | modify_field(ipv4.ttl, ipv4.ttl); 39 | } -------------------------------------------------------------------------------- /dataplanes/P4Netronome/primitive_actions.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | #define SEM_COUNT 256 10 | 11 | __declspec(ctm export aligned(64)) int my_semaphore = 1; 12 | 13 | __declspec(ctm export aligned(64)) long long int my_data = 0; 14 | 15 | __declspec(imem export aligned(64)) int global_semaphores[SEM_COUNT] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; 16 | 17 | //__declspec(shared scope(global) export imem aligned(64)) int global_semaphores[SEM_COUNT] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; 18 | 19 | // Just for debugging. 20 | //__declspec(shared scope(global) export imem aligned(64)) long long int global_counters[SEM_COUNT]; 21 | 22 | void semaphore_down(volatile __declspec(mem addr40) void * addr) { 23 | /* semaphore "DOWN" = claim = wait */ 24 | unsigned int addr_hi, addr_lo; 25 | __declspec(read_write_reg) int xfer; 26 | SIGNAL_PAIR my_signal_pair; 27 | addr_hi = ((unsigned long long int)addr >> 8) & 0xff000000; 28 | addr_lo = (unsigned long long int)addr & 0xffffffff; 29 | do { 30 | xfer = 1; 31 | __asm { 32 | mem[test_subsat, xfer, addr_hi, <<8, addr_lo, 1],\ 33 | sig_done[my_signal_pair]; 34 | ctx_arb[my_signal_pair] 35 | } 36 | } while (xfer == 0); 37 | } 38 | 39 | void semaphore_up(volatile __declspec(mem addr40) void * addr) { 40 | /* semaphore "UP" = release = signal */ 41 | unsigned int addr_hi, addr_lo; 42 | __declspec(read_write_reg) int xfer; 43 | addr_hi = ((unsigned long long int)addr >> 8) & 0xff000000; 44 | addr_lo = (unsigned long long int)addr & 0xffffffff; 45 | 46 | __asm { 47 | mem[incr, --, addr_hi, <<8, addr_lo, 1]; 48 | } 49 | 50 | } 51 | 52 | 53 | // lock a semaphore based on an ID in the metadata. 54 | int pif_plugin_semaphore_lock(EXTRACTED_HEADERS_T *headers, MATCH_DATA_T *data){ 55 | // Get the ID of the semaphore to lock from the packet header. 56 | __declspec(local_mem) int sem_sid; 57 | sem_sid = (int)pif_plugin_meta_get__sm__sid(headers); 58 | 59 | // Lock that semaphore. 60 | semaphore_down( &global_semaphores[sem_sid]); 61 | 62 | return PIF_PLUGIN_RETURN_FORWARD; 63 | } 64 | // Release a semaphore based on an ID in the metadata. 65 | int pif_plugin_semaphore_release(EXTRACTED_HEADERS_T *headers, MATCH_DATA_T *data){ 66 | // Get the ID of the semaphore to lock from the packet header. 67 | // Would like to store this in metadata, but.. 68 | __declspec(local_mem) int sem_sid; 69 | sem_sid = (int)pif_plugin_meta_get__sm__sid(headers); 70 | 71 | // Release that semaphore. 72 | semaphore_up( &global_semaphores[sem_sid]); 73 | 74 | return PIF_PLUGIN_RETURN_FORWARD; 75 | } 76 | 77 | 78 | 79 | void copyRegister(){ 80 | volatile __declspec( mem addr40) unsigned int *payload; 81 | int i, copyCt, mu_offset; 82 | mu_offset =(256 << pif_pkt_info_global.ctm_size); 83 | payload = (__declspec( mem addr40) unsigned int *)(((uint64_t)pif_pkt_info_global.muptr << 11) + mu_offset); 84 | // Manually copy 300 words to payload (i.e. 50 flow records @ 6 words each). 85 | copyCt = 300; 86 | for (i=0; i> (WORDBITS -1 ))); 180 | modify_field(em.evictMask, (em.evictMask-1)); 181 | 182 | } -------------------------------------------------------------------------------- /dataplanes/tofino/p4src/mainTurboFlow.p4: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * mainTurboFlow.p4 -- Simple 2 hop switch with TurboFlow. 4 | * 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | 13 | #include "parser.p4" 14 | #include "turboFlow.p4" 15 | 16 | #include "../../modules/p4/miscUtils.p4" 17 | #include "../../modules/p4/forwardL2.p4" 18 | 19 | // #include "latencyMirror.p4" 20 | 21 | 22 | control ingress { 23 | // Port to index mapping. Always do this. 24 | if (ig_intr_md.resubmit_flag == 0){ 25 | apply(tiPortToIndex); 26 | } 27 | // Stage 0: apply L2 forwarding. 28 | ciForwardPacket(); // (forwardL2.p4) 29 | // Next stages: apply TurboFlow (only to IPv4 packets) 30 | if (valid(ipv4)) { 31 | ciTurboFlow(); 32 | } 33 | 34 | } 35 | 36 | control egress { 37 | // Strip TurboFlow headers unless its an eviction packet to the CPU. 38 | ceTurboFlow(); 39 | } 40 | 41 | 42 | 43 | control ciTurboFlow { 44 | 45 | // Setup TurboFlow headers. 46 | apply(tiAddTfHeaders); 47 | 48 | // Update key fields. 49 | apply(tiUpdateSrcAddr); 50 | apply(tiUpdateDstAddr); 51 | apply(tiUpdatePorts); 52 | apply(tiUpdateProtocol); 53 | 54 | // Set match flag if all key fields are equal. 55 | if (ipv4.srcAddr == tfExportKey.srcAddr) { 56 | if (ipv4.dstAddr == tfExportKey.dstAddr) { 57 | if (l4_ports.ports == tfExportKey.ports) { 58 | if (ipv4.protocol == tfExportKey.protocol) { 59 | apply(tiSetMatch); 60 | } 61 | } 62 | } 63 | } 64 | 65 | // update features (depending on match flag). 66 | if (tfMeta.matchFlag == 1) { 67 | apply(tiIncrementPktCt); 68 | apply(tiIncrementByteCt); 69 | } 70 | else { 71 | apply(tiResetPktCt); 72 | apply(tiResetByteCt); 73 | apply(tiResetStartTs); 74 | // No table for endTs, endTs is now, set in tiInitFr. 75 | } 76 | 77 | // If match flag == 0, multicast to the TurboFlow monitoring port. 78 | if (tfMeta.matchFlag == 0) { 79 | apply(tiMcToCpu); 80 | } 81 | 82 | } 83 | -------------------------------------------------------------------------------- /dataplanes/tofino/p4src/parser.p4: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * Headers, metadata, and parser. 4 | * 5 | */ 6 | 7 | #define ETHERTYPE_IPV4 0x0800 8 | #define ETHERTYPE_TURBOFLOW 0x081A 9 | 10 | // Metadata for processing. 11 | metadata tfMeta_t tfMeta; 12 | 13 | // Headers for exporting an evicted flow record. 14 | header tfExportStart_t tfExportStart; 15 | header tfExportKey_t tfExportKey; 16 | header tfExportFeatures_t tfExportFeatures; 17 | 18 | /*========================================== 19 | = TurboFlow Headers. = 20 | ==========================================*/ 21 | 22 | header_type tfMeta_t { 23 | fields { 24 | curTs : 32; 25 | hashVal : 16; 26 | matchFlag : 8; 27 | isClone : 8; 28 | } 29 | } 30 | 31 | header_type tfExportStart_t { 32 | fields { 33 | realEtherType : 16; 34 | } 35 | } 36 | 37 | header_type tfExportKey_t { 38 | fields { 39 | srcAddr : 32; 40 | dstAddr : 32; 41 | ports : 32; 42 | protocol : 8; 43 | } 44 | } 45 | 46 | header_type tfExportFeatures_t { 47 | fields { 48 | byteCt : 32; 49 | startTs : 32; 50 | endTs : 32; 51 | pktCt : 16; 52 | } 53 | } 54 | 55 | /*===== End of TurboFlow Headers. ======*/ 56 | 57 | /*=========================================== 58 | = Forwarding Headers. = 59 | ===========================================*/ 60 | 61 | header_type ethernet_t { 62 | fields { 63 | dstAddr : 48; 64 | srcAddr : 48; 65 | etherType : 16; 66 | } 67 | } 68 | header ethernet_t ethernet; 69 | 70 | header_type ipv4_t { 71 | fields { 72 | version : 4; 73 | ihl : 4; 74 | diffserv : 8; 75 | totalLen : 16; 76 | identification : 16; 77 | flags : 3; 78 | fragOffset : 13; 79 | ttl : 8; 80 | protocol : 8; 81 | hdrChecksum : 16; // here 82 | srcAddr : 32; 83 | dstAddr: 32; 84 | } 85 | } 86 | header ipv4_t ipv4; 87 | 88 | header_type l4_ports_t { 89 | fields { 90 | ports : 32; 91 | // srcPort : 16; 92 | // dstPort : 16; 93 | } 94 | } 95 | header l4_ports_t l4_ports; 96 | 97 | 98 | /*===== End of Forwarding Headers. ======*/ 99 | 100 | 101 | 102 | 103 | parser start { 104 | return parse_ethernet; 105 | } 106 | 107 | parser parse_ethernet { 108 | extract(ethernet); 109 | return select(latest.etherType) { 110 | ETHERTYPE_TURBOFLOW : parse_turboflow; 111 | ETHERTYPE_IPV4 : parse_ipv4; 112 | default : ingress; 113 | } 114 | } 115 | 116 | // IP. 117 | parser parse_ipv4 { 118 | extract(ipv4); 119 | return parse_l4; 120 | } 121 | 122 | // TCP / UDP ports. 123 | parser parse_l4 { 124 | extract(l4_ports); 125 | 126 | return ingress; 127 | } 128 | 129 | parser parse_turboflow { 130 | extract(tfExportStart); 131 | extract(tfExportKey); 132 | extract(tfExportFeatures); 133 | return select(tfExportStart.realEtherType) { 134 | ETHERTYPE_IPV4 : parse_ipv4; 135 | default : ingress; 136 | } 137 | } 138 | 139 | // e2e mirrored is always (in this example) a ethernet TurboFlow packet. 140 | @pragma packet_entry 141 | parser start_e2e_mirrored { 142 | extract(ethernet); 143 | extract(tfExportStart); 144 | extract(tfExportKey); 145 | extract(tfExportFeatures); 146 | // set_metadata(tfMeta.isClone, 1); 147 | return select(tfExportStart.realEtherType) { 148 | ETHERTYPE_IPV4 : parse_ipv4; 149 | default : ingress; 150 | } 151 | } 152 | 153 | 154 | // @pragma packet_entry 155 | // parser start_coalesced { 156 | // // extract(ethernet); 157 | // set_metadata(tfMeta.isClone, 1); 158 | // return ingress; 159 | // } -------------------------------------------------------------------------------- /dataplanes/tofino/p4src/turboFlow.p4: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * turboFlow.p4 -- Turboflow data plane. 4 | * 5 | */ 6 | 7 | // Max number of flows to track at once. 8 | #define TF_HASH_TBL_SIZE 65536 9 | #define TF_HASH_BIT_WIDTH 16 10 | 11 | #define TF_MC_GID 666 12 | #define TF_CLONE_MID 66 13 | #define TF_COAL_MID 1016 14 | 15 | 16 | // Setup TurboFlow Header. Needs controller rule. 17 | @pragma stage 2 18 | table tiAddTfHeaders { 19 | reads {ethernet.etherType : exact;} 20 | actions {aiAddTfHeaders; aeDoNothing;} 21 | default_action : aeDoNothing(); 22 | } 23 | 24 | action aiAddTfHeaders() { 25 | // TurboFlow Header. 26 | add_header(tfExportStart); 27 | modify_field(tfExportStart.realEtherType, ethernet.etherType); 28 | modify_field(ethernet.etherType, ETHERTYPE_TURBOFLOW); 29 | 30 | // TurboFlow data -- the exported flow's key. 31 | add_header(tfExportKey); 32 | modify_field(tfExportKey.srcAddr, 0); 33 | modify_field(tfExportKey.dstAddr, 0); 34 | modify_field(tfExportKey.ports, 0); 35 | modify_field(tfExportKey.protocol, 0); 36 | 37 | // TurboFlow data -- the exported flow's features. 38 | add_header(tfExportFeatures); 39 | 40 | // Compute hash of key. 41 | modify_field_with_hash_based_offset(tfMeta.hashVal, 0, flowKeyHashCalc, 65536); 42 | // Get 32 bit timestamp. 43 | modify_field(tfMeta.curTs, ig_intr_md.ingress_mac_tstamp); 44 | modify_field(tfExportFeatures.endTs, ig_intr_md.ingress_mac_tstamp); 45 | } 46 | 47 | field_list flKeyFields { 48 | ipv4.srcAddr; 49 | ipv4.dstAddr; 50 | l4_ports.ports; 51 | ipv4.protocol; 52 | } 53 | 54 | field_list_calculation flowKeyHashCalc { 55 | input { flKeyFields; } 56 | algorithm : crc16; 57 | output_width : TF_HASH_BIT_WIDTH; 58 | } 59 | 60 | /*========================================================= 61 | = Update stateful keys, load evict FR. = 62 | =========================================================*/ 63 | 64 | 65 | table tiUpdateSrcAddr { 66 | actions {aiUpdateSrcAddr;} 67 | default_action : aiUpdateSrcAddr(); 68 | } 69 | action aiUpdateSrcAddr() { 70 | sUpdateSrcAddr.execute_stateful_alu(tfMeta.hashVal); 71 | } 72 | 73 | // evictFr.srcAddr = entry 74 | // If new != entry: 75 | // entry = new.srcAddr 76 | blackbox stateful_alu sUpdateSrcAddr { 77 | reg : rSrcAddr; 78 | condition_lo : ipv4.srcAddr == register_lo; 79 | 80 | update_lo_1_predicate : not condition_lo; 81 | update_lo_1_value : ipv4.srcAddr; 82 | 83 | // output_predicate : not condition_lo; 84 | output_dst : tfExportKey.srcAddr; 85 | output_value : register_lo; 86 | } 87 | 88 | register rSrcAddr { 89 | width : 32; 90 | instance_count : TF_HASH_TBL_SIZE; 91 | } 92 | 93 | table tiUpdateDstAddr { 94 | actions {aiUpdateDstAddr;} 95 | default_action : aiUpdateDstAddr(); 96 | } 97 | action aiUpdateDstAddr() { 98 | sUpdateDstAddr.execute_stateful_alu(tfMeta.hashVal); 99 | } 100 | 101 | blackbox stateful_alu sUpdateDstAddr { 102 | reg : rDstAddr; 103 | condition_lo : ipv4.dstAddr == register_lo; 104 | 105 | update_lo_1_predicate : not condition_lo; 106 | update_lo_1_value : ipv4.dstAddr; 107 | 108 | // output_predicate : not condition_lo; 109 | output_dst : tfExportKey.dstAddr; 110 | output_value : register_lo; 111 | } 112 | 113 | register rDstAddr { 114 | width : 32; 115 | instance_count : TF_HASH_TBL_SIZE; 116 | } 117 | 118 | 119 | table tiUpdatePorts { 120 | actions {aiUpdatePorts;} 121 | default_action : aiUpdatePorts(); 122 | } 123 | action aiUpdatePorts() { 124 | sUpdatePorts.execute_stateful_alu(tfMeta.hashVal); 125 | } 126 | 127 | blackbox stateful_alu sUpdatePorts { 128 | reg : rPorts; 129 | condition_lo : l4_ports.ports == register_lo; 130 | 131 | update_lo_1_predicate : not condition_lo; 132 | update_lo_1_value : l4_ports.ports; 133 | 134 | // output_predicate : not condition_lo; 135 | output_dst : tfExportKey.ports; 136 | output_value : register_lo; 137 | } 138 | 139 | register rPorts { 140 | width : 32; 141 | instance_count : TF_HASH_TBL_SIZE; 142 | } 143 | 144 | 145 | table tiUpdateProtocol { 146 | actions {aiUpdateProtocol;} 147 | default_action : aiUpdateProtocol(); 148 | } 149 | action aiUpdateProtocol() { 150 | sUpdateProtocol.execute_stateful_alu(tfMeta.hashVal); 151 | } 152 | 153 | blackbox stateful_alu sUpdateProtocol { 154 | reg : rProtocol; 155 | condition_lo : ipv4.protocol == register_lo; 156 | 157 | update_lo_1_predicate : not condition_lo; 158 | update_lo_1_value : ipv4.protocol; 159 | 160 | // output_predicate : not condition_lo; 161 | output_dst : tfExportKey.protocol; 162 | output_value : register_lo; 163 | } 164 | 165 | register rProtocol { 166 | width : 32; 167 | instance_count : TF_HASH_TBL_SIZE; 168 | } 169 | 170 | 171 | /*===== End of Update stateful keys, load evict FR. ======*/ 172 | 173 | 174 | /*======================================= 175 | = Set evict flag. = 176 | =======================================*/ 177 | 178 | @pragma stage 5 179 | table tiSetMatch { 180 | actions { aiSetMatch;} 181 | default_action : aiSetMatch(); 182 | } 183 | action aiSetMatch(){ 184 | modify_field(tfMeta.matchFlag, 1); 185 | } 186 | 187 | /*===== End of Set evict flag. ======*/ 188 | 189 | 190 | /*======================================= 191 | = Update Features = 192 | =======================================*/ 193 | 194 | // Packet count. 195 | table tiIncrementPktCt { 196 | actions {aiIncrementPktCt;} 197 | default_action : aiIncrementPktCt(); 198 | } 199 | action aiIncrementPktCt() { 200 | sIncrementPktCt.execute_stateful_alu(tfMeta.hashVal); 201 | } 202 | 203 | blackbox stateful_alu sIncrementPktCt { 204 | reg : rPktCt; 205 | update_lo_1_value : register_lo + 1; 206 | } 207 | register rPktCt { 208 | width : 16; 209 | instance_count : TF_HASH_TBL_SIZE; 210 | } 211 | 212 | 213 | table tiResetPktCt { 214 | actions {aiResetPktCt;} 215 | default_action : aiResetPktCt(); 216 | } 217 | action aiResetPktCt() { 218 | sResetPktCt.execute_stateful_alu(tfMeta.hashVal); 219 | } 220 | 221 | blackbox stateful_alu sResetPktCt { 222 | reg : rPktCt; 223 | update_lo_1_value : 1; 224 | output_dst : tfExportFeatures.pktCt; 225 | output_value : register_lo; 226 | } 227 | 228 | 229 | // Byte count. 230 | table tiIncrementByteCt { 231 | actions {aiIncrementByteCt;} 232 | default_action : aiIncrementByteCt(); 233 | } 234 | action aiIncrementByteCt() { 235 | sIncrementByteCt.execute_stateful_alu(tfMeta.hashVal); 236 | } 237 | 238 | blackbox stateful_alu sIncrementByteCt { 239 | reg : rByteCt; 240 | update_lo_1_value : register_lo + ipv4.totalLen; 241 | } 242 | register rByteCt { 243 | width : 32; 244 | instance_count : TF_HASH_TBL_SIZE; 245 | } 246 | 247 | 248 | table tiResetByteCt { 249 | actions {aiResetByteCt;} 250 | default_action : aiResetByteCt(); 251 | } 252 | action aiResetByteCt() { 253 | sResetByteCt.execute_stateful_alu(tfMeta.hashVal); 254 | } 255 | 256 | blackbox stateful_alu sResetByteCt { 257 | reg : rByteCt; 258 | update_lo_1_value : ipv4.totalLen; 259 | output_dst : tfExportFeatures.byteCt; 260 | output_value : register_lo; 261 | } 262 | 263 | // Start timestamp. 264 | table tiResetStartTs { 265 | actions {aiResetStartTs;} 266 | default_action : aiResetStartTs(); 267 | } 268 | action aiResetStartTs() { 269 | sResetStartTs.execute_stateful_alu(tfMeta.hashVal); 270 | } 271 | 272 | blackbox stateful_alu sResetStartTs { 273 | reg : rStartTs; 274 | update_lo_1_value : tfMeta.curTs; 275 | output_dst : tfExportFeatures.startTs; 276 | output_value : register_lo; 277 | } 278 | register rStartTs { 279 | width : 32; 280 | instance_count : TF_HASH_TBL_SIZE; 281 | } 282 | 283 | 284 | // End timestamp -- no state needed, end Ts is just when the eviction happens. 285 | 286 | 287 | 288 | /*===== End of Update Features ======*/ 289 | 290 | 291 | // Multicast to CPU port using TurboFlow GID. 292 | table tiMcToCpu { 293 | actions {aiMcToCpu;} 294 | default_action : aiMcToCpu(); 295 | } 296 | action aiMcToCpu() { 297 | modify_field(ig_intr_md_for_tm.mcast_grp_a, TF_MC_GID); 298 | } 299 | 300 | 301 | /*================================================== 302 | = TurboFlow Egress Pipeline. = 303 | ==================================================*/ 304 | 305 | control ceTurboFlow { 306 | if (ethernet.etherType == ETHERTYPE_TURBOFLOW) { 307 | apply(teProcessTfHeader); 308 | } 309 | } 310 | 311 | 312 | // default: (port == other) --> removeTfHeader() 313 | // (port == cpuPort, isMirror == false) --> clone_e2e to truncator and drop. 314 | // (port == cpuPort, isMirror == true) --> do nothing. 315 | table teProcessTfHeader { 316 | reads { 317 | eg_intr_md.egress_port : exact; 318 | tfMeta.isClone : exact; 319 | } 320 | actions { aeDoNothing; aeRemoveTfHeader; aeCloneToTruncator;} 321 | default_action : aeRemoveTfHeader(); 322 | } 323 | 324 | action aeDoNothing() { 325 | no_op(); 326 | } 327 | 328 | action aeRemoveTfHeader() { 329 | modify_field(ethernet.etherType, tfExportStart.realEtherType); 330 | remove_header(tfExportStart); 331 | remove_header(tfExportKey); 332 | remove_header(tfExportFeatures); 333 | } 334 | 335 | action aeCloneToTruncator() { 336 | modify_field(tfMeta.isClone, 1); 337 | clone_e2e(TF_CLONE_MID, flCloneMeta); 338 | // sample_e2e(TF_COAL_MID, 72); 339 | drop(); 340 | } 341 | field_list flCloneMeta { 342 | tfMeta.isClone; 343 | } 344 | 345 | /*===== End of TurboFlow Egress Pipeline. ======*/ 346 | 347 | 348 | -------------------------------------------------------------------------------- /hashtables/aggregator_batch.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "common.h" 10 | 11 | // Aggregator implementation using flat map hash table with 128b int key and batching. 12 | #define BATCHING TRUE 13 | #include "optimizations.h" 14 | 15 | 16 | void initHashTable(); 17 | Result runBenchmark(); 18 | int main(int argc, char *argv[]) { 19 | printf("version: batching map -- batch size: %i\n",PREFETCHCT); 20 | printf("flows: %i records: %i\n", FLOW_COUNT, MESSAGE_COUNT); 21 | 22 | genRandomMicroflows(MESSAGE_COUNT, FLOW_COUNT); 23 | initHashTable(); 24 | 25 | Result r = runBenchmark(); 26 | printResults(r); 27 | free(inArr); 28 | } 29 | 30 | void initHashTable(){ 31 | // reserve at least enough for the expected number of active flows. 32 | // A prime number gives better performance. 33 | flow_map.reserve(8296553); 34 | } 35 | 36 | 37 | inline void parseFcn(int parsej){ 38 | // cast the keys in the batch to 128 bit ints. 39 | v[parsej] = (__m128i*)(inArr+curPos+parsej); 40 | // calculate the addresses where the records would be stored in the hash table. 41 | ptrs[parsej] = flow_map.getPtr(*v[parsej]); 42 | 43 | // ptrs[parsej] = flow_map.getPtr(((__m128i*)(inArr+curPos+parsej))[0]); 44 | // fetch records for all keysin the batch. 45 | __builtin_prefetch((const void*)(ptrs[parsej]),0,0); 46 | } 47 | 48 | 49 | inline void updateFcn(int updatej){ 50 | // update the records in the batch. 51 | // emplace. 52 | __m128i *k = v[updatej]; 53 | auto v = flow_map.emplace(*k, 0); 54 | // update. 55 | flow_map[*k]++; 56 | } 57 | 58 | Result runBenchmark() 59 | { 60 | Result r; 61 | uint matchCount = 0; 62 | uint missCount = 0; 63 | long int stime, ftime; 64 | 65 | stime = cTime(); 66 | for (curPos=0; curPos(),parseFcn); // parse the batch in an unrolled loop. 68 | unroll_f(_int(),updateFcn); // update the batch in an unrolled loop. 69 | } 70 | ftime = cTime(); 71 | 72 | r.processedMfCt = loadedMfCt; 73 | r.flowCount = flow_map.size(); 74 | r.executionTime = ftime - stime; 75 | return r; 76 | } 77 | -------------------------------------------------------------------------------- /hashtables/aggregator_flat.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | // #include 10 | #include "flat_hash_map.hpp" 11 | #include "common.h" 12 | 13 | // Aggregator implementation using flat map hash table. 14 | // std::unordered_map flow_map; 15 | ska::flat_hash_map flow_map; 16 | 17 | 18 | void initHashTable(); 19 | Result runBenchmark(); 20 | int main(int argc, char *argv[]) { 21 | printf("version: flat hash table\n"); 22 | printf("flows: %i records: %i\n", FLOW_COUNT, MESSAGE_COUNT); 23 | 24 | genRandomMicroflows(MESSAGE_COUNT, FLOW_COUNT); 25 | initHashTable(); 26 | 27 | Result r = runBenchmark(); 28 | printResults(r); 29 | free(inArr); 30 | } 31 | 32 | void initHashTable(){ 33 | // reserve at least enough for the expected number of active flows. 34 | // Using a prime number gives best performance. 35 | flow_map.reserve(8296553); 36 | } 37 | 38 | // The actual table update function. Just emplace and increment counter. 39 | inline void updateFcn(){ 40 | std::string keyStr = std::string((char *)&inArr[curPos], sizeof(MicroflowBin)); 41 | // emplace. 42 | flow_map.emplace(keyStr, 0); 43 | // update. 44 | flow_map[keyStr]++; 45 | 46 | } 47 | 48 | 49 | Result runBenchmark() 50 | { 51 | uint matchCount = 0; 52 | uint missCount = 0; 53 | long int stime, ftime; 54 | 55 | // do the benchmark. 56 | stime = cTime(); 57 | // Run update once for each loaded record. 58 | for (curPos=0; curPos 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "common.h" 10 | 11 | // Aggregator implementation using flat map hash table with 128b int key. 12 | #include "optimizations.h" 13 | 14 | 15 | 16 | void initHashTable(); 17 | Result runBenchmark(); 18 | int main(int argc, char *argv[]) { 19 | printf("version: integer keys\n"); 20 | printf("flows: %i records: %i\n", FLOW_COUNT, MESSAGE_COUNT); 21 | 22 | genRandomMicroflows(MESSAGE_COUNT, FLOW_COUNT); 23 | initHashTable(); 24 | 25 | Result r = runBenchmark(); 26 | printResults(r); 27 | free(inArr); 28 | } 29 | 30 | void initHashTable(){ 31 | // reserve at least enough for the expected number of active flows. 32 | // A prime number gives better performance. 33 | flow_map.reserve(8296553); 34 | } 35 | 36 | // The actual table update function. Just emplace and increment counter. 37 | inline void updateFcn(){ 38 | // emplace. 39 | __m128i *k = (__m128i *)&inArr[curPos]; 40 | flow_map.emplace(*k, 0); 41 | // update. 42 | flow_map[*k]++; 43 | } 44 | 45 | Result runBenchmark() 46 | { 47 | uint matchCount = 0; 48 | uint missCount = 0; 49 | long int stime, ftime; 50 | 51 | stime = cTime(); 52 | for (curPos=0; curPos 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | #include "common.h" 12 | 13 | // Aggregator implementation using std c++ hash table. 14 | std::unordered_map flow_map; 15 | 16 | 17 | void initHashTable(); 18 | Result runBenchmark(); 19 | 20 | int main(int argc, char *argv[]) { 21 | printf("version: std unordered map\n"); 22 | printf("flows: %i records: %i\n", FLOW_COUNT, MESSAGE_COUNT); 23 | 24 | genRandomMicroflows(MESSAGE_COUNT, FLOW_COUNT); 25 | initHashTable(); 26 | 27 | Result r = runBenchmark(); 28 | printResults(r); 29 | free(inArr); 30 | } 31 | 32 | void initHashTable(){ 33 | // nothing for std. 34 | // flow_map.reserve(8296553); 35 | } 36 | 37 | // The actual table update function. Just emplace and increment counter. 38 | inline void updateFcn(){ 39 | std::string keyStr = std::string((char *)&inArr[curPos], sizeof(MicroflowBin)); 40 | // emplace. 41 | flow_map.emplace(keyStr, 0); 42 | // update. 43 | flow_map[keyStr]++; 44 | 45 | } 46 | 47 | 48 | Result runBenchmark() 49 | { 50 | uint matchCount = 0; 51 | uint missCount = 0; 52 | long int stime, ftime; 53 | 54 | // do the benchmark. 55 | stime = cTime(); 56 | for (curPos=0; curPos 3 | /*====================================== 4 | = Configuration. = 5 | ======================================*/ 6 | 7 | #define MESSAGE_COUNT 10000000 8 | #define FLOW_COUNT 100000 9 | 10 | /*===== End of Configuration. ======*/ 11 | 12 | 13 | struct Result{ 14 | long int processedMfCt; 15 | long int executionTime; 16 | long int flowCount; 17 | }; 18 | 19 | struct MicroflowBin { 20 | uint64_t addrs; 21 | uint64_t ports; 22 | }; 23 | 24 | 25 | MicroflowBin * inArr; 26 | uint64_t loadedMfCt; 27 | uint64_t curPos; 28 | 29 | 30 | long int cTime(){ 31 | struct timeval tp; 32 | gettimeofday(&tp, NULL); 33 | long int ms = tp.tv_sec * 1000 + tp.tv_usec / 1000; 34 | return ms; 35 | } 36 | 37 | // Generate maxMessages microflows from numFlows distinct flows. 38 | void genRandomMicroflows(long int maxMessages, long int numFlows){ 39 | // Generate random flow keys. 40 | MicroflowBin * flowKeys = (MicroflowBin *)malloc(numFlows * sizeof(MicroflowBin)); 41 | for (int i = 0; i < numFlows; i++) { 42 | flowKeys[i].addrs = rand(); 43 | flowKeys[i].ports = rand(); 44 | } 45 | // Generate messages from flows uniformly selected from the keys. 46 | inArr = (MicroflowBin *)malloc(maxMessages * sizeof(MicroflowBin)); 47 | for (int i = 0; i < maxMessages; i++) { 48 | int selectedFid = rand()%numFlows; 49 | // printf("(%i)\n",selectedFid); 50 | inArr[i].addrs = flowKeys[selectedFid].addrs; 51 | inArr[i].ports = flowKeys[selectedFid].ports; 52 | } 53 | // Clean up flow keys. 54 | free(flowKeys); 55 | loadedMfCt = maxMessages; 56 | } 57 | 58 | void printResults(Result r){ 59 | int mf_ps = int(float(r.processedMfCt) / float(r.executionTime/1000.0)); 60 | printf("microflows per second: %i\n",mf_ps); 61 | printf("flow records collected: %lu\n",r.flowCount); 62 | } -------------------------------------------------------------------------------- /hashtables/flat_hash_map.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Malte Skarupke 2017. 2 | // Distributed under the Boost Software License, Version 1.0. 3 | // (See http://www.boost.org/LICENSE_1_0.txt) 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #ifdef _MSC_VER 20 | #define SKA_NOINLINE(...) __declspec(noinline) __VA_ARGS__ 21 | #else 22 | #define SKA_NOINLINE(...) __VA_ARGS__ __attribute__((noinline)) 23 | #endif 24 | 25 | namespace ska 26 | { 27 | struct prime_number_hash_policy; 28 | struct power_of_two_hash_policy; 29 | 30 | namespace detailv3 31 | { 32 | template 33 | struct functor_storage : Functor 34 | { 35 | functor_storage() = default; 36 | functor_storage(const Functor & functor) 37 | : Functor(functor) 38 | { 39 | } 40 | template 41 | Result operator()(Args &&... args) 42 | { 43 | return static_cast(*this)(std::forward(args)...); 44 | } 45 | template 46 | Result operator()(Args &&... args) const 47 | { 48 | return static_cast(*this)(std::forward(args)...); 49 | } 50 | }; 51 | template 52 | struct functor_storage 53 | { 54 | typedef Result (*function_ptr)(Args...); 55 | function_ptr function; 56 | functor_storage(function_ptr function) 57 | : function(function) 58 | { 59 | } 60 | Result operator()(Args... args) const 61 | { 62 | return function(std::forward(args)...); 63 | } 64 | operator function_ptr &() 65 | { 66 | return function; 67 | } 68 | operator const function_ptr &() 69 | { 70 | return function; 71 | } 72 | }; 73 | template 74 | struct KeyOrValueHasher : functor_storage 75 | { 76 | typedef functor_storage hasher_storage; 77 | KeyOrValueHasher() = default; 78 | KeyOrValueHasher(const hasher & hash) 79 | : hasher_storage(hash) 80 | { 81 | } 82 | size_t operator()(const key_type & key) 83 | { 84 | return static_cast(*this)(key); 85 | } 86 | size_t operator()(const key_type & key) const 87 | { 88 | return static_cast(*this)(key); 89 | } 90 | size_t operator()(const value_type & value) 91 | { 92 | return static_cast(*this)(value.first); 93 | } 94 | size_t operator()(const value_type & value) const 95 | { 96 | return static_cast(*this)(value.first); 97 | } 98 | template 99 | size_t operator()(const std::pair & value) 100 | { 101 | return static_cast(*this)(value.first); 102 | } 103 | template 104 | size_t operator()(const std::pair & value) const 105 | { 106 | return static_cast(*this)(value.first); 107 | } 108 | }; 109 | template 110 | struct KeyOrValueEquality : functor_storage 111 | { 112 | typedef functor_storage equality_storage; 113 | KeyOrValueEquality() = default; 114 | KeyOrValueEquality(const key_equal & equality) 115 | : equality_storage(equality) 116 | { 117 | } 118 | bool operator()(const key_type & lhs, const key_type & rhs) 119 | { 120 | return static_cast(*this)(lhs, rhs); 121 | } 122 | bool operator()(const key_type & lhs, const value_type & rhs) 123 | { 124 | return static_cast(*this)(lhs, rhs.first); 125 | } 126 | bool operator()(const value_type & lhs, const key_type & rhs) 127 | { 128 | return static_cast(*this)(lhs.first, rhs); 129 | } 130 | bool operator()(const value_type & lhs, const value_type & rhs) 131 | { 132 | return static_cast(*this)(lhs.first, rhs.first); 133 | } 134 | template 135 | bool operator()(const key_type & lhs, const std::pair & rhs) 136 | { 137 | return static_cast(*this)(lhs, rhs.first); 138 | } 139 | template 140 | bool operator()(const std::pair & lhs, const key_type & rhs) 141 | { 142 | return static_cast(*this)(lhs.first, rhs); 143 | } 144 | template 145 | bool operator()(const value_type & lhs, const std::pair & rhs) 146 | { 147 | return static_cast(*this)(lhs.first, rhs.first); 148 | } 149 | template 150 | bool operator()(const std::pair & lhs, const value_type & rhs) 151 | { 152 | return static_cast(*this)(lhs.first, rhs.first); 153 | } 154 | template 155 | bool operator()(const std::pair & lhs, const std::pair & rhs) 156 | { 157 | return static_cast(*this)(lhs.first, rhs.first); 158 | } 159 | }; 160 | template 161 | struct sherwood_v3_entry 162 | { 163 | static constexpr sherwood_v3_entry special_end_entry() 164 | { 165 | sherwood_v3_entry end; 166 | end.distance_from_desired = special_end_value; 167 | return end; 168 | } 169 | 170 | bool has_value() const 171 | { 172 | return distance_from_desired >= 0; 173 | } 174 | bool is_empty() const 175 | { 176 | return distance_from_desired < 0; 177 | } 178 | bool is_at_desired_position() const 179 | { 180 | return distance_from_desired <= 0; 181 | } 182 | template 183 | void emplace(int8_t distance, Args &&... args) 184 | { 185 | new (std::addressof(value)) T(std::forward(args)...); 186 | distance_from_desired = distance; 187 | } 188 | 189 | void destroy_value() 190 | { 191 | value.~T(); 192 | distance_from_desired = -1; 193 | } 194 | 195 | int8_t distance_from_desired = -1; 196 | static constexpr int8_t special_end_value = 0; 197 | union { T value; }; 198 | }; 199 | template 200 | struct sherwood_v3_entry_constexpr 201 | { 202 | static constexpr sherwood_v3_entry_constexpr special_end_entry() 203 | { 204 | sherwood_v3_entry_constexpr end; 205 | end.distance_from_desired = sherwood_v3_entry::special_end_value; 206 | return end; 207 | } 208 | 209 | int8_t distance_from_desired = -1; 210 | typename std::aligned_storage::type bytes = {}; 211 | }; 212 | static constexpr int8_t min_lookups = 4; 213 | template 214 | struct EntryDefaultTable 215 | { 216 | static constexpr const sherwood_v3_entry_constexpr table[min_lookups] = 217 | { 218 | {}, {}, {}, sherwood_v3_entry_constexpr::special_end_entry() 219 | }; 220 | }; 221 | template 222 | constexpr const sherwood_v3_entry_constexpr EntryDefaultTable::table[min_lookups]; 223 | 224 | inline int8_t log2(size_t value) 225 | { 226 | static constexpr int8_t table[64] = 227 | { 228 | 63, 0, 58, 1, 59, 47, 53, 2, 229 | 60, 39, 48, 27, 54, 33, 42, 3, 230 | 61, 51, 37, 40, 49, 18, 28, 20, 231 | 55, 30, 34, 11, 43, 14, 22, 4, 232 | 62, 57, 46, 52, 38, 26, 32, 41, 233 | 50, 36, 17, 19, 29, 10, 13, 21, 234 | 56, 45, 25, 31, 35, 16, 9, 12, 235 | 44, 24, 15, 8, 23, 7, 6, 5 236 | }; 237 | value |= value >> 1; 238 | value |= value >> 2; 239 | value |= value >> 4; 240 | value |= value >> 8; 241 | value |= value >> 16; 242 | value |= value >> 32; 243 | return table[((value - (value >> 1)) * 0x07EDD5E59A4E28C2) >> 58]; 244 | } 245 | void throw_out_of_range(); 246 | 247 | template 248 | struct AssignIfTrue 249 | { 250 | void operator()(T & lhs, const T & rhs) 251 | { 252 | lhs = rhs; 253 | } 254 | void operator()(T & lhs, T && rhs) 255 | { 256 | lhs = std::move(rhs); 257 | } 258 | }; 259 | template 260 | struct AssignIfTrue 261 | { 262 | void operator()(T &, const T &) 263 | { 264 | } 265 | void operator()(T &, T &&) 266 | { 267 | } 268 | }; 269 | 270 | inline size_t next_power_of_two(size_t i) 271 | { 272 | --i; 273 | i |= i >> 1; 274 | i |= i >> 2; 275 | i |= i >> 4; 276 | i |= i >> 8; 277 | i |= i >> 16; 278 | i |= i >> 32; 279 | ++i; 280 | return i; 281 | } 282 | 283 | template using void_t = void; 284 | 285 | template 286 | struct HashPolicySelector 287 | { 288 | typedef prime_number_hash_policy type; 289 | }; 290 | template 291 | struct HashPolicySelector> 292 | { 293 | typedef typename T::hash_policy type; 294 | }; 295 | 296 | template 297 | class sherwood_v3_table : private EntryAlloc, private Hasher, private Equal 298 | { 299 | using Entry = detailv3::sherwood_v3_entry; 300 | using AllocatorTraits = std::allocator_traits; 301 | using EntryPointer = typename AllocatorTraits::pointer; 302 | struct convertible_to_iterator; 303 | 304 | public: 305 | 306 | using value_type = T; 307 | using size_type = size_t; 308 | using difference_type = std::ptrdiff_t; 309 | using hasher = ArgumentHash; 310 | using key_equal = ArgumentEqual; 311 | using allocator_type = EntryAlloc; 312 | using reference = value_type &; 313 | using const_reference = const value_type &; 314 | using pointer = value_type *; 315 | using const_pointer = const value_type *; 316 | 317 | sherwood_v3_table() 318 | { 319 | } 320 | explicit sherwood_v3_table(size_type bucket_count, const ArgumentHash & hash = ArgumentHash(), const ArgumentEqual & equal = ArgumentEqual(), const ArgumentAlloc & alloc = ArgumentAlloc()) 321 | : EntryAlloc(alloc), Hasher(hash), Equal(equal) 322 | { 323 | rehash(bucket_count); 324 | } 325 | sherwood_v3_table(size_type bucket_count, const ArgumentAlloc & alloc) 326 | : sherwood_v3_table(bucket_count, ArgumentHash(), ArgumentEqual(), alloc) 327 | { 328 | } 329 | sherwood_v3_table(size_type bucket_count, const ArgumentHash & hash, const ArgumentAlloc & alloc) 330 | : sherwood_v3_table(bucket_count, hash, ArgumentEqual(), alloc) 331 | { 332 | } 333 | explicit sherwood_v3_table(const ArgumentAlloc & alloc) 334 | : EntryAlloc(alloc) 335 | { 336 | } 337 | template 338 | sherwood_v3_table(It first, It last, size_type bucket_count = 0, const ArgumentHash & hash = ArgumentHash(), const ArgumentEqual & equal = ArgumentEqual(), const ArgumentAlloc & alloc = ArgumentAlloc()) 339 | : sherwood_v3_table(bucket_count, hash, equal, alloc) 340 | { 341 | insert(first, last); 342 | } 343 | template 344 | sherwood_v3_table(It first, It last, size_type bucket_count, const ArgumentAlloc & alloc) 345 | : sherwood_v3_table(first, last, bucket_count, ArgumentHash(), ArgumentEqual(), alloc) 346 | { 347 | } 348 | template 349 | sherwood_v3_table(It first, It last, size_type bucket_count, const ArgumentHash & hash, const ArgumentAlloc & alloc) 350 | : sherwood_v3_table(first, last, bucket_count, hash, ArgumentEqual(), alloc) 351 | { 352 | } 353 | sherwood_v3_table(std::initializer_list il, size_type bucket_count = 0, const ArgumentHash & hash = ArgumentHash(), const ArgumentEqual & equal = ArgumentEqual(), const ArgumentAlloc & alloc = ArgumentAlloc()) 354 | : sherwood_v3_table(bucket_count, hash, equal, alloc) 355 | { 356 | if (bucket_count == 0) 357 | rehash(il.size()); 358 | insert(il.begin(), il.end()); 359 | } 360 | sherwood_v3_table(std::initializer_list il, size_type bucket_count, const ArgumentAlloc & alloc) 361 | : sherwood_v3_table(il, bucket_count, ArgumentHash(), ArgumentEqual(), alloc) 362 | { 363 | } 364 | sherwood_v3_table(std::initializer_list il, size_type bucket_count, const ArgumentHash & hash, const ArgumentAlloc & alloc) 365 | : sherwood_v3_table(il, bucket_count, hash, ArgumentEqual(), alloc) 366 | { 367 | } 368 | sherwood_v3_table(const sherwood_v3_table & other) 369 | : sherwood_v3_table(other, AllocatorTraits::select_on_container_copy_construction(other.get_allocator())) 370 | { 371 | } 372 | sherwood_v3_table(const sherwood_v3_table & other, const ArgumentAlloc & alloc) 373 | : EntryAlloc(alloc), Hasher(other), Equal(other), _max_load_factor(other._max_load_factor) 374 | { 375 | rehash_for_other_container(other); 376 | try 377 | { 378 | insert(other.begin(), other.end()); 379 | } 380 | catch(...) 381 | { 382 | clear(); 383 | deallocate_data(entries, num_slots_minus_one, max_lookups); 384 | throw; 385 | } 386 | } 387 | sherwood_v3_table(sherwood_v3_table && other) noexcept 388 | : EntryAlloc(std::move(other)), Hasher(std::move(other)), Equal(std::move(other)) 389 | { 390 | swap_pointers(other); 391 | } 392 | sherwood_v3_table(sherwood_v3_table && other, const ArgumentAlloc & alloc) noexcept 393 | : EntryAlloc(alloc), Hasher(std::move(other)), Equal(std::move(other)) 394 | { 395 | swap_pointers(other); 396 | } 397 | sherwood_v3_table & operator=(const sherwood_v3_table & other) 398 | { 399 | if (this == std::addressof(other)) 400 | return *this; 401 | 402 | clear(); 403 | if (AllocatorTraits::propagate_on_container_copy_assignment::value) 404 | { 405 | if (static_cast(*this) != static_cast(other)) 406 | { 407 | reset_to_empty_state(); 408 | } 409 | AssignIfTrue()(*this, other); 410 | } 411 | _max_load_factor = other._max_load_factor; 412 | static_cast(*this) = other; 413 | static_cast(*this) = other; 414 | rehash_for_other_container(other); 415 | insert(other.begin(), other.end()); 416 | return *this; 417 | } 418 | sherwood_v3_table & operator=(sherwood_v3_table && other) noexcept 419 | { 420 | if (this == std::addressof(other)) 421 | return *this; 422 | else if (AllocatorTraits::propagate_on_container_move_assignment::value) 423 | { 424 | clear(); 425 | reset_to_empty_state(); 426 | AssignIfTrue()(*this, std::move(other)); 427 | swap_pointers(other); 428 | } 429 | else if (static_cast(*this) == static_cast(other)) 430 | { 431 | swap_pointers(other); 432 | } 433 | else 434 | { 435 | clear(); 436 | _max_load_factor = other._max_load_factor; 437 | rehash_for_other_container(other); 438 | for (T & elem : other) 439 | emplace(std::move(elem)); 440 | other.clear(); 441 | } 442 | static_cast(*this) = std::move(other); 443 | static_cast(*this) = std::move(other); 444 | return *this; 445 | } 446 | ~sherwood_v3_table() 447 | { 448 | clear(); 449 | deallocate_data(entries, num_slots_minus_one, max_lookups); 450 | } 451 | 452 | const allocator_type & get_allocator() const 453 | { 454 | return static_cast(*this); 455 | } 456 | const ArgumentEqual & key_eq() const 457 | { 458 | return static_cast(*this); 459 | } 460 | const ArgumentHash & hash_function() const 461 | { 462 | return static_cast(*this); 463 | } 464 | 465 | template 466 | struct templated_iterator 467 | { 468 | EntryPointer current = EntryPointer(); 469 | 470 | using iterator_category = std::forward_iterator_tag; 471 | using value_type = ValueType; 472 | using difference_type = ptrdiff_t; 473 | using pointer = ValueType *; 474 | using reference = ValueType &; 475 | 476 | friend bool operator==(const templated_iterator & lhs, const templated_iterator & rhs) 477 | { 478 | return lhs.current == rhs.current; 479 | } 480 | friend bool operator!=(const templated_iterator & lhs, const templated_iterator & rhs) 481 | { 482 | return !(lhs == rhs); 483 | } 484 | 485 | templated_iterator & operator++() 486 | { 487 | do 488 | { 489 | ++current; 490 | } 491 | while(current->is_empty()); 492 | return *this; 493 | } 494 | templated_iterator operator++(int) 495 | { 496 | templated_iterator copy(*this); 497 | ++*this; 498 | return copy; 499 | } 500 | 501 | ValueType & operator*() const 502 | { 503 | return current->value; 504 | } 505 | ValueType * operator->() const 506 | { 507 | return std::addressof(current->value); 508 | } 509 | 510 | operator templated_iterator() const 511 | { 512 | return { current }; 513 | } 514 | }; 515 | using iterator = templated_iterator; 516 | using const_iterator = templated_iterator; 517 | 518 | iterator begin() 519 | { 520 | for (EntryPointer it = entries;; ++it) 521 | { 522 | if (it->has_value()) 523 | return { it }; 524 | } 525 | } 526 | const_iterator begin() const 527 | { 528 | for (EntryPointer it = entries;; ++it) 529 | { 530 | if (it->has_value()) 531 | return { it }; 532 | } 533 | } 534 | const_iterator cbegin() const 535 | { 536 | return begin(); 537 | } 538 | iterator end() 539 | { 540 | return { entries + static_cast(num_slots_minus_one + max_lookups) }; 541 | } 542 | const_iterator end() const 543 | { 544 | return { entries + static_cast(num_slots_minus_one + max_lookups) }; 545 | } 546 | const_iterator cend() const 547 | { 548 | return end(); 549 | } 550 | 551 | // Get pointer to position, for prefetching. 552 | void * getPtr(const FindKey & key) 553 | { 554 | size_t index = hash_policy.index_for_hash(hash_object(key), num_slots_minus_one); 555 | EntryPointer it = entries + ptrdiff_t(index); 556 | return (void *)it; 557 | } 558 | 559 | iterator find(const FindKey & key) 560 | { 561 | size_t index = hash_policy.index_for_hash(hash_object(key), num_slots_minus_one); 562 | EntryPointer it = entries + ptrdiff_t(index); 563 | for (int8_t distance = 0; it->distance_from_desired >= distance; ++distance, ++it) 564 | { 565 | if (compares_equal(key, it->value)) 566 | return { it }; 567 | } 568 | return end(); 569 | } 570 | const_iterator find(const FindKey & key) const 571 | { 572 | return const_cast(this)->find(key); 573 | } 574 | size_t count(const FindKey & key) const 575 | { 576 | return find(key) == end() ? 0 : 1; 577 | } 578 | std::pair equal_range(const FindKey & key) 579 | { 580 | iterator found = find(key); 581 | if (found == end()) 582 | return { found, found }; 583 | else 584 | return { found, std::next(found) }; 585 | } 586 | std::pair equal_range(const FindKey & key) const 587 | { 588 | const_iterator found = find(key); 589 | if (found == end()) 590 | return { found, found }; 591 | else 592 | return { found, std::next(found) }; 593 | } 594 | 595 | template 596 | std::pair emplace(Key && key, Args &&... args) 597 | { 598 | size_t index = hash_policy.index_for_hash(hash_object(key), num_slots_minus_one); 599 | EntryPointer current_entry = entries + ptrdiff_t(index); 600 | int8_t distance_from_desired = 0; 601 | for (; current_entry->distance_from_desired >= distance_from_desired; ++current_entry, ++distance_from_desired) 602 | { 603 | if (compares_equal(key, current_entry->value)) 604 | return { { current_entry }, false }; 605 | } 606 | return emplace_new_key(distance_from_desired, current_entry, std::forward(key), std::forward(args)...); 607 | } 608 | 609 | std::pair insert(const value_type & value) 610 | { 611 | return emplace(value); 612 | } 613 | std::pair insert(value_type && value) 614 | { 615 | return emplace(std::move(value)); 616 | } 617 | template 618 | iterator emplace_hint(const_iterator, Args &&... args) 619 | { 620 | return emplace(std::forward(args)...).first; 621 | } 622 | iterator insert(const_iterator, const value_type & value) 623 | { 624 | return emplace(value).first; 625 | } 626 | iterator insert(const_iterator, value_type && value) 627 | { 628 | return emplace(std::move(value)).first; 629 | } 630 | 631 | template 632 | void insert(It begin, It end) 633 | { 634 | for (; begin != end; ++begin) 635 | { 636 | emplace(*begin); 637 | } 638 | } 639 | void insert(std::initializer_list il) 640 | { 641 | insert(il.begin(), il.end()); 642 | } 643 | 644 | void rehash(size_t num_buckets) 645 | { 646 | num_buckets = std::max(num_buckets, static_cast(std::ceil(num_elements / static_cast(_max_load_factor)))); 647 | if (num_buckets == 0) 648 | { 649 | reset_to_empty_state(); 650 | return; 651 | } 652 | auto new_prime_index = hash_policy.next_size_over(num_buckets); 653 | if (num_buckets == bucket_count()) 654 | return; 655 | int8_t new_max_lookups = compute_max_lookups(num_buckets); 656 | EntryPointer new_buckets(AllocatorTraits::allocate(*this, num_buckets + new_max_lookups)); 657 | for (EntryPointer it = new_buckets, real_end = it + static_cast(num_buckets + new_max_lookups - 1); it != real_end; ++it) 658 | { 659 | it->distance_from_desired = -1; 660 | } 661 | new_buckets[num_buckets + new_max_lookups - 1].distance_from_desired = Entry::special_end_value; 662 | std::swap(entries, new_buckets); 663 | std::swap(num_slots_minus_one, num_buckets); 664 | --num_slots_minus_one; 665 | hash_policy.commit(new_prime_index); 666 | int8_t old_max_lookups = max_lookups; 667 | max_lookups = new_max_lookups; 668 | num_elements = 0; 669 | for (EntryPointer it = new_buckets, end = it + static_cast(num_buckets + old_max_lookups); it != end; ++it) 670 | { 671 | if (it->has_value()) 672 | { 673 | emplace(std::move(it->value)); 674 | it->destroy_value(); 675 | } 676 | } 677 | deallocate_data(new_buckets, num_buckets, old_max_lookups); 678 | } 679 | 680 | void reserve(size_t num_elements) 681 | { 682 | size_t required_buckets = num_buckets_for_reserve(num_elements); 683 | if (required_buckets > bucket_count()) 684 | rehash(required_buckets); 685 | } 686 | 687 | // the return value is a type that can be converted to an iterator 688 | // the reason for doing this is that it's not free to find the 689 | // iterator pointing at the next element. if you care about the 690 | // next iterator, turn the return value into an iterator 691 | convertible_to_iterator erase(const_iterator to_erase) 692 | { 693 | EntryPointer current = to_erase.current; 694 | current->destroy_value(); 695 | --num_elements; 696 | for (EntryPointer next = current + ptrdiff_t(1); !next->is_at_desired_position(); ++current, ++next) 697 | { 698 | current->emplace(next->distance_from_desired - 1, std::move(next->value)); 699 | next->destroy_value(); 700 | } 701 | return { to_erase.current }; 702 | } 703 | 704 | iterator erase(const_iterator begin_it, const_iterator end_it) 705 | { 706 | for (EntryPointer it = begin_it.current, end = end_it.current; it != end; ++it) 707 | { 708 | if (it->has_value()) 709 | { 710 | it->destroy_value(); 711 | --num_elements; 712 | } 713 | } 714 | if (end_it == this->end()) 715 | return this->end(); 716 | ptrdiff_t num_to_move = std::min(static_cast(end_it.current->distance_from_desired), end_it.current - begin_it.current); 717 | EntryPointer to_return = end_it.current - num_to_move; 718 | for (EntryPointer it = end_it.current; !it->is_at_desired_position();) 719 | { 720 | EntryPointer target = it - num_to_move; 721 | target->emplace(it->distance_from_desired - num_to_move, std::move(it->value)); 722 | it->destroy_value(); 723 | ++it; 724 | num_to_move = std::min(static_cast(it->distance_from_desired), num_to_move); 725 | } 726 | return { to_return }; 727 | } 728 | 729 | size_t erase(const FindKey & key) 730 | { 731 | auto found = find(key); 732 | if (found == end()) 733 | return 0; 734 | else 735 | { 736 | erase(found); 737 | return 1; 738 | } 739 | } 740 | 741 | void clear() 742 | { 743 | for (EntryPointer it = entries, end = it + static_cast(num_slots_minus_one + max_lookups); it != end; ++it) 744 | { 745 | if (it->has_value()) 746 | it->destroy_value(); 747 | } 748 | num_elements = 0; 749 | } 750 | 751 | void shrink_to_fit() 752 | { 753 | rehash_for_other_container(*this); 754 | } 755 | 756 | void swap(sherwood_v3_table & other) 757 | { 758 | using std::swap; 759 | swap_pointers(other); 760 | swap(static_cast(*this), static_cast(other)); 761 | swap(static_cast(*this), static_cast(other)); 762 | if (AllocatorTraits::propagate_on_container_swap::value) 763 | swap(static_cast(*this), static_cast(other)); 764 | } 765 | 766 | size_t size() const 767 | { 768 | return num_elements; 769 | } 770 | size_t max_size() const 771 | { 772 | return (AllocatorTraits::max_size(*this)) / sizeof(Entry); 773 | } 774 | size_t bucket_count() const 775 | { 776 | return num_slots_minus_one + 1; 777 | } 778 | size_type max_bucket_count() const 779 | { 780 | return (AllocatorTraits::max_size(*this) - min_lookups) / sizeof(Entry); 781 | } 782 | size_t bucket(const FindKey & key) const 783 | { 784 | return hash_policy.index_for_hash(hash_object(key), num_slots_minus_one); 785 | } 786 | float load_factor() const 787 | { 788 | size_t buckets = bucket_count(); 789 | if (buckets) 790 | return static_cast(num_elements) / bucket_count(); 791 | else 792 | return 0; 793 | } 794 | void max_load_factor(float value) 795 | { 796 | _max_load_factor = value; 797 | } 798 | float max_load_factor() const 799 | { 800 | return _max_load_factor; 801 | } 802 | 803 | bool empty() const 804 | { 805 | return num_elements == 0; 806 | } 807 | 808 | using DefaultTable = detailv3::EntryDefaultTable; 809 | EntryPointer entries = const_cast(reinterpret_cast(DefaultTable::table)); 810 | private: 811 | size_t num_slots_minus_one = 0; 812 | typename HashPolicySelector::type hash_policy; 813 | int8_t max_lookups = detailv3::min_lookups - 1; 814 | float _max_load_factor = 0.5f; 815 | size_t num_elements = 0; 816 | 817 | static int8_t compute_max_lookups(size_t num_buckets) 818 | { 819 | int8_t desired = detailv3::log2(num_buckets); 820 | return std::max(detailv3::min_lookups, desired); 821 | } 822 | 823 | size_t num_buckets_for_reserve(size_t num_elements) const 824 | { 825 | return static_cast(std::ceil(num_elements / std::min(0.5, static_cast(_max_load_factor)))); 826 | } 827 | void rehash_for_other_container(const sherwood_v3_table & other) 828 | { 829 | rehash(std::min(num_buckets_for_reserve(other.size()), other.bucket_count())); 830 | } 831 | 832 | void swap_pointers(sherwood_v3_table & other) 833 | { 834 | using std::swap; 835 | swap(hash_policy, other.hash_policy); 836 | swap(entries, other.entries); 837 | swap(num_slots_minus_one, other.num_slots_minus_one); 838 | swap(num_elements, other.num_elements); 839 | swap(max_lookups, other.max_lookups); 840 | swap(_max_load_factor, other._max_load_factor); 841 | } 842 | 843 | template 844 | SKA_NOINLINE(std::pair) emplace_new_key(int8_t distance_from_desired, EntryPointer current_entry, Key && key, Args &&... args) 845 | { 846 | using std::swap; 847 | if (num_slots_minus_one == 0 || distance_from_desired == max_lookups || static_cast(num_elements + 1) / static_cast(bucket_count()) > _max_load_factor) 848 | { 849 | grow(); 850 | return emplace(std::forward(key), std::forward(args)...); 851 | } 852 | else if (current_entry->is_empty()) 853 | { 854 | current_entry->emplace(distance_from_desired, std::forward(key), std::forward(args)...); 855 | ++num_elements; 856 | return { { current_entry }, true }; 857 | } 858 | value_type to_insert(std::forward(key), std::forward(args)...); 859 | swap(distance_from_desired, current_entry->distance_from_desired); 860 | swap(to_insert, current_entry->value); 861 | iterator result = { current_entry }; 862 | for (++distance_from_desired, ++current_entry;; ++current_entry) 863 | { 864 | if (current_entry->is_empty()) 865 | { 866 | current_entry->emplace(distance_from_desired, std::move(to_insert)); 867 | ++num_elements; 868 | return { result, true }; 869 | } 870 | else if (current_entry->distance_from_desired < distance_from_desired) 871 | { 872 | swap(distance_from_desired, current_entry->distance_from_desired); 873 | swap(to_insert, current_entry->value); 874 | ++distance_from_desired; 875 | } 876 | else 877 | { 878 | ++distance_from_desired; 879 | if (distance_from_desired == max_lookups) 880 | { 881 | swap(to_insert, result.current->value); 882 | grow(); 883 | return emplace(std::move(to_insert)); 884 | } 885 | } 886 | } 887 | } 888 | 889 | void grow() 890 | { 891 | rehash(std::max(size_t(4), 2 * bucket_count())); 892 | } 893 | 894 | void deallocate_data(EntryPointer begin, size_t num_slots_minus_one, int8_t max_lookups) 895 | { 896 | if (begin != const_cast(reinterpret_cast(DefaultTable::table))) 897 | { 898 | AllocatorTraits::deallocate(*this, begin, num_slots_minus_one + max_lookups + 1); 899 | } 900 | } 901 | 902 | void reset_to_empty_state() 903 | { 904 | deallocate_data(entries, num_slots_minus_one, max_lookups); 905 | entries = const_cast(reinterpret_cast(DefaultTable::table)); 906 | num_slots_minus_one = 0; 907 | hash_policy.reset(); 908 | max_lookups = detailv3::min_lookups - 1; 909 | } 910 | 911 | template 912 | size_t hash_object(const U & key) 913 | { 914 | return static_cast(*this)(key); 915 | } 916 | template 917 | size_t hash_object(const U & key) const 918 | { 919 | return static_cast(*this)(key); 920 | } 921 | template 922 | bool compares_equal(const L & lhs, const R & rhs) 923 | { 924 | // compare equals start. 925 | return static_cast(*this)(lhs, rhs); 926 | // compared equals end. 927 | } 928 | 929 | struct convertible_to_iterator 930 | { 931 | EntryPointer it; 932 | 933 | operator iterator() 934 | { 935 | if (it->has_value()) 936 | return { it }; 937 | else 938 | return ++iterator{it}; 939 | } 940 | operator const_iterator() 941 | { 942 | if (it->has_value()) 943 | return { it }; 944 | else 945 | return ++const_iterator{it}; 946 | } 947 | }; 948 | 949 | }; 950 | } 951 | 952 | struct prime_number_hash_policy 953 | { 954 | static size_t mod0(size_t) { return 0llu; } 955 | static size_t mod2(size_t hash) { return hash % 2llu; } 956 | static size_t mod3(size_t hash) { return hash % 3llu; } 957 | static size_t mod5(size_t hash) { return hash % 5llu; } 958 | static size_t mod7(size_t hash) { return hash % 7llu; } 959 | static size_t mod11(size_t hash) { return hash % 11llu; } 960 | static size_t mod13(size_t hash) { return hash % 13llu; } 961 | static size_t mod17(size_t hash) { return hash % 17llu; } 962 | static size_t mod23(size_t hash) { return hash % 23llu; } 963 | static size_t mod29(size_t hash) { return hash % 29llu; } 964 | static size_t mod37(size_t hash) { return hash % 37llu; } 965 | static size_t mod47(size_t hash) { return hash % 47llu; } 966 | static size_t mod59(size_t hash) { return hash % 59llu; } 967 | static size_t mod73(size_t hash) { return hash % 73llu; } 968 | static size_t mod97(size_t hash) { return hash % 97llu; } 969 | static size_t mod127(size_t hash) { return hash % 127llu; } 970 | static size_t mod151(size_t hash) { return hash % 151llu; } 971 | static size_t mod197(size_t hash) { return hash % 197llu; } 972 | static size_t mod251(size_t hash) { return hash % 251llu; } 973 | static size_t mod313(size_t hash) { return hash % 313llu; } 974 | static size_t mod397(size_t hash) { return hash % 397llu; } 975 | static size_t mod499(size_t hash) { return hash % 499llu; } 976 | static size_t mod631(size_t hash) { return hash % 631llu; } 977 | static size_t mod797(size_t hash) { return hash % 797llu; } 978 | static size_t mod1009(size_t hash) { return hash % 1009llu; } 979 | static size_t mod1259(size_t hash) { return hash % 1259llu; } 980 | static size_t mod1597(size_t hash) { return hash % 1597llu; } 981 | static size_t mod2011(size_t hash) { return hash % 2011llu; } 982 | static size_t mod2539(size_t hash) { return hash % 2539llu; } 983 | static size_t mod3203(size_t hash) { return hash % 3203llu; } 984 | static size_t mod4027(size_t hash) { return hash % 4027llu; } 985 | static size_t mod5087(size_t hash) { return hash % 5087llu; } 986 | static size_t mod6421(size_t hash) { return hash % 6421llu; } 987 | static size_t mod8089(size_t hash) { return hash % 8089llu; } 988 | static size_t mod10193(size_t hash) { return hash % 10193llu; } 989 | static size_t mod12853(size_t hash) { return hash % 12853llu; } 990 | static size_t mod16193(size_t hash) { return hash % 16193llu; } 991 | static size_t mod20399(size_t hash) { return hash % 20399llu; } 992 | static size_t mod25717(size_t hash) { return hash % 25717llu; } 993 | static size_t mod32401(size_t hash) { return hash % 32401llu; } 994 | static size_t mod40823(size_t hash) { return hash % 40823llu; } 995 | static size_t mod51437(size_t hash) { return hash % 51437llu; } 996 | static size_t mod64811(size_t hash) { return hash % 64811llu; } 997 | static size_t mod81649(size_t hash) { return hash % 81649llu; } 998 | static size_t mod102877(size_t hash) { return hash % 102877llu; } 999 | static size_t mod129607(size_t hash) { return hash % 129607llu; } 1000 | static size_t mod163307(size_t hash) { return hash % 163307llu; } 1001 | static size_t mod205759(size_t hash) { return hash % 205759llu; } 1002 | static size_t mod259229(size_t hash) { return hash % 259229llu; } 1003 | static size_t mod326617(size_t hash) { return hash % 326617llu; } 1004 | static size_t mod411527(size_t hash) { return hash % 411527llu; } 1005 | static size_t mod518509(size_t hash) { return hash % 518509llu; } 1006 | static size_t mod653267(size_t hash) { return hash % 653267llu; } 1007 | static size_t mod823117(size_t hash) { return hash % 823117llu; } 1008 | static size_t mod1037059(size_t hash) { return hash % 1037059llu; } 1009 | static size_t mod1306601(size_t hash) { return hash % 1306601llu; } 1010 | static size_t mod1646237(size_t hash) { return hash % 1646237llu; } 1011 | static size_t mod2074129(size_t hash) { return hash % 2074129llu; } 1012 | static size_t mod2613229(size_t hash) { return hash % 2613229llu; } 1013 | static size_t mod3292489(size_t hash) { return hash % 3292489llu; } 1014 | static size_t mod4148279(size_t hash) { return hash % 4148279llu; } 1015 | static size_t mod5226491(size_t hash) { return hash % 5226491llu; } 1016 | static size_t mod6584983(size_t hash) { return hash % 6584983llu; } 1017 | static size_t mod8296553(size_t hash) { return hash % 8296553llu; } 1018 | static size_t mod10453007(size_t hash) { return hash % 10453007llu; } 1019 | static size_t mod13169977(size_t hash) { return hash % 13169977llu; } 1020 | static size_t mod16593127(size_t hash) { return hash % 16593127llu; } 1021 | static size_t mod20906033(size_t hash) { return hash % 20906033llu; } 1022 | static size_t mod26339969(size_t hash) { return hash % 26339969llu; } 1023 | static size_t mod33186281(size_t hash) { return hash % 33186281llu; } 1024 | static size_t mod41812097(size_t hash) { return hash % 41812097llu; } 1025 | static size_t mod52679969(size_t hash) { return hash % 52679969llu; } 1026 | static size_t mod66372617(size_t hash) { return hash % 66372617llu; } 1027 | static size_t mod83624237(size_t hash) { return hash % 83624237llu; } 1028 | static size_t mod105359939(size_t hash) { return hash % 105359939llu; } 1029 | static size_t mod132745199(size_t hash) { return hash % 132745199llu; } 1030 | static size_t mod167248483(size_t hash) { return hash % 167248483llu; } 1031 | static size_t mod210719881(size_t hash) { return hash % 210719881llu; } 1032 | static size_t mod265490441(size_t hash) { return hash % 265490441llu; } 1033 | static size_t mod334496971(size_t hash) { return hash % 334496971llu; } 1034 | static size_t mod421439783(size_t hash) { return hash % 421439783llu; } 1035 | static size_t mod530980861(size_t hash) { return hash % 530980861llu; } 1036 | static size_t mod668993977(size_t hash) { return hash % 668993977llu; } 1037 | static size_t mod842879579(size_t hash) { return hash % 842879579llu; } 1038 | static size_t mod1061961721(size_t hash) { return hash % 1061961721llu; } 1039 | static size_t mod1337987929(size_t hash) { return hash % 1337987929llu; } 1040 | static size_t mod1685759167(size_t hash) { return hash % 1685759167llu; } 1041 | static size_t mod2123923447(size_t hash) { return hash % 2123923447llu; } 1042 | static size_t mod2675975881(size_t hash) { return hash % 2675975881llu; } 1043 | static size_t mod3371518343(size_t hash) { return hash % 3371518343llu; } 1044 | static size_t mod4247846927(size_t hash) { return hash % 4247846927llu; } 1045 | static size_t mod5351951779(size_t hash) { return hash % 5351951779llu; } 1046 | static size_t mod6743036717(size_t hash) { return hash % 6743036717llu; } 1047 | static size_t mod8495693897(size_t hash) { return hash % 8495693897llu; } 1048 | static size_t mod10703903591(size_t hash) { return hash % 10703903591llu; } 1049 | static size_t mod13486073473(size_t hash) { return hash % 13486073473llu; } 1050 | static size_t mod16991387857(size_t hash) { return hash % 16991387857llu; } 1051 | static size_t mod21407807219(size_t hash) { return hash % 21407807219llu; } 1052 | static size_t mod26972146961(size_t hash) { return hash % 26972146961llu; } 1053 | static size_t mod33982775741(size_t hash) { return hash % 33982775741llu; } 1054 | static size_t mod42815614441(size_t hash) { return hash % 42815614441llu; } 1055 | static size_t mod53944293929(size_t hash) { return hash % 53944293929llu; } 1056 | static size_t mod67965551447(size_t hash) { return hash % 67965551447llu; } 1057 | static size_t mod85631228929(size_t hash) { return hash % 85631228929llu; } 1058 | static size_t mod107888587883(size_t hash) { return hash % 107888587883llu; } 1059 | static size_t mod135931102921(size_t hash) { return hash % 135931102921llu; } 1060 | static size_t mod171262457903(size_t hash) { return hash % 171262457903llu; } 1061 | static size_t mod215777175787(size_t hash) { return hash % 215777175787llu; } 1062 | static size_t mod271862205833(size_t hash) { return hash % 271862205833llu; } 1063 | static size_t mod342524915839(size_t hash) { return hash % 342524915839llu; } 1064 | static size_t mod431554351609(size_t hash) { return hash % 431554351609llu; } 1065 | static size_t mod543724411781(size_t hash) { return hash % 543724411781llu; } 1066 | static size_t mod685049831731(size_t hash) { return hash % 685049831731llu; } 1067 | static size_t mod863108703229(size_t hash) { return hash % 863108703229llu; } 1068 | static size_t mod1087448823553(size_t hash) { return hash % 1087448823553llu; } 1069 | static size_t mod1370099663459(size_t hash) { return hash % 1370099663459llu; } 1070 | static size_t mod1726217406467(size_t hash) { return hash % 1726217406467llu; } 1071 | static size_t mod2174897647073(size_t hash) { return hash % 2174897647073llu; } 1072 | static size_t mod2740199326961(size_t hash) { return hash % 2740199326961llu; } 1073 | static size_t mod3452434812973(size_t hash) { return hash % 3452434812973llu; } 1074 | static size_t mod4349795294267(size_t hash) { return hash % 4349795294267llu; } 1075 | static size_t mod5480398654009(size_t hash) { return hash % 5480398654009llu; } 1076 | static size_t mod6904869625999(size_t hash) { return hash % 6904869625999llu; } 1077 | static size_t mod8699590588571(size_t hash) { return hash % 8699590588571llu; } 1078 | static size_t mod10960797308051(size_t hash) { return hash % 10960797308051llu; } 1079 | static size_t mod13809739252051(size_t hash) { return hash % 13809739252051llu; } 1080 | static size_t mod17399181177241(size_t hash) { return hash % 17399181177241llu; } 1081 | static size_t mod21921594616111(size_t hash) { return hash % 21921594616111llu; } 1082 | static size_t mod27619478504183(size_t hash) { return hash % 27619478504183llu; } 1083 | static size_t mod34798362354533(size_t hash) { return hash % 34798362354533llu; } 1084 | static size_t mod43843189232363(size_t hash) { return hash % 43843189232363llu; } 1085 | static size_t mod55238957008387(size_t hash) { return hash % 55238957008387llu; } 1086 | static size_t mod69596724709081(size_t hash) { return hash % 69596724709081llu; } 1087 | static size_t mod87686378464759(size_t hash) { return hash % 87686378464759llu; } 1088 | static size_t mod110477914016779(size_t hash) { return hash % 110477914016779llu; } 1089 | static size_t mod139193449418173(size_t hash) { return hash % 139193449418173llu; } 1090 | static size_t mod175372756929481(size_t hash) { return hash % 175372756929481llu; } 1091 | static size_t mod220955828033581(size_t hash) { return hash % 220955828033581llu; } 1092 | static size_t mod278386898836457(size_t hash) { return hash % 278386898836457llu; } 1093 | static size_t mod350745513859007(size_t hash) { return hash % 350745513859007llu; } 1094 | static size_t mod441911656067171(size_t hash) { return hash % 441911656067171llu; } 1095 | static size_t mod556773797672909(size_t hash) { return hash % 556773797672909llu; } 1096 | static size_t mod701491027718027(size_t hash) { return hash % 701491027718027llu; } 1097 | static size_t mod883823312134381(size_t hash) { return hash % 883823312134381llu; } 1098 | static size_t mod1113547595345903(size_t hash) { return hash % 1113547595345903llu; } 1099 | static size_t mod1402982055436147(size_t hash) { return hash % 1402982055436147llu; } 1100 | static size_t mod1767646624268779(size_t hash) { return hash % 1767646624268779llu; } 1101 | static size_t mod2227095190691797(size_t hash) { return hash % 2227095190691797llu; } 1102 | static size_t mod2805964110872297(size_t hash) { return hash % 2805964110872297llu; } 1103 | static size_t mod3535293248537579(size_t hash) { return hash % 3535293248537579llu; } 1104 | static size_t mod4454190381383713(size_t hash) { return hash % 4454190381383713llu; } 1105 | static size_t mod5611928221744609(size_t hash) { return hash % 5611928221744609llu; } 1106 | static size_t mod7070586497075177(size_t hash) { return hash % 7070586497075177llu; } 1107 | static size_t mod8908380762767489(size_t hash) { return hash % 8908380762767489llu; } 1108 | static size_t mod11223856443489329(size_t hash) { return hash % 11223856443489329llu; } 1109 | static size_t mod14141172994150357(size_t hash) { return hash % 14141172994150357llu; } 1110 | static size_t mod17816761525534927(size_t hash) { return hash % 17816761525534927llu; } 1111 | static size_t mod22447712886978529(size_t hash) { return hash % 22447712886978529llu; } 1112 | static size_t mod28282345988300791(size_t hash) { return hash % 28282345988300791llu; } 1113 | static size_t mod35633523051069991(size_t hash) { return hash % 35633523051069991llu; } 1114 | static size_t mod44895425773957261(size_t hash) { return hash % 44895425773957261llu; } 1115 | static size_t mod56564691976601587(size_t hash) { return hash % 56564691976601587llu; } 1116 | static size_t mod71267046102139967(size_t hash) { return hash % 71267046102139967llu; } 1117 | static size_t mod89790851547914507(size_t hash) { return hash % 89790851547914507llu; } 1118 | static size_t mod113129383953203213(size_t hash) { return hash % 113129383953203213llu; } 1119 | static size_t mod142534092204280003(size_t hash) { return hash % 142534092204280003llu; } 1120 | static size_t mod179581703095829107(size_t hash) { return hash % 179581703095829107llu; } 1121 | static size_t mod226258767906406483(size_t hash) { return hash % 226258767906406483llu; } 1122 | static size_t mod285068184408560057(size_t hash) { return hash % 285068184408560057llu; } 1123 | static size_t mod359163406191658253(size_t hash) { return hash % 359163406191658253llu; } 1124 | static size_t mod452517535812813007(size_t hash) { return hash % 452517535812813007llu; } 1125 | static size_t mod570136368817120201(size_t hash) { return hash % 570136368817120201llu; } 1126 | static size_t mod718326812383316683(size_t hash) { return hash % 718326812383316683llu; } 1127 | static size_t mod905035071625626043(size_t hash) { return hash % 905035071625626043llu; } 1128 | static size_t mod1140272737634240411(size_t hash) { return hash % 1140272737634240411llu; } 1129 | static size_t mod1436653624766633509(size_t hash) { return hash % 1436653624766633509llu; } 1130 | static size_t mod1810070143251252131(size_t hash) { return hash % 1810070143251252131llu; } 1131 | static size_t mod2280545475268481167(size_t hash) { return hash % 2280545475268481167llu; } 1132 | static size_t mod2873307249533267101(size_t hash) { return hash % 2873307249533267101llu; } 1133 | static size_t mod3620140286502504283(size_t hash) { return hash % 3620140286502504283llu; } 1134 | static size_t mod4561090950536962147(size_t hash) { return hash % 4561090950536962147llu; } 1135 | static size_t mod5746614499066534157(size_t hash) { return hash % 5746614499066534157llu; } 1136 | static size_t mod7240280573005008577(size_t hash) { return hash % 7240280573005008577llu; } 1137 | static size_t mod9122181901073924329(size_t hash) { return hash % 9122181901073924329llu; } 1138 | static size_t mod11493228998133068689(size_t hash) { return hash % 11493228998133068689llu; } 1139 | static size_t mod14480561146010017169(size_t hash) { return hash % 14480561146010017169llu; } 1140 | static size_t mod18446744073709551557(size_t hash) { return hash % 18446744073709551557llu; } 1141 | 1142 | size_t index_for_hash(size_t hash, size_t /*num_slots_minus_one*/) const 1143 | { 1144 | static constexpr size_t (* const mod_functions[])(size_t) = 1145 | { 1146 | &mod0, &mod2, &mod3, &mod5, &mod7, &mod11, &mod13, &mod17, &mod23, &mod29, &mod37, 1147 | &mod47, &mod59, &mod73, &mod97, &mod127, &mod151, &mod197, &mod251, &mod313, &mod397, 1148 | &mod499, &mod631, &mod797, &mod1009, &mod1259, &mod1597, &mod2011, &mod2539, &mod3203, 1149 | &mod4027, &mod5087, &mod6421, &mod8089, &mod10193, &mod12853, &mod16193, &mod20399, 1150 | &mod25717, &mod32401, &mod40823, &mod51437, &mod64811, &mod81649, &mod102877, 1151 | &mod129607, &mod163307, &mod205759, &mod259229, &mod326617, &mod411527, &mod518509, 1152 | &mod653267, &mod823117, &mod1037059, &mod1306601, &mod1646237, &mod2074129, 1153 | &mod2613229, &mod3292489, &mod4148279, &mod5226491, &mod6584983, &mod8296553, 1154 | &mod10453007, &mod13169977, &mod16593127, &mod20906033, &mod26339969, &mod33186281, 1155 | &mod41812097, &mod52679969, &mod66372617, &mod83624237, &mod105359939, &mod132745199, 1156 | &mod167248483, &mod210719881, &mod265490441, &mod334496971, &mod421439783, 1157 | &mod530980861, &mod668993977, &mod842879579, &mod1061961721, &mod1337987929, 1158 | &mod1685759167, &mod2123923447, &mod2675975881, &mod3371518343, &mod4247846927, 1159 | &mod5351951779, &mod6743036717, &mod8495693897, &mod10703903591, &mod13486073473, 1160 | &mod16991387857, &mod21407807219, &mod26972146961, &mod33982775741, &mod42815614441, 1161 | &mod53944293929, &mod67965551447, &mod85631228929, &mod107888587883, &mod135931102921, 1162 | &mod171262457903, &mod215777175787, &mod271862205833, &mod342524915839, 1163 | &mod431554351609, &mod543724411781, &mod685049831731, &mod863108703229, 1164 | &mod1087448823553, &mod1370099663459, &mod1726217406467, &mod2174897647073, 1165 | &mod2740199326961, &mod3452434812973, &mod4349795294267, &mod5480398654009, 1166 | &mod6904869625999, &mod8699590588571, &mod10960797308051, &mod13809739252051, 1167 | &mod17399181177241, &mod21921594616111, &mod27619478504183, &mod34798362354533, 1168 | &mod43843189232363, &mod55238957008387, &mod69596724709081, &mod87686378464759, 1169 | &mod110477914016779, &mod139193449418173, &mod175372756929481, &mod220955828033581, 1170 | &mod278386898836457, &mod350745513859007, &mod441911656067171, &mod556773797672909, 1171 | &mod701491027718027, &mod883823312134381, &mod1113547595345903, &mod1402982055436147, 1172 | &mod1767646624268779, &mod2227095190691797, &mod2805964110872297, &mod3535293248537579, 1173 | &mod4454190381383713, &mod5611928221744609, &mod7070586497075177, &mod8908380762767489, 1174 | &mod11223856443489329, &mod14141172994150357, &mod17816761525534927, 1175 | &mod22447712886978529, &mod28282345988300791, &mod35633523051069991, 1176 | &mod44895425773957261, &mod56564691976601587, &mod71267046102139967, 1177 | &mod89790851547914507, &mod113129383953203213, &mod142534092204280003, 1178 | &mod179581703095829107, &mod226258767906406483, &mod285068184408560057, 1179 | &mod359163406191658253, &mod452517535812813007, &mod570136368817120201, 1180 | &mod718326812383316683, &mod905035071625626043, &mod1140272737634240411, 1181 | &mod1436653624766633509, &mod1810070143251252131, &mod2280545475268481167, 1182 | &mod2873307249533267101, &mod3620140286502504283, &mod4561090950536962147, 1183 | &mod5746614499066534157, &mod7240280573005008577, &mod9122181901073924329, 1184 | &mod11493228998133068689, &mod14480561146010017169, &mod18446744073709551557 1185 | }; 1186 | return mod_functions[prime_index](hash); 1187 | } 1188 | uint8_t next_size_over(size_t & size) const 1189 | { 1190 | // prime numbers generated by the following method: 1191 | // 1. start with a prime p = 2 1192 | // 2. go to wolfram alpha and get p = NextPrime(2 * p) 1193 | // 3. repeat 2. until you overflow 64 bits 1194 | // you now have large gaps which you would hit if somebody called reserve() with an unlucky number. 1195 | // 4. to fill the gaps for every prime p go to wolfram alpha and get ClosestPrime(p * 2^(1/3)) and ClosestPrime(p * 2^(2/3)) and put those in the gaps 1196 | // 5. get PrevPrime(2^64) and put it at the end 1197 | static constexpr const size_t prime_list[] = 1198 | { 1199 | 2llu, 3llu, 5llu, 7llu, 11llu, 13llu, 17llu, 23llu, 29llu, 37llu, 47llu, 1200 | 59llu, 73llu, 97llu, 127llu, 151llu, 197llu, 251llu, 313llu, 397llu, 1201 | 499llu, 631llu, 797llu, 1009llu, 1259llu, 1597llu, 2011llu, 2539llu, 1202 | 3203llu, 4027llu, 5087llu, 6421llu, 8089llu, 10193llu, 12853llu, 16193llu, 1203 | 20399llu, 25717llu, 32401llu, 40823llu, 51437llu, 64811llu, 81649llu, 1204 | 102877llu, 129607llu, 163307llu, 205759llu, 259229llu, 326617llu, 1205 | 411527llu, 518509llu, 653267llu, 823117llu, 1037059llu, 1306601llu, 1206 | 1646237llu, 2074129llu, 2613229llu, 3292489llu, 4148279llu, 5226491llu, 1207 | 6584983llu, 8296553llu, 10453007llu, 13169977llu, 16593127llu, 20906033llu, 1208 | 26339969llu, 33186281llu, 41812097llu, 52679969llu, 66372617llu, 1209 | 83624237llu, 105359939llu, 132745199llu, 167248483llu, 210719881llu, 1210 | 265490441llu, 334496971llu, 421439783llu, 530980861llu, 668993977llu, 1211 | 842879579llu, 1061961721llu, 1337987929llu, 1685759167llu, 2123923447llu, 1212 | 2675975881llu, 3371518343llu, 4247846927llu, 5351951779llu, 6743036717llu, 1213 | 8495693897llu, 10703903591llu, 13486073473llu, 16991387857llu, 1214 | 21407807219llu, 26972146961llu, 33982775741llu, 42815614441llu, 1215 | 53944293929llu, 67965551447llu, 85631228929llu, 107888587883llu, 1216 | 135931102921llu, 171262457903llu, 215777175787llu, 271862205833llu, 1217 | 342524915839llu, 431554351609llu, 543724411781llu, 685049831731llu, 1218 | 863108703229llu, 1087448823553llu, 1370099663459llu, 1726217406467llu, 1219 | 2174897647073llu, 2740199326961llu, 3452434812973llu, 4349795294267llu, 1220 | 5480398654009llu, 6904869625999llu, 8699590588571llu, 10960797308051llu, 1221 | 13809739252051llu, 17399181177241llu, 21921594616111llu, 27619478504183llu, 1222 | 34798362354533llu, 43843189232363llu, 55238957008387llu, 69596724709081llu, 1223 | 87686378464759llu, 110477914016779llu, 139193449418173llu, 1224 | 175372756929481llu, 220955828033581llu, 278386898836457llu, 1225 | 350745513859007llu, 441911656067171llu, 556773797672909llu, 1226 | 701491027718027llu, 883823312134381llu, 1113547595345903llu, 1227 | 1402982055436147llu, 1767646624268779llu, 2227095190691797llu, 1228 | 2805964110872297llu, 3535293248537579llu, 4454190381383713llu, 1229 | 5611928221744609llu, 7070586497075177llu, 8908380762767489llu, 1230 | 11223856443489329llu, 14141172994150357llu, 17816761525534927llu, 1231 | 22447712886978529llu, 28282345988300791llu, 35633523051069991llu, 1232 | 44895425773957261llu, 56564691976601587llu, 71267046102139967llu, 1233 | 89790851547914507llu, 113129383953203213llu, 142534092204280003llu, 1234 | 179581703095829107llu, 226258767906406483llu, 285068184408560057llu, 1235 | 359163406191658253llu, 452517535812813007llu, 570136368817120201llu, 1236 | 718326812383316683llu, 905035071625626043llu, 1140272737634240411llu, 1237 | 1436653624766633509llu, 1810070143251252131llu, 2280545475268481167llu, 1238 | 2873307249533267101llu, 3620140286502504283llu, 4561090950536962147llu, 1239 | 5746614499066534157llu, 7240280573005008577llu, 9122181901073924329llu, 1240 | 11493228998133068689llu, 14480561146010017169llu, 18446744073709551557llu 1241 | }; 1242 | const size_t * found = std::lower_bound(std::begin(prime_list), std::end(prime_list) - 1, size); 1243 | size = *found; 1244 | return static_cast(1 + found - prime_list); 1245 | } 1246 | void commit(uint8_t new_prime_index) 1247 | { 1248 | prime_index = new_prime_index; 1249 | } 1250 | void reset() 1251 | { 1252 | prime_index = 0; 1253 | } 1254 | 1255 | private: 1256 | uint8_t prime_index = 0; 1257 | }; 1258 | 1259 | struct power_of_two_hash_policy 1260 | { 1261 | size_t index_for_hash(size_t hash, size_t num_slots_minus_one) const 1262 | { 1263 | return hash & num_slots_minus_one; 1264 | } 1265 | int8_t next_size_over(size_t & size) const 1266 | { 1267 | size = detailv3::next_power_of_two(size); 1268 | return 0; 1269 | } 1270 | void commit(int8_t) 1271 | { 1272 | } 1273 | void reset() 1274 | { 1275 | } 1276 | 1277 | }; 1278 | 1279 | template, typename E = std::equal_to, typename A = std::allocator > > 1280 | class flat_hash_map 1281 | : public detailv3::sherwood_v3_table 1282 | < 1283 | std::pair, 1284 | K, 1285 | H, 1286 | detailv3::KeyOrValueHasher, H>, 1287 | E, 1288 | detailv3::KeyOrValueEquality, E>, 1289 | A, 1290 | typename std::allocator_traits::template rebind_alloc>> 1291 | > 1292 | { 1293 | using Table = detailv3::sherwood_v3_table 1294 | < 1295 | std::pair, 1296 | K, 1297 | H, 1298 | detailv3::KeyOrValueHasher, H>, 1299 | E, 1300 | detailv3::KeyOrValueEquality, E>, 1301 | A, 1302 | typename std::allocator_traits::template rebind_alloc>> 1303 | >; 1304 | public: 1305 | 1306 | using key_type = K; 1307 | using mapped_type = V; 1308 | 1309 | using Table::Table; 1310 | flat_hash_map() 1311 | { 1312 | } 1313 | 1314 | V & operator[](const K & key) 1315 | { 1316 | return emplace(key, convertible_to_value()).first->second; 1317 | } 1318 | V & operator[](K && key) 1319 | { 1320 | return emplace(std::move(key), convertible_to_value()).first->second; 1321 | } 1322 | V & at(const K & key) 1323 | { 1324 | auto found = this->find(key); 1325 | if (found == this->end()) 1326 | throw std::out_of_range("Argument passed to at() was not in the map."); 1327 | return found->second; 1328 | } 1329 | const V & at(const K & key) const 1330 | { 1331 | auto found = this->find(key); 1332 | if (found == this->end()) 1333 | throw std::out_of_range("Argument passed to at() was not in the map."); 1334 | return found->second; 1335 | } 1336 | 1337 | using Table::emplace; 1338 | std::pair emplace() 1339 | { 1340 | return emplace(key_type(), convertible_to_value()); 1341 | } 1342 | 1343 | friend bool operator==(const flat_hash_map & lhs, const flat_hash_map & rhs) 1344 | { 1345 | if (lhs.size() != rhs.size()) 1346 | return false; 1347 | for (const typename Table::value_type & value : lhs) 1348 | { 1349 | auto found = rhs.find(value.first); 1350 | if (found == rhs.end()) 1351 | return false; 1352 | else if (value.second != found->second) 1353 | return false; 1354 | } 1355 | return true; 1356 | } 1357 | friend bool operator!=(const flat_hash_map & lhs, const flat_hash_map & rhs) 1358 | { 1359 | return !(lhs == rhs); 1360 | } 1361 | 1362 | private: 1363 | struct convertible_to_value 1364 | { 1365 | operator V() const 1366 | { 1367 | return V(); 1368 | } 1369 | }; 1370 | }; 1371 | 1372 | template, typename E = std::equal_to, typename A = std::allocator > 1373 | class flat_hash_set 1374 | : public detailv3::sherwood_v3_table 1375 | < 1376 | T, 1377 | T, 1378 | H, 1379 | detailv3::functor_storage, 1380 | E, 1381 | detailv3::functor_storage, 1382 | A, 1383 | typename std::allocator_traits::template rebind_alloc> 1384 | > 1385 | { 1386 | using Table = detailv3::sherwood_v3_table 1387 | < 1388 | T, 1389 | T, 1390 | H, 1391 | detailv3::functor_storage, 1392 | E, 1393 | detailv3::functor_storage, 1394 | A, 1395 | typename std::allocator_traits::template rebind_alloc> 1396 | >; 1397 | public: 1398 | 1399 | using key_type = T; 1400 | 1401 | using Table::Table; 1402 | flat_hash_set() 1403 | { 1404 | } 1405 | 1406 | template 1407 | std::pair emplace(Args &&... args) 1408 | { 1409 | return Table::emplace(T(std::forward(args)...)); 1410 | } 1411 | std::pair emplace(const key_type & arg) 1412 | { 1413 | return Table::emplace(arg); 1414 | } 1415 | std::pair emplace(key_type & arg) 1416 | { 1417 | return Table::emplace(arg); 1418 | } 1419 | std::pair emplace(const key_type && arg) 1420 | { 1421 | return Table::emplace(std::move(arg)); 1422 | } 1423 | std::pair emplace(key_type && arg) 1424 | { 1425 | return Table::emplace(std::move(arg)); 1426 | } 1427 | 1428 | friend bool operator==(const flat_hash_set & lhs, const flat_hash_set & rhs) 1429 | { 1430 | if (lhs.size() != rhs.size()) 1431 | return false; 1432 | for (const T & value : lhs) 1433 | { 1434 | if (rhs.find(value) == rhs.end()) 1435 | return false; 1436 | } 1437 | return true; 1438 | } 1439 | friend bool operator!=(const flat_hash_set & lhs, const flat_hash_set & rhs) 1440 | { 1441 | return !(lhs == rhs); 1442 | } 1443 | }; 1444 | 1445 | 1446 | template 1447 | struct power_of_two_std_hash : std::hash 1448 | { 1449 | typedef ska::power_of_two_hash_policy hash_policy; 1450 | }; 1451 | 1452 | } // end namespace ska -------------------------------------------------------------------------------- /hashtables/makefile: -------------------------------------------------------------------------------- 1 | all: aggregator_std aggregator_flat aggregator_intkey aggregator_batch 2 | 3 | aggregator_std: aggregator_std.cc 4 | g++ aggregator_std.cc -o aggregator_std -std=c++14 -Ofast -msse4.1 #-march=broadwell 5 | 6 | aggregator_flat: aggregator_flat.cc 7 | g++ aggregator_flat.cc -o aggregator_flat -std=c++14 -Ofast -msse4.1 #-march=broadwell 8 | 9 | aggregator_intkey: aggregator_intkey.cc 10 | g++ aggregator_intkey.cc -o aggregator_intkey -std=c++14 -Ofast -msse4.1 #-march=broadwell 11 | 12 | aggregator_batch: aggregator_batch.cc 13 | g++ aggregator_batch.cc -o aggregator_batch -std=c++14 -Ofast -msse4.1 #-march=broadwell 14 | 15 | clean: 16 | rm aggregator_std aggregator_flat aggregator_intkey aggregator_batch -------------------------------------------------------------------------------- /hashtables/optimizations.h: -------------------------------------------------------------------------------- 1 | #include "flat_hash_map.hpp" 2 | #include "xxhash.hpp" 3 | 4 | /*============================================== 5 | = 128 bit key comparison = 6 | ==============================================*/ 7 | namespace std 8 | { 9 | template <> 10 | struct hash<__m128i> 11 | { 12 | size_t operator()(const __m128i& k) const 13 | { 14 | return XXHash32::hash((void *)&k, 16, 1); 15 | } 16 | }; 17 | } 18 | 19 | // https://stackoverflow.com/questions/44511386/sse-addition-and-conversion 20 | static const __m128i zero = {0}; 21 | inline bool compare128(__m128i a, __m128i b) { 22 | __m128i c = _mm_xor_si128(a, b); 23 | return _mm_testc_si128(zero, c); 24 | } 25 | 26 | typedef struct 27 | { 28 | bool operator() (const __m128i &x, const __m128i &y) const { 29 | // __m128i* a = (__m128i*) &x; 30 | // __m128i* b = (__m128i*) &y; 31 | 32 | return compare128 (x, y); 33 | } 34 | } AggregateKeyEq; 35 | 36 | ska::flat_hash_map<__m128i, int, std::hash<__m128i>, AggregateKeyEq> flow_map; 37 | 38 | 39 | /*===== End of 128 bit key comparison ======*/ 40 | 41 | #ifdef BATCHING 42 | /*==================================================== 43 | = Loop unrolling and batching. = 44 | ====================================================*/ 45 | #define PREFETCHCT 10 46 | 47 | 48 | 49 | // Loop unrolling. 50 | template struct _int{ }; 51 | 52 | template 53 | inline void unroll_f(_int, F&& f, Args&&... args) { 54 | unroll_f(_int(),std::forward(f),std::forward(args)...); 55 | f(N,args...); 56 | } 57 | template 58 | inline void unroll_f(_int<0>, F&& f, Args&&... args) { 59 | f(0,args...); 60 | } 61 | __m128i * v[PREFETCHCT]; 62 | void * ptrs[PREFETCHCT]; 63 | 64 | /*===== End of Loop unrolling and batching. ======*/ 65 | 66 | #endif 67 | 68 | 69 | 70 | /*======================================= 71 | = Cycle counting. = 72 | =======================================*/ 73 | 74 | typedef unsigned long long ticks; 75 | 76 | static __inline__ ticks tickStart (void) { 77 | unsigned cycles_low, cycles_high; 78 | asm volatile ("CPUID\n\t" 79 | "RDTSC\n\t" 80 | "mov %%edx, %0\n\t" 81 | "mov %%eax, %1\n\t": "=r" (cycles_high), "=r" (cycles_low):: 82 | "%rax", "%rbx", "%rcx", "%rdx"); 83 | return ((ticks)cycles_high << 32) | cycles_low; 84 | } 85 | 86 | static __inline__ ticks tickStop (void) { 87 | unsigned cycles_low, cycles_high; 88 | asm volatile("RDTSCP\n\t" 89 | "mov %%edx, %0\n\t" 90 | "mov %%eax, %1\n\t" 91 | "CPUID\n\t": "=r" (cycles_high), "=r" (cycles_low):: "%rax", 92 | "%rbx", "%rcx", "%rdx"); 93 | return ((ticks)cycles_high << 32) | cycles_low; 94 | } 95 | 96 | /*===== End of Cycle counting. ======*/ 97 | -------------------------------------------------------------------------------- /hashtables/readme.md: -------------------------------------------------------------------------------- 1 | ### Hash table implementations from TurboFlow ### 2 | 3 | 4 | #### Overview #### 5 | This directory contains simple implementations of the core hash table data-structures evaluated with TurboFlow. 6 | 7 | Each program contains a variant of the data-structure: 8 | - ```aggregator_std```: uses c++ unordered_map as the hash table. 9 | - ```aggregator_flat```: uses a custom flat hash table implementation. 10 | - ```aggregator_intkey```: uses 128 bit integer keys with custom key comparison and hashing operations. 11 | - ```aggregator_batch```: batches updates to the hash table. 12 | 13 | Each program also contains a simple benchmarking wrapper that: 14 | 1) generates random microflow-like messages, randomly distributed across a number of flows. 15 | 2) uses the hash table to count the number of microflows in each flow. 16 | 3) measures hash table throughput in messages per second. 17 | 18 | #### Usage #### 19 | ``` 20 | jsonch@johnshack:~/gits/turboflow/hashtables$ ./benchmarkAll.sh 21 | --------- building all --------- 22 | rm aggregator_std aggregator_flat aggregator_intkey aggregator_batch 23 | g++ aggregator_std.cc -o aggregator_std -std=c++14 -Ofast -msse4.1 #-march=broadwell 24 | g++ aggregator_flat.cc -o aggregator_flat -std=c++14 -Ofast -msse4.1 #-march=broadwell 25 | g++ aggregator_intkey.cc -o aggregator_intkey -std=c++14 -Ofast -msse4.1 #-march=broadwell 26 | g++ aggregator_batch.cc -o aggregator_batch -std=c++14 -Ofast -msse4.1 #-march=broadwell 27 | -------------------------------- 28 | ---- running aggregator_std ---- 29 | version: std unordered map 30 | flows: 100000 records: 10000000 31 | microflows per second: 1848770 32 | flow records collected: 100000 33 | -------------------------------- 34 | ---- running aggregator_flat ---- 35 | version: flat hash table 36 | flows: 100000 records: 10000000 37 | microflows per second: 2541942 38 | flow records collected: 100000 39 | -------------------------------- 40 | ---- running aggregator_intkey ---- 41 | version: integer keys 42 | flows: 100000 records: 10000000 43 | microflows per second: 5479452 44 | flow records collected: 100000 45 | -------------------------------- 46 | ---- running aggregator_batch ---- 47 | version: batching map -- batch size: 10 48 | flows: 100000 records: 10000000 49 | microflows per second: 7800312 50 | flow records collected: 100000 51 | -------------------------------- 52 | ``` 53 | 54 | #### Notes #### 55 | - The 128 bit integer operations require gcc >= 5.5. 56 | - On many systems, the -march= flag can improve performance. 57 | - Optimal batch size depends on the system. Change PREFETCHCT (default 10) in optimizations.h. 58 | -------------------------------------------------------------------------------- /hashtables/xxhash.hpp: -------------------------------------------------------------------------------- 1 | // ////////////////////////////////////////////////////////// 2 | // xxhash32.h 3 | // Copyright (c) 2016 Stephan Brumme. All rights reserved. 4 | // see http://create.stephan-brumme.com/disclaimer.html 5 | // 6 | #pragma once 7 | #include // for uint32_t and uint64_t 8 | /// XXHash (32 bit), based on Yann Collet's descriptions, see http://cyan4973.github.io/xxHash/ 9 | /** How to use: 10 | uint32_t myseed = 0; 11 | XXHash32 myhash(myseed); 12 | myhash.add(pointerToSomeBytes, numberOfBytes); 13 | myhash.add(pointerToSomeMoreBytes, numberOfMoreBytes); // call add() as often as you like to ... 14 | // and compute hash: 15 | uint32_t result = myhash.hash(); 16 | // or all of the above in one single line: 17 | uint32_t result2 = XXHash32::hash(mypointer, numBytes, myseed); 18 | Note: my code is NOT endian-aware ! 19 | **/ 20 | class XXHash32 21 | { 22 | public: 23 | /// create new XXHash (32 bit) 24 | /** @param seed your seed value, even zero is a valid seed and e.g. used by LZ4 **/ 25 | explicit XXHash32(uint32_t seed) 26 | { 27 | state[0] = seed + Prime1 + Prime2; 28 | state[1] = seed + Prime2; 29 | state[2] = seed; 30 | state[3] = seed - Prime1; 31 | bufferSize = 0; 32 | totalLength = 0; 33 | } 34 | /// add a chunk of bytes 35 | /** @param input pointer to a continuous block of data 36 | @param length number of bytes 37 | @return false if parameters are invalid / zero **/ 38 | bool add(const void* input, uint64_t length) 39 | { 40 | // no data ? 41 | if (!input || length == 0) 42 | return false; 43 | totalLength += length; 44 | // byte-wise access 45 | const unsigned char* data = (const unsigned char*)input; 46 | // unprocessed old data plus new data still fit in temporary buffer ? 47 | if (bufferSize + length < MaxBufferSize) 48 | { 49 | // just add new data 50 | while (length-- > 0) 51 | buffer[bufferSize++] = *data++; 52 | return true; 53 | } 54 | // point beyond last byte 55 | const unsigned char* stop = data + length; 56 | const unsigned char* stopBlock = stop - MaxBufferSize; 57 | // some data left from previous update ? 58 | if (bufferSize > 0) 59 | { 60 | // make sure temporary buffer is full (16 bytes) 61 | while (bufferSize < MaxBufferSize) 62 | buffer[bufferSize++] = *data++; 63 | // process these 16 bytes (4x4) 64 | process(buffer, state[0], state[1], state[2], state[3]); 65 | } 66 | // copying state to local variables helps optimizer A LOT 67 | uint32_t s0 = state[0], s1 = state[1], s2 = state[2], s3 = state[3]; 68 | // 16 bytes at once 69 | while (data <= stopBlock) 70 | { 71 | // local variables s0..s3 instead of state[0]..state[3] are much faster 72 | process(data, s0, s1, s2, s3); 73 | data += 16; 74 | } 75 | // copy back 76 | state[0] = s0; state[1] = s1; state[2] = s2; state[3] = s3; 77 | // copy remainder to temporary buffer 78 | bufferSize = stop - data; 79 | for (unsigned int i = 0; i < bufferSize; i++) 80 | buffer[i] = data[i]; 81 | // done 82 | return true; 83 | } 84 | /// get current hash 85 | /** @return 32 bit XXHash **/ 86 | uint32_t hash() const 87 | { 88 | uint32_t result = (uint32_t)totalLength; 89 | // fold 128 bit state into one single 32 bit value 90 | if (totalLength >= MaxBufferSize) 91 | result += rotateLeft(state[0], 1) + 92 | rotateLeft(state[1], 7) + 93 | rotateLeft(state[2], 12) + 94 | rotateLeft(state[3], 18); 95 | else 96 | // internal state wasn't set in add(), therefore original seed is still stored in state2 97 | result += state[2] + Prime5; 98 | // process remaining bytes in temporary buffer 99 | const unsigned char* data = buffer; 100 | // point beyond last byte 101 | const unsigned char* stop = data + bufferSize; 102 | // at least 4 bytes left ? => eat 4 bytes per step 103 | for (; data + 4 <= stop; data += 4) 104 | result = rotateLeft(result + *(uint32_t*)data * Prime3, 17) * Prime4; 105 | // take care of remaining 0..3 bytes, eat 1 byte per step 106 | while (data != stop) 107 | result = rotateLeft(result + (*data++) * Prime5, 11) * Prime1; 108 | // mix bits 109 | result ^= result >> 15; 110 | result *= Prime2; 111 | result ^= result >> 13; 112 | result *= Prime3; 113 | result ^= result >> 16; 114 | return result; 115 | } 116 | /// combine constructor, add() and hash() in one static function (C style) 117 | /** @param input pointer to a continuous block of data 118 | @param length number of bytes 119 | @param seed your seed value, e.g. zero is a valid seed and used by LZ4 120 | @return 32 bit XXHash **/ 121 | static uint32_t hash(const void* input, uint64_t length, uint32_t seed) 122 | { 123 | XXHash32 hasher(seed); 124 | hasher.add(input, length); 125 | return hasher.hash(); 126 | } 127 | private: 128 | /// magic constants :-) 129 | static const uint32_t Prime1 = 2654435761U; 130 | static const uint32_t Prime2 = 2246822519U; 131 | static const uint32_t Prime3 = 3266489917U; 132 | static const uint32_t Prime4 = 668265263U; 133 | static const uint32_t Prime5 = 374761393U; 134 | /// temporarily store up to 15 bytes between multiple add() calls 135 | static const uint32_t MaxBufferSize = 15+1; 136 | // internal state and temporary buffer 137 | uint32_t state[4]; // state[2] == seed if totalLength < MaxBufferSize 138 | unsigned char buffer[MaxBufferSize]; 139 | unsigned int bufferSize; 140 | uint64_t totalLength; 141 | /// rotate bits, should compile to a single CPU instruction (ROL) 142 | static inline uint32_t rotateLeft(uint32_t x, unsigned char bits) 143 | { 144 | return (x << bits) | (x >> (32 - bits)); 145 | } 146 | /// process a block of 4x4 bytes, this is the main part of the XXHash32 algorithm 147 | static inline void process(const void* data, uint32_t& state0, uint32_t& state1, uint32_t& state2, uint32_t& state3) 148 | { 149 | const uint32_t* block = (const uint32_t*) data; 150 | state0 = rotateLeft(state0 + block[0] * Prime2, 13) * Prime1; 151 | state1 = rotateLeft(state1 + block[1] * Prime2, 13) * Prime1; 152 | state2 = rotateLeft(state2 + block[2] * Prime2, 13) * Prime1; 153 | state3 = rotateLeft(state3 + block[3] * Prime2, 13) * Prime1; 154 | } 155 | }; --------------------------------------------------------------------------------