├── BOBHash32.h ├── BOBHash64.h ├── CSS.h ├── ColdFilter ├── CF.cpp ├── SC.h ├── SC_spacesaving.h └── SPA.h ├── CounterTree ├── BOBHash32.h ├── Counter_Tree.h ├── Sketchpheap.h ├── main.cpp └── params.h ├── ElasticSketch ├── ElasticSketch.h ├── HeavyPart.h ├── HeavyPart.h.gch ├── LightPart.h ├── param.h └── throughput.cpp ├── LossyCounting.h ├── README.md ├── heavykeeper.h ├── main.cpp ├── params.h ├── spacesaving.h ├── ssummary.h └── technical_report.pdf /BOBHash32.h: -------------------------------------------------------------------------------- 1 | #ifndef _BOBHASH32_H 2 | #define _BOBHASH32_H 3 | #include 4 | using namespace std; 5 | 6 | typedef unsigned int uint; 7 | typedef unsigned long long int uint64; 8 | 9 | 10 | #define MAX_PRIME32 1229 11 | #define MAX_BIG_PRIME32 50 12 | 13 | class BOBHash32 14 | { 15 | public: 16 | BOBHash32(); 17 | ~BOBHash32(); 18 | BOBHash32(uint prime32Num); 19 | void initialize(uint prime32Num); 20 | uint run(const char * str, uint len); 21 | private: 22 | uint prime32Num; 23 | }; 24 | 25 | uint big_prime3232[MAX_BIG_PRIME32] = { 26 | 20177, 20183, 20201, 20219, 20231, 20233, 20249, 20261, 20269, 20287, 27 | 20297, 20323, 20327, 20333, 20341, 20347, 20353, 20357, 20359, 20369, 28 | 20389, 20393, 20399, 20407, 20411, 20431, 20441, 20443, 20477, 20479, 29 | 20483, 20507, 20509, 20521, 20533, 20543, 20549, 20551, 20563, 20593, 30 | 20599, 20611, 20627, 20639, 20641, 20663, 20681, 20693, 20707, 20717 31 | }; 32 | uint prime32[MAX_PRIME32] = { 33 | 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 34 | 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 35 | 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 36 | 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 37 | 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 38 | 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 39 | 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 40 | 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 41 | 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, 42 | 467, 479, 487, 491, 499, 503, 509, 521, 523, 541, 43 | 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 44 | 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 45 | 661, 673, 677, 683, 691, 701, 709, 719, 727, 733, 46 | 739, 743, 751, 757, 761, 769, 773, 787, 797, 809, 47 | 811, 821, 823, 827, 829, 839, 853, 857, 859, 863, 48 | 877, 881, 883, 887, 907, 911, 919, 929, 937, 941, 49 | 947, 953, 967, 971, 977, 983, 991, 997, 50 | 1009, 1013, 1019, 1021, 1031, 1033, 1039, 1049, 1051, 1061, 51 | 1063, 1069, 1087, 1091, 1093, 1097, 1103, 1109, 1117, 1123, 52 | 1129, 1151, 1153, 1163, 1171, 1181, 1187, 1193, 1201, 1213, 53 | 1217, 1223, 1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283, 54 | 1289, 1291, 1297, 1301, 1303, 1307, 1319, 1321, 1327, 1361, 55 | 1367, 1373, 1381, 1399, 1409, 1423, 1427, 1429, 1433, 1439, 56 | 1447, 1451, 1453, 1459, 1471, 1481, 1483, 1487, 1489, 1493, 57 | 1499, 1511, 1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571, 58 | 1579, 1583, 1597, 1601, 1607, 1609, 1613, 1619, 1621, 1627, 59 | 1637, 1657, 1663, 1667, 1669, 1693, 1697, 1699, 1709, 1721, 60 | 1723, 1733, 1741, 1747, 1753, 1759, 1777, 1783, 1787, 1789, 61 | 1801, 1811, 1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877, 62 | 1879, 1889, 1901, 1907, 1913, 1931, 1933, 1949, 1951, 1973, 63 | 1979, 1987, 1993, 1997, 1999, 2003, 2011, 2017, 2027, 2029, 64 | 2039, 2053, 2063, 2069, 2081, 2083, 2087, 2089, 2099, 2111, 65 | 2113, 2129, 2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203, 66 | 2207, 2213, 2221, 2237, 2239, 2243, 2251, 2267, 2269, 2273, 67 | 2281, 2287, 2293, 2297, 2309, 2311, 2333, 2339, 2341, 2347, 68 | 2351, 2357, 2371, 2377, 2381, 2383, 2389, 2393, 2399, 2411, 69 | 2417, 2423, 2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503, 70 | 2521, 2531, 2539, 2543, 2549, 2551, 2557, 2579, 2591, 2593, 71 | 2609, 2617, 2621, 2633, 2647, 2657, 2659, 2663, 2671, 2677, 72 | 2683, 2687, 2689, 2693, 2699, 2707, 2711, 2713, 2719, 2729, 73 | 2731, 2741, 2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801, 74 | 2803, 2819, 2833, 2837, 2843, 2851, 2857, 2861, 2879, 2887, 75 | 2897, 2903, 2909, 2917, 2927, 2939, 2953, 2957, 2963, 2969, 76 | 2971, 2999, 3001, 3011, 3019, 3023, 3037, 3041, 3049, 3061, 77 | 3067, 3079, 3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167, 78 | 3169, 3181, 3187, 3191, 3203, 3209, 3217, 3221, 3229, 3251, 79 | 3253, 3257, 3259, 3271, 3299, 3301, 3307, 3313, 3319, 3323, 80 | 3329, 3331, 3343, 3347, 3359, 3361, 3371, 3373, 3389, 3391, 81 | 3407, 3413, 3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491, 82 | 3499, 3511, 3517, 3527, 3529, 3533, 3539, 3541, 3547, 3557, 83 | 3559, 3571, 3581, 3583, 3593, 3607, 3613, 3617, 3623, 3631, 84 | 3637, 3643, 3659, 3671, 3673, 3677, 3691, 3697, 3701, 3709, 85 | 3719, 3727, 3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797, 86 | 3803, 3821, 3823, 3833, 3847, 3851, 3853, 3863, 3877, 3881, 87 | 3889, 3907, 3911, 3917, 3919, 3923, 3929, 3931, 3943, 3947, 88 | 3967, 3989, 4001, 4003, 4007, 4013, 4019, 4021, 4027, 4049, 89 | 4051, 4057, 4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129, 90 | 4133, 4139, 4153, 4157, 4159, 4177, 4201, 4211, 4217, 4219, 91 | 4229, 4231, 4241, 4243, 4253, 4259, 4261, 4271, 4273, 4283, 92 | 4289, 4297, 4327, 4337, 4339, 4349, 4357, 4363, 4373, 4391, 93 | 4397, 4409, 4421, 4423, 4441, 4447, 4451, 4457, 4463, 4481, 94 | 4483, 4493, 4507, 4513, 4517, 4519, 4523, 4547, 4549, 4561, 95 | 4567, 4583, 4591, 4597, 4603, 4621, 4637, 4639, 4643, 4649, 96 | 4651, 4657, 4663, 4673, 4679, 4691, 4703, 4721, 4723, 4729, 97 | 4733, 4751, 4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813, 98 | 4817, 4831, 4861, 4871, 4877, 4889, 4903, 4909, 4919, 4931, 99 | 4933, 4937, 4943, 4951, 4957, 4967, 4969, 4973, 4987, 4993, 100 | 4999, 5003, 5009, 5011, 5021, 5023, 5039, 5051, 5059, 5077, 101 | 5081, 5087, 5099, 5101, 5107, 5113, 5119, 5147, 5153, 5167, 102 | 5171, 5179, 5189, 5197, 5209, 5227, 5231, 5233, 5237, 5261, 103 | 5273, 5279, 5281, 5297, 5303, 5309, 5323, 5333, 5347, 5351, 104 | 5381, 5387, 5393, 5399, 5407, 5413, 5417, 5419, 5431, 5437, 105 | 5441, 5443, 5449, 5471, 5477, 5479, 5483, 5501, 5503, 5507, 106 | 5519, 5521, 5527, 5531, 5557, 5563, 5569, 5573, 5581, 5591, 107 | 5623, 5639, 5641, 5647, 5651, 5653, 5657, 5659, 5669, 5683, 108 | 5689, 5693, 5701, 5711, 5717, 5737, 5741, 5743, 5749, 5779, 109 | 5783, 5791, 5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849, 110 | 5851, 5857, 5861, 5867, 5869, 5879, 5881, 5897, 5903, 5923, 111 | 5927, 5939, 5953, 5981, 5987, 6007, 6011, 6029, 6037, 6043, 112 | 6047, 6053, 6067, 6073, 6079, 6089, 6091, 6101, 6113, 6121, 113 | 6131, 6133, 6143, 6151, 6163, 6173, 6197, 6199, 6203, 6211, 114 | 6217, 6221, 6229, 6247, 6257, 6263, 6269, 6271, 6277, 6287, 115 | 6299, 6301, 6311, 6317, 6323, 6329, 6337, 6343, 6353, 6359, 116 | 6361, 6367, 6373, 6379, 6389, 6397, 6421, 6427, 6449, 6451, 117 | 6469, 6473, 6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563, 118 | 6569, 6571, 6577, 6581, 6599, 6607, 6619, 6637, 6653, 6659, 119 | 6661, 6673, 6679, 6689, 6691, 6701, 6703, 6709, 6719, 6733, 120 | 6737, 6761, 6763, 6779, 6781, 6791, 6793, 6803, 6823, 6827, 121 | 6829, 6833, 6841, 6857, 6863, 6869, 6871, 6883, 6899, 6907, 122 | 6911, 6917, 6947, 6949, 6959, 6961, 6967, 6971, 6977, 6983, 123 | 6991, 6997, 7001, 7013, 7019, 7027, 7039, 7043, 7057, 7069, 124 | 7079, 7103, 7109, 7121, 7127, 7129, 7151, 7159, 7177, 7187, 125 | 7193, 7207, 7211, 7213, 7219, 7229, 7237, 7243, 7247, 7253, 126 | 7283, 7297, 7307, 7309, 7321, 7331, 7333, 7349, 7351, 7369, 127 | 7393, 7411, 7417, 7433, 7451, 7457, 7459, 7477, 7481, 7487, 128 | 7489, 7499, 7507, 7517, 7523, 7529, 7537, 7541, 7547, 7549, 129 | 7559, 7561, 7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621, 130 | 7639, 7643, 7649, 7669, 7673, 7681, 7687, 7691, 7699, 7703, 131 | 7717, 7723, 7727, 7741, 7753, 7757, 7759, 7789, 7793, 7817, 132 | 7823, 7829, 7841, 7853, 7867, 7873, 7877, 7879, 7883, 7901, 133 | 7907, 7919, 7927, 7933, 7937, 7949, 7951, 7963, 7993, 8009, 134 | 8011, 8017, 8039, 8053, 8059, 8069, 8081, 8087, 8089, 8093, 135 | 8101, 8111, 8117, 8123, 8147, 8161, 8167, 8171, 8179, 8191, 136 | 8209, 8219, 8221, 8231, 8233, 8237, 8243, 8263, 8269, 8273, 137 | 8287, 8291, 8293, 8297, 8311, 8317, 8329, 8353, 8363, 8369, 138 | 8377, 8387, 8389, 8419, 8423, 8429, 8431, 8443, 8447, 8461, 139 | 8467, 8501, 8513, 8521, 8527, 8537, 8539, 8543, 8563, 8573, 140 | 8581, 8597, 8599, 8609, 8623, 8627, 8629, 8641, 8647, 8663, 141 | 8669, 8677, 8681, 8689, 8693, 8699, 8707, 8713, 8719, 8731, 142 | 8737, 8741, 8747, 8753, 8761, 8779, 8783, 8803, 8807, 8819, 143 | 8821, 8831, 8837, 8839, 8849, 8861, 8863, 8867, 8887, 8893, 144 | 8923, 8929, 8933, 8941, 8951, 8963, 8969, 8971, 8999, 9001, 145 | 9007, 9011, 9013, 9029, 9041, 9043, 9049, 9059, 9067, 9091, 146 | 9103, 9109, 9127, 9133, 9137, 9151, 9157, 9161, 9173, 9181, 147 | 9187, 9199, 9203, 9209, 9221, 9227, 9239, 9241, 9257, 9277, 148 | 9281, 9283, 9293, 9311, 9319, 9323, 9337, 9341, 9343, 9349, 149 | 9371, 9377, 9391, 9397, 9403, 9413, 9419, 9421, 9431, 9433, 150 | 9437, 9439, 9461, 9463, 9467, 9473, 9479, 9491, 9497, 9511, 151 | 9521, 9533, 9539, 9547, 9551, 9587, 9601, 9613, 9619, 9623, 152 | 9629, 9631, 9643, 9649, 9661, 9677, 9679, 9689, 9697, 9719, 153 | 9721, 9733, 9739, 9743, 9749, 9767, 9769, 9781, 9787, 9791, 154 | 9803, 9811, 9817, 9829, 9833, 9839, 9851, 9857, 9859, 9871, 155 | 9883, 9887, 9901, 9907, 9923, 9929, 9931, 9941, 9949, 9967, 156 | 9973 157 | }; 158 | 159 | #define mix(a,b,c) \ 160 | { \ 161 | a -= b; a -= c; a ^= (c>>13); \ 162 | b -= c; b -= a; b ^= (a<<8); \ 163 | c -= a; c -= b; c ^= (b>>13); \ 164 | a -= b; a -= c; a ^= (c>>12); \ 165 | b -= c; b -= a; b ^= (a<<16); \ 166 | c -= a; c -= b; c ^= (b>>5); \ 167 | a -= b; a -= c; a ^= (c>>3); \ 168 | b -= c; b -= a; b ^= (a<<10); \ 169 | c -= a; c -= b; c ^= (b>>15); \ 170 | } 171 | 172 | BOBHash32::BOBHash32() 173 | { 174 | this->prime32Num = 0; 175 | } 176 | 177 | BOBHash32::BOBHash32(uint prime32Num) 178 | { 179 | this->prime32Num = prime32Num; 180 | } 181 | 182 | void BOBHash32::initialize(uint prime32Num) 183 | { 184 | this->prime32Num = prime32Num; 185 | } 186 | 187 | uint BOBHash32::run(const char * str, uint len) 188 | { 189 | //register ub4 a,b,c,len; 190 | uint a,b,c; 191 | uint initval = 0; 192 | /* Set up the internal state */ 193 | //len = length; 194 | a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */ 195 | c = prime32[this->prime32Num]; /* the previous hash value */ 196 | 197 | /*---------------------------------------- handle most of the key */ 198 | while (len >= 12) 199 | { 200 | a += (str[0] +((uint)str[1]<<8) +((uint)str[2]<<16) +((uint)str[3]<<24)); 201 | b += (str[4] +((uint)str[5]<<8) +((uint)str[6]<<16) +((uint)str[7]<<24)); 202 | c += (str[8] +((uint)str[9]<<8) +((uint)str[10]<<16)+((uint)str[11]<<24)); 203 | mix(a,b,c); 204 | str += 12; len -= 12; 205 | } 206 | 207 | /*------------------------------------- handle the last 11 bytes */ 208 | c += len; 209 | switch(len) /* all the case statements fall through */ 210 | { 211 | case 11: c+=((uint)str[10]<<24); 212 | case 10: c+=((uint)str[9]<<16); 213 | case 9 : c+=((uint)str[8]<<8); 214 | /* the first byte of c is reserved for the length */ 215 | case 8 : b+=((uint)str[7]<<24); 216 | case 7 : b+=((uint)str[6]<<16); 217 | case 6 : b+=((uint)str[5]<<8); 218 | case 5 : b+=str[4]; 219 | case 4 : a+=((uint)str[3]<<24); 220 | case 3 : a+=((uint)str[2]<<16); 221 | case 2 : a+=((uint)str[1]<<8); 222 | case 1 : a+=str[0]; 223 | /* case 0: nothing left to add */ 224 | } 225 | mix(a,b,c); 226 | /*-------------------------------------------- report the result */ 227 | return c; 228 | } 229 | 230 | BOBHash32::~BOBHash32() 231 | { 232 | 233 | } 234 | #endif //_BOBHASH32_H 235 | -------------------------------------------------------------------------------- /BOBHash64.h: -------------------------------------------------------------------------------- 1 | #ifndef _BOBHASH64_H 2 | #define _BOBHASH64_H 3 | #include 4 | using namespace std; 5 | 6 | typedef unsigned int uint; 7 | typedef unsigned long long int uint64; 8 | 9 | #define MAX_PRIME64 1229 10 | #define MAX_BIG_PRIME64 50 11 | 12 | class BOBHash64 13 | { 14 | public: 15 | BOBHash64(); 16 | ~BOBHash64(); 17 | BOBHash64(uint prime64Num); 18 | void initialize(uint prime64Num); 19 | uint64 run(const char * str, uint len); 20 | private: 21 | uint prime64Num; 22 | }; 23 | 24 | uint64 big_prime64[MAX_BIG_PRIME64] = { 25 | 20177, 20183, 20201, 20219, 20231, 20233, 20249, 20261, 20269, 20287, 26 | 20297, 20323, 20327, 20333, 20341, 20347, 20353, 20357, 20359, 20369, 27 | 20389, 20393, 20399, 20407, 20411, 20431, 20441, 20443, 20477, 20479, 28 | 20483, 20507, 20509, 20521, 20533, 20543, 20549, 20551, 20563, 20593, 29 | 20599, 20611, 20627, 20639, 20641, 20663, 20681, 20693, 20707, 20717 30 | }; 31 | uint64 prime64[MAX_PRIME64] = { 32 | 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 33 | 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 34 | 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 35 | 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 36 | 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 37 | 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 38 | 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 39 | 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 40 | 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, 41 | 467, 479, 487, 491, 499, 503, 509, 521, 523, 541, 42 | 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 43 | 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 44 | 661, 673, 677, 683, 691, 701, 709, 719, 727, 733, 45 | 739, 743, 751, 757, 761, 769, 773, 787, 797, 809, 46 | 811, 821, 823, 827, 829, 839, 853, 857, 859, 863, 47 | 877, 881, 883, 887, 907, 911, 919, 929, 937, 941, 48 | 947, 953, 967, 971, 977, 983, 991, 997, 49 | 1009, 1013, 1019, 1021, 1031, 1033, 1039, 1049, 1051, 1061, 50 | 1063, 1069, 1087, 1091, 1093, 1097, 1103, 1109, 1117, 1123, 51 | 1129, 1151, 1153, 1163, 1171, 1181, 1187, 1193, 1201, 1213, 52 | 1217, 1223, 1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283, 53 | 1289, 1291, 1297, 1301, 1303, 1307, 1319, 1321, 1327, 1361, 54 | 1367, 1373, 1381, 1399, 1409, 1423, 1427, 1429, 1433, 1439, 55 | 1447, 1451, 1453, 1459, 1471, 1481, 1483, 1487, 1489, 1493, 56 | 1499, 1511, 1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571, 57 | 1579, 1583, 1597, 1601, 1607, 1609, 1613, 1619, 1621, 1627, 58 | 1637, 1657, 1663, 1667, 1669, 1693, 1697, 1699, 1709, 1721, 59 | 1723, 1733, 1741, 1747, 1753, 1759, 1777, 1783, 1787, 1789, 60 | 1801, 1811, 1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877, 61 | 1879, 1889, 1901, 1907, 1913, 1931, 1933, 1949, 1951, 1973, 62 | 1979, 1987, 1993, 1997, 1999, 2003, 2011, 2017, 2027, 2029, 63 | 2039, 2053, 2063, 2069, 2081, 2083, 2087, 2089, 2099, 2111, 64 | 2113, 2129, 2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203, 65 | 2207, 2213, 2221, 2237, 2239, 2243, 2251, 2267, 2269, 2273, 66 | 2281, 2287, 2293, 2297, 2309, 2311, 2333, 2339, 2341, 2347, 67 | 2351, 2357, 2371, 2377, 2381, 2383, 2389, 2393, 2399, 2411, 68 | 2417, 2423, 2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503, 69 | 2521, 2531, 2539, 2543, 2549, 2551, 2557, 2579, 2591, 2593, 70 | 2609, 2617, 2621, 2633, 2647, 2657, 2659, 2663, 2671, 2677, 71 | 2683, 2687, 2689, 2693, 2699, 2707, 2711, 2713, 2719, 2729, 72 | 2731, 2741, 2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801, 73 | 2803, 2819, 2833, 2837, 2843, 2851, 2857, 2861, 2879, 2887, 74 | 2897, 2903, 2909, 2917, 2927, 2939, 2953, 2957, 2963, 2969, 75 | 2971, 2999, 3001, 3011, 3019, 3023, 3037, 3041, 3049, 3061, 76 | 3067, 3079, 3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167, 77 | 3169, 3181, 3187, 3191, 3203, 3209, 3217, 3221, 3229, 3251, 78 | 3253, 3257, 3259, 3271, 3299, 3301, 3307, 3313, 3319, 3323, 79 | 3329, 3331, 3343, 3347, 3359, 3361, 3371, 3373, 3389, 3391, 80 | 3407, 3413, 3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491, 81 | 3499, 3511, 3517, 3527, 3529, 3533, 3539, 3541, 3547, 3557, 82 | 3559, 3571, 3581, 3583, 3593, 3607, 3613, 3617, 3623, 3631, 83 | 3637, 3643, 3659, 3671, 3673, 3677, 3691, 3697, 3701, 3709, 84 | 3719, 3727, 3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797, 85 | 3803, 3821, 3823, 3833, 3847, 3851, 3853, 3863, 3877, 3881, 86 | 3889, 3907, 3911, 3917, 3919, 3923, 3929, 3931, 3943, 3947, 87 | 3967, 3989, 4001, 4003, 4007, 4013, 4019, 4021, 4027, 4049, 88 | 4051, 4057, 4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129, 89 | 4133, 4139, 4153, 4157, 4159, 4177, 4201, 4211, 4217, 4219, 90 | 4229, 4231, 4241, 4243, 4253, 4259, 4261, 4271, 4273, 4283, 91 | 4289, 4297, 4327, 4337, 4339, 4349, 4357, 4363, 4373, 4391, 92 | 4397, 4409, 4421, 4423, 4441, 4447, 4451, 4457, 4463, 4481, 93 | 4483, 4493, 4507, 4513, 4517, 4519, 4523, 4547, 4549, 4561, 94 | 4567, 4583, 4591, 4597, 4603, 4621, 4637, 4639, 4643, 4649, 95 | 4651, 4657, 4663, 4673, 4679, 4691, 4703, 4721, 4723, 4729, 96 | 4733, 4751, 4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813, 97 | 4817, 4831, 4861, 4871, 4877, 4889, 4903, 4909, 4919, 4931, 98 | 4933, 4937, 4943, 4951, 4957, 4967, 4969, 4973, 4987, 4993, 99 | 4999, 5003, 5009, 5011, 5021, 5023, 5039, 5051, 5059, 5077, 100 | 5081, 5087, 5099, 5101, 5107, 5113, 5119, 5147, 5153, 5167, 101 | 5171, 5179, 5189, 5197, 5209, 5227, 5231, 5233, 5237, 5261, 102 | 5273, 5279, 5281, 5297, 5303, 5309, 5323, 5333, 5347, 5351, 103 | 5381, 5387, 5393, 5399, 5407, 5413, 5417, 5419, 5431, 5437, 104 | 5441, 5443, 5449, 5471, 5477, 5479, 5483, 5501, 5503, 5507, 105 | 5519, 5521, 5527, 5531, 5557, 5563, 5569, 5573, 5581, 5591, 106 | 5623, 5639, 5641, 5647, 5651, 5653, 5657, 5659, 5669, 5683, 107 | 5689, 5693, 5701, 5711, 5717, 5737, 5741, 5743, 5749, 5779, 108 | 5783, 5791, 5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849, 109 | 5851, 5857, 5861, 5867, 5869, 5879, 5881, 5897, 5903, 5923, 110 | 5927, 5939, 5953, 5981, 5987, 6007, 6011, 6029, 6037, 6043, 111 | 6047, 6053, 6067, 6073, 6079, 6089, 6091, 6101, 6113, 6121, 112 | 6131, 6133, 6143, 6151, 6163, 6173, 6197, 6199, 6203, 6211, 113 | 6217, 6221, 6229, 6247, 6257, 6263, 6269, 6271, 6277, 6287, 114 | 6299, 6301, 6311, 6317, 6323, 6329, 6337, 6343, 6353, 6359, 115 | 6361, 6367, 6373, 6379, 6389, 6397, 6421, 6427, 6449, 6451, 116 | 6469, 6473, 6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563, 117 | 6569, 6571, 6577, 6581, 6599, 6607, 6619, 6637, 6653, 6659, 118 | 6661, 6673, 6679, 6689, 6691, 6701, 6703, 6709, 6719, 6733, 119 | 6737, 6761, 6763, 6779, 6781, 6791, 6793, 6803, 6823, 6827, 120 | 6829, 6833, 6841, 6857, 6863, 6869, 6871, 6883, 6899, 6907, 121 | 6911, 6917, 6947, 6949, 6959, 6961, 6967, 6971, 6977, 6983, 122 | 6991, 6997, 7001, 7013, 7019, 7027, 7039, 7043, 7057, 7069, 123 | 7079, 7103, 7109, 7121, 7127, 7129, 7151, 7159, 7177, 7187, 124 | 7193, 7207, 7211, 7213, 7219, 7229, 7237, 7243, 7247, 7253, 125 | 7283, 7297, 7307, 7309, 7321, 7331, 7333, 7349, 7351, 7369, 126 | 7393, 7411, 7417, 7433, 7451, 7457, 7459, 7477, 7481, 7487, 127 | 7489, 7499, 7507, 7517, 7523, 7529, 7537, 7541, 7547, 7549, 128 | 7559, 7561, 7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621, 129 | 7639, 7643, 7649, 7669, 7673, 7681, 7687, 7691, 7699, 7703, 130 | 7717, 7723, 7727, 7741, 7753, 7757, 7759, 7789, 7793, 7817, 131 | 7823, 7829, 7841, 7853, 7867, 7873, 7877, 7879, 7883, 7901, 132 | 7907, 7919, 7927, 7933, 7937, 7949, 7951, 7963, 7993, 8009, 133 | 8011, 8017, 8039, 8053, 8059, 8069, 8081, 8087, 8089, 8093, 134 | 8101, 8111, 8117, 8123, 8147, 8161, 8167, 8171, 8179, 8191, 135 | 8209, 8219, 8221, 8231, 8233, 8237, 8243, 8263, 8269, 8273, 136 | 8287, 8291, 8293, 8297, 8311, 8317, 8329, 8353, 8363, 8369, 137 | 8377, 8387, 8389, 8419, 8423, 8429, 8431, 8443, 8447, 8461, 138 | 8467, 8501, 8513, 8521, 8527, 8537, 8539, 8543, 8563, 8573, 139 | 8581, 8597, 8599, 8609, 8623, 8627, 8629, 8641, 8647, 8663, 140 | 8669, 8677, 8681, 8689, 8693, 8699, 8707, 8713, 8719, 8731, 141 | 8737, 8741, 8747, 8753, 8761, 8779, 8783, 8803, 8807, 8819, 142 | 8821, 8831, 8837, 8839, 8849, 8861, 8863, 8867, 8887, 8893, 143 | 8923, 8929, 8933, 8941, 8951, 8963, 8969, 8971, 8999, 9001, 144 | 9007, 9011, 9013, 9029, 9041, 9043, 9049, 9059, 9067, 9091, 145 | 9103, 9109, 9127, 9133, 9137, 9151, 9157, 9161, 9173, 9181, 146 | 9187, 9199, 9203, 9209, 9221, 9227, 9239, 9241, 9257, 9277, 147 | 9281, 9283, 9293, 9311, 9319, 9323, 9337, 9341, 9343, 9349, 148 | 9371, 9377, 9391, 9397, 9403, 9413, 9419, 9421, 9431, 9433, 149 | 9437, 9439, 9461, 9463, 9467, 9473, 9479, 9491, 9497, 9511, 150 | 9521, 9533, 9539, 9547, 9551, 9587, 9601, 9613, 9619, 9623, 151 | 9629, 9631, 9643, 9649, 9661, 9677, 9679, 9689, 9697, 9719, 152 | 9721, 9733, 9739, 9743, 9749, 9767, 9769, 9781, 9787, 9791, 153 | 9803, 9811, 9817, 9829, 9833, 9839, 9851, 9857, 9859, 9871, 154 | 9883, 9887, 9901, 9907, 9923, 9929, 9931, 9941, 9949, 9967, 155 | 9973 156 | }; 157 | 158 | #define mix64(a,b,c) \ 159 | { \ 160 | a -= b; a -= c; a ^= (c>>43); \ 161 | b -= c; b -= a; b ^= (a<<9); \ 162 | c -= a; c -= b; c ^= (b>>8); \ 163 | a -= b; a -= c; a ^= (c>>38); \ 164 | b -= c; b -= a; b ^= (a<<23); \ 165 | c -= a; c -= b; c ^= (b>>5); \ 166 | a -= b; a -= c; a ^= (c>>35); \ 167 | b -= c; b -= a; b ^= (a<<49); \ 168 | c -= a; c -= b; c ^= (b>>11); \ 169 | a -= b; a -= c; a ^= (c>>12); \ 170 | b -= c; b -= a; b ^= (a<<18); \ 171 | c -= a; c -= b; c ^= (b>>22); \ 172 | } 173 | 174 | BOBHash64::BOBHash64() 175 | { 176 | this->prime64Num = 0; 177 | } 178 | 179 | BOBHash64::BOBHash64(uint prime64Num) 180 | { 181 | this->prime64Num = prime64Num; 182 | } 183 | 184 | void BOBHash64::initialize(uint prime64Num) 185 | { 186 | this->prime64Num = prime64Num; 187 | } 188 | 189 | uint64 BOBHash64::run(const char * str, uint len) 190 | { 191 | //register ub4 a,b,c,len; 192 | uint64 a,b,c; 193 | /* Set up the internal state */ 194 | //len = length; 195 | a = b = 0x9e3779b97f4a7c13LL; /* the golden ratio; an arbitrary value */ 196 | 197 | c = prime64[this->prime64Num]; /* the previous hash value */ 198 | 199 | /*---------------------------------------- handle most of the key */ 200 | 201 | while (len >= 24) 202 | { 203 | a += (str[0]+((uint64)str[1]<< 8)+((uint64)str[2]<<16)+((uint64)str[3]<<24) 204 | +((uint64)str[4]<<32)+((uint64)str[5]<<40)+((uint64)str[6]<<48)+((uint64)str[7]<<56)); 205 | b += (str[8]+((uint64)str[9]<< 8)+((uint64)str[10]<<16)+((uint64)str[11]<<24) 206 | +((uint64)str[12]<<32)+((uint64)str[13]<<40)+((uint64)str[14]<<48)+((uint64)str[15]<<56)); 207 | c += (str[16]+((uint64)str[17]<< 8)+((uint64)str[18]<<16)+((uint64)str[19]<<24) 208 | +((uint64)str[20]<<32)+((uint64)str[21]<<40)+((uint64)str[22]<<48)+((uint64)str[23]<<56)); 209 | mix64(a,b,c); 210 | str += 24; 211 | len -= 24; 212 | } 213 | 214 | /*------------------------------------- handle the last 11 bytes */ 215 | c += len; 216 | switch(len) /* all the case statements fall through */ 217 | { 218 | case 23: c+=((uint64)str[22]<<56); 219 | case 22: c+=((uint64)str[21]<<48); 220 | case 21: c+=((uint64)str[20]<<40); 221 | case 20: c+=((uint64)str[19]<<32); 222 | case 19: c+=((uint64)str[18]<<24); 223 | case 18: c+=((uint64)str[17]<<16); 224 | case 17: c+=((uint64)str[16]<<8); 225 | /* the first byte of c is reserved for the length */ 226 | case 16: b+=((uint64)str[15]<<56); 227 | case 15: b+=((uint64)str[14]<<48); 228 | case 14: b+=((uint64)str[13]<<40); 229 | case 13: b+=((uint64)str[12]<<32); 230 | case 12: b+=((uint64)str[11]<<24); 231 | case 11: b+=((uint64)str[10]<<16); 232 | case 10: b+=((uint64)str[ 9]<<8); 233 | case 9: b+=((uint64)str[ 8]); 234 | case 8: a+=((uint64)str[ 7]<<56); 235 | case 7: a+=((uint64)str[ 6]<<48); 236 | case 6: a+=((uint64)str[ 5]<<40); 237 | case 5: a+=((uint64)str[ 4]<<32); 238 | case 4: a+=((uint64)str[ 3]<<24); 239 | case 3: a+=((uint64)str[ 2]<<16); 240 | case 2: a+=((uint64)str[ 1]<<8); 241 | case 1: a+=((uint64)str[ 0]); 242 | /* case 0: nothing left to add */ 243 | } 244 | mix64(a,b,c); 245 | /*-------------------------------------------- report the result */ 246 | // static int cnt = 0; 247 | // if(cnt == 0) 248 | // { 249 | // printf("%x", c); 250 | // cnt++; 251 | // } 252 | // printf("%llx\n", c); 253 | return c; 254 | } 255 | 256 | BOBHash64::~BOBHash64() 257 | { 258 | 259 | } 260 | #endif //_BOBHASH64_H 261 | -------------------------------------------------------------------------------- /CSS.h: -------------------------------------------------------------------------------- 1 | #ifndef _CSS_H 2 | #define _CSS_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "BOBHASH32.h" 12 | #include "params.h" 13 | #include "BOBHASH64.h" 14 | #define rep(i,a,n) for(int i=a;i<=n;i++) 15 | using namespace std; 16 | class CSS 17 | { 18 | private: 19 | int head[M+10],Next[M+10],M2,K; 20 | struct node {int wz; string ID;} ID_index[M+10]; 21 | int R[MAX_MEM+10],Counter_Array[MAX_MEM+10],m,Last; 22 | int Value_Index[MAX_MEM+10]; 23 | BOBHash32 * bobhash; 24 | public: 25 | CSS(int M2,int K):M2(M2),K(K) {bobhash=new BOBHash32(1001);} 26 | void clear() 27 | { 28 | Value_Index[0]=M2-1; ID_index[0].wz=-1; 29 | for (int i=0; irun(ST.c_str(),ST.size()))%M2; 34 | } 35 | int Find(string x) 36 | { 37 | int X=Hash(x); 38 | int now=head[X]; 39 | while (now && ID_index[now].ID!=x) now=Next[now]; 40 | if (!now) return -1; else 41 | return ID_index[now].wz; 42 | } 43 | void Change(int x,int y,int z) 44 | { 45 | int now=head[x]; 46 | while (now && ID_index[now].wz!=y) now=Next[now]; 47 | ID_index[now].wz=z; 48 | } 49 | void Change(int x,int y,int z,string a) 50 | { 51 | int now=head[x]; 52 | while (now && (ID_index[now].wz!=y || ID_index[now].ID!=a)) now=Next[now]; 53 | ID_index[now].wz=z; 54 | } 55 | void Insert_Hash(int x,string y,int z) 56 | { 57 | ID_index[Last].ID=y; ID_index[Last].wz=z; 58 | Next[Last]=head[x]; 59 | head[x]=Last; 60 | } 61 | void Delete(int x,int y) 62 | { 63 | if (ID_index[head[x]].wz==y) {Last=head[x]; head[x]=Next[head[x]]; return;} 64 | int now=head[x]; 65 | while (now && ID_index[Next[now]].wz!=y) now=Next[now]; 66 | Last=Next[now]; 67 | Next[now]=Next[Next[now]]; 68 | } 69 | void Insert(string x) 70 | { 71 | int p=Find(x); 72 | if (p!=-1) 73 | { 74 | int Q=Counter_Array[p]+1; 75 | int z=Value_Index[Counter_Array[p]]; 76 | if (z!=p) 77 | { 78 | Change(R[z],z,p); 79 | R[p]=R[z]; 80 | Value_Index[Counter_Array[p]]=z-1; 81 | } 82 | else 83 | if (z && Counter_Array[z-1]==Counter_Array[z]) Value_Index[Counter_Array[z]]=z-1; else Value_Index[Counter_Array[z]]=0; 84 | Value_Index[Q]=max(Value_Index[Q],z); 85 | R[z]=Hash(x); 86 | Counter_Array[z]=Q; 87 | Change(R[z],p,z,x); 88 | } 89 | else 90 | { 91 | Delete(R[0],0); int Q=Counter_Array[0]+1; 92 | int z=Value_Index[Counter_Array[0]]; 93 | if (z) 94 | { 95 | Change(R[z],z,0); 96 | R[0]=R[z]; 97 | Value_Index[Counter_Array[0]]=z-1; 98 | } else 99 | Value_Index[Counter_Array[0]]=0; 100 | Value_Index[Q]=max(Value_Index[Q],z); 101 | R[z]=Hash(x); 102 | Counter_Array[z]=Q; 103 | Insert_Hash(R[z],x,z); 104 | } 105 | } 106 | struct Node {string x; int y;} q[MAX_MEM+10]; 107 | static int cmp(Node i,Node j) {return i.y>j.y;} 108 | void work() 109 | { 110 | int CNT=0; 111 | for (int i=M2-1; i>=M2-K; i--) 112 | { 113 | int now=head[R[i]]; 114 | while (1) {if (ID_index[now].wz==i) {q[CNT].x=ID_index[now].ID;q[CNT].y=Counter_Array[i]; CNT++; break;} now=Next[now]; } 115 | } 116 | sort(q,q+K,cmp); 117 | } 118 | pair Query(int k) 119 | { 120 | return make_pair(q[k].x,q[k].y); 121 | } 122 | }; 123 | #endif 124 | -------------------------------------------------------------------------------- /ColdFilter/CF.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "SC_SpaceSaving.h" 3 | 4 | #include 5 | #include 6 | 7 | #define MAX_INSERT_PACKAGE 10000000 8 | 9 | unordered_map ground_truth; 10 | uint32_t insert_data[MAX_INSERT_PACKAGE]; 11 | uint32_t query_data[MAX_INSERT_PACKAGE]; 12 | 13 | int load_data(const char *filename) { 14 | FILE *pf = fopen(filename, "rb"); 15 | if (!pf) { 16 | cerr << filename << " not found." << endl; 17 | exit(-1); 18 | } 19 | 20 | ground_truth.clear(); 21 | 22 | char ip[13]; 23 | int ret = 0; 24 | while (fread(ip, 1, 13, pf)) { 25 | uint32_t key = *(uint32_t *) ip; 26 | insert_data[ret] = key; 27 | ground_truth[key]++; 28 | ret++; 29 | if (ret == MAX_INSERT_PACKAGE) 30 | break; 31 | } 32 | fclose(pf); 33 | 34 | int i = 0; 35 | for (auto itr: ground_truth) { 36 | query_data[i++] = itr.first; 37 | } 38 | 39 | printf("Total stream size = %d\n", ret); 40 | printf("Distinct item number = %d\n", ground_truth.size()); 41 | 42 | int max_freq = 0; 43 | for (auto itr: ground_truth) { 44 | max_freq = std::max(max_freq, itr.second); 45 | } 46 | printf("Max frequency = %d\n", max_freq); 47 | 48 | return ret; 49 | } 50 | // return accuary and AAE 51 | pair ss_compare_value_with_ground_truth(uint32_t k, vector> & result) 52 | { 53 | // prepare top-k ground truth 54 | vector> gt_ordered; 55 | for (auto itr: ground_truth) { 56 | gt_ordered.emplace_back(itr); 57 | } 58 | std::sort(gt_ordered.begin(), gt_ordered.end(), [](const std::pair &left, const std::pair &right) { 59 | return left.second > right.second; 60 | }); 61 | set set_gt; 62 | int i = 0; 63 | int th; 64 | for (auto itr: gt_ordered) { 65 | if (i >= k && itr.second < th) { 66 | break; 67 | } 68 | set_gt.insert(itr.first); 69 | i++; 70 | if (i == k) { 71 | th = itr.second; 72 | } 73 | } 74 | 75 | double aae = 0; 76 | 77 | set set_rp; 78 | unordered_map mp_rp; 79 | 80 | int lp = 0; 81 | for (lp = 0; lp < k; ++lp) { 82 | set_rp.insert(result[lp].first); 83 | mp_rp[result[lp].first] = result[lp].second; 84 | } 85 | 86 | vector intersection(k); 87 | auto end_itr = std::set_intersection( 88 | set_gt.begin(), set_gt.end(), 89 | set_rp.begin(), set_rp.end(), 90 | intersection.begin() 91 | ); 92 | 93 | for (auto itr = intersection.begin(); itr != end_itr; ++itr) { 94 | int diff = int(mp_rp[*itr]) - int(ground_truth[*itr]); 95 | aae += abs(diff); 96 | } 97 | 98 | int num = end_itr - intersection.begin(); 99 | num = num > 0 ? num : 1; 100 | 101 | return make_pair(double(num) / k, aae / num); 102 | } 103 | 104 | void demo_ss(int packet_num) 105 | { 106 | printf("\nExp for top-k query:\n"); 107 | 108 | const int k = 32; 109 | printf("\tk = %d\n", k); 110 | 111 | auto sc_ss = new SC_SpaceSaving(); 112 | 113 | sc_ss->build(insert_data, packet_num); 114 | 115 | sc_ss->get_top_k_with_frequency(k, result); 116 | ret = ss_compare_value_with_ground_truth(k, result); 117 | printf("\tSpaceSaving with SC: 32 buckets, 5KB SC\n"); 118 | printf("\t accuracy %lf, AAE %lf\n", ret.first, ret.second); 119 | } 120 | int main() 121 | { 122 | int packet_num = load_data("u1"); 123 | demo_cu(packet_num); 124 | demo_ss(packet_num); 125 | demo_flow_radar(packet_num); 126 | return 0; 127 | } 128 | -------------------------------------------------------------------------------- /ColdFilter/SC.h: -------------------------------------------------------------------------------- 1 | #ifndef _SC_H 2 | #define _SC_H 3 | 4 | //#include "params.h" 5 | #include "BOBHash32.h" 6 | #include "SPA.h" 7 | #include 8 | #include 9 | #include 10 | #include 11 | #ifdef UNIX 12 | #include 13 | #else 14 | #include 15 | #endif 16 | using namespace std; 17 | 18 | #define MAX_HASH_NUM 4 19 | 20 | template 21 | class StreamClassifier 22 | { 23 | // static constexpr int bucket_num = 1000; 24 | // static constexpr int counter_num = 16; 25 | 26 | static constexpr int buffer_size = bucket_num * counter_num * 8; 27 | static constexpr int remained = memory_in_bytes - buffer_size; 28 | 29 | static constexpr int d1 = 3; 30 | static constexpr int m1_in_bytes = int(remained * l1_ratio / 100.0); 31 | static constexpr int d2 = 3; 32 | static constexpr int m2_in_bytes = int(remained * (100 - l1_ratio) / 100.0); 33 | 34 | uint32_t ID[bucket_num][counter_num] __attribute__ ((aligned (16))); 35 | int counter[bucket_num][counter_num]; 36 | int cur_pos[bucket_num]; 37 | 38 | static constexpr int w1 = m1_in_bytes * 8 / 4; 39 | static constexpr int w_word = m1_in_bytes * 8 / 4 / 16; 40 | static constexpr int w2 = m2_in_bytes * 8 / 16; 41 | 42 | uint64_t L1[m1_in_bytes * 8 / 4 / 16]; // Layer 1 is organized as word, one word contains 16 counter, one counter consist of 4 bit 43 | uint16_t L2[m2_in_bytes * 8 / 16]; // Layer 2 is organized as counter, one counter consist of 16 bit 44 | 45 | SPA * spa; 46 | 47 | BOBHash32 * bobhash1; 48 | BOBHash32 * bobhash2[d2]; 49 | 50 | int cur_kick; 51 | 52 | void clear_data() 53 | { 54 | cur_kick = 0; 55 | memset(ID, 0, sizeof(ID)); 56 | memset(counter, 0, sizeof(counter)); 57 | memset(cur_pos, 0, sizeof(cur_pos)); 58 | memset(L1, 0, sizeof(L1)); 59 | memset(L2, 0, sizeof(L2)); 60 | } 61 | public: 62 | StreamClassifier() 63 | { 64 | bobhash1 = new BOBHash32(500); 65 | for (int i = 0; i < d2; i++) { 66 | bobhash2[i] = new BOBHash32(1000 + i); 67 | } 68 | clear_data(); 69 | spa = NULL; 70 | } 71 | 72 | void print_basic_info() 73 | { 74 | printf("Stream Classifer\n"); 75 | printf("\tSIMD buffer: %d counters, %.4lf MB occupies\n", bucket_num * counter_num, bucket_num * counter_num * 8.0 / 1024 / 1024); 76 | printf("\tL1: %d counters, %.4lf MB occupies\n", w1, w1 * 0.5 / 1024 / 1024); 77 | printf("\tL2: %d counters, %.4lf MB occupies\n", w2, w2 * 2.0 / 1024 / 1024); 78 | } 79 | 80 | ~StreamClassifier() 81 | { 82 | delete bobhash1; 83 | for (int i = 0; i < d2; i++) 84 | delete bobhash2[i]; 85 | } 86 | 87 | //periodical refreshing for continuous top-k; 88 | void init_array_period() 89 | { 90 | for (int i = 0; i < w_word; i++) { 91 | uint64_t temp = L1[i]; 92 | 93 | temp = (temp & (0xF)) == 0xF ? temp : (temp & 0xFFFFFFFFFFFFFFF0); 94 | temp = (temp & (0xF0)) == 0xF0 ? temp : (temp & 0xFFFFFFFFFFFFFF0F); 95 | temp = (temp & (0xF00)) == 0xF00 ? temp : (temp & 0xFFFFFFFFFFFFF0FF); 96 | temp = (temp & (0xF000)) == 0xF000 ? temp : (temp & 0xFFFFFFFFFFFF0FFF); 97 | 98 | temp = (temp & (0xF0000)) == 0xF0000 ? temp : (temp & 0xFFFFFFFFFFF0FFFF); 99 | temp = (temp & (0xF00000)) == 0xF00000 ? temp : (temp & 0xFFFFFFFFFF0FFFFF); 100 | temp = (temp & (0xF000000)) == 0xF000000 ? temp : (temp & 0xFFFFFFFFF0FFFFFF); 101 | temp = (temp & (0xF0000000)) == 0xF0000000 ? temp : (temp & 0xFFFFFFFF0FFFFFFF); 102 | 103 | temp = (temp & (0xF00000000)) == 0xF00000000 ? temp : (temp & 0xFFFFFFF0FFFFFFFF); 104 | temp = (temp & (0xF000000000)) == 0xF000000000 ? temp : (temp & 0xFFFFFF0FFFFFFFFF); 105 | temp = (temp & (0xF0000000000)) == 0xF0000000000 ? temp : (temp & 0xFFFFF0FFFFFFFFFF); 106 | temp = (temp & (0xF00000000000)) == 0xF00000000000 ? temp : (temp & 0xFFFF0FFFFFFFFFFF); 107 | 108 | temp = (temp & (0xF000000000000)) == 0xF000000000000 ? temp : (temp & 0xFFF0FFFFFFFFFFFF); 109 | temp = (temp & (0xF0000000000000)) == 0xF0000000000000 ? temp : (temp & 0xFF0FFFFFFFFFFFFF); 110 | temp = (temp & (0xF00000000000000)) == 0xF00000000000000 ? temp : (temp & 0xF0FFFFFFFFFFFFFF); 111 | temp = (temp & (0xF000000000000000)) == 0xF000000000000000 ? temp : (temp & 0x0FFFFFFFFFFFFFFF); 112 | 113 | L1[i] = temp; 114 | } 115 | 116 | for (int i = 0; i < w2; i++) { 117 | short int temp = L2[i]; 118 | L2[i] = (temp == threshold) ? temp : 0; 119 | } 120 | } 121 | 122 | void init_array_all() 123 | { 124 | memset(L1, 0, sizeof(uint64_t) * w_word); 125 | memset(L2, 0, sizeof(short int) * w2); 126 | } 127 | 128 | void init_spa(SPA * _spa) 129 | { 130 | spa = _spa; 131 | } 132 | 133 | void insert(uint32_t key) 134 | { 135 | int bucket_id = key % bucket_num; 136 | // int bucket_id = key & 0x2FF; 137 | 138 | // the code below assume counter per buckets is 16 139 | 140 | const __m128i item = _mm_set1_epi32((int)key); 141 | int matched; 142 | 143 | if (counter_num == 16) { 144 | __m128i *keys_p = (__m128i *)ID[bucket_id]; 145 | 146 | __m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]); 147 | __m128i b_comp = _mm_cmpeq_epi32(item, keys_p[1]); 148 | __m128i c_comp = _mm_cmpeq_epi32(item, keys_p[2]); 149 | __m128i d_comp = _mm_cmpeq_epi32(item, keys_p[3]); 150 | 151 | a_comp = _mm_packs_epi32(a_comp, b_comp); 152 | c_comp = _mm_packs_epi32(c_comp, d_comp); 153 | a_comp = _mm_packs_epi32(a_comp, c_comp); 154 | 155 | matched = _mm_movemask_epi8(a_comp); 156 | } else if (counter_num == 4) { 157 | __m128i *keys_p = (__m128i *)ID[bucket_id]; 158 | __m128i a_comp = _mm_cmpeq_epi32(item, keys_p[0]); 159 | matched = _mm_movemask_ps(*(__m128 *)&a_comp); 160 | } else { 161 | throw std::logic_error("Not implemented."); 162 | } 163 | 164 | if (matched != 0) { 165 | //return 32 if input is zero; 166 | int matched_index = _tzcnt_u32((uint32_t)matched); 167 | 168 | ++counter[bucket_id][matched_index]; 169 | return; 170 | } 171 | 172 | int cur_pos_now = cur_pos[bucket_id]; 173 | if (cur_pos_now != counter_num) { 174 | // printf("%d\n", cur_pos_now); 175 | ID[bucket_id][cur_pos_now] = key; 176 | ++counter[bucket_id][cur_pos_now] ; 177 | ++cur_pos[bucket_id]; 178 | return; 179 | } 180 | 181 | /****************randomly choose one counter to kick!******************/ 182 | insert_SC(ID[bucket_id][cur_kick], counter[bucket_id][cur_kick]); 183 | ID[bucket_id][cur_kick] = key; 184 | counter[bucket_id][cur_kick] = 1; 185 | 186 | cur_kick = (cur_kick + 1) % counter_num; 187 | } 188 | 189 | void insert_SC(uint32_t kick_ID, int kick_f) 190 | { 191 | int v1 = 1 << 30; 192 | 193 | int value[MAX_HASH_NUM]; 194 | int index[MAX_HASH_NUM]; 195 | int offset[MAX_HASH_NUM]; 196 | 197 | uint64_t hash_value = (bobhash1->run((const char *)&kick_ID, 4)); 198 | int word_index = hash_value % w_word; 199 | hash_value >>= 16; 200 | 201 | uint64_t temp = L1[word_index]; 202 | for (int i = 0; i < d1; i++) { 203 | offset[i] = (hash_value & 0xF); 204 | value[i] = (temp >> (offset[i] << 2)) & 0xF; 205 | v1 = std::min(v1, value[i]); 206 | 207 | hash_value >>= 4; 208 | } 209 | 210 | int temp2 = v1 + kick_f; 211 | if (temp2 <= 15) { // maybe optimized use SIMD? 212 | for (int i = 0; i < d1; i++) { 213 | int temp3 = ((temp >> (offset[i] << 2)) & 0xF); 214 | if (temp3 < temp2) { 215 | temp += ((uint64)(temp2 - temp3) << (offset[i] << 2)); 216 | } 217 | } 218 | L1[word_index] = temp; 219 | return; 220 | } 221 | 222 | for (int i = 0; i < d1; i++) { 223 | temp |= ((uint64_t)0xF << (offset[i] << 2)); 224 | } 225 | L1[word_index] = temp; 226 | 227 | int delta1 = 15 - v1; 228 | kick_f -= delta1; 229 | 230 | int v2 = 1 << 30; 231 | for (int i = 0; i < d2; i++) { 232 | index[i] = (bobhash2[i]->run((const char *)&kick_ID, 4)) % w2; 233 | value[i] = L2[index[i]]; 234 | v2 = std::min(value[i], v2); 235 | } 236 | 237 | temp2 = v2 + kick_f; 238 | if (temp2 <= threshold) { 239 | for (int i = 0; i < d2; i++) { 240 | L2[index[i]] = (L2[index[i]] > temp2) ? L2[index[i]] : temp2; 241 | } 242 | return; 243 | } 244 | 245 | for (int i = 0; i < d2; i++) { 246 | L2[index[i]] = threshold; 247 | } 248 | 249 | int delta2 = threshold - v2; 250 | kick_f -= delta2; 251 | 252 | spa->insert(kick_ID, kick_f); 253 | } 254 | 255 | void refresh() 256 | { 257 | for (int i = 0; i < bucket_num; i++) { 258 | for (int j = 0; j < counter_num; j++) { 259 | insert_SC(ID[i][j], counter[i][j]); 260 | ID[i][j] = counter[i][j] = 0; 261 | } 262 | cur_pos[i] = 0; 263 | } 264 | return; 265 | } 266 | 267 | int query(uint32_t key) 268 | { 269 | int v1 = 1 << 30; 270 | 271 | // constexpr int max_d = d1 > d2 ? d1 : d2; 272 | // int value[max_d]; 273 | // int index[max_d]; 274 | // int offset[max_d]; 275 | 276 | uint32_t hash_value = (bobhash1->run((const char *)&key, 4)); 277 | int word_index = hash_value % w_word; 278 | hash_value >>= 16; 279 | 280 | uint64_t temp = L1[word_index]; 281 | for (int i = 0; i < d1; i++) { 282 | int of, val; 283 | of = (hash_value & 0xF); 284 | val = (temp >> (of << 2)) & 0xF; 285 | v1 = std::min(val, v1); 286 | hash_value >>= 4; 287 | } 288 | 289 | if (v1 != 15) 290 | return v1; 291 | 292 | int v2 = 1 << 30; 293 | for (int i = 0; i < d2; i++) { 294 | int index = (bobhash2[i]->run((const char *)&key, 4)) % w2; 295 | int value = L2[index]; 296 | v2 = std::min(value, v2); 297 | } 298 | 299 | return v1 + v2; 300 | } 301 | }; 302 | 303 | #endif//_SC_H 304 | -------------------------------------------------------------------------------- /ColdFilter/SC_spacesaving.h: -------------------------------------------------------------------------------- 1 | #ifndef STREAMCLASSIFIER_SC_SPACESAVING_H 2 | #define STREAMCLASSIFIER_SC_SPACESAVING_H 3 | 4 | #include "SC.h" 5 | #include 6 | 7 | template 8 | class SC_SpaceSaving 9 | { 10 | StreamClassifier 65535 ? 65535 : (threshold - 15)), 35> sc; 11 | SpaceSaving ss; 12 | public: 13 | SC_SpaceSaving() { 14 | sc.init_spa(&ss); 15 | // sc.print_basic_info(); 16 | } 17 | 18 | inline void build(uint32_t * items, int n) 19 | { 20 | for (int i = 0; i < n; ++i) { 21 | sc.insert(items[i]); 22 | } 23 | 24 | sc.refresh(); 25 | } 26 | 27 | void get_top_k(uint32_t k, uint32_t items[]) 28 | { 29 | ss.get_top_k(k, items); 30 | } 31 | 32 | void get_top_k_with_frequency(uint32_t k, vector> & result) 33 | { 34 | ss.get_top_k_with_frequency(k, result); 35 | // for (auto itr: result) { 36 | for (int i = 0; i < k; ++i) { 37 | result[i].second += std::min(65535u + 15u, threshold); 38 | } 39 | } 40 | }; 41 | 42 | #endif //STREAMCLASSIFIER_SC_SPACESAVING_H 43 | -------------------------------------------------------------------------------- /ColdFilter/SPA.h: -------------------------------------------------------------------------------- 1 | #ifndef _SPA_H 2 | #define _SPA_H 3 | using namespace std; 4 | 5 | class SPA 6 | { 7 | public: 8 | virtual void insert(unsigned int key, int f) = 0; 9 | }; 10 | 11 | #endif // _SPA_H 12 | -------------------------------------------------------------------------------- /CounterTree/BOBHash32.h: -------------------------------------------------------------------------------- 1 | #ifndef _BOBHASH32_H 2 | #define _BOBHASH32_H 3 | #include 4 | using namespace std; 5 | 6 | typedef unsigned int uint; 7 | typedef unsigned long long int uint64; 8 | 9 | 10 | #define MAX_PRIME32 1229 11 | #define MAX_BIG_PRIME32 50 12 | 13 | class BOBHash32 14 | { 15 | public: 16 | BOBHash32(); 17 | ~BOBHash32(); 18 | BOBHash32(uint prime32Num); 19 | void initialize(uint prime32Num); 20 | uint run(const char * str, uint len); 21 | private: 22 | uint prime32Num; 23 | }; 24 | 25 | uint big_prime3232[MAX_BIG_PRIME32] = { 26 | 20177, 20183, 20201, 20219, 20231, 20233, 20249, 20261, 20269, 20287, 27 | 20297, 20323, 20327, 20333, 20341, 20347, 20353, 20357, 20359, 20369, 28 | 20389, 20393, 20399, 20407, 20411, 20431, 20441, 20443, 20477, 20479, 29 | 20483, 20507, 20509, 20521, 20533, 20543, 20549, 20551, 20563, 20593, 30 | 20599, 20611, 20627, 20639, 20641, 20663, 20681, 20693, 20707, 20717 31 | }; 32 | uint prime32[MAX_PRIME32] = { 33 | 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 34 | 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 35 | 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 36 | 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 37 | 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 38 | 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 39 | 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 40 | 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 41 | 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, 42 | 467, 479, 487, 491, 499, 503, 509, 521, 523, 541, 43 | 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 44 | 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 45 | 661, 673, 677, 683, 691, 701, 709, 719, 727, 733, 46 | 739, 743, 751, 757, 761, 769, 773, 787, 797, 809, 47 | 811, 821, 823, 827, 829, 839, 853, 857, 859, 863, 48 | 877, 881, 883, 887, 907, 911, 919, 929, 937, 941, 49 | 947, 953, 967, 971, 977, 983, 991, 997, 50 | 1009, 1013, 1019, 1021, 1031, 1033, 1039, 1049, 1051, 1061, 51 | 1063, 1069, 1087, 1091, 1093, 1097, 1103, 1109, 1117, 1123, 52 | 1129, 1151, 1153, 1163, 1171, 1181, 1187, 1193, 1201, 1213, 53 | 1217, 1223, 1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283, 54 | 1289, 1291, 1297, 1301, 1303, 1307, 1319, 1321, 1327, 1361, 55 | 1367, 1373, 1381, 1399, 1409, 1423, 1427, 1429, 1433, 1439, 56 | 1447, 1451, 1453, 1459, 1471, 1481, 1483, 1487, 1489, 1493, 57 | 1499, 1511, 1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571, 58 | 1579, 1583, 1597, 1601, 1607, 1609, 1613, 1619, 1621, 1627, 59 | 1637, 1657, 1663, 1667, 1669, 1693, 1697, 1699, 1709, 1721, 60 | 1723, 1733, 1741, 1747, 1753, 1759, 1777, 1783, 1787, 1789, 61 | 1801, 1811, 1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877, 62 | 1879, 1889, 1901, 1907, 1913, 1931, 1933, 1949, 1951, 1973, 63 | 1979, 1987, 1993, 1997, 1999, 2003, 2011, 2017, 2027, 2029, 64 | 2039, 2053, 2063, 2069, 2081, 2083, 2087, 2089, 2099, 2111, 65 | 2113, 2129, 2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203, 66 | 2207, 2213, 2221, 2237, 2239, 2243, 2251, 2267, 2269, 2273, 67 | 2281, 2287, 2293, 2297, 2309, 2311, 2333, 2339, 2341, 2347, 68 | 2351, 2357, 2371, 2377, 2381, 2383, 2389, 2393, 2399, 2411, 69 | 2417, 2423, 2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503, 70 | 2521, 2531, 2539, 2543, 2549, 2551, 2557, 2579, 2591, 2593, 71 | 2609, 2617, 2621, 2633, 2647, 2657, 2659, 2663, 2671, 2677, 72 | 2683, 2687, 2689, 2693, 2699, 2707, 2711, 2713, 2719, 2729, 73 | 2731, 2741, 2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801, 74 | 2803, 2819, 2833, 2837, 2843, 2851, 2857, 2861, 2879, 2887, 75 | 2897, 2903, 2909, 2917, 2927, 2939, 2953, 2957, 2963, 2969, 76 | 2971, 2999, 3001, 3011, 3019, 3023, 3037, 3041, 3049, 3061, 77 | 3067, 3079, 3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167, 78 | 3169, 3181, 3187, 3191, 3203, 3209, 3217, 3221, 3229, 3251, 79 | 3253, 3257, 3259, 3271, 3299, 3301, 3307, 3313, 3319, 3323, 80 | 3329, 3331, 3343, 3347, 3359, 3361, 3371, 3373, 3389, 3391, 81 | 3407, 3413, 3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491, 82 | 3499, 3511, 3517, 3527, 3529, 3533, 3539, 3541, 3547, 3557, 83 | 3559, 3571, 3581, 3583, 3593, 3607, 3613, 3617, 3623, 3631, 84 | 3637, 3643, 3659, 3671, 3673, 3677, 3691, 3697, 3701, 3709, 85 | 3719, 3727, 3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797, 86 | 3803, 3821, 3823, 3833, 3847, 3851, 3853, 3863, 3877, 3881, 87 | 3889, 3907, 3911, 3917, 3919, 3923, 3929, 3931, 3943, 3947, 88 | 3967, 3989, 4001, 4003, 4007, 4013, 4019, 4021, 4027, 4049, 89 | 4051, 4057, 4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129, 90 | 4133, 4139, 4153, 4157, 4159, 4177, 4201, 4211, 4217, 4219, 91 | 4229, 4231, 4241, 4243, 4253, 4259, 4261, 4271, 4273, 4283, 92 | 4289, 4297, 4327, 4337, 4339, 4349, 4357, 4363, 4373, 4391, 93 | 4397, 4409, 4421, 4423, 4441, 4447, 4451, 4457, 4463, 4481, 94 | 4483, 4493, 4507, 4513, 4517, 4519, 4523, 4547, 4549, 4561, 95 | 4567, 4583, 4591, 4597, 4603, 4621, 4637, 4639, 4643, 4649, 96 | 4651, 4657, 4663, 4673, 4679, 4691, 4703, 4721, 4723, 4729, 97 | 4733, 4751, 4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813, 98 | 4817, 4831, 4861, 4871, 4877, 4889, 4903, 4909, 4919, 4931, 99 | 4933, 4937, 4943, 4951, 4957, 4967, 4969, 4973, 4987, 4993, 100 | 4999, 5003, 5009, 5011, 5021, 5023, 5039, 5051, 5059, 5077, 101 | 5081, 5087, 5099, 5101, 5107, 5113, 5119, 5147, 5153, 5167, 102 | 5171, 5179, 5189, 5197, 5209, 5227, 5231, 5233, 5237, 5261, 103 | 5273, 5279, 5281, 5297, 5303, 5309, 5323, 5333, 5347, 5351, 104 | 5381, 5387, 5393, 5399, 5407, 5413, 5417, 5419, 5431, 5437, 105 | 5441, 5443, 5449, 5471, 5477, 5479, 5483, 5501, 5503, 5507, 106 | 5519, 5521, 5527, 5531, 5557, 5563, 5569, 5573, 5581, 5591, 107 | 5623, 5639, 5641, 5647, 5651, 5653, 5657, 5659, 5669, 5683, 108 | 5689, 5693, 5701, 5711, 5717, 5737, 5741, 5743, 5749, 5779, 109 | 5783, 5791, 5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849, 110 | 5851, 5857, 5861, 5867, 5869, 5879, 5881, 5897, 5903, 5923, 111 | 5927, 5939, 5953, 5981, 5987, 6007, 6011, 6029, 6037, 6043, 112 | 6047, 6053, 6067, 6073, 6079, 6089, 6091, 6101, 6113, 6121, 113 | 6131, 6133, 6143, 6151, 6163, 6173, 6197, 6199, 6203, 6211, 114 | 6217, 6221, 6229, 6247, 6257, 6263, 6269, 6271, 6277, 6287, 115 | 6299, 6301, 6311, 6317, 6323, 6329, 6337, 6343, 6353, 6359, 116 | 6361, 6367, 6373, 6379, 6389, 6397, 6421, 6427, 6449, 6451, 117 | 6469, 6473, 6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563, 118 | 6569, 6571, 6577, 6581, 6599, 6607, 6619, 6637, 6653, 6659, 119 | 6661, 6673, 6679, 6689, 6691, 6701, 6703, 6709, 6719, 6733, 120 | 6737, 6761, 6763, 6779, 6781, 6791, 6793, 6803, 6823, 6827, 121 | 6829, 6833, 6841, 6857, 6863, 6869, 6871, 6883, 6899, 6907, 122 | 6911, 6917, 6947, 6949, 6959, 6961, 6967, 6971, 6977, 6983, 123 | 6991, 6997, 7001, 7013, 7019, 7027, 7039, 7043, 7057, 7069, 124 | 7079, 7103, 7109, 7121, 7127, 7129, 7151, 7159, 7177, 7187, 125 | 7193, 7207, 7211, 7213, 7219, 7229, 7237, 7243, 7247, 7253, 126 | 7283, 7297, 7307, 7309, 7321, 7331, 7333, 7349, 7351, 7369, 127 | 7393, 7411, 7417, 7433, 7451, 7457, 7459, 7477, 7481, 7487, 128 | 7489, 7499, 7507, 7517, 7523, 7529, 7537, 7541, 7547, 7549, 129 | 7559, 7561, 7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621, 130 | 7639, 7643, 7649, 7669, 7673, 7681, 7687, 7691, 7699, 7703, 131 | 7717, 7723, 7727, 7741, 7753, 7757, 7759, 7789, 7793, 7817, 132 | 7823, 7829, 7841, 7853, 7867, 7873, 7877, 7879, 7883, 7901, 133 | 7907, 7919, 7927, 7933, 7937, 7949, 7951, 7963, 7993, 8009, 134 | 8011, 8017, 8039, 8053, 8059, 8069, 8081, 8087, 8089, 8093, 135 | 8101, 8111, 8117, 8123, 8147, 8161, 8167, 8171, 8179, 8191, 136 | 8209, 8219, 8221, 8231, 8233, 8237, 8243, 8263, 8269, 8273, 137 | 8287, 8291, 8293, 8297, 8311, 8317, 8329, 8353, 8363, 8369, 138 | 8377, 8387, 8389, 8419, 8423, 8429, 8431, 8443, 8447, 8461, 139 | 8467, 8501, 8513, 8521, 8527, 8537, 8539, 8543, 8563, 8573, 140 | 8581, 8597, 8599, 8609, 8623, 8627, 8629, 8641, 8647, 8663, 141 | 8669, 8677, 8681, 8689, 8693, 8699, 8707, 8713, 8719, 8731, 142 | 8737, 8741, 8747, 8753, 8761, 8779, 8783, 8803, 8807, 8819, 143 | 8821, 8831, 8837, 8839, 8849, 8861, 8863, 8867, 8887, 8893, 144 | 8923, 8929, 8933, 8941, 8951, 8963, 8969, 8971, 8999, 9001, 145 | 9007, 9011, 9013, 9029, 9041, 9043, 9049, 9059, 9067, 9091, 146 | 9103, 9109, 9127, 9133, 9137, 9151, 9157, 9161, 9173, 9181, 147 | 9187, 9199, 9203, 9209, 9221, 9227, 9239, 9241, 9257, 9277, 148 | 9281, 9283, 9293, 9311, 9319, 9323, 9337, 9341, 9343, 9349, 149 | 9371, 9377, 9391, 9397, 9403, 9413, 9419, 9421, 9431, 9433, 150 | 9437, 9439, 9461, 9463, 9467, 9473, 9479, 9491, 9497, 9511, 151 | 9521, 9533, 9539, 9547, 9551, 9587, 9601, 9613, 9619, 9623, 152 | 9629, 9631, 9643, 9649, 9661, 9677, 9679, 9689, 9697, 9719, 153 | 9721, 9733, 9739, 9743, 9749, 9767, 9769, 9781, 9787, 9791, 154 | 9803, 9811, 9817, 9829, 9833, 9839, 9851, 9857, 9859, 9871, 155 | 9883, 9887, 9901, 9907, 9923, 9929, 9931, 9941, 9949, 9967, 156 | 9973 157 | }; 158 | 159 | #define mix(a,b,c) \ 160 | { \ 161 | a -= b; a -= c; a ^= (c>>13); \ 162 | b -= c; b -= a; b ^= (a<<8); \ 163 | c -= a; c -= b; c ^= (b>>13); \ 164 | a -= b; a -= c; a ^= (c>>12); \ 165 | b -= c; b -= a; b ^= (a<<16); \ 166 | c -= a; c -= b; c ^= (b>>5); \ 167 | a -= b; a -= c; a ^= (c>>3); \ 168 | b -= c; b -= a; b ^= (a<<10); \ 169 | c -= a; c -= b; c ^= (b>>15); \ 170 | } 171 | 172 | BOBHash32::BOBHash32() 173 | { 174 | this->prime32Num = 0; 175 | } 176 | 177 | BOBHash32::BOBHash32(uint prime32Num) 178 | { 179 | this->prime32Num = prime32Num; 180 | } 181 | 182 | void BOBHash32::initialize(uint prime32Num) 183 | { 184 | this->prime32Num = prime32Num; 185 | } 186 | 187 | uint BOBHash32::run(const char * str, uint len) 188 | { 189 | //register ub4 a,b,c,len; 190 | uint a,b,c; 191 | uint initval = 0; 192 | /* Set up the internal state */ 193 | //len = length; 194 | a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */ 195 | c = prime32[this->prime32Num]; /* the previous hash value */ 196 | 197 | /*---------------------------------------- handle most of the key */ 198 | while (len >= 12) 199 | { 200 | a += (str[0] +((uint)str[1]<<8) +((uint)str[2]<<16) +((uint)str[3]<<24)); 201 | b += (str[4] +((uint)str[5]<<8) +((uint)str[6]<<16) +((uint)str[7]<<24)); 202 | c += (str[8] +((uint)str[9]<<8) +((uint)str[10]<<16)+((uint)str[11]<<24)); 203 | mix(a,b,c); 204 | str += 12; len -= 12; 205 | } 206 | 207 | /*------------------------------------- handle the last 11 bytes */ 208 | c += len; 209 | switch(len) /* all the case statements fall through */ 210 | { 211 | case 11: c+=((uint)str[10]<<24); 212 | case 10: c+=((uint)str[9]<<16); 213 | case 9 : c+=((uint)str[8]<<8); 214 | /* the first byte of c is reserved for the length */ 215 | case 8 : b+=((uint)str[7]<<24); 216 | case 7 : b+=((uint)str[6]<<16); 217 | case 6 : b+=((uint)str[5]<<8); 218 | case 5 : b+=str[4]; 219 | case 4 : a+=((uint)str[3]<<24); 220 | case 3 : a+=((uint)str[2]<<16); 221 | case 2 : a+=((uint)str[1]<<8); 222 | case 1 : a+=str[0]; 223 | /* case 0: nothing left to add */ 224 | } 225 | mix(a,b,c); 226 | /*-------------------------------------------- report the result */ 227 | return c; 228 | } 229 | 230 | BOBHash32::~BOBHash32() 231 | { 232 | 233 | } 234 | #endif //_BOBHASH32_H 235 | -------------------------------------------------------------------------------- /CounterTree/Counter_Tree.h: -------------------------------------------------------------------------------- 1 | #ifndef _Counter_Tree_H 2 | #define _Counter_Tree_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "params.h" 12 | #include "BOBHASH32.h" 13 | #define Total 10000005 14 | #define Count 4 15 | using namespace std; 16 | class Counter_Tree 17 | { 18 | private: 19 | BOBHash32 * bobhash[10]; 20 | struct node {int val,idx; string ID;} Heap[Total]; 21 | int tot[13][100000],head[N],next[Total],k,WZ,Q,p,cnt,o,MIN,M2,K,NUM,M,M_,m,n,r,sum[13][100000]; 22 | struct Node {int wz; string ID;} ID_index[Total]; 23 | 24 | public: 25 | Counter_Tree(int MEM,int K):K(K) 26 | { 27 | M=MEM*1024*8-432*K; 28 | M_=M*2/3; 29 | m=M_/4; 30 | r=3; 31 | M2=1000000; 32 | n=0; 33 | for (int i=0; i<10; i++) bobhash[i]=new BOBHash32(i+1000); 34 | } 35 | int Find(int x,string y) 36 | { 37 | int now=head[x]; 38 | while (ID_index[now].ID!=y && now) now=next[now]; 39 | if (ID_index[now].ID==y) return ID_index[now].wz; 40 | return -1; 41 | } 42 | void Delete(int x,string y) 43 | { 44 | if (ID_index[head[x]].ID==y) {head[x]=next[head[x]]; return;} 45 | int now=head[x],Last; 46 | while (ID_index[now].ID!=y && now) {Last=now; now=next[now];} 47 | if (!head[x]) return; 48 | next[Last]=next[next[Last]]; 49 | } 50 | void Change(int x,int y) 51 | { 52 | swap(ID_index[Heap[x].idx].wz,ID_index[Heap[y].idx].wz); 53 | swap(Heap[x].val,Heap[y].val); 54 | swap(Heap[x].idx,Heap[y].idx); 55 | swap(Heap[x].ID,Heap[y].ID); 56 | } 57 | int Estimate(string A) 58 | { 59 | int WZ,ans=0; 60 | int k=1; 61 | for (int i=0; irun(A.c_str(),A.size()))%m; 64 | int now=0; 65 | int WZ2=WZ; 66 | int MAX=0; 67 | for (int i=1; i<13; i++) 68 | { 69 | WZ/=3; 70 | now++; 71 | if (tot[now][WZ]) {WZ2=WZ; MAX=now;} 72 | } 73 | int k=1; 74 | for (int i=1; i<=MAX; i++) k*=3; 75 | ans+=sum[MAX][WZ2] - n*r*k/m; 76 | } 77 | return ans; 78 | } 79 | void Insert(string A) 80 | { 81 | n++; 82 | int MIN=1000000000; 83 | int i=rand()%r; 84 | { 85 | int WZ=(bobhash[i]->run(A.c_str(),A.size()))%m; 86 | sum[0][WZ]++; 87 | int now = 0; int WZ2=WZ; 88 | while (now<12) 89 | { 90 | now++; 91 | WZ2/=3; 92 | sum[now][WZ2]++; 93 | } 94 | tot[0][WZ]++; 95 | now = 0; 96 | while (tot[now][WZ]==16) 97 | { 98 | tot[now][WZ]=0; 99 | now++; WZ/=3; 100 | tot[now][WZ]++; 101 | } 102 | MIN = Estimate(A); 103 | } 104 | 105 | Q=(bobhash[Count]->run(A.c_str(),A.size()))%M2; 106 | p=Find(Q,A); 107 | if (p==-1) 108 | { 109 | Heap[++cnt].val=MIN; Heap[cnt].ID=A; 110 | o++; ID_index[o].ID=A; ID_index[o].wz=cnt; next[o]=head[Q]; head[Q]=o; 111 | Heap[cnt].idx=o; 112 | int now=cnt; 113 | while (now>1 && Heap[now].valK) 115 | { 116 | Change(1,cnt); Delete((bobhash[Count]->run(Heap[cnt].ID.c_str(),Heap[cnt].ID.size()))%M2,Heap[cnt].ID); 117 | int now=1; cnt--; 118 | while (now*2<=cnt && Heap[now].val>Heap[now*2].val || now*2+1<=cnt && Heap[now].val>Heap[now*2+1].val) 119 | { 120 | if (Heap[now*2].val1 && Heap[now].valHeap[now*2].val || now*2+1<=cnt && Heap[now].val>Heap[now*2+1].val) 133 | { 134 | if (Heap[now*2].val Query(int k) 143 | { 144 | //printf("%s %s\n",Heap[1].ID,Heap[2].ID); 145 | return make_pair(Heap[k+1].ID,Heap[k+1].val); 146 | } 147 | }; 148 | #endif 149 | -------------------------------------------------------------------------------- /CounterTree/Sketchpheap.h: -------------------------------------------------------------------------------- 1 | #ifndef _Sketchpheap_H 2 | #define _Sketchpheap_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "params.h" 12 | #include "BOBHASH32.h" 13 | #define Total 10000005 14 | #define Count 4 15 | using namespace std; 16 | class Sketchpheap 17 | { 18 | private: 19 | BOBHash32 * bobhash[Count+2]; 20 | struct node {int val,idx; string ID;} Heap[Total]; 21 | int tot[Count][MAX_MEM],head[N],next[Total],m,k,WZ,Q,p,cnt,o,MIN,M2,K,NUM; 22 | struct Node {int wz; string ID;} ID_index[Total]; 23 | 24 | public: 25 | Sketchpheap(int NUM,int K):NUM(NUM),K(K) 26 | { 27 | M2=1000000; 28 | for (int i=0; i<=Count; i++) bobhash[i]=new BOBHash32(i+1000); 29 | } 30 | int Find(int x,string y) 31 | { 32 | int now=head[x]; 33 | while (ID_index[now].ID!=y && now) now=next[now]; 34 | if (ID_index[now].ID==y) return ID_index[now].wz; 35 | return -1; 36 | } 37 | void Delete(int x,string y) 38 | { 39 | if (ID_index[head[x]].ID==y) {head[x]=next[head[x]]; return;} 40 | int now=head[x],Last; 41 | while (ID_index[now].ID!=y && now) {Last=now; now=next[now];} 42 | if (!head[x]) return; 43 | next[Last]=next[next[Last]]; 44 | } 45 | void Change(int x,int y) 46 | { 47 | swap(ID_index[Heap[x].idx].wz,ID_index[Heap[y].idx].wz); 48 | swap(Heap[x].val,Heap[y].val); 49 | swap(Heap[x].idx,Heap[y].idx); 50 | swap(Heap[x].ID,Heap[y].ID); 51 | } 52 | void Insert(string A) 53 | { 54 | MIN=1000000000; 55 | for (int i=0; irun(A.c_str(),A.size()))%NUM; 58 | tot[i][WZ]++; 59 | MIN=min(MIN,tot[i][WZ]); 60 | } 61 | Q=(bobhash[Count]->run(A.c_str(),A.size()))%M2; 62 | p=Find(Q,A); 63 | if (p==-1) 64 | { 65 | Heap[++cnt].val=MIN; Heap[cnt].ID=A; 66 | o++; ID_index[o].ID=A; ID_index[o].wz=cnt; next[o]=head[Q]; head[Q]=o; 67 | Heap[cnt].idx=o; 68 | int now=cnt; 69 | while (now>1 && Heap[now].valK) 71 | { 72 | Change(1,cnt); Delete((bobhash[Count]->run(Heap[cnt].ID.c_str(),Heap[cnt].ID.size()))%M2,Heap[cnt].ID); 73 | int now=1; cnt--; 74 | while (now*2<=cnt && Heap[now].val>Heap[now*2].val || now*2+1<=cnt && Heap[now].val>Heap[now*2+1].val) 75 | { 76 | if (Heap[now*2].val1 && Heap[now].valHeap[now*2].val || now*2+1<=cnt && Heap[now].val>Heap[now*2+1].val) 89 | { 90 | if (Heap[now*2].val Query(int k) 99 | { 100 | //printf("%s %s\n",Heap[1].ID,Heap[2].ID); 101 | return make_pair(Heap[k+1].ID,Heap[k+1].val); 102 | } 103 | }; 104 | #endif 105 | -------------------------------------------------------------------------------- /CounterTree/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "BOBHASH32.h" 11 | #include "params.h" 12 | #include "Counter_Tree.h" 13 | #include 14 | using namespace std; 15 | map B,C,Rank; 16 | struct node {string x;int y;} p[10000005]; 17 | ifstream fin("u1",ios::in|ios::binary); 18 | char a[105]; 19 | string Read() 20 | { 21 | fin.read(a,13); 22 | a[13]='\0'; 23 | string tmp=a; 24 | return tmp; 25 | } 26 | string s[10000005]; 27 | int cmp(node i,node j) {return i.y>j.y;} 28 | int main(int argv, char **argc) 29 | { 30 | srand((unsigned)time(NULL)); 31 | //freopen("D:\\pkuѧϰ\\sliding\\stack-new.txt","r",stdin); 32 | int MEM,K; 33 | //sscanf(argc[1],"%d",&MEM); 34 | //sscanf(argc[2],"%d",&K); 35 | MEM=50; K=100; 36 | //fin.close(); 37 | //fin.open(argc[3],ios::in|ios::binary); 38 | //char f[1000]; 39 | //sprintf(f,"Other_MEM=%d_K=%d_%c%c.txt",MEM,K,argc[3][1],argc[3][2]); 40 | //freopen(f,"w",stdout); 41 | // K=100; 42 | int m=2000000; // the number of flows in stream 43 | // prepare for sketch plus heap 44 | Counter_Tree *SH; 45 | // Insertion 46 | for (int i=1; i<=m; i++) 47 | { 48 | char aa[105]; 49 | //scanf("%s",aa); 50 | //s[i]=aa; 51 | //scanf("%s",aa); 52 | s[i]=Read(); 53 | B[s[i]]++; 54 | } 55 | 56 | cout<<"preparing true flow"<::iterator sit=B.begin(); sit!=B.end(); sit++) 60 | { 61 | p[++cnt].x=sit->first; 62 | p[cnt].y=sit->second; 63 | } 64 | sort(p+1,p+cnt+1,cmp); 65 | for (int i=1; i<=K; i++) {Rank[p[i].x]=i; C[p[i].x]=p[i].y;} 66 | 67 | 68 | int sh_M; 69 | 70 | { 71 | // for (sh_M=1; 432*K+sh_M*4*16<=MEM*1024*8; sh_M++); 72 | SH=new Counter_Tree(MEM,K); 73 | cout<Insert(s[i]); 78 | } 79 | int SH_sum=0,SH_AAE=0; double SH_ARE=0; 80 | string SH_string; int SH_num; double MAP=0; 81 | for (int i=0; iQuery(i)).first; SH_num=(SH->Query(i)).second; 84 | SH_AAE+=abs(B[SH_string]-SH_num); SH_ARE+=abs(B[SH_string]-SH_num)/(B[SH_string]+0.0); 85 | if (C[SH_string]) SH_sum++;// else cout<0 && Rank[SH_string]<=i+1) summ++; 90 | MAP+=(double)summ/(i+1); 91 | } 92 | MAP/=K; 93 | 94 | printf("%dKB top %d:\nAccepted: %d/%d %.10f\nARE: %.10f\nAAE: %.10f\nMAP: %.10f\n\n",MEM,K,SH_sum,K,(SH_sum/(K+0.0)),SH_ARE/K,SH_AAE/(K+0.0),MAP); 95 | } 96 | return 0; 97 | } 98 | -------------------------------------------------------------------------------- /CounterTree/params.h: -------------------------------------------------------------------------------- 1 | #ifndef _PARAMS_H 2 | #define _PARAMS_H 3 | 4 | #define N 1000000 // maximum flow 5 | //#define M 1000000 // maximum size of stream-summary or CSS 6 | #define MAX_MEM 1000000 // maximum memory size 7 | #define HK_d 2 // maximum memory size 8 | 9 | #endif //_PARAMS_H 10 | -------------------------------------------------------------------------------- /ElasticSketch/ElasticSketch.h: -------------------------------------------------------------------------------- 1 | #ifndef _ELASTIC_SKETCH_H_ 2 | #define _ELASTIC_SKETCH_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include "HeavyPart.h" 8 | #include "LightPart.h" 9 | 10 | template 11 | class ElasticSketch 12 | { 13 | static constexpr int heavy_mem = bucket_num * COUNTER_PER_BUCKET * 8; 14 | static constexpr int light_mem = tot_memory_in_bytes - heavy_mem; 15 | 16 | HeavyPart heavy_part; 17 | LightPart light_part; 18 | 19 | public: 20 | ElasticSketch(){} 21 | ~ElasticSketch(){} 22 | void clear() 23 | { 24 | heavy_part.clear(); 25 | light_part.clear(); 26 | } 27 | 28 | void insert(uint8_t *key, int f = 1) 29 | { 30 | uint8_t swap_key[KEY_LENGTH_4]; 31 | uint32_t swap_val = 0; 32 | int result = heavy_part.insert(key, swap_key, swap_val, f); 33 | 34 | switch(result) 35 | { 36 | case 0: return; 37 | case 1:{ 38 | if(HIGHEST_BIT_IS_1(swap_val)) 39 | light_part.insert(swap_key, GetCounterVal(swap_val)); 40 | else 41 | light_part.swap_insert(swap_key, swap_val); 42 | return; 43 | } 44 | case 2: light_part.insert(key, 1); return; 45 | default: 46 | printf("error return value !\n"); 47 | exit(1); 48 | } 49 | } 50 | 51 | void quick_insert(uint8_t *key, int f = 1) 52 | { 53 | heavy_part.quick_insert(key, f); 54 | } 55 | 56 | int query(uint8_t *key) 57 | { 58 | uint32_t heavy_result = heavy_part.query(key); 59 | if(heavy_result == 0 || HIGHEST_BIT_IS_1(heavy_result)) 60 | { 61 | int light_result = light_part.query(key); 62 | return (int)GetCounterVal(heavy_result) + light_result; 63 | } 64 | return heavy_result; 65 | } 66 | 67 | int query_compressed_part(uint8_t *key, uint8_t *compress_part, int compress_counter_num) 68 | { 69 | uint32_t heavy_result = heavy_part.query(key); 70 | if(heavy_result == 0 || HIGHEST_BIT_IS_1(heavy_result)) 71 | { 72 | int light_result = light_part.query_compressed_part(key, compress_part, compress_counter_num); 73 | return (int)GetCounterVal(heavy_result) + light_result; 74 | } 75 | return heavy_result; 76 | } 77 | 78 | struct node {int x; string y;} t[10005]; 79 | static int cmp(node i,node j) {return i.x>j.x;} 80 | void get_heavy_hitters(int threshold, vector> & results) 81 | { 82 | int cnt = 0; 83 | for (int i = 0; i < bucket_num; ++i) 84 | for (int j = 0; j < MAX_VALID_COUNTER; ++j) 85 | { 86 | uint32_t key = heavy_part.buckets[i].key[j]; 87 | int val = query((uint8_t *)&key); 88 | t[cnt].x=val; t[cnt++].y=string((const char*)&key, 4); 89 | } 90 | std::sort(t,t+cnt,cmp); 91 | for (int i = 0; i < threshold; i++) results.push_back(make_pair(t[i].y, t[i].x)); 92 | } 93 | 94 | /* interface */ 95 | int get_compress_width(int ratio) { return light_part.get_compress_width(ratio);} 96 | void compress(int ratio, uint8_t *dst) { light_part.compress(ratio, dst); } 97 | int get_bucket_num() { return heavy_part.get_bucket_num(); } 98 | double get_bandwidth(int compress_ratio) 99 | { 100 | int result = heavy_part.get_memory_usage(); 101 | result += get_compress_width(compress_ratio) * sizeof(uint8_t); 102 | return result * 1.0 / 1024 / 1024; 103 | } 104 | 105 | int get_cardinality() 106 | { 107 | int card = light_part.get_cardinality(); 108 | for(int i = 0; i < bucket_num; ++i) 109 | for(int j = 0; j < MAX_VALID_COUNTER; ++j) 110 | { 111 | uint8_t key[KEY_LENGTH_4]; 112 | *(uint32_t*)key = heavy_part.buckets[i].key[j]; 113 | int val = heavy_part.buckets[i].val[j]; 114 | int ex_val = light_part.query(key); 115 | 116 | if(HIGHEST_BIT_IS_1(val) && ex_val) 117 | { 118 | val += ex_val; 119 | card--; 120 | } 121 | if(GetCounterVal(val)) 122 | card++; 123 | } 124 | return card; 125 | } 126 | 127 | double get_entropy() 128 | { 129 | int tot = 0; 130 | double entr = 0; 131 | 132 | light_part.get_entropy(tot, entr); 133 | 134 | for(int i = 0; i < bucket_num; ++i) 135 | for(int j = 0; j < MAX_VALID_COUNTER; ++j) 136 | { 137 | uint8_t key[KEY_LENGTH_4]; 138 | *(uint32_t*)key = heavy_part.buckets[i].key[j]; 139 | int val = heavy_part.buckets[i].val[j]; 140 | 141 | int ex_val = light_part.query(key); 142 | 143 | if(HIGHEST_BIT_IS_1(val) && ex_val) 144 | { 145 | val += ex_val; 146 | 147 | tot -= ex_val; 148 | 149 | entr -= ex_val * log2(ex_val); 150 | } 151 | val = GetCounterVal(val); 152 | if(val) 153 | { 154 | tot += val; 155 | entr += val * log2(val); 156 | } 157 | } 158 | return -entr / tot + log2(tot); 159 | } 160 | 161 | void get_distribution(vector &dist) 162 | { 163 | light_part.get_distribution(dist); 164 | 165 | for(int i = 0; i < bucket_num; ++i) 166 | for(int j = 0; j < MAX_VALID_COUNTER; ++j) 167 | { 168 | uint8_t key[KEY_LENGTH_4]; 169 | *(uint32_t*)key = heavy_part.buckets[i].key[j]; 170 | int val = heavy_part.buckets[i].val[j]; 171 | 172 | int ex_val = light_part.query(key); 173 | 174 | if(HIGHEST_BIT_IS_1(val) && ex_val != 0) 175 | { 176 | val += ex_val; 177 | dist[ex_val]--; 178 | } 179 | val = GetCounterVal(val); 180 | if(val) 181 | { 182 | if(val + 1 > dist.size()) 183 | dist.resize(val + 1); 184 | dist[val]++; 185 | } 186 | } 187 | } 188 | 189 | /* 190 | void *operator new(size_t sz) 191 | { 192 | constexpr uint32_t alignment = 64; 193 | size_t alloc_size = (2 * alignment + sz) / alignment * alignment; 194 | void *ptr = ::operator new(alloc_size); 195 | void *old_ptr = ptr; 196 | void *new_ptr = ((char*)std::align(alignment, sz, ptr, alloc_size) + alignment); 197 | ((void **)new_ptr)[-1] = old_ptr; 198 | 199 | return new_ptr; 200 | } 201 | void operator delete(void *p) 202 | { 203 | ::operator delete(((void**)p)[-1]); 204 | } 205 | */ 206 | }; 207 | 208 | 209 | 210 | #endif 211 | -------------------------------------------------------------------------------- /ElasticSketch/HeavyPart.h: -------------------------------------------------------------------------------- 1 | #ifndef _HEAVYPART_H_ 2 | #define _HEAVYPART_H_ 3 | 4 | #include "param.h" 5 | 6 | 7 | 8 | 9 | template 10 | class HeavyPart 11 | { 12 | public: 13 | alignas(64) Bucket buckets[bucket_num]; 14 | 15 | HeavyPart() 16 | { 17 | clear(); 18 | } 19 | ~HeavyPart(){} 20 | 21 | void clear() 22 | { 23 | memset(buckets, 0, sizeof(Bucket) * bucket_num); 24 | } 25 | 26 | 27 | /* insertion */ 28 | int insert(uint8_t *key, uint8_t *swap_key, uint32_t &swap_val, uint32_t f = 1) 29 | { 30 | uint32_t fp; 31 | int pos = CalculateFP(key, fp); 32 | 33 | 34 | 35 | int matched = 0, match_index = -1; 36 | for (int i = 0; i < MAX_VALID_COUNTER; ++i) 37 | { 38 | if (buckets[pos].key[i] == fp) 39 | { 40 | matched = 1; 41 | match_index = i; 42 | break; 43 | } 44 | } 45 | if (matched != 0) 46 | { 47 | buckets[pos].val[match_index] += f; 48 | return 0; 49 | } 50 | /* 51 | const __m256i item = _mm256_set1_epi32((int)fp); 52 | __m256i *keys_p = (__m256i *)(buckets[pos].key); 53 | int matched = 0; 54 | 55 | __m256i a_comp = _mm256_cmpeq_epi32(item, keys_p[0]); 56 | matched = _mm256_movemask_ps((__m256)a_comp); 57 | 58 | if (matched != 0) 59 | { 60 | //return 32 if input is zero; 61 | int matched_index = _tzcnt_u32((uint32_t)matched); 62 | buckets[pos].val[matched_index] += f; 63 | return 0; 64 | } 65 | */ 66 | 67 | 68 | 69 | int min_counter = -1; 70 | uint32_t min_counter_val = 0x7FFFFFFF; 71 | for(int i = 0; i < MAX_VALID_COUNTER; ++i) 72 | { 73 | int tmp_val = GetCounterVal(buckets[pos].val[i]); 74 | 75 | if(min_counter_val > tmp_val) 76 | { 77 | min_counter_val = tmp_val; 78 | min_counter = i; 79 | } 80 | } 81 | /* 82 | const uint32_t mask_base = 0x7FFFFFFF; 83 | const __m256i *counters = (__m256i *)(buckets[pos].val); 84 | __m256 masks = (__m256)_mm256_set1_epi32(mask_base); 85 | __m256 results = (_mm256_and_ps(*(__m256*)counters, masks)); 86 | __m256 mask2 = (__m256)_mm256_set_epi32(mask_base, 0, 0, 0, 0, 0, 0, 0); 87 | results = _mm256_or_ps(results, mask2); 88 | 89 | __m128i low_part = _mm_castps_si128(_mm256_extractf128_ps(results, 0)); 90 | __m128i high_part = _mm_castps_si128(_mm256_extractf128_ps(results, 1)); 91 | 92 | __m128i x = _mm_min_epi32(low_part, high_part); 93 | __m128i min1 = _mm_shuffle_epi32(x, _MM_SHUFFLE(0,0,3,2)); 94 | __m128i min2 = _mm_min_epi32(x,min1); 95 | __m128i min3 = _mm_shuffle_epi32(min2, _MM_SHUFFLE(0,0,0,1)); 96 | __m128i min4 = _mm_min_epi32(min2,min3); 97 | int min_counter_val = _mm_cvtsi128_si32(min4); 98 | 99 | const __m256i ct_item = _mm256_set1_epi32(min_counter_val); 100 | int ct_matched = 0; 101 | 102 | __m256i ct_a_comp = _mm256_cmpeq_epi32(ct_item, (__m256i)results); 103 | matched = _mm256_movemask_ps((__m256)ct_a_comp); 104 | int min_counter = _tzcnt_u32((uint32_t)matched); 105 | */ 106 | 107 | if(min_counter_val == 0) // empty counter 108 | { 109 | buckets[pos].key[min_counter] = fp; 110 | buckets[pos].val[min_counter] = f; 111 | return 0; 112 | } 113 | 114 | 115 | uint32_t guard_val = buckets[pos].val[MAX_VALID_COUNTER]; 116 | guard_val = UPDATE_GUARD_VAL(guard_val); 117 | 118 | 119 | if(!JUDGE_IF_SWAP(GetCounterVal(min_counter_val), guard_val)) 120 | { 121 | buckets[pos].val[MAX_VALID_COUNTER] = guard_val; 122 | return 2; 123 | } 124 | 125 | 126 | *((uint32_t*)swap_key) = buckets[pos].key[min_counter]; 127 | swap_val = buckets[pos].val[min_counter]; 128 | 129 | 130 | buckets[pos].val[MAX_VALID_COUNTER] = 0; 131 | 132 | 133 | buckets[pos].key[min_counter] = fp; 134 | buckets[pos].val[min_counter] = 0x80000001; 135 | 136 | return 1; 137 | } 138 | 139 | int quick_insert(uint8_t *key, uint32_t f = 1) 140 | { 141 | uint32_t fp; 142 | int pos = CalculateFP(key, fp); 143 | 144 | 145 | int matched = 0, match_index = -1; 146 | for (int i = 0; i < MAX_VALID_COUNTER; ++i) 147 | { 148 | if (buckets[pos].key[i] == fp) 149 | { 150 | matched = 1; 151 | match_index = i; 152 | break; 153 | } 154 | } 155 | if (matched != 0) 156 | { 157 | buckets[pos].val[match_index] += f; 158 | return 0; 159 | } 160 | 161 | /* 162 | const __m256i item = _mm256_set1_epi32((int)fp); 163 | __m256i *keys_p = (__m256i *)(buckets[pos].key); 164 | int matched = 0; 165 | 166 | __m256i a_comp = _mm256_cmpeq_epi32(item, keys_p[0]); 167 | matched = _mm256_movemask_ps((__m256)a_comp); 168 | 169 | if (matched != 0) 170 | { 171 | int matched_index = _tzcnt_u32((uint32_t)matched); 172 | buckets[pos].val[matched_index] += f; 173 | return 0; 174 | } 175 | */ 176 | int min_counter = -1; 177 | uint32_t min_counter_val = 0x7FFFFFFF; 178 | for(int i = 0; i < MAX_VALID_COUNTER; ++i) 179 | { 180 | int tmp_val = GetCounterVal(buckets[pos].val[i]); 181 | 182 | if(min_counter_val > tmp_val) 183 | { 184 | min_counter_val = tmp_val; 185 | min_counter = i; 186 | } 187 | } 188 | 189 | /* 190 | const uint32_t mask_base = 0x7FFFFFFF; 191 | const __m256i *counters = (__m256i *)(buckets[pos].val); 192 | __m256 masks = (__m256)_mm256_set1_epi32(mask_base); 193 | __m256 results = (_mm256_and_ps(*(__m256*)counters, masks)); 194 | __m256 mask2 = (__m256)_mm256_set_epi32(mask_base, 0, 0, 0, 0, 0, 0, 0); 195 | results = _mm256_or_ps(results, mask2); 196 | 197 | __m128i low_part = _mm_castps_si128(_mm256_extractf128_ps(results, 0)); 198 | __m128i high_part = _mm_castps_si128(_mm256_extractf128_ps(results, 1)); 199 | 200 | __m128i x = _mm_min_epi32(low_part, high_part); 201 | __m128i min1 = _mm_shuffle_epi32(x, _MM_SHUFFLE(0,0,3,2)); 202 | __m128i min2 = _mm_min_epi32(x,min1); 203 | __m128i min3 = _mm_shuffle_epi32(min2, _MM_SHUFFLE(0,0,0,1)); 204 | __m128i min4 = _mm_min_epi32(min2,min3); 205 | int min_counter_val = _mm_cvtsi128_si32(min4); 206 | 207 | const __m256i ct_item = _mm256_set1_epi32(min_counter_val); 208 | int ct_matched = 0; 209 | 210 | __m256i ct_a_comp = _mm256_cmpeq_epi32(ct_item, (__m256i)results); 211 | matched = _mm256_movemask_ps((__m256)ct_a_comp); 212 | int min_counter = _tzcnt_u32((uint32_t)matched); 213 | */ 214 | 215 | if(min_counter_val == 0) 216 | { 217 | buckets[pos].key[min_counter] = fp; 218 | buckets[pos].val[min_counter] = f; 219 | return 0; 220 | } 221 | 222 | 223 | uint32_t guard_val = buckets[pos].val[MAX_VALID_COUNTER]; 224 | guard_val = UPDATE_GUARD_VAL(guard_val); 225 | 226 | if(!JUDGE_IF_SWAP(min_counter_val, guard_val)) 227 | { 228 | buckets[pos].val[MAX_VALID_COUNTER] = guard_val; 229 | return 2; 230 | } 231 | 232 | 233 | buckets[pos].val[MAX_VALID_COUNTER] = 0; 234 | 235 | buckets[pos].key[min_counter] = fp; 236 | return 1; 237 | } 238 | 239 | /* query */ 240 | uint32_t query(uint8_t *key) 241 | { 242 | uint32_t fp; 243 | int pos = CalculateFP(key, fp); 244 | 245 | for(int i = 0; i < MAX_VALID_COUNTER; ++i) 246 | if(buckets[pos].key[i] == fp) 247 | return buckets[pos].val[i]; 248 | 249 | return 0; 250 | } 251 | 252 | 253 | /* interface */ 254 | int get_memory_usage() 255 | { 256 | return bucket_num * sizeof(Bucket); 257 | } 258 | int get_bucket_num() 259 | { 260 | return bucket_num; 261 | } 262 | 263 | private: 264 | int CalculateFP(uint8_t *key, uint32_t &fp) 265 | { 266 | fp = *((uint32_t*)key); 267 | return CalculateBucketPos(fp) % bucket_num; 268 | } 269 | }; 270 | 271 | #endif 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | -------------------------------------------------------------------------------- /ElasticSketch/HeavyPart.h.gch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/papergitkeeper/heavy-keeper-project/c07aab7a13b08f565d4090f7be17d805d5b6ba85/ElasticSketch/HeavyPart.h.gch -------------------------------------------------------------------------------- /ElasticSketch/LightPart.h: -------------------------------------------------------------------------------- 1 | #ifndef _LIGHT_PART_H_ 2 | #define _LIGHT_PART_H_ 3 | 4 | #include "param.h" 5 | 6 | template 7 | class LightPart 8 | { 9 | static constexpr int counter_num = init_mem_in_bytes; 10 | BOBHash32 *bobhash = NULL; 11 | 12 | public: 13 | uint8_t counters[counter_num]; 14 | int mice_dist[256]; 15 | 16 | LightPart() 17 | { 18 | clear(); 19 | std::random_device rd; 20 | bobhash = new BOBHash32(rd() % MAX_PRIME32); 21 | } 22 | ~LightPart() 23 | { 24 | delete bobhash; 25 | } 26 | 27 | void clear() 28 | { 29 | memset(counters, 0, counter_num); 30 | memset(mice_dist, 0, sizeof(int) * 256); 31 | } 32 | 33 | 34 | /* insertion */ 35 | void insert(uint8_t *key, int f = 1) 36 | { 37 | uint32_t hash_val = (uint32_t)bobhash->run((const char*)key, KEY_LENGTH_4); 38 | uint32_t pos = hash_val % (uint32_t)counter_num; 39 | 40 | int old_val = (int)counters[pos]; 41 | int new_val = (int)counters[pos] + f; 42 | 43 | new_val = new_val < 255 ? new_val : 255; 44 | counters[pos] = (uint8_t)new_val; 45 | 46 | mice_dist[old_val]--; 47 | mice_dist[new_val]++; 48 | } 49 | 50 | void swap_insert(uint8_t *key, int f) 51 | { 52 | uint32_t hash_val = (uint32_t)bobhash->run((const char*)key, KEY_LENGTH_4); 53 | uint32_t pos = hash_val % (uint32_t)counter_num; 54 | 55 | f = f < 255 ? f : 255; 56 | if (counters[pos] < f) 57 | { 58 | int old_val = (int)counters[pos]; 59 | counters[pos] = (uint8_t)f; 60 | int new_val = (int)counters[pos]; 61 | 62 | mice_dist[old_val]--; 63 | mice_dist[new_val]++; 64 | } 65 | } 66 | 67 | 68 | /* query */ 69 | int query(uint8_t *key) 70 | { 71 | uint32_t hash_val = (uint32_t)bobhash->run((const char*)key, KEY_LENGTH_4); 72 | uint32_t pos = hash_val % (uint32_t)counter_num; 73 | 74 | return (int)counters[pos]; 75 | } 76 | 77 | 78 | /* compress */ 79 | void compress(int ratio, uint8_t *dst) 80 | { 81 | int width = get_compress_width(ratio); 82 | 83 | for (int i = 0; i < width && i < counter_num; ++i) 84 | { 85 | uint8_t max_val = 0; 86 | for (int j = i; j < counter_num; j += width) 87 | max_val = counters[j] > max_val ? counters[j] : max_val; 88 | dst[i] = max_val; 89 | } 90 | } 91 | 92 | int query_compressed_part(uint8_t *key, uint8_t *compress_part, int compress_counter_num) 93 | { 94 | uint32_t hash_val = (uint32_t)bobhash->run((const char *)key, KEY_LENGTH_4); 95 | uint32_t pos = (hash_val % (uint32_t)counter_num) % compress_counter_num; 96 | 97 | return (int)compress_part[pos]; 98 | } 99 | 100 | 101 | /* other measurement task */ 102 | int get_compress_width(int ratio) { return (counter_num / ratio); } 103 | int get_compress_memory(int ratio) { return (uint32_t)(counter_num / ratio); } 104 | int get_memory_usage() { return counter_num; } 105 | 106 | int get_cardinality() 107 | { 108 | int mice_card = 0; 109 | for (int i = 1; i < 256; i++) 110 | mice_card += mice_dist[i]; 111 | 112 | double rate = (counter_num - mice_card) / (double)counter_num; 113 | return counter_num * log(1 / rate); 114 | } 115 | 116 | void get_entropy(int &tot, double &entr) 117 | { 118 | for (int i = 1; i < 256; i++) 119 | { 120 | tot += mice_dist[i] * i; 121 | entr += mice_dist[i] * i * log2(i); 122 | } 123 | } 124 | }; 125 | 126 | 127 | 128 | 129 | 130 | #endif 131 | -------------------------------------------------------------------------------- /ElasticSketch/param.h: -------------------------------------------------------------------------------- 1 | #ifndef _PARAM_H_ 2 | #define _PARAM_H_ 3 | 4 | #include "../common/BOBHash32.h" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #define COUNTER_PER_BUCKET 8 17 | #define MAX_VALID_COUNTER 7 18 | 19 | #define ALIGNMENT 64 20 | 21 | #define COUNTER_PER_WORD 8 22 | #define BIT_TO_DETERMINE_COUNTER 3 23 | #define K_HASH_WORD 1 24 | 25 | 26 | #define KEY_LENGTH_4 4 27 | #define KEY_LENGTH_13 13 28 | 29 | #define CONSTANT_NUMBER 2654435761u 30 | #define CalculateBucketPos(fp) (((fp) * CONSTANT_NUMBER) >> 15) 31 | 32 | #define GetCounterVal(val) ((uint32_t)((val) & 0x7FFFFFFF)) 33 | 34 | #define JUDGE_IF_SWAP(min_val, guard_val) ((guard_val) > ((min_val) << 3)) 35 | 36 | #define UPDATE_GUARD_VAL(guard_val) ((guard_val) + 1) 37 | 38 | #define SWAP_MIN_VAL_THRESHOLD 5 39 | 40 | #define HIGHEST_BIT_IS_1(val) ((val) & 0x80000000) 41 | 42 | 43 | 44 | struct Bucket 45 | { 46 | uint32_t key[COUNTER_PER_BUCKET]; 47 | uint32_t val[COUNTER_PER_BUCKET]; 48 | }; 49 | 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /ElasticSketch/throughput.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "ElasticSketch.h" 10 | using namespace std; 11 | 12 | #define START_FILE_NO 1 13 | #define END_FILE_NO 10 14 | 15 | 16 | struct FIVE_TUPLE{ char key[13]; }; 17 | typedef vector TRACE; 18 | TRACE traces[END_FILE_NO - START_FILE_NO + 1]; 19 | 20 | void ReadInTraces(const char *trace_prefix) 21 | { 22 | int datafileCnt = 1; 23 | // for(int datafileCnt = START_FILE_NO; datafileCnt <= END_FILE_NO; ++datafileCnt) 24 | { 25 | char datafileName[100]; 26 | sprintf(datafileName, "%s", trace_prefix); 27 | FILE *fin = fopen(datafileName, "rb"); 28 | 29 | FIVE_TUPLE tmp_five_tuple; 30 | traces[datafileCnt - 1].clear(); 31 | int tot_num = 0; 32 | while(fread(&tmp_five_tuple, 1, 13, fin) == 13 && tot_num < 10000000) 33 | { 34 | traces[datafileCnt - 1].push_back(tmp_five_tuple); 35 | tot_num ++; 36 | } 37 | fclose(fin); 38 | 39 | printf("Successfully read in %s, %ld packets\n", datafileName, traces[datafileCnt - 1].size()); 40 | } 41 | printf("\n"); 42 | } 43 | 44 | map mp; 45 | struct node {string x;int y;} tt[10000005]; 46 | int cmp(node i,node j) {return i.y>j.y;} 47 | 48 | int CalculateFP(uint8_t *key, uint32_t &fp) 49 | { 50 | fp = *((uint32_t*)key); 51 | // return CalculateBucketPos(fp) % bucket_num; 52 | } 53 | 54 | int insert2(uint8_t *key, uint8_t *swap_key, uint32_t &swap_val, uint32_t f = 1) 55 | { 56 | uint32_t fp; 57 | int pos = CalculateFP(key, fp); 58 | uint32_t key2 = fp; 59 | 60 | mp[string((const char*)&key2, 4)] ++; 61 | } 62 | 63 | void insert(uint8_t *key, int f = 1) 64 | { 65 | uint8_t swap_key[KEY_LENGTH_4]; 66 | uint32_t swap_val = 0; 67 | int result = insert2(key, swap_key, swap_val, f); 68 | } 69 | 70 | 71 | int main() 72 | { 73 | ReadInTraces("u1"); 74 | 75 | 76 | #define HEAVY_MEM (int(50 * 1024 / 4)) 77 | #define BUCKET_NUM (HEAVY_MEM / 64) 78 | #define TOT_MEM_IN_BYTES (50 * 1024) 79 | ElasticSketch *elastic = NULL; 80 | 81 | 82 | 83 | // for(int datafileCnt = START_FILE_NO; datafileCnt <= END_FILE_NO; ++datafileCnt) 84 | int datafileCnt = 1; 85 | { 86 | elastic = NULL; 87 | 88 | timespec time1, time2; 89 | long long resns; 90 | int packet_cnt = (int)traces[datafileCnt - 1].size(); 91 | 92 | uint8_t **keys = new uint8_t*[(int)traces[datafileCnt - 1].size()]; 93 | for(int i = 0; i < (int)traces[datafileCnt - 1].size(); ++i) 94 | { 95 | keys[i] = new uint8_t[13]; 96 | memcpy(keys[i], traces[datafileCnt - 1][i].key, 13); 97 | } 98 | 99 | clock_gettime(CLOCK_MONOTONIC, &time1); 100 | int test_cycles = 1; 101 | for(int t = 0; t < test_cycles; ++t) 102 | { 103 | elastic = new ElasticSketch(); 104 | for(int i = 0; i < packet_cnt; ++i) 105 | { 106 | if (i%1000000==0) printf("%d\n",i); 107 | insert(keys[i]); 108 | elastic->insert(keys[i]); 109 | } 110 | 111 | int cnt = 0; 112 | for (map :: iterator sit = (--mp.end()); sit!=mp.begin(); sit--) 113 | { 114 | tt[cnt].x=sit->first; 115 | tt[cnt++].y=sit->second; 116 | } 117 | sort(tt,tt+cnt,cmp); 118 | cout<<2< OK; 120 | for (int i = 0 ; i < 100; i++) 121 | { 122 | OK[tt[i].x] = 1; 123 | } 124 | 125 | vector> results; 126 | elastic->get_heavy_hitters(100, results); 127 | double AAE=0,ARE=0,accuracy = 0; 128 | cout<<3< 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "BOBHASH32.h" 12 | #include "params.h" 13 | #include "ssummary.h" 14 | #include "BOBHASH64.h" 15 | #define rep(i,a,n) for(int i=a;i<=n;i++) 16 | using namespace std; 17 | class LossyCounting 18 | { 19 | private: 20 | ssummary *ss; 21 | int K; 22 | public: 23 | LossyCounting(int K):K(K) {ss=new ssummary(0); ss->clear();} 24 | void Insert(string x,int c) 25 | { 26 | bool mon=false; 27 | int p=ss->find(x); 28 | if (p) mon=true; 29 | if (!mon) 30 | { 31 | int q=c+1; 32 | int i=ss->getid(); 33 | ss->add2(ss->location(x),i); 34 | ss->str[i]=x; 35 | ss->sum[i]=q; 36 | ss->link(i,0); 37 | } else 38 | { 39 | int tmp=ss->Left[ss->sum[p]]; 40 | ss->cut(p); 41 | if(ss->head[ss->sum[p]])tmp=ss->sum[p]; 42 | ss->sum[p]++; 43 | ss->link(p,tmp); 44 | } 45 | } 46 | void clear(int c) 47 | { 48 | while (ss->getmin()Right[0]; 51 | int tmp=ss->head[t]; 52 | ss->cut(ss->head[t]); 53 | ss->recycling(tmp); 54 | } 55 | } 56 | struct Node {string x; int y;} q[MAX_MEM+10]; 57 | static int cmp(Node i,Node j) {return i.y>j.y;} 58 | void work() 59 | { 60 | int CNT=0; 61 | for(int i=N;i;i=ss->Left[i]) 62 | for(int j=ss->head[i];j;j=ss->Next[j]) {q[CNT].x=ss->str[j]; q[CNT].y=ss->sum[j]; CNT++; } 63 | sort(q,q+CNT,cmp); 64 | } 65 | pair Query(int k) 66 | { 67 | return make_pair(q[k].x,q[k].y); 68 | } 69 | }; 70 | #endif 71 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Heavykeeper 2 | 3 | Finding top-k elephant flows is a critical task in network traffic measurement. As the line rate increases in today's network, designing accurate and fast algorithms for this task becomes more and more challenging. There are several well-known algorithms, including Lossy counting, Space-Saving, CSS, etc. However, the performances of all existing algorithms are poor. In this paper, we propose a novel data structure, named Heavykeeper, which achieves high precision in finding top-k elephant flows. It also works at fast and constant speed. The key idea of heavykeeper is to intelligently record the frequencies of elephant flows and omit mice flows. Experimental results show that our heavykeeper algorithm achieves almost 100% precision with a small memory size, and reduces the error by around 3 orders of magnitude on average compared to the state-of-the-art. 4 | 5 | # About the source codes, dataset and parameters setting 6 | 7 | The source code contains the C++ implementation of the Space-Saving, Lossycounting, CSS, heavykeeper and stream-summary (which is used in Space-Saving, Lossycounting and heavykeeper). We complete these codes on WINDOWS 10 and compile successfully using g++ 4.8.4. 8 | 9 | The file u1 is comprised of IP packets captured from the network of our campus. And the hash functions are 64-bit Bob hash functions and 32-bit Bob hash functions, obtained from http://burtleburtle.net/bob/hash/evahash.html. 10 | 11 | You can also use your own dataset and other hash functions. 12 | 13 | # How to run 14 | 15 | Suppose you've already cloned the respository and start from the Codes directory. 16 | 17 | You just need to compile and run main.cpp. 18 | 19 | # Input format 20 | 21 | You need to input two integers MEM and K, which means "memory = MEM KB" and "top-K". 22 | 23 | # Output format 24 | 25 | Our program will print the PRESICION, ARE and AAE of these sketches on the screen. 26 | -------------------------------------------------------------------------------- /heavykeeper.h: -------------------------------------------------------------------------------- 1 | #ifndef _heavykeeper_H 2 | #define _heavykeeper_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "BOBHASH32.h" 12 | #include "params.h" 13 | #include "ssummary.h" 14 | #include "BOBHASH64.h" 15 | #define HK_d 2 16 | #define HK_b 1.08 17 | #define rep(i,a,n) for(int i=a;i<=n;i++) 18 | using namespace std; 19 | class heavykeeper 20 | { 21 | private: 22 | ssummary *ss; 23 | struct node {int C,FP;} HK[HK_d][MAX_MEM+10]; 24 | BOBHash64 * bobhash; 25 | int K,M2; 26 | public: 27 | heavykeeper(int M2,int K):M2(M2),K(K) {ss=new ssummary(K); ss->clear(); bobhash=new BOBHash64(1005);} 28 | void clear() 29 | { 30 | for (int i=0; irun(ST.c_str(),ST.size())); 36 | } 37 | void Insert(string x) 38 | { 39 | bool mon=false; 40 | int p=ss->find(x); 41 | if (p) mon=true; 42 | int maxv=0; 43 | unsigned long long H=Hash(x); int FP=(H>>48); 44 | for (int j=0; jgetmin()) 51 | HK[j][Hsh].C++; 52 | maxv=max(maxv,HK[j][Hsh].C); 53 | } else 54 | { 55 | if (!(rand()%int(pow(HK_b,HK[j][Hsh].C)))) 56 | { 57 | HK[j][Hsh].C--; 58 | if (HK[j][Hsh].C<=0) 59 | { 60 | HK[j][Hsh].FP=FP; 61 | HK[j][Hsh].C=1; 62 | maxv=max(maxv,1); 63 | } 64 | } 65 | } 66 | } 67 | if (!mon) 68 | { 69 | if (maxv-(ss->getmin())==1 || ss->totgetid(); 72 | ss->add2(ss->location(x),i); 73 | ss->str[i]=x; 74 | ss->sum[i]=maxv; 75 | ss->link(i,0); 76 | while(ss->tot>K) 77 | { 78 | int t=ss->Right[0]; 79 | int tmp=ss->head[t]; 80 | ss->cut(ss->head[t]); 81 | ss->recycling(tmp); 82 | } 83 | } 84 | } else 85 | if (maxv>ss->sum[p]) 86 | { 87 | int tmp=ss->Left[ss->sum[p]]; 88 | ss->cut(p); 89 | if(ss->head[ss->sum[p]]) tmp=ss->sum[p]; 90 | ss->sum[p]=maxv; 91 | ss->link(p,tmp); 92 | } 93 | } 94 | struct Node {string x; int y;} q[MAX_MEM+10]; 95 | static int cmp(Node i,Node j) {return i.y>j.y;} 96 | void work() 97 | { 98 | int CNT=0; 99 | for(int i=N;i;i=ss->Left[i]) 100 | for(int j=ss->head[i];j;j=ss->Next[j]) {q[CNT].x=ss->str[j]; q[CNT].y=ss->sum[j]; CNT++; } 101 | sort(q,q+CNT,cmp); 102 | } 103 | pair Query(int k) 104 | { 105 | return make_pair(q[k].x,q[k].y); 106 | } 107 | }; 108 | #endif 109 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "BOBHASH32.h" 11 | #include "params.h" 12 | #include "ssummary.h" 13 | #include "heavykeeper.h" 14 | #include "spacesaving.h" 15 | #include "LossyCounting.h" 16 | #include "CSS.h" 17 | using namespace std; 18 | map B,C; 19 | struct node {string x;int y;} p[10000005]; 20 | ifstream fin("u1",ios::in|ios::binary); 21 | char a[105]; 22 | string Read() 23 | { 24 | fin.read(a,13); 25 | a[13]='\0'; 26 | string tmp=a; 27 | return tmp; 28 | } 29 | int cmp(node i,node j) {return i.y>j.y;} 30 | int main() 31 | { 32 | int MEM,K; 33 | cin>>MEM>>K; 34 | cout<<"MEM="<clear(); 41 | 42 | // preparing spacesaving 43 | int ss_M; 44 | for (ss_M=1; 432*ss_M<=MEM*1024*8; ss_M++); 45 | spacesaving *ss; ss=new spacesaving(ss_M,K); 46 | 47 | // preparing LossyCounting 48 | int LC_M; 49 | for (LC_M=1; 227*LC_M<=MEM*1024*8; LC_M++); 50 | LossyCounting *LC; LC=new LossyCounting(K); 51 | 52 | // preparing CSS 53 | int css_M; 54 | for (css_M=1; 179*css_M+4*css_M*log(css_M)/log(2)<=MEM*1024*8; css_M++); 55 | CSS *css; css=new CSS(css_M,K); css->clear(); 56 | // Inserting 57 | for (int i=1; i<=m; i++) 58 | { 59 | if (i%(m/10)==0) cout<<"Insert "<Insert(s); 63 | ss->Insert(s); 64 | LC->Insert(s,i/LC_M); if (i%LC_M==0) LC->clear(i/LC_M); 65 | css->Insert(s); 66 | } 67 | hk->work(); 68 | ss->work(); 69 | LC->work(); 70 | css->work(); 71 | 72 | cout<<"preparing true flow"<::iterator sit=B.begin(); sit!=B.end(); sit++) 76 | { 77 | p[++cnt].x=sit->first; 78 | p[cnt].y=sit->second; 79 | } 80 | sort(p+1,p+cnt+1,cmp); 81 | for (int i=1; i<=K; i++) C[p[i].x]=p[i].y; 82 | 83 | // Calculating PRE, ARE, AAE 84 | cout<<"Calculating"<Query(i)).first; hk_num=(hk->Query(i)).second; 90 | hk_AAE+=abs(B[hk_string]-hk_num); hk_ARE+=abs(B[hk_string]-hk_num)/(B[hk_string]+0.0); 91 | if (C[hk_string]) hk_sum++; 92 | } 93 | 94 | int LC_sum=0,LC_AAE=0; double LC_ARE=0; 95 | string LC_string; int LC_num; 96 | for (int i=0; iQuery(i)).first; LC_num=(LC->Query(i)).second; 99 | LC_AAE+=abs(B[LC_string]-LC_num); LC_ARE+=abs(B[LC_string]-LC_num)/(B[LC_string]+0.0); 100 | if (C[LC_string]) LC_sum++; 101 | } 102 | 103 | int ss_sum=0,ss_AAE=0; double ss_ARE=0; 104 | string ss_string; int ss_num; 105 | for (int i=0; iQuery(i)).first; ss_num=(ss->Query(i)).second; 108 | ss_AAE+=abs(B[ss_string]-ss_num); ss_ARE+=abs(B[ss_string]-ss_num)/(B[ss_string]+0.0); 109 | if (C[ss_string]) ss_sum++; 110 | } 111 | 112 | int css_sum=0,css_AAE=0; double css_ARE=0; 113 | string css_string; int css_num; 114 | for (int i=0; iQuery(i)).first; css_num=(css->Query(i)).second; 117 | css_AAE+=abs(B[css_string]-css_num); css_ARE+=abs(B[css_string]-css_num)/(B[css_string]+0.0); 118 | if (C[css_string]) css_sum++; 119 | } 120 | printf("heavkeeper:\nAccepted: %d/%d %.10f\nARE: %.10f\nAAE: %.10f\n\n",hk_sum,K,(hk_sum/(K+0.0)),hk_ARE/K,hk_AAE/(K+0.0)); 121 | printf("LossyCounting:\nAccepted: %d/%d %.10f\nARE: %.10f\nAAE: %.10f\n\n",LC_sum,K,(LC_sum/(K+0.0)),LC_ARE/K,LC_AAE/(K+0.0)); 122 | printf("spacesaving:\nAccepted: %d/%d %.10f\nARE: %.10f\nAAE: %.10f\n\n",ss_sum,K,(ss_sum/(K+0.0)),ss_ARE/K,ss_AAE/(K+0.0)); 123 | printf("CSS:\nAccepted: %d/%d %.10f\nARE: %.10f\nAAE: %.10f\n\n",css_sum,K,(css_sum/(K+0.0)),css_ARE/K,css_AAE/(K+0.0)); 124 | return 0; 125 | } 126 | -------------------------------------------------------------------------------- /params.h: -------------------------------------------------------------------------------- 1 | #ifndef _PARAMS_H 2 | #define _PARAMS_H 3 | 4 | #define N 1000000 // maximum flow 5 | #define M 1000000 // maximum size of stream-summary or CSS 6 | #define MAX_MEM 1000000 // maximum memory size 7 | #define HK_d 2 // maximum memory size 8 | 9 | #endif //_PARAMS_H 10 | -------------------------------------------------------------------------------- /spacesaving.h: -------------------------------------------------------------------------------- 1 | #ifndef _spacesaving_H 2 | #define _spacesaving_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "BOBHASH32.h" 12 | #include "params.h" 13 | #include "ssummary.h" 14 | #include "BOBHASH64.h" 15 | #define rep(i,a,n) for(int i=a;i<=n;i++) 16 | using namespace std; 17 | class spacesaving 18 | { 19 | private: 20 | ssummary *ss; 21 | int K,M2; 22 | public: 23 | spacesaving(int M2,int K):M2(M2),K(K) {ss=new ssummary(M2); ss->clear();} 24 | void Insert(string x) 25 | { 26 | bool mon=false; 27 | int p=ss->find(x); 28 | if (p) mon=true; 29 | if (!mon) 30 | { 31 | int q; 32 | if (ss->totgetmin()+1; 34 | int i=ss->getid(); 35 | ss->add2(ss->location(x),i); 36 | ss->str[i]=x; 37 | ss->sum[i]=q; 38 | ss->link(i,0); 39 | while(ss->tot>M2) 40 | { 41 | int t=ss->Right[0]; 42 | int tmp=ss->head[t]; 43 | ss->cut(ss->head[t]); 44 | ss->recycling(tmp); 45 | } 46 | } else 47 | { 48 | int tmp=ss->Left[ss->sum[p]]; 49 | ss->cut(p); 50 | if(ss->head[ss->sum[p]]) tmp=ss->sum[p]; 51 | ss->sum[p]++; 52 | ss->link(p,tmp); 53 | } 54 | } 55 | struct Node {string x; int y;} q[MAX_MEM+10]; 56 | static int cmp(Node i,Node j) {return i.y>j.y;} 57 | void work() 58 | { 59 | int CNT=0; 60 | for(int i=N;i;i=ss->Left[i]) 61 | for(int j=ss->head[i];j;j=ss->Next[j]) {q[CNT].x=ss->str[j]; q[CNT].y=ss->sum[j]; CNT++; } 62 | sort(q,q+CNT,cmp); 63 | } 64 | pair Query(int k) 65 | { 66 | return make_pair(q[k].x,q[k].y); 67 | } 68 | }; 69 | #endif 70 | -------------------------------------------------------------------------------- /ssummary.h: -------------------------------------------------------------------------------- 1 | #ifndef _ssummary_H 2 | #define _ssummary_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "BOBHASH32.h" 12 | #include "params.h" 13 | #define len2 9973 14 | #define rep(i,a,n) for(int i=a;i<=n;i++) 15 | using namespace std; 16 | class ssummary 17 | { 18 | public: 19 | int tot; 20 | int sum[M+10],K,last[M+10],Next[M+10],ID[M+10]; 21 | int head[N+10],Left[N+10],Right[N+10],num; 22 | string str[M+10]; 23 | int head2[len2+10],Next2[M+10]; 24 | BOBHash32 * bobhash; 25 | 26 | ssummary(int K):K(K) {bobhash=new BOBHash32(1000);} 27 | void clear() 28 | { 29 | memset(sum,0,sizeof(sum)); 30 | memset(last,0,sizeof(Next)); 31 | memset(Next2,0,sizeof(Next2)); 32 | rep(i,0,N)head[i]=Left[i]=Right[i]=0; 33 | rep(i,0,len2-1)head2[0]=0; 34 | tot=0; 35 | rep(i,1,M+2)ID[i]=i; 36 | num=M+2; 37 | Right[0]=N; 38 | Left[N]=0; 39 | } 40 | int getid() 41 | { 42 | int i=ID[num--]; 43 | last[i]=Next[i]=sum[i]=Next2[i]=0; 44 | return i; 45 | } 46 | int location(string ST) 47 | { 48 | return (bobhash->run(ST.c_str(),ST.size()))%len2; 49 | } 50 | void add2(int x,int y) 51 | { 52 | Next2[y]=head2[x]; 53 | head2[x]=y; 54 | } 55 | int find(string s) 56 | { 57 | for(int i=head2[location(s)];i;i=Next2[i]) 58 | if(str[i]==s)return i; 59 | return 0; 60 | } 61 | void linkhead(int i,int j) 62 | { 63 | Left[i]=j; 64 | Right[i]=Right[j]; 65 | Right[j]=i; 66 | Left[Right[i]]=i; 67 | } 68 | void cuthead(int i) 69 | { 70 | int t1=Left[i],t2=Right[i]; 71 | Right[t1]=t2; 72 | Left[t2]=t1; 73 | } 74 | int getmin() 75 | { 76 | if (tot0 && j>sum[i]-10;j--) 91 | if(head[j]){linkhead(sum[i],j);return;} 92 | linkhead(sum[i],ww); 93 | } 94 | } 95 | void cut(int i) 96 | { 97 | --tot; 98 | if(head[sum[i]]==i)head[sum[i]]=Next[i]; 99 | if(head[sum[i]]==0)cuthead(sum[i]); 100 | int t1=last[i],t2=Next[i]; 101 | if(t1)Next[t1]=t2; 102 | if(t2)last[t2]=t1; 103 | } 104 | void recycling(int i) 105 | { 106 | int w=location(str[i]); 107 | if (head2[w]==i) 108 | head2[w]=Next2[i]; 109 | else 110 | { 111 | for(int j=head2[w];j;j=Next2[j]) 112 | if(Next2[j]==i) 113 | { 114 | Next2[j]=Next2[i]; 115 | break; 116 | } 117 | } 118 | ID[++num]=i; 119 | } 120 | }; 121 | #endif 122 | -------------------------------------------------------------------------------- /technical_report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/papergitkeeper/heavy-keeper-project/c07aab7a13b08f565d4090f7be17d805d5b6ba85/technical_report.pdf --------------------------------------------------------------------------------