├── misc └── crc32c_sse42_u64.h ├── mem ├── speed │ ├── Makefile │ ├── speed_test_plots.py │ ├── speed_test.py │ └── speed_test.cpp ├── AnchorHash.hpp ├── AnchorHash.cpp └── README.md ├── tests ├── balance │ ├── Makefile │ ├── balance_test.py │ └── balance.cpp └── speed │ ├── Makefile │ ├── speed_test_plots.py │ ├── speed_test.py │ └── speed_test.cpp ├── AnchorHashQre.hpp ├── LICENSE ├── AnchorHashQre.cpp └── README.md /misc/crc32c_sse42_u64.h: -------------------------------------------------------------------------------- 1 | // Harware based crc calculation 2 | 3 | static inline uint32_t crc32c_sse42_u64(uint64_t key, uint64_t seed) { 4 | __asm__ volatile( 5 | "crc32q %[key], %[seed];" 6 | : [seed] "+r" (seed) 7 | : [key] "rm" (key)); 8 | return seed; 9 | } 10 | 11 | -------------------------------------------------------------------------------- /mem/speed/Makefile: -------------------------------------------------------------------------------- 1 | all: speed_test 2 | 3 | speed_test: speed_test.cpp ../AnchorHash.cpp ../AnchorHash.hpp 4 | g++ -Wall -o2 speed_test.cpp ../AnchorHash.cpp -o speed_test -std=c++11 5 | 6 | # Cleaning old files before new make 7 | clean: 8 | rm -f *.o 9 | rm -f *.txt 10 | rm -f speed_test 11 | -------------------------------------------------------------------------------- /tests/balance/Makefile: -------------------------------------------------------------------------------- 1 | all: balance 2 | 3 | balance: balance.cpp ../../AnchorHashQre.cpp ../../AnchorHashQre.hpp 4 | g++ -Wall -o2 balance.cpp ../../AnchorHashQre.cpp -o balance -std=c++11 5 | 6 | # Cleaning old files before new make 7 | clean: 8 | rm -f *.o 9 | rm -f *.txt 10 | rm -f balance 11 | -------------------------------------------------------------------------------- /tests/speed/Makefile: -------------------------------------------------------------------------------- 1 | all: speed_test 2 | 3 | speed_test: speed_test.cpp ../../AnchorHashQre.cpp ../../AnchorHashQre.hpp 4 | g++ -Wall -o2 speed_test.cpp ../../AnchorHashQre.cpp -o speed_test -std=c++11 5 | 6 | # Cleaning old files before new make 7 | clean: 8 | rm -f *.o 9 | rm -f *.txt 10 | rm -f speed_test 11 | -------------------------------------------------------------------------------- /mem/AnchorHash.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /** Class declaration */ 6 | class AnchorHash { 7 | 8 | private: 9 | 10 | // Anchor 11 | uint32_t *A; 12 | 13 | // "Map diagonal" 14 | uint32_t *K; 15 | 16 | // Size of the anchor 17 | uint32_t M; 18 | 19 | // Size of the working 20 | uint32_t N; 21 | 22 | // Removed buckets 23 | std::stack r; 24 | 25 | 26 | public: 27 | 28 | AnchorHash (uint32_t, uint32_t); 29 | 30 | ~AnchorHash(); 31 | 32 | uint32_t ComputeBucket(uint64_t, uint64_t); 33 | 34 | uint32_t UpdateRemoval(uint32_t); 35 | 36 | uint32_t UpdateNewBucket(); 37 | 38 | private: 39 | uint32_t ComputeTranslation(uint32_t, uint32_t); 40 | 41 | 42 | }; 43 | -------------------------------------------------------------------------------- /AnchorHashQre.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /** Class declaration */ 6 | class AnchorHashQre { 7 | 8 | private: 9 | 10 | // Anchor 11 | uint32_t *A; 12 | 13 | // Working 14 | uint32_t *W; 15 | 16 | // Last appearance 17 | uint32_t *L; 18 | 19 | // "Map diagonal" 20 | uint32_t *K; 21 | 22 | // Size of the anchor 23 | uint32_t M; 24 | 25 | // Size of the working 26 | uint32_t N; 27 | 28 | // Removed buckets 29 | std::stack r; 30 | 31 | // Translation oracle 32 | uint32_t ComputeTranslation(uint32_t i , uint32_t j); 33 | 34 | public: 35 | 36 | AnchorHashQre (uint32_t, uint32_t); 37 | 38 | ~AnchorHashQre(); 39 | 40 | uint32_t ComputeBucket(uint64_t, uint64_t); 41 | 42 | uint32_t UpdateRemoval(uint32_t); 43 | 44 | uint32_t UpdateNewBucket(); 45 | 46 | }; 47 | -------------------------------------------------------------------------------- /tests/balance/balance_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import numpy as np 5 | from subprocess import call 6 | 7 | ''' 8 | AnchoHash: 9 | if (argc < 6) { 10 | cout << "Usage Error:\n"; 11 | cout << "argv[1]: int AcnhorSet\n"; 12 | cout << "argv[2]: int WorkingSet\n"; 13 | cout << "argv[3]: int NumRemovals\n"; 14 | cout << "argv[4]: int NumKeys\n"; 15 | cout << "argv[5]: int ResFileName\n"; 16 | return 1; 17 | } 18 | 19 | ''' 20 | 21 | try: 22 | 23 | dir_name = "./" 24 | test = os.listdir(dir_name) 25 | 26 | for item in test: 27 | if item.endswith(".txt"): 28 | os.remove(os.path.join(dir_name, item)) 29 | 30 | 31 | except OSError: 32 | pass 33 | 34 | numkeys = [10**i for i in range(3,10)] 35 | numremovals = 100 36 | 37 | for i in numkeys: 38 | 39 | call(["./balance", str(1000), str(1000), str(numremovals), str(i), 'anchor_balance.txt']) 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 anchorhash 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tests/speed/speed_test_plots.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import matplotlib.pyplot as plt 4 | import math 5 | 6 | def read_anchor_res(fn): 7 | 8 | with open(fn) as fp: 9 | anchor_lines = fp.readlines() 10 | 11 | achor_res = [x.split() for x in anchor_lines] 12 | 13 | achor_num_buckets = [float(i[4]) for i in achor_res] 14 | achor_rates = [float(i[8]) for i in achor_res] 15 | 16 | return (achor_num_buckets, achor_rates) 17 | 18 | buckets1, rates1 = read_anchor_res("anchor_0.txt") 19 | buckets2, rates2 = read_anchor_res("anchor_10.txt") 20 | buckets3, rates3 = read_anchor_res("anchor_100.txt") 21 | buckets4, rates4 = read_anchor_res("anchor_1000.txt") 22 | buckets5, rates5 = read_anchor_res("anchor_10000.txt") 23 | 24 | plt.semilogx(buckets1, rates1, label= 'AnchorHash 0%') 25 | plt.semilogx(buckets2, rates2, label= 'AnchorHash 10%') 26 | plt.semilogx(buckets3, rates3, label= 'AnchorHash 100%') 27 | plt.semilogx(buckets4, rates4, label= 'AnchorHash 1000%') 28 | plt.semilogx(buckets5, rates5, label= 'AnchorHash 10000%') 29 | 30 | plt.xlabel('Number of buckets') 31 | plt.ylabel('Rate [Mkps]') 32 | plt.legend() 33 | plt.savefig("speed.pdf") 34 | plt.show() 35 | -------------------------------------------------------------------------------- /mem/speed/speed_test_plots.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import matplotlib.pyplot as plt 4 | import math 5 | 6 | def read_anchor_res(fn): 7 | 8 | with open(fn) as fp: 9 | anchor_lines = fp.readlines() 10 | 11 | achor_res = [x.split() for x in anchor_lines] 12 | 13 | achor_num_buckets = [float(i[4])-float(i[6]) for i in achor_res] 14 | achor_rates = [float(i[8]) for i in achor_res] 15 | 16 | return (achor_num_buckets, achor_rates) 17 | 18 | buckets1, rates1 = read_anchor_res("anchor_0.txt") 19 | buckets2, rates2 = read_anchor_res("anchor_10.txt") 20 | buckets3, rates3 = read_anchor_res("anchor_100.txt") 21 | buckets4, rates4 = read_anchor_res("anchor_1000.txt") 22 | buckets5, rates5 = read_anchor_res("anchor_10000.txt") 23 | 24 | plt.semilogx(buckets1, rates1, label= 'AnchorHash 0%') 25 | plt.semilogx(buckets2, rates2, label= 'AnchorHash 10%') 26 | plt.semilogx(buckets3, rates3, label= 'AnchorHash 100%') 27 | plt.semilogx(buckets4, rates4, label= 'AnchorHash 1000%') 28 | plt.semilogx(buckets5, rates5, label= 'AnchorHash 10000%') 29 | 30 | plt.xlabel('Number of buckets') 31 | plt.ylabel('Rate [Mkps]') 32 | plt.legend() 33 | plt.grid(which='major', axis='both', linestyle=':', linewidth=0.5) 34 | plt.savefig("speed.pdf") 35 | plt.show() 36 | -------------------------------------------------------------------------------- /tests/speed/speed_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import numpy as np 5 | from subprocess import call 6 | 7 | ''' 8 | AnchoHash: 9 | if (argc < 6) { 10 | cout << "Usage Error:\n"; 11 | cout << "argv[1]: int AcnhorSet\n"; 12 | cout << "argv[2]: int WorkingSet\n"; 13 | cout << "argv[3]: int NumRemovals\n"; 14 | cout << "argv[4]: int NumKeys\n"; 15 | cout << "argv[5]: int ResFileName\n"; 16 | return 1; 17 | } 18 | 19 | ''' 20 | 21 | try: 22 | 23 | dir_name = "./" 24 | test = os.listdir(dir_name) 25 | 26 | for item in test: 27 | if item.endswith(".txt"): 28 | os.remove(os.path.join(dir_name, item)) 29 | 30 | 31 | except OSError: 32 | pass 33 | 34 | up = 6 35 | 36 | workingset = [np.ceil(1.0*(10**i)) for i in range(1,up)] 37 | 38 | acnhorset_0 = [np.ceil(1.0*(10**i)) for i in range(1,up)] 39 | acnhorset_10 = [np.ceil(1.1*(10**i)) for i in range(1,up)] 40 | acnhorset_100 = [np.ceil(2.0*(10**i)) for i in range(1,up)] 41 | acnhorset_1000 = [np.ceil(10.0*(10**i)) for i in range(1,up)] 42 | acnhorset_10000 = [np.ceil(100.0*(10**i)) for i in range(1,up)] 43 | 44 | 45 | 46 | numkeys = 100000000 47 | numremovals = 0 48 | 49 | for i in range(len(workingset)): 50 | 51 | call(["./speed_test", str(acnhorset_0[i]), str(acnhorset_0[i]), str(acnhorset_0[i]-workingset[i]), str(numkeys), 'anchor_0.txt']) 52 | call(["./speed_test", str(acnhorset_10[i]), str(acnhorset_10[i]), str(acnhorset_10[i]-workingset[i]), str(numkeys), 'anchor_10.txt']) 53 | call(["./speed_test", str(acnhorset_100[i]), str(acnhorset_100[i]), str(acnhorset_100[i]-workingset[i]), str(numkeys), 'anchor_100.txt']) 54 | call(["./speed_test", str(acnhorset_1000[i]), str(acnhorset_1000[i]), str(acnhorset_1000[i]-workingset[i]), str(numkeys), 'anchor_1000.txt']) 55 | call(["./speed_test", str(acnhorset_10000[i]), str(acnhorset_10000[i]), str(acnhorset_10000[i]-workingset[i]), str(numkeys), 'anchor_10000.txt']) 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /mem/speed/speed_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import numpy as np 5 | from subprocess import call 6 | 7 | ''' 8 | AnchoHash: 9 | if (argc < 6) { 10 | cout << "Usage Error:\n"; 11 | cout << "argv[1]: int AcnhorSet\n"; 12 | cout << "argv[2]: int WorkingSet\n"; 13 | cout << "argv[3]: int NumRemovals\n"; 14 | cout << "argv[4]: int NumKeys\n"; 15 | cout << "argv[5]: int ResFileName\n"; 16 | return 1; 17 | } 18 | 19 | ''' 20 | 21 | try: 22 | 23 | dir_name = "./" 24 | test = os.listdir(dir_name) 25 | 26 | for item in test: 27 | if item.endswith(".txt"): 28 | os.remove(os.path.join(dir_name, item)) 29 | 30 | 31 | except OSError: 32 | pass 33 | 34 | up = 6 35 | 36 | workingset = [np.ceil(1.0*(10**i)) for i in range(1,up)] 37 | 38 | acnhorset_0 = [np.ceil(1.0*(10**i)) for i in range(1,up)] 39 | acnhorset_10 = [np.ceil(1.1*(10**i)) for i in range(1,up)] 40 | acnhorset_100 = [np.ceil(2.0*(10**i)) for i in range(1,up)] 41 | acnhorset_1000 = [np.ceil(10.0*(10**i)) for i in range(1,up)] 42 | acnhorset_10000 = [np.ceil(100.0*(10**i)) for i in range(1,up)] 43 | 44 | 45 | 46 | numkeys = 100000000 47 | numremovals = 0 48 | 49 | for i in range(len(workingset)): 50 | 51 | call(["./speed_test", str(acnhorset_0[i]), str(acnhorset_0[i]), str(acnhorset_0[i]-workingset[i]), str(numkeys), 'anchor_0.txt']) 52 | call(["./speed_test", str(acnhorset_10[i]), str(acnhorset_10[i]), str(acnhorset_10[i]-workingset[i]), str(numkeys), 'anchor_10.txt']) 53 | call(["./speed_test", str(acnhorset_100[i]), str(acnhorset_100[i]), str(acnhorset_100[i]-workingset[i]), str(numkeys), 'anchor_100.txt']) 54 | call(["./speed_test", str(acnhorset_1000[i]), str(acnhorset_1000[i]), str(acnhorset_1000[i]-workingset[i]), str(numkeys), 'anchor_1000.txt']) 55 | call(["./speed_test", str(acnhorset_10000[i]), str(acnhorset_10000[i]), str(acnhorset_10000[i]-workingset[i]), str(numkeys), 'anchor_10000.txt']) 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /tests/speed/speed_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "../../AnchorHashQre.hpp" 5 | 6 | using namespace std; 7 | 8 | int main(int argc, char *argv[]) { 9 | 10 | if (argc < 6) { 11 | cout << "Usage Error:\n"; 12 | cout << "argv[1]: int AcnhorSet\n"; 13 | cout << "argv[2]: int WorkingSet\n"; 14 | cout << "argv[3]: int NumRemovals\n"; 15 | cout << "argv[4]: int NumKeys\n"; 16 | cout << "argv[5]: int ResFileName\n"; 17 | return 1; 18 | } 19 | 20 | uint32_t anchor_set = atoi(argv[1]); 21 | uint32_t working_set = atoi(argv[2]); 22 | uint32_t num_removals = atoi(argv[3]); 23 | uint32_t num_keys = atoi(argv[4]); 24 | 25 | string filename = argv[5]; 26 | 27 | srand(time(NULL)); 28 | 29 | AnchorHashQre anchorhashqre(anchor_set, working_set); 30 | 31 | // random removals 32 | uint32_t* bucket_status = new uint32_t [anchor_set](); 33 | 34 | for (uint32_t i=0;i= w; --i) { 22 | A[i] = i; 23 | r.push(i); 24 | } 25 | 26 | // Set initial set sizes 27 | M = a; 28 | N = w; 29 | 30 | } 31 | 32 | /** Destructor */ 33 | AnchorHash::~AnchorHash () { 34 | 35 | delete [] A; 36 | delete [] K; 37 | 38 | } 39 | 40 | inline uint32_t AnchorHash::ComputeTranslation(uint32_t h, uint32_t view_size) { 41 | while (A[h] >= view_size) { 42 | h = K[h]; 43 | } 44 | return h; 45 | } 46 | 47 | uint32_t AnchorHash::ComputeBucket(uint64_t key1 , uint64_t key2) { 48 | 49 | // First hash is uniform on the anchor set 50 | uint32_t bs = crc32c_sse42_u64(key1, key2); 51 | uint32_t b = bs % M; 52 | 53 | // Loop until hitting a working bucket 54 | while (A[b] != 0) { 55 | bs = crc32c_sse42_u64(key1 - bs, key2 + bs); 56 | uint32_t h = bs % A[b]; 57 | 58 | // update h to reflect b's view 59 | b = ComputeTranslation(h, A[b]); 60 | } 61 | 62 | return b; 63 | 64 | } 65 | 66 | uint32_t AnchorHash::UpdateRemoval(uint32_t b) { 67 | 68 | // update reserved stack 69 | r.push(b); 70 | 71 | // find replacement for b at slot N-1 72 | uint32_t h = ComputeTranslation(N-1, N); 73 | 74 | // Update "replaced by" map 75 | K[b] = h; 76 | 77 | // update live set size 78 | N--; 79 | 80 | // Update removal 81 | A[b] = N; 82 | 83 | return 0; 84 | 85 | } 86 | 87 | uint32_t AnchorHash::UpdateNewBucket() { 88 | 89 | // Who was removed last? 90 | uint32_t b = r.top(); 91 | r.pop(); 92 | 93 | // update live set size 94 | N++; 95 | 96 | // Ressurect 97 | A[b] = 0; 98 | 99 | // Restore in "replaced by" map 100 | K[b] = b; 101 | 102 | return b; 103 | 104 | } 105 | -------------------------------------------------------------------------------- /mem/README.md: -------------------------------------------------------------------------------- 1 | # AnchorHash - using less memory 2 | 3 | This is a version that uses only half the memory for storing state, as compared to the version in the [paper](https://doi.org/10.1109/TNET.2020.3039547). It makes exactly the same hashing decisions and has exactly the same key lookup speed. The only difference is that each bucket remove takes slightly longer (order of a key lookup operation vs. O(1)). Bucket addition time is unchanged. 4 | 5 | ## Code 6 | This code is a direct replacement to `AnchorHashQre.cpp` and `AnchorHashQre.hpp`. 7 | 8 | ### Try it 9 | Go into the `speed` directory, run make, run the python script, and plot 10 | 11 | # Algorithm 12 | 13 | ```hs 14 | INITWRAPPER(a,S) // a anchor capacity, S list of resources, a>=|S| 15 | M←∅ 16 | for i∈(0,1,...,|S|−1) do 17 | M←M∪{(i,S[i])} // mapping from bucket to resource 18 | INITANCHOR(a,|S|) 19 | 20 | GETRESOURCE(k) // compute resource for key k 21 | b←GETBUCKET(hash(k)) // convert key to int (e.g., rand(seed=k)) and call anchorHash 22 | ξ←M(b) 23 | return ξ 24 | 25 | ADDRESOURCE(ξ) 26 | b←ADDBUCKET( ) 27 | M←M∪{(b,ξ)} 28 | 29 | REMOVERESOURCE(ξ) 30 | b←INV_M(ξ) 31 | M←M\{(b,ξ)} 32 | REMOVEBUCKET(b) 33 | ``` 34 | 35 | ```hs 36 | INITANCHOR(a,w) // a anchor size (capacity), w number of workers (size) 37 | A[b]←0 for b=0,1,...,a−1 // W_b←0 for b∈A 38 | R←∅ // empty stack 39 | N←w // mumber of initially working buckets 40 | K[b]←b for b=0,1,...,a−1 41 | for b=a−1 downtow do // remove initially unused buckets 42 | R.push(b) 43 | A[b]←b 44 | 45 | BUCKETATVIEW(b,v) // find who replaced b at view size v 46 | while A[b]>=v do // b is removed for view size v 47 | b←K(b) // search for W_v[b] 48 | return b 49 | 50 | GETBUCKET(k) 51 | b←hash(k) mod a // can use k if calling through wrapper as it is already hash(key) 52 | while A[b]>0 do // b is removed 53 | h←h_b(k) // h←hash(b,k) mod A[b] OR k←rand(seed=k), h←k mod A[b] 54 | b←BUCKETATVIEW(h,A[b]) 55 | return b 56 | 57 | ADDBUCKET( ) 58 | b←R.pop() 59 | N←N+ 1 60 | A[b]←0 // W←W ∪ {b}, delete W_b 61 | K[b]←b 62 | return b 63 | 64 | REMOVEBUCKET(b) 65 | R.push(b) 66 | h←BUCKETATVIEW(N-1,N) 67 | K[b]←h 68 | N←N−1 69 | A[b]←N // W_b←W\b, A[b]←|W_b| 70 | ``` 71 | -------------------------------------------------------------------------------- /mem/speed/speed_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "../AnchorHash.hpp" 5 | 6 | using namespace std; 7 | 8 | int main(int argc, char *argv[]) { 9 | 10 | if (argc < 6) { 11 | cout << "Usage Error:\n"; 12 | cout << "argv[1]: int AcnhorSet\n"; 13 | cout << "argv[2]: int WorkingSet\n"; 14 | cout << "argv[3]: int NumRemovals\n"; 15 | cout << "argv[4]: int NumKeys\n"; 16 | cout << "argv[5]: int ResFileName\n"; 17 | return 1; 18 | } 19 | 20 | uint32_t anchor_set = atoi(argv[1]); 21 | uint32_t working_set = atoi(argv[2]); 22 | uint32_t num_removals = atoi(argv[3]); 23 | uint32_t num_keys = atoi(argv[4]); 24 | 25 | string filename = argv[5]; 26 | 27 | srand(time(NULL)); 28 | 29 | AnchorHash anchorhash(anchor_set, working_set); 30 | 31 | // random removals 32 | uint32_t* bucket_status = new uint32_t [anchor_set](); 33 | 34 | // test keys 35 | uint32_t* keys = new uint32_t [2*num_keys](); 36 | 37 | for (uint32_t i=0;i 2 | #include 3 | 4 | #include "../../AnchorHashQre.hpp" 5 | 6 | using namespace std; 7 | 8 | int main(int argc, char *argv[]) { 9 | 10 | if (argc < 6) { 11 | cout << "Usage Error:\n"; 12 | cout << "argv[1]: int AcnhorSet\n"; 13 | cout << "argv[2]: int WorkingSet\n"; 14 | cout << "argv[3]: int NumRemovals\n"; 15 | cout << "argv[4]: int NumKeys\n"; 16 | cout << "argv[5]: int ResFileName\n"; 17 | return 1; 18 | } 19 | 20 | uint32_t anchor_set = atoi(argv[1]); 21 | uint32_t working_set = atoi(argv[2]); 22 | uint32_t num_removals = atoi(argv[3]); 23 | uint32_t num_keys = atoi(argv[4]); 24 | 25 | string filename = argv[5]; 26 | 27 | srand(time(NULL)); 28 | 29 | AnchorHashQre anchorhashqre(anchor_set, working_set); 30 | 31 | // for lb 32 | uint32_t* anchor_ansorbed_keys = new uint32_t [anchor_set](); 33 | 34 | // random removals 35 | uint32_t* bucket_status = new uint32_t [anchor_set](); 36 | 37 | for (uint32_t i=0;i lb) { 69 | lb = anchor_ansorbed_keys[i]/mean; 70 | } 71 | 72 | } 73 | 74 | else { 75 | if (anchor_ansorbed_keys[i] > 0) { 76 | printf("Anchor: crazy bug!\n"); 77 | } 78 | } 79 | 80 | } 81 | 82 | // print lb res 83 | printf("Anchor: LB is %lf\n", lb); 84 | 85 | results_file << "AnchorHashQre: " << "Balance: " << lb << "\n"; 86 | //////////////////////////////////////////////////////////////////// 87 | 88 | results_file.close(); 89 | 90 | delete [] bucket_status; 91 | delete [] anchor_ansorbed_keys; 92 | 93 | return 0; 94 | 95 | } 96 | -------------------------------------------------------------------------------- /AnchorHashQre.cpp: -------------------------------------------------------------------------------- 1 | #include "AnchorHashQre.hpp" 2 | #include "./misc/crc32c_sse42_u64.h" 3 | 4 | using namespace std; 5 | 6 | /** Constructor */ 7 | AnchorHashQre::AnchorHashQre (uint32_t a, uint32_t w) { 8 | 9 | // Allocate the anchor array 10 | A = new uint32_t [a](); 11 | 12 | // Allocate the working array 13 | W = new uint32_t [a](); 14 | 15 | // Allocate the last apperance array 16 | L = new uint32_t [a](); 17 | 18 | // Allocate the "map diagonal" 19 | K = new uint32_t [a](); 20 | 21 | // Initialize "swap" arrays 22 | for(uint32_t i = 0; i < a; ++i) { 23 | L[i] = i; 24 | W[i] = i; 25 | K[i] = i; 26 | } 27 | 28 | // We treat initial removals as ordered removals 29 | for(uint32_t i = a - 1; i >= w; --i) { 30 | A[i] = i; 31 | r.push(i); 32 | } 33 | 34 | // Set initial set sizes 35 | M = a; 36 | N = w; 37 | 38 | } 39 | 40 | /** Destructor */ 41 | AnchorHashQre::~AnchorHashQre () { 42 | 43 | delete [] A; 44 | delete [] W; 45 | delete [] L; 46 | delete [] K; 47 | 48 | } 49 | 50 | uint32_t AnchorHashQre::ComputeTranslation(uint32_t i , uint32_t j) { 51 | 52 | if (i == j) return K[i]; 53 | 54 | uint32_t b = j; 55 | 56 | while (A[i] <= A[b]) { 57 | b = K[b]; 58 | } 59 | 60 | return b; 61 | 62 | } 63 | 64 | uint32_t AnchorHashQre::ComputeBucket(uint64_t key1 , uint64_t key2) { 65 | 66 | // First hash is uniform on the anchor set 67 | uint32_t bs = crc32c_sse42_u64(key1, key2); 68 | uint32_t b = bs % M; 69 | 70 | // Loop until hitting a working bucket 71 | while (A[b] != 0) { 72 | 73 | // New candidate (bs - for better balance - avoid patterns) 74 | bs = crc32c_sse42_u64(key1 - bs, key2 + bs); 75 | uint32_t h = bs % A[b]; 76 | 77 | // h is working or observed by bucket 78 | if ((A[h] == 0) || (A[h] < A[b])) { 79 | b = h; 80 | } 81 | 82 | // need translation for (bucket, h) 83 | else { 84 | b = ComputeTranslation(b,h); 85 | } 86 | 87 | } 88 | 89 | return b; 90 | 91 | } 92 | 93 | uint32_t AnchorHashQre::UpdateRemoval(uint32_t b) { 94 | 95 | // update reserved stack 96 | r.push(b); 97 | 98 | // update live set size 99 | N--; 100 | 101 | // who is the replacement 102 | W[L[b]] = W[N]; 103 | L[W[N]] = L[b]; 104 | 105 | // Update map diagonal 106 | K[b] = W[N]; 107 | 108 | // Update removal 109 | A[b] = N; 110 | 111 | return 0; 112 | 113 | } 114 | 115 | uint32_t AnchorHashQre::UpdateNewBucket() { 116 | 117 | // Who was removed last? 118 | uint32_t b = r.top(); 119 | r.pop(); 120 | 121 | // Restore in observed_set 122 | L[W[N]] = N; 123 | W[L[b]] = b; 124 | 125 | // update live set size 126 | N++; 127 | 128 | // Ressurect 129 | A[b] = 0; 130 | 131 | // Restore in diagonal 132 | K[b] = b; 133 | 134 | return b; 135 | 136 | } 137 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AnchorHash - A Scalable Consistent Hash 2 | 3 | AnchorHash is described in our paper [AnchorHash: A Scalable Consistent Hash](https://doi.org/10.1109/TNET.2020.3039547) 4 | 5 | Consistent hashing (CH) is a central building block in many networking applications, from datacenter load-balancing to distributed storage. Unfortunately, state-of-the-art CH solutions cannot ensure full consistency under arbitrary changes and/or cannot scale while maintaining reasonable memory footprints and update times. We present AnchorHash, a scalable and fully-consistent hashing algorithm. AnchorHash achieves high key lookup rates, a low memory footprint, and low update times. We formally establish its strong theoretical guarantees, and present advanced implementations with a memory footprint of only a few bytes per resource. Moreover, extensive evaluations indicate that it outperforms state-of-the-art algorithms, and that it can scale on a single core to 100 million resources while still achieving a key lookup rate of more than 15 million keys per second. 6 | 7 | ## Code 8 | This repository contains the code used to create the figures for the evaluation section. See also [memory optimized variant](#memory-optimized-c). 9 | 10 | ### Try it 11 | Go into the `tests\speed` and `tests\balance` directories, run make, run the python script, and plot 12 | 13 | ### System Requirements 14 | This implementation makes use of the `CRC32` CPU instruction of the *Streaming SIMD Extensions 4 (SSE4)*. You can replace it in `misc/crc32c_sse42_u64.h`. 15 | 16 | ## Other implementations 17 | 18 | ### Memory optimized C++ 19 | 20 | - The `mem` directory contains a [variant](./mem/README.md) of the code that uses only half the memory. 21 | 22 | ### Go 23 | 24 | - https://github.com/anchorhash/go-anchorhash 25 | - https://github.com/OmerBenHayun/go-anchorhash 26 | - https://github.com/wdamron/go-anchorhash 27 | 28 | ### Python 29 | - https://github.com/anchorhash/py-anchorhash 30 | 31 | # Notes 32 | 33 | * In a distributed system, must maintain consensus on the ordering of changes to the working set and on the seed for key hashing (digest) 34 | 35 | * Hash functions must be independent for differenet values of k and b 36 | 37 | 38 | # Algorithm 39 | 40 | ```hs 41 | INITWRAPPER(a,S) // a anchor capacity, S list of resources, a>=|S| 42 | M←∅ 43 | for i∈(0,1,...,|S|−1) do 44 | M←M∪{(i,S[i])} // mapping from bucket to resource 45 | INITANCHOR(a,|S|) 46 | 47 | GETRESOURCE(k) // compute resource for key k 48 | b←GETBUCKET(hash(k)) // convert key to int (e.g., rand(seed=k)) and call anchorHash 49 | ξ←M(b) 50 | return ξ 51 | 52 | ADDRESOURCE(ξ) 53 | b←ADDBUCKET( ) 54 | M←M∪{(b,ξ)} 55 | 56 | REMOVERESOURCE(ξ) 57 | b←INV_M(ξ) 58 | M←M\{(b,ξ)} 59 | REMOVEBUCKET(b) 60 | ``` 61 | 62 | ```hs 63 | INITANCHOR(a,w) // a anchor size (capacity), w number of workers (size) 64 | A[b]←0 for b=0,1,...,a−1 // W_b←0 for b∈A 65 | R←∅ // empty stack 66 | N←w // mumber of initially working buckets 67 | K[b]←L[b]←W[b]←b for b=0,1,...,a−1 68 | for b=a−1 downtow do // remove initially unused buckets 69 | R.push(b) 70 | A[b]←b 71 | 72 | GETBUCKET(k) 73 | b←hash(k) mod a // can use k if calling through wrapper as it is already hash(key) 74 | while A[b]>0 do // b is removed 75 | h←h_b(k) // h←hash(b,k) mod A[b] OR k←rand(seed=k), h←k mod A[b] 76 | while A[h]≥A[b] do // W_b[h] != h, b removed prior to h 77 | h←K[h] // search for W_b[h] 78 | b←h // b←H_W_b(k) 79 | return b 80 | 81 | ADDBUCKET( ) 82 | b←R.pop() 83 | A[b]←0 // W←W ∪ {b}, delete W_b 84 | L[W[N]]←N 85 | W[L[b]]←K[b]←b 86 | N←N+ 1 87 | return b 88 | 89 | REMOVEBUCKET(b) 90 | R.push(b) 91 | N←N−1 92 | A[b]←N // W_b←W\b, A[b]←|W_b| 93 | W[L[b]]←K[b]←W[N] 94 | L[W[N]]←L[b] 95 | ``` 96 | --------------------------------------------------------------------------------