├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── backshift_hashmap.cc ├── backshift_hashmap.h ├── bitmap_hashmap.cc ├── bitmap_hashmap.h ├── hamming.cc ├── hamming.h ├── hashmap.h ├── main.cc ├── monitoring.cc ├── monitoring.h ├── murmurhash3.cc ├── murmurhash3.h ├── plot.py ├── probing_hashmap.cc ├── probing_hashmap.h ├── shadow_hashmap.cc ├── shadow_hashmap.h ├── testcase.cc ├── testcase.h ├── tombstone_hashmap.cc └── tombstone_hashmap.h /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | 6 | # Compiled Dynamic libraries 7 | *.so 8 | *.dylib 9 | 10 | # Compiled Static libraries 11 | *.lai 12 | *.la 13 | *.a 14 | 15 | *~ 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013 Emmanuel Goossaert 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC=g++ 2 | CFLAGS=-O3 -c -Wall -g 3 | LDFLAGS=-g 4 | SOURCES=bitmap_hashmap.cc shadow_hashmap.cc probing_hashmap.cc tombstone_hashmap.cc backshift_hashmap.cc testcase.cc monitoring.cc murmurhash3.cc hamming.cc 5 | SOURCES_MAIN=main.cc 6 | OBJECTS=$(SOURCES:.cc=.o) 7 | OBJECTS_MAIN=$(SOURCES_MAIN:.cc=.o) 8 | EXECUTABLE=hashmap 9 | 10 | all: $(SOURCES) $(EXECUTABLE) 11 | 12 | $(EXECUTABLE): $(OBJECTS) $(OBJECTS_MAIN) 13 | $(CC) $(LDFLAGS) $(OBJECTS) $(OBJECTS_MAIN) -o $@ 14 | 15 | .cc.o: 16 | $(CC) $(CFLAGS) $< -o $@ 17 | 18 | clean: 19 | rm -f *~ *.o $(EXECUTABLE) 20 | 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | hashmap 2 | ======= 3 | 4 | Implementation of open addressing hash table algorithms in C++. 5 | 6 | Most of the code is under development, but the main() function in main.cc 7 | should provide enough information as to how to use the algorithms. 8 | -------------------------------------------------------------------------------- /backshift_hashmap.cc: -------------------------------------------------------------------------------- 1 | #include "backshift_hashmap.h" 2 | 3 | namespace hashmap { 4 | 5 | int BackshiftHashMap::Open() { 6 | buckets_ = new Bucket[num_buckets_]; 7 | memset(buckets_, 0, sizeof(Bucket) * (num_buckets_)); 8 | monitoring_ = new hashmap::Monitoring(num_buckets_, num_buckets_, static_cast(this)); 9 | num_buckets_used_ = 0; 10 | return 0; 11 | } 12 | 13 | int BackshiftHashMap::Close() { 14 | if (buckets_ != NULL) { 15 | for (uint32_t i = 0; i < num_buckets_; i++) { 16 | if (buckets_[i].entry != NULL) { 17 | delete[] buckets_[i].entry->data; 18 | delete buckets_[i].entry; 19 | } 20 | } 21 | delete[] buckets_; 22 | } 23 | 24 | if (monitoring_ != NULL) { 25 | delete monitoring_; 26 | } 27 | return 0; 28 | } 29 | 30 | 31 | 32 | int BackshiftHashMap::Get(const std::string& key, std::string* value) { 33 | uint64_t hash = hash_function(key); 34 | uint64_t index_init = hash % num_buckets_; 35 | uint64_t probe_distance = 0; 36 | bool found = false; 37 | uint32_t i; 38 | for (i = 0; i < probing_max_; i++) { 39 | uint64_t index_current = (index_init + i) % num_buckets_; 40 | FillDistanceToInitIndex(index_current, &probe_distance); 41 | if ( buckets_[index_current].entry == NULL 42 | || i > probe_distance) { 43 | break; 44 | } 45 | 46 | if ( key.size() == buckets_[index_current].entry->size_key 47 | && memcmp(buckets_[index_current].entry->data, key.c_str(), key.size()) == 0) { 48 | *value = std::string(buckets_[index_current].entry->data + key.size(), 49 | buckets_[index_current].entry->size_value); 50 | found = true; 51 | break; 52 | } 53 | } 54 | 55 | if (found) return 0; 56 | 57 | monitoring_->AddDMB(i); 58 | monitoring_->AddAlignedDMB(index_init, (index_init + i) % num_buckets_); 59 | return 1; 60 | } 61 | 62 | 63 | 64 | 65 | int BackshiftHashMap::Put(const std::string& key, const std::string& value) { 66 | if (num_buckets_used_ == num_buckets_) { 67 | return 1; 68 | } 69 | num_buckets_used_ += 1; 70 | 71 | uint64_t hash = hash_function(key); 72 | uint64_t index_init = hash % num_buckets_; 73 | 74 | char *data = new char[key.size() + value.size()]; 75 | memcpy(data, key.c_str(), key.size()); 76 | memcpy(data + key.size(), value.c_str(), value.size()); 77 | 78 | BackshiftHashMap::Entry *entry = new BackshiftHashMap::Entry; 79 | entry->size_key = key.size(); 80 | entry->size_value = value.size(); 81 | entry->data = data; 82 | 83 | uint64_t index_current = index_init; 84 | uint64_t probe_distance = 0; 85 | uint64_t probe_current = 0; 86 | BackshiftHashMap::Entry *entry_temp = NULL; 87 | uint64_t hash_temp = 0; 88 | uint64_t i; 89 | int num_swaps = 0; 90 | 91 | for (i = 0; i < probing_max_; i++) { 92 | index_current = (index_init + i) % num_buckets_; 93 | if (buckets_[index_current].entry == NULL) { 94 | monitoring_->SetDIB(index_current, probe_current); 95 | buckets_[index_current].entry = entry; 96 | buckets_[index_current].hash = hash; 97 | break; 98 | } else { 99 | FillDistanceToInitIndex(index_current, &probe_distance); 100 | if (probe_current > probe_distance) { 101 | // Swapping the current bucket with the one to insert 102 | entry_temp = buckets_[index_current].entry; 103 | hash_temp = buckets_[index_current].hash; 104 | buckets_[index_current].entry = entry; 105 | buckets_[index_current].hash = hash; 106 | entry = entry_temp; 107 | hash = hash_temp; 108 | monitoring_->SetDIB(index_current, probe_current); 109 | probe_current = probe_distance; 110 | num_swaps += 1; 111 | } 112 | } 113 | probe_current++; 114 | } 115 | 116 | monitoring_->AddDFB(i); 117 | monitoring_->AddAlignedDFB(index_init, index_current); 118 | monitoring_->AddNumberOfSwaps(num_swaps); 119 | 120 | return 0; 121 | } 122 | 123 | 124 | int BackshiftHashMap::Exists(const std::string& key) { 125 | // TODO: implement 126 | return 0; 127 | } 128 | 129 | 130 | int BackshiftHashMap::Remove(const std::string& key) { 131 | uint64_t hash = hash_function(key); 132 | uint64_t index_init = hash % num_buckets_; 133 | bool found = false; 134 | uint64_t index_current = 0; 135 | uint64_t probe_distance = 0; 136 | 137 | for (uint64_t i = 0; i < num_buckets_; i++) { 138 | index_current = (index_init + i) % num_buckets_; 139 | FillDistanceToInitIndex(index_current, &probe_distance); 140 | if ( buckets_[index_current].entry == NULL 141 | || i > probe_distance) { 142 | break; 143 | } 144 | 145 | if ( key.size() == buckets_[index_current].entry->size_key 146 | && memcmp(buckets_[index_current].entry->data, key.c_str(), key.size()) == 0) { 147 | found = true; 148 | break; 149 | } 150 | } 151 | 152 | if (found) { 153 | delete[] buckets_[index_current].entry->data; 154 | delete buckets_[index_current].entry; 155 | monitoring_->RemoveDIB(index_current); 156 | uint64_t i = 1; 157 | uint64_t index_previous = 0, index_swap = 0; 158 | for (i = 1; i < num_buckets_; i++) { 159 | index_previous = (index_current + i - 1) % num_buckets_; 160 | index_swap = (index_current + i) % num_buckets_; 161 | if (buckets_[index_swap].entry == NULL) { 162 | buckets_[index_previous].entry = NULL; 163 | monitoring_->RemoveDIB(index_previous); 164 | break; 165 | } 166 | uint64_t distance; 167 | if (FillDistanceToInitIndex(index_swap, &distance) != 0) { 168 | fprintf(stderr, "Error in FillDistanceToInitIndex()"); 169 | } 170 | if (distance == 0) { 171 | buckets_[index_previous].entry = NULL; 172 | monitoring_->RemoveDIB(index_previous); 173 | break; 174 | } 175 | buckets_[index_previous].entry = buckets_[index_swap].entry; 176 | buckets_[index_previous].hash = buckets_[index_swap].hash; 177 | monitoring_->SetDIB(index_previous, distance-1); 178 | } 179 | monitoring_->AddDSB(i); 180 | monitoring_->AddAlignedDSB(index_current, index_swap); 181 | num_buckets_used_ -= 1; 182 | return 0; 183 | } 184 | 185 | return 1; 186 | } 187 | 188 | 189 | 190 | int BackshiftHashMap::Resize() { 191 | // TODO: implement 192 | return 0; 193 | } 194 | 195 | 196 | // For debugging 197 | int BackshiftHashMap::CheckDensity() { 198 | return 0; 199 | } 200 | 201 | int BackshiftHashMap::BucketCounts() { 202 | return 0; 203 | } 204 | 205 | int BackshiftHashMap::Dump() { 206 | return 0; 207 | } 208 | 209 | 210 | int BackshiftHashMap::GetBucketState(int index) { 211 | //printf("GetBucketState %d\n", index); 212 | if (buckets_[index].entry == NULL) { 213 | return 0; 214 | } 215 | 216 | return 1; 217 | } 218 | 219 | int BackshiftHashMap::FillInitIndex(uint64_t index_stored, uint64_t *index_init) { 220 | if(buckets_[index_stored].entry == NULL) return -1; 221 | *index_init = buckets_[index_stored].hash % num_buckets_; 222 | return 0; 223 | } 224 | 225 | int BackshiftHashMap::FillDistanceToInitIndex(uint64_t index_stored, uint64_t *distance) { 226 | if(buckets_[index_stored].entry == NULL) return -1; 227 | uint64_t index_init = buckets_[index_stored].hash % num_buckets_; 228 | if (index_init <= index_stored) { 229 | *distance = index_stored - index_init; 230 | } else { 231 | *distance = index_stored + (num_buckets_ - index_init); 232 | } 233 | return 0; 234 | } 235 | 236 | 237 | void BackshiftHashMap::GetMetadata(std::map< std::string, std::string >& metadata) { 238 | metadata["name"] = "backshift"; 239 | char buffer[1024]; 240 | sprintf(buffer, "{\"num_buckets\": %" PRIu64 ", \"probing_max\": %" PRIu64 "}", num_buckets_, probing_max_); 241 | metadata["parameters_hashmap"] = buffer; 242 | sprintf(buffer, "nb%" PRIu64 "-pm%" PRIu64 "", num_buckets_, probing_max_); 243 | metadata["parameters_hashmap_string"] = buffer; 244 | } 245 | 246 | }; // end namespace hashmap 247 | -------------------------------------------------------------------------------- /backshift_hashmap.h: -------------------------------------------------------------------------------- 1 | #ifndef HASHMAP_BACKSHIFT 2 | #define HASHMAP_BACKSHIFT 3 | 4 | #ifndef __STDC_FORMAT_MACROS 5 | #define __STDC_FORMAT_MACROS 6 | #endif 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include "murmurhash3.h" 16 | #include "hamming.h" 17 | #include "hashmap.h" 18 | 19 | #include "monitoring.h" 20 | 21 | namespace hashmap 22 | { 23 | 24 | 25 | 26 | class BackshiftHashMap: public HashMap 27 | { 28 | public: 29 | 30 | BackshiftHashMap(uint64_t size) { 31 | buckets_ = NULL; 32 | num_buckets_ = size; 33 | probing_max_ = size; 34 | } 35 | 36 | virtual ~BackshiftHashMap() { 37 | Close(); 38 | } 39 | 40 | int Open(); 41 | int Close(); 42 | 43 | struct Entry 44 | { 45 | uint32_t size_key; 46 | uint32_t size_value; 47 | char *data; 48 | }; 49 | 50 | struct Bucket 51 | { 52 | uint64_t hash; 53 | struct Entry* entry; 54 | }; 55 | 56 | int Get(const std::string& key, std::string* value); 57 | int Put(const std::string& key, const std::string& value); 58 | int Exists(const std::string& key); 59 | int Remove(const std::string& key); 60 | int Resize(); 61 | int Dump(); 62 | int CheckDensity(); 63 | int BucketCounts(); 64 | int GetBucketState(int index); 65 | int FillInitIndex(uint64_t index_stored, uint64_t *index_init); 66 | int FillDistanceToInitIndex(uint64_t index_stored, uint64_t *distance); 67 | void GetMetadata(std::map< std::string, std::string >& metadata); 68 | uint64_t GetMinInitDistance(); 69 | uint64_t GetMaxInitDistance(); 70 | 71 | private: 72 | Bucket* buckets_; 73 | uint64_t num_buckets_; 74 | uint64_t num_buckets_used_; 75 | 76 | uint64_t hash_function(const std::string& key) { 77 | static char hash[16]; 78 | static uint64_t output; 79 | MurmurHash3_x64_128(key.c_str(), key.size(), 0, hash); 80 | memcpy(&output, hash, 8); 81 | return output; 82 | } 83 | 84 | uint64_t probing_max_; 85 | }; 86 | 87 | 88 | }; // end namespace hashmap 89 | 90 | #endif // HASHMAP_BACKSHIFT 91 | -------------------------------------------------------------------------------- /bitmap_hashmap.cc: -------------------------------------------------------------------------------- 1 | #include "bitmap_hashmap.h" 2 | 3 | namespace hashmap { 4 | 5 | 6 | 7 | int BitmapHashMap::Open() { 8 | buckets_ = new Bucket[num_buckets_ + size_neighborhood_]; 9 | memset(buckets_, 0, sizeof(Bucket) * (num_buckets_ + size_neighborhood_)); 10 | monitoring_ = new hashmap::Monitoring(num_buckets_, size_neighborhood_, static_cast(this)); 11 | return 0; 12 | } 13 | 14 | 15 | int BitmapHashMap::Close() { 16 | if (buckets_ != NULL) { 17 | for (uint32_t i = 0; i < num_buckets_; i++) { 18 | if (buckets_[i].entry != NULL) { 19 | delete[] buckets_[i].entry->data; 20 | delete buckets_[i].entry; 21 | } 22 | } 23 | delete[] buckets_; 24 | } 25 | 26 | if (monitoring_ != NULL) { 27 | delete monitoring_; 28 | } 29 | return 0; 30 | } 31 | 32 | 33 | 34 | 35 | int BitmapHashMap::Get(const std::string& key, std::string* value) { 36 | uint64_t hash = hash_function(key); 37 | uint64_t index_init = hash % num_buckets_; 38 | uint32_t mask = 1 << (size_neighborhood_-1); 39 | bool found = false; 40 | uint32_t i; 41 | uint32_t dmb = 0; 42 | for (i = 0; i < size_neighborhood_; i++) { 43 | if (buckets_[index_init].bitmap & mask) { 44 | dmb = i; 45 | uint64_t index_current = (index_init + i) % num_buckets_; 46 | if ( buckets_[index_current].entry != NULL 47 | && key.size() == buckets_[index_current].entry->size_key 48 | && memcmp(buckets_[index_current].entry->data, key.c_str(), key.size()) == 0) { 49 | *value = std::string(buckets_[index_current].entry->data + key.size(), 50 | buckets_[index_current].entry->size_value); 51 | found = true; 52 | break; 53 | } 54 | } 55 | mask = mask >> 1; 56 | } 57 | 58 | if (found) return 0; 59 | 60 | monitoring_->AddDMB(dmb); 61 | monitoring_->AddAlignedDMB(index_init, (index_init + i) % num_buckets_); 62 | return 1; 63 | } 64 | 65 | uint64_t BitmapHashMap::FindEmptyBucketAndDoSwaps(uint64_t index_init) { 66 | bool found = false; 67 | uint64_t index_current = index_init; 68 | for (uint32_t i = 0; i < size_probing_; i++) { 69 | index_current = (index_init + i) % num_buckets_; 70 | if (buckets_[index_current].entry == NULL) { 71 | found = true; 72 | monitoring_->AddDFB(i); 73 | monitoring_->AddAlignedDFB(index_init, index_current); 74 | break; 75 | } 76 | } 77 | 78 | if (!found) { 79 | return num_buckets_; 80 | } 81 | 82 | int num_swaps = 0; 83 | uint32_t index_base = 0; 84 | 85 | uint64_t index_empty = index_current; 86 | while ( (index_empty >= index_init && (index_empty - index_init) >= size_neighborhood_) 87 | || (index_empty < index_init && (index_empty + num_buckets_ - index_init) >= size_neighborhood_)) { 88 | uint64_t index_base_init = (num_buckets_ + index_empty - (size_neighborhood_ - 1)) % num_buckets_; 89 | // For each candidate base bucket 90 | bool found_swap = false; 91 | for (uint32_t i = 0; i < size_neighborhood_ - 1; i++) { 92 | // -1 because no need to test the bucket at index_empty 93 | // For each mask position 94 | index_base = (index_base_init + i) % num_buckets_; 95 | uint32_t mask = 1 << (size_neighborhood_-1); 96 | for (uint32_t j = 0; j < size_neighborhood_ - i - 1; j++) { 97 | if (buckets_[index_base].bitmap & mask) { 98 | // Found, so now we swap buckets and update the bitmap 99 | uint32_t index_candidate = (index_base + j) % num_buckets_; 100 | buckets_[index_empty].entry = buckets_[index_candidate].entry; 101 | buckets_[index_base].bitmap &= ~mask; 102 | uint32_t mask_new = 1 << i; 103 | buckets_[index_base].bitmap |= mask_new; 104 | 105 | // Move PSL monitoring 106 | uint64_t dib = monitoring_->GetDIB(index_candidate); 107 | monitoring_->RemoveDIB(index_candidate); 108 | monitoring_->SetDIB(index_empty, dib); 109 | 110 | // Prepare for next iteration 111 | index_empty = index_candidate; 112 | found_swap = true; 113 | 114 | num_swaps += 1; 115 | break; 116 | } 117 | mask = mask >> 1; 118 | } 119 | if (found_swap) break; 120 | } 121 | if (!found_swap) { 122 | // This is a dirty hack in case no reordering worked but we already had a 123 | // few swaps, we want to avoid having the same entry pointer in two 124 | // different buckets, which would make the program crash when freeing 125 | // the memory in Close(). 126 | // This should be changed whenever the Resize() method is implemented. 127 | buckets_[index_empty].entry = NULL; 128 | return num_buckets_; 129 | } 130 | } 131 | 132 | // Monitoring 133 | uint64_t dib; 134 | if (index_empty >= index_init) { 135 | dib = index_empty - index_init; 136 | } else { 137 | dib = index_empty + num_buckets_ - index_init; 138 | } 139 | monitoring_->SetDIB(index_empty, dib); 140 | monitoring_->AddNumberOfSwaps(num_swaps); 141 | 142 | return index_empty; 143 | } 144 | 145 | int BitmapHashMap::Put(const std::string& key, const std::string& value) { 146 | uint64_t hash = hash_function(key); 147 | uint64_t index_init = hash % num_buckets_; 148 | uint64_t index_empty = FindEmptyBucketAndDoSwaps(index_init); 149 | 150 | if (index_empty == num_buckets_) { 151 | return 1; 152 | } 153 | 154 | char *data = new char[key.size() + value.size()]; 155 | memcpy(data, key.c_str(), key.size()); 156 | memcpy(data + key.size(), value.c_str(), value.size()); 157 | 158 | BitmapHashMap::Entry *entry = new BitmapHashMap::Entry; 159 | entry->size_key = key.size(); 160 | entry->size_value = value.size(); 161 | entry->data = data; 162 | buckets_[index_empty].entry = entry; 163 | 164 | uint32_t mask; 165 | if (index_empty >= index_init) { 166 | mask = 1 << (size_neighborhood_ - ((index_empty - index_init) + 1)); 167 | } else { 168 | mask = 1 << (size_neighborhood_ - ((index_empty + num_buckets_ - index_init) + 1)); 169 | } 170 | buckets_[index_init].bitmap |= mask; 171 | return 0; 172 | } 173 | 174 | 175 | int BitmapHashMap::Exists(const std::string& key) { 176 | // TODO: implement 177 | return 0; 178 | } 179 | 180 | 181 | int BitmapHashMap::Remove(const std::string& key) { 182 | uint64_t hash = hash_function(key); 183 | uint64_t index_init = hash % num_buckets_; 184 | uint32_t mask = 1 << (size_neighborhood_-1); 185 | bool found = false; 186 | uint64_t index_current; 187 | for (uint32_t i = 0; i < size_neighborhood_; i++) { 188 | if (buckets_[index_init].bitmap & mask) { 189 | index_current = (index_init + i) % num_buckets_; 190 | if ( key.size() == buckets_[index_current].entry->size_key 191 | && memcmp(buckets_[index_current].entry->data, key.c_str(), key.size()) == 0) { 192 | found = true; 193 | break; 194 | } 195 | } 196 | mask = mask >> 1; 197 | } 198 | 199 | if (found) { 200 | //fprintf(stderr, "Remove() [%s] %" PRIu64 " %" PRIu64 "\n", key.c_str(), index_init, index_current); 201 | delete[] buckets_[index_current].entry->data; 202 | delete buckets_[index_current].entry; 203 | buckets_[index_current].entry = NULL; 204 | buckets_[index_init].bitmap = buckets_[index_init].bitmap & (~mask); 205 | monitoring_->RemoveDIB(index_current); 206 | return 0; 207 | } 208 | 209 | return 1; 210 | } 211 | 212 | 213 | int BitmapHashMap::Resize() { 214 | // TODO: implement 215 | // If the resize is called when FindEmptyBucketAndDoSwaps() cannot perform 216 | // the necessary swaps, then make sure that the item being inserted 217 | // or swapped is not nullified and that it is correctly inserted 218 | // after the resize. 219 | return 0; 220 | } 221 | 222 | 223 | // For debugging 224 | int BitmapHashMap::CheckDensity() { 225 | int num_pages = 0; 226 | int count_empty = 0; 227 | int count_probe = 0; 228 | 229 | int level = 32; 230 | 231 | for (uint32_t i = 0; i < num_buckets_; i++) { 232 | if (buckets_[i].entry == NULL) { 233 | count_empty += 1; 234 | } else { 235 | count_probe += 1; 236 | } 237 | 238 | if (i > 0 && i % level == 0) { 239 | if (count_probe < 0.25 * level) { 240 | std::cout << "."; 241 | } else if (count_probe < 0.5 * level) { 242 | std::cout << ":"; 243 | } else if (count_probe < 0.75 * level) { 244 | std::cout << "|"; 245 | } else if (count_probe < 0.85 * level) { 246 | std::cout << "o"; 247 | } else if (count_probe < 0.95 * level) { 248 | std::cout << "U"; 249 | } else if (count_probe < level) { 250 | std::cout << "O"; 251 | } else { 252 | std::cout << "0"; 253 | } 254 | count_probe = 0; 255 | num_pages += 1; 256 | } 257 | } 258 | std::cout << std::endl; 259 | 260 | std::cout << "Count empty: " << count_empty << "/" << num_buckets_ << std::endl; 261 | std::cout << "Pages: " << num_pages << " | " << num_pages * level << std::endl; 262 | return 0; 263 | } 264 | 265 | 266 | int BitmapHashMap::BucketCounts() { 267 | int counts[33]; 268 | for (int i = 0; i <= 32; i++) { 269 | counts[i] = 0; 270 | } 271 | 272 | int total = 0; 273 | 274 | for (uint32_t i = 0; i < num_buckets_; i++) { 275 | counts[hamming2(buckets_[i].bitmap)] += 1; 276 | } 277 | 278 | for (int i = 0; i <= 32; i++) { 279 | std::cout << "size " << i << ": " << counts[i] << std::endl; 280 | total += counts[i]; 281 | } 282 | 283 | std::cout << "total: " << total << std::endl; 284 | 285 | return 0; 286 | } 287 | 288 | 289 | 290 | int BitmapHashMap::Dump() { 291 | for (uint32_t i = 0; i < num_buckets_ + size_neighborhood_; i++) { 292 | 293 | std::cout << "bitmap: "; 294 | for (uint32_t j = 0; j < size_neighborhood_; j++) { 295 | uint32_t mask = 1 << (size_neighborhood_-1-j); 296 | if (buckets_[i].bitmap & mask) { 297 | std::cout << "1"; 298 | } else { 299 | std::cout << "0"; 300 | } 301 | } 302 | 303 | if (buckets_[i].entry != NULL) { 304 | std::string key(buckets_[i].entry->data, 305 | buckets_[i].entry->size_key); 306 | std::string value(buckets_[i].entry->data + buckets_[i].entry->size_key, 307 | buckets_[i].entry->size_value); 308 | std::cout << " | index: " << i << " - " << key << " " << value; 309 | } 310 | std::cout << std::endl; 311 | } 312 | return 0; 313 | } 314 | 315 | 316 | 317 | 318 | int BitmapHashMap::GetBucketState(int index) { 319 | if (buckets_[index].entry == NULL) { 320 | return 0; 321 | } 322 | 323 | return 1; 324 | } 325 | 326 | 327 | int BitmapHashMap::FillInitIndex(uint64_t index_stored, uint64_t *index_init) { 328 | if(buckets_[index_stored].entry == NULL) return -1; 329 | std::string key(buckets_[index_stored].entry->data, 330 | buckets_[index_stored].entry->size_key); 331 | *index_init = hash_function(key) % num_buckets_; 332 | return 0; 333 | } 334 | 335 | 336 | void BitmapHashMap::GetMetadata(std::map< std::string, std::string >& metadata) { 337 | metadata["name"] = "bitmap"; 338 | char buffer[1024]; 339 | sprintf(buffer, "{\"num_buckets\": %" PRIu64 ", \"size_probing\": %u}", num_buckets_, size_probing_); 340 | metadata["parameters_hashmap"] = buffer; 341 | sprintf(buffer, "nb%" PRIu64 "-sp%u", num_buckets_, size_probing_); 342 | metadata["parameters_hashmap_string"] = buffer; 343 | } 344 | 345 | 346 | 347 | }; 348 | -------------------------------------------------------------------------------- /bitmap_hashmap.h: -------------------------------------------------------------------------------- 1 | #ifndef HASHMAP_BITMAP 2 | #define HASHMAP_BITMAP 3 | 4 | #ifndef __STDC_FORMAT_MACROS 5 | #define __STDC_FORMAT_MACROS 6 | #endif 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | #include "murmurhash3.h" 14 | #include "hamming.h" 15 | #include "hashmap.h" 16 | #include "monitoring.h" 17 | 18 | namespace hashmap 19 | { 20 | 21 | 22 | 23 | class BitmapHashMap: public HashMap 24 | { 25 | public: 26 | 27 | BitmapHashMap(uint64_t size, 28 | uint64_t size_probing 29 | ) { 30 | buckets_ = NULL; 31 | num_buckets_ = size; 32 | size_neighborhood_ = 32; 33 | size_probing_ = size_probing; 34 | } 35 | 36 | virtual ~BitmapHashMap() { 37 | Close(); 38 | } 39 | 40 | int Open(); 41 | int Close(); 42 | 43 | struct Entry 44 | { 45 | uint32_t size_key; 46 | uint32_t size_value; 47 | char *data; 48 | }; 49 | 50 | struct Bucket 51 | { 52 | uint32_t bitmap; 53 | struct Entry* entry; 54 | }; 55 | 56 | 57 | int Get(const std::string& key, std::string* value); 58 | int Put(const std::string& key, const std::string& value); 59 | int Exists(const std::string& key); 60 | int Remove(const std::string& key); 61 | int Resize(); 62 | int Dump(); 63 | int CheckDensity(); 64 | int BucketCounts(); 65 | int GetBucketState(int index); 66 | int FillInitIndex(uint64_t index_stored, uint64_t *index_init); 67 | void GetMetadata(std::map< std::string, std::string >& metadata); 68 | 69 | 70 | private: 71 | Bucket* buckets_; 72 | uint64_t num_buckets_; 73 | uint32_t size_neighborhood_; 74 | uint32_t size_probing_; 75 | 76 | uint64_t FindEmptyBucketAndDoSwaps(uint64_t index_init); 77 | 78 | uint64_t hash_function(const std::string& key) { 79 | static char hash[16]; 80 | static uint64_t output; 81 | MurmurHash3_x64_128(key.c_str(), key.size(), 0, hash); 82 | memcpy(&output, hash, 8); 83 | //std::cout << output << std::endl; 84 | return output; 85 | } 86 | 87 | }; 88 | 89 | 90 | }; // end namespace hashmap 91 | 92 | #endif // HASHMAP_BITMAP 93 | -------------------------------------------------------------------------------- /hamming.cc: -------------------------------------------------------------------------------- 1 | // Code is from: http://en.wikipedia.org/wiki/Hamming_weight 2 | // 3 | //types and constants used in the functions below 4 | 5 | #include "hamming.h" 6 | 7 | const uint64_t m1 = 0x5555555555555555; //binary: 0101... 8 | const uint64_t m2 = 0x3333333333333333; //binary: 00110011.. 9 | const uint64_t m4 = 0x0f0f0f0f0f0f0f0f; //binary: 4 zeros, 4 ones ... 10 | const uint64_t m8 = 0x00ff00ff00ff00ff; //binary: 8 zeros, 8 ones ... 11 | const uint64_t m16 = 0x0000ffff0000ffff; //binary: 16 zeros, 16 ones ... 12 | const uint64_t m32 = 0x00000000ffffffff; //binary: 32 zeros, 32 ones 13 | const uint64_t hff = 0xffffffffffffffff; //binary: all ones 14 | const uint64_t h01 = 0x0101010101010101; //the sum of 256 to the power of 0,1,2,3... 15 | 16 | //This is a naive implementation, shown for comparison, 17 | //and to help in understanding the better functions. 18 | //It uses 24 arithmetic operations (shift, add, and). 19 | int hamming1(uint64_t x) { 20 | x = (x & m1 ) + ((x >> 1) & m1 ); //put count of each 2 bits into those 2 bits 21 | x = (x & m2 ) + ((x >> 2) & m2 ); //put count of each 4 bits into those 4 bits 22 | x = (x & m4 ) + ((x >> 4) & m4 ); //put count of each 8 bits into those 8 bits 23 | x = (x & m8 ) + ((x >> 8) & m8 ); //put count of each 16 bits into those 16 bits 24 | x = (x & m16) + ((x >> 16) & m16); //put count of each 32 bits into those 32 bits 25 | x = (x & m32) + ((x >> 32) & m32); //put count of each 64 bits into those 64 bits 26 | return x; 27 | } 28 | 29 | //This uses fewer arithmetic operations than any other known 30 | //implementation on machines with slow multiplication. 31 | //It uses 17 arithmetic operations. 32 | int hamming2(uint64_t x) { 33 | x -= (x >> 1) & m1; //put count of each 2 bits into those 2 bits 34 | x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits 35 | x = (x + (x >> 4)) & m4; //put count of each 8 bits into those 8 bits 36 | x += x >> 8; //put count of each 16 bits into their lowest 8 bits 37 | x += x >> 16; //put count of each 32 bits into their lowest 8 bits 38 | x += x >> 32; //put count of each 64 bits into their lowest 8 bits 39 | return x & 0x7f; 40 | } 41 | 42 | //This uses fewer arithmetic operations than any other known 43 | //implementation on machines with fast multiplication. 44 | //It uses 12 arithmetic operations, one of which is a multiply. 45 | int hamming3(uint64_t x) { 46 | x -= (x >> 1) & m1; //put count of each 2 bits into those 2 bits 47 | x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits 48 | x = (x + (x >> 4)) & m4; //put count of each 8 bits into those 8 bits 49 | return (x * h01)>>56; //returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ... 50 | } 51 | -------------------------------------------------------------------------------- /hamming.h: -------------------------------------------------------------------------------- 1 | #ifndef HASHMAP_HAMMING 2 | #define HASHMAP_HAMMING 3 | #include 4 | #include 5 | 6 | int hamming1(uint64_t x); 7 | int hamming2(uint64_t x); 8 | int hamming3(uint64_t x); 9 | 10 | #endif 11 | -------------------------------------------------------------------------------- /hashmap.h: -------------------------------------------------------------------------------- 1 | #ifndef HASHMAP 2 | #define HASHMAP 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "murmurhash3.h" 9 | #include "hamming.h" 10 | 11 | 12 | namespace hashmap 13 | { 14 | 15 | class Monitoring; 16 | 17 | class HashMap 18 | { 19 | public: 20 | 21 | HashMap() { 22 | monitoring_ = NULL; 23 | } 24 | 25 | virtual ~HashMap() { 26 | } 27 | 28 | virtual int Open() = 0; 29 | virtual int Close() = 0; 30 | virtual int Get(const std::string& key, std::string* value) = 0; 31 | virtual int Put(const std::string& key, const std::string& value) = 0; 32 | virtual int Exists(const std::string& key) = 0; 33 | virtual int Remove(const std::string& key) = 0; 34 | virtual int Dump() = 0; 35 | virtual int CheckDensity() = 0; 36 | virtual int BucketCounts() = 0; 37 | virtual int GetBucketState(int index) = 0; 38 | virtual int FillInitIndex(uint64_t index_stored, uint64_t *index_init) = 0; 39 | virtual void GetMetadata(std::map< std::string, std::string >& metadata) = 0; 40 | 41 | Monitoring *monitoring_; 42 | }; 43 | 44 | }; // end namespace hashmap 45 | 46 | #endif // HASHMAP 47 | -------------------------------------------------------------------------------- /main.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "hashmap.h" 10 | #include "probing_hashmap.h" 11 | #include "tombstone_hashmap.h" 12 | #include "backshift_hashmap.h" 13 | #include "bitmap_hashmap.h" 14 | #include "shadow_hashmap.h" 15 | 16 | #include "testcase.h" 17 | 18 | 19 | 20 | std::string concatenate(std::string const& str, int i) 21 | { 22 | std::stringstream s; 23 | s << str << i; 24 | return s.str(); 25 | } 26 | 27 | 28 | uint32_t NearestPowerOfTwo(const uint32_t number) { 29 | uint32_t power = 1; 30 | while (power < number) { 31 | power <<= 1; 32 | } 33 | return power; 34 | } 35 | 36 | 37 | int exists_or_mkdir(const char *path) { 38 | struct stat sb; 39 | 40 | if (stat(path, &sb) == 0) { 41 | if (!S_ISDIR(sb.st_mode)) { 42 | return 1; 43 | } 44 | } else if (mkdir(path, 0777) != 0) { 45 | return 1; 46 | } 47 | 48 | return 0; 49 | } 50 | 51 | 52 | void show_usage() { 53 | fprintf(stdout, "Test program for implementations of open addressing hash table algorithms.\n"); 54 | fprintf(stdout, "\n"); 55 | 56 | fprintf(stdout, "General parameters (mandatory):\n"); 57 | fprintf(stdout, " --algo algorithm to use for the hash table. Possible values are:\n"); 58 | fprintf(stdout, " * linear: basic linear probing\n"); 59 | fprintf(stdout, " * tombstone: Robin Hood hashing with tombstone deletion\n"); 60 | fprintf(stdout, " * backshift: Robin Hood hashing with backward shifting deletion\n"); 61 | fprintf(stdout, " * bitmap: hopscotch hashing with bitmap representation\n"); 62 | fprintf(stdout, " * shadow: hopscotch hashing with shadow representation\n"); 63 | fprintf(stdout, " --testcase test case to use. Possible values are:\n"); 64 | fprintf(stdout, " * loading: load the table until it is full (does not perform any removals).\n"); 65 | fprintf(stdout, " * batch: load the table, then remove a large batch, and re-insert a large batch.\n"); 66 | fprintf(stdout, " * ripple: load the table, then do a series of removal-insertion operations.\n"); 67 | fprintf(stdout, "\n"); 68 | 69 | fprintf(stdout, "Parameters for linear probing algorithm (optional):\n"); 70 | fprintf(stdout, " --num_buckets number of buckets in the hash table (default=10000)\n"); 71 | fprintf(stdout, "\n"); 72 | 73 | fprintf(stdout, "Parameters for tombstone algorithm (optional):\n"); 74 | fprintf(stdout, " --num_buckets number of buckets in the hash table (default=10000)\n"); 75 | fprintf(stdout, "\n"); 76 | 77 | fprintf(stdout, "Parameters for backshift algorithm (optional):\n"); 78 | fprintf(stdout, " --num_buckets number of buckets in the hash table (default=10000)\n"); 79 | fprintf(stdout, "\n"); 80 | 81 | fprintf(stdout, "Parameters for bitmap algorithm (optional):\n"); 82 | fprintf(stdout, " --num_buckets number of buckets in the hash table (default=10000)\n"); 83 | fprintf(stdout, " --size_probing maximum number of buckets used in the probing (default=4096)\n"); 84 | fprintf(stdout, "\n"); 85 | 86 | fprintf(stdout, "Parameters for shadow algorithm (optional):\n"); 87 | fprintf(stdout, " --num_buckets number of buckets in the hash table (default=10000)\n"); 88 | fprintf(stdout, " --size_probing maximum number of buckets used in the probing (default=4096)\n"); 89 | fprintf(stdout, " --size_nh_start starting size of the neighborhoods (default=32)\n"); 90 | fprintf(stdout, " --size_nh_end ending size of the neighborhoods (default=32)\n"); 91 | fprintf(stdout, "\n"); 92 | 93 | fprintf(stdout, "Parameters for the batch test case (optional):\n"); 94 | fprintf(stdout, " --load_factor_max maxium load factor at which the table should be used (default=.7)\n"); 95 | fprintf(stdout, " --load_factor_step load factor by which items in the table should be removed and inserted (default=.1)\n"); 96 | fprintf(stdout, "\n"); 97 | 98 | fprintf(stdout, "Parameters for the ripple test case (optional):\n"); 99 | fprintf(stdout, " --load_factor_max maxium load factor at which the table should be used (default=.7)\n"); 100 | fprintf(stdout, " --load_factor_step load factor by which items in the table should be removed and inserted (default=.1)\n"); 101 | fprintf(stdout, "\n"); 102 | 103 | fprintf(stdout, "Examples:\n"); 104 | fprintf(stdout, "./hashmap --algo backshift --num_buckets 10000 --testcase batch --load_factor_max 0.8 --load_factor_step 0.1\n"); 105 | fprintf(stdout, "./hashmap --algo shadow --num_buckets 10000 --size_nh_start 4 --size_nh_end 64 --testcase loading\n"); 106 | } 107 | 108 | 109 | 110 | 111 | 112 | int main(int argc, char **argv) { 113 | bool has_error; 114 | 115 | if (argc == 1 || (argc == 2 && strcmp(argv[1], "--help") == 0)) { 116 | show_usage(); 117 | exit(-1); 118 | } 119 | 120 | if (argc % 2 == 0) { 121 | std::cerr << "Error: invalid number of arguments" << std::endl; 122 | exit(-1); 123 | } 124 | 125 | uint32_t size_neighborhood_start = 32; 126 | uint32_t size_neighborhood_end = 32; 127 | uint32_t size_probing = 4096; 128 | uint32_t num_buckets = 10000; 129 | double load_factor_max = 0.7; 130 | double load_factor_step = 0.1; 131 | std::string algorithm = ""; 132 | std::string testcase = ""; 133 | 134 | if (argc > 2) { 135 | for (int i = 1; i < argc; i += 2 ) { 136 | if (strcmp(argv[i], "--algo" ) == 0) { 137 | algorithm = std::string(argv[i+1]); 138 | } else if (strcmp(argv[i], "--num_buckets" ) == 0) { 139 | num_buckets = atoi(argv[i+1]); 140 | } else if (strcmp(argv[i], "--size_nh_start" ) == 0) { 141 | size_neighborhood_start = atoi(argv[i+1]); 142 | } else if (strcmp(argv[i], "--size_nh_end" ) == 0) { 143 | size_neighborhood_end = atoi(argv[i+1]); 144 | } else if (strcmp(argv[i], "--size_probing" ) == 0) { 145 | size_probing = atoi(argv[i+1]); 146 | } else if (strcmp(argv[i], "--testcase" ) == 0) { 147 | testcase = std::string(argv[i+1]); 148 | } else if (strcmp(argv[i], "--load_factor_max" ) == 0) { 149 | load_factor_max = atof(argv[i+1]); 150 | } else if (strcmp(argv[i], "--load_factor_step" ) == 0) { 151 | load_factor_step = atof(argv[i+1]); 152 | } else { 153 | fprintf(stderr, "Unknown parameter [%s]\n", argv[i]); 154 | exit(-1); 155 | } 156 | } 157 | } 158 | 159 | int num_items = num_buckets; 160 | //int num_items = NearestPowerOfTwo(num_buckets); 161 | hashmap::HashMap *hm; 162 | if (algorithm == "bitmap") { 163 | hm = new hashmap::BitmapHashMap(num_items, size_probing); 164 | } else if (algorithm == "shadow") { 165 | hm = new hashmap::ShadowHashMap(num_items, size_probing, size_neighborhood_start, size_neighborhood_end); 166 | } else if (algorithm == "linear") { 167 | hm = new hashmap::ProbingHashMap(num_items, size_probing); 168 | } else if (algorithm == "tombstone") { 169 | hm = new hashmap::TombstoneHashMap(num_items); 170 | } else if (algorithm == "backshift") { 171 | hm = new hashmap::BackshiftHashMap(num_items); 172 | } else { 173 | fprintf(stderr, "Unknown algorithm [%s]\n", algorithm.c_str()); 174 | exit(-1); 175 | } 176 | 177 | if (testcase == "loading") { 178 | //run_testcase2(hm, num_items, load_factor_max); 179 | hashmap::LoadingTestCase tc(hm, num_items); 180 | tc.run(); 181 | return 0; 182 | } else if (testcase == "batch") { 183 | //run_testcase2(hm, num_items, load_factor_max); 184 | hashmap::BatchTestCase tc(hm, num_items, load_factor_max, load_factor_step); 185 | tc.run(); 186 | return 0; 187 | } else if (testcase == "ripple") { 188 | hashmap::RippleTestCase tc(hm, num_items, load_factor_max, load_factor_step); 189 | tc.run(); 190 | return 0; 191 | } else if (testcase != "") { 192 | fprintf(stderr, "Error: testcase is unknown [%s]\n", testcase.c_str()); 193 | return 1; 194 | } 195 | 196 | hm->Open(); 197 | std::string value_out("value_out"); 198 | 199 | 200 | 201 | int num_items_reached = 0; 202 | 203 | for (int i = 0; i < num_items; i++) { 204 | value_out = "value_out"; 205 | std::string key = concatenate( "key", i ); 206 | std::string value = concatenate( "value", i ); 207 | int ret_put = hm->Put(key, value); 208 | hm->Get(key, &value_out); 209 | 210 | if (ret_put != 0) { 211 | std::cout << "Insertion stopped due to clustering at step: " << i << std::endl; 212 | std::cout << "Load factor: " << (double)i/num_items << std::endl; 213 | num_items_reached = i; 214 | break; 215 | } 216 | } 217 | 218 | 219 | has_error = false; 220 | for (int i = 0; i < num_items_reached; i++) { 221 | value_out = "value_out"; 222 | std::string key = concatenate( "key", i ); 223 | std::string value = concatenate( "value", i ); 224 | int ret_get = hm->Get(key, &value_out); 225 | if (ret_get != 0 || value != value_out) { 226 | std::cout << "Final check: error at step [" << i << "]" << std::endl; 227 | has_error = true; 228 | break; 229 | } 230 | } 231 | 232 | if (!has_error) { 233 | std::cout << "Final check: OK" << std::endl; 234 | } 235 | 236 | 237 | /* 238 | if (hm->monitoring_ != NULL) { 239 | std::cout << "Monitoring: OK" << std::endl; 240 | } 241 | 242 | // testcase-algo-metric-runnumber-step.json 243 | // batch50-shadow-density-00001-0001.json 244 | 245 | hm->monitoring_->PrintDensity("density.json"); 246 | std::cout << "Clustering" << std::endl; 247 | hm->monitoring_->PrintClustering(hm); 248 | 249 | hm->monitoring_->PrintDIB("probing_sequence_length_search.json"); 250 | hm->monitoring_->PrintNumScannedBlocks("num_scanned_blocks.json"); 251 | 252 | */ 253 | //hm->CheckDensity(); 254 | //hm->BucketCounts(); 255 | 256 | 257 | has_error = false; 258 | for (int i = 0; i < num_items_reached; i++) { 259 | std::string key = concatenate( "key", i ); 260 | std::string value = concatenate( "value", i ); 261 | int ret_remove = hm->Remove(key); 262 | if (ret_remove != 0) { 263 | std::cout << "Remove: error at step [" << i << "]" << std::endl; 264 | has_error = true; 265 | break; 266 | } 267 | int ret_get = hm->Get(key, &value_out); 268 | if (ret_get == 0) { 269 | std::cout << "Remove: error at step [" << i << "] -- can get after remove" << std::endl; 270 | has_error = true; 271 | break; 272 | } 273 | } 274 | 275 | if (!has_error) { 276 | std::cout << "Removing items: OK" << std::endl; 277 | } 278 | 279 | 280 | return 0; 281 | } 282 | -------------------------------------------------------------------------------- /monitoring.cc: -------------------------------------------------------------------------------- 1 | #include "monitoring.h" 2 | #include "hashmap.h" 3 | 4 | namespace hashmap { 5 | 6 | void Monitoring::PrintInfo(FILE* fd, std::string metric) { 7 | std::map metadata; 8 | hm_->GetMetadata(metadata); 9 | fprintf(fd, " \"algorithm\": \"%s\",\n", metadata["name"].c_str()); 10 | fprintf(fd, " \"testcase\": \"%s\",\n", testcase_.c_str()); 11 | fprintf(fd, " \"metric\": \"%s\",\n", metric.c_str()); 12 | fprintf(fd, " \"parameters_testcase\": %s,\n", parameters_testcase_json_.c_str()); 13 | fprintf(fd, " \"parameters_testcase_string\": \"%s\",\n", parameters_testcase_string_.c_str()); 14 | fprintf(fd, " \"parameters_hashmap\": %s,\n", metadata["parameters_hashmap"].c_str()); 15 | fprintf(fd, " \"parameters_hashmap_string\": \"%s\",\n", metadata["parameters_hashmap_string"].c_str()); 16 | fprintf(fd, " \"instance\": %" PRIu64 ",\n", instance_); 17 | fprintf(fd, " \"cycle\": %" PRIu64 ",\n", cycle_); 18 | } 19 | 20 | 21 | uint64_t** Monitoring::GetClustering(HashMap* hm) { 22 | // This is a O(n^2) solution, but there is a O(n) one. If this gets too slow, 23 | // replace with the O(n) solution. 24 | uint64_t sizes_window[8] = { 8, 16, 32, 64, 128, 256, 512, 1024 }; 25 | 26 | uint64_t **clustering = (uint64_t**) new uint64_t*[8]; 27 | for (unsigned int i = 0; i < 8; i++) { 28 | clustering[i] = new uint64_t[ sizes_window[i] + 1 ]; 29 | for (unsigned int j = 0; j < sizes_window[i]; j++) { 30 | clustering[i][j] = 0; 31 | } 32 | } 33 | 34 | for (uint64_t index_bucket = 0; index_bucket < num_buckets_; index_bucket++) { 35 | for (uint64_t index_window = 0; index_window < 8; index_window++) { 36 | if (index_bucket >= num_buckets_ - sizes_window[index_window]) { 37 | continue; 38 | } 39 | 40 | uint64_t count = 0; 41 | for (uint64_t i = 0; i < sizes_window[index_window]; i++) { 42 | uint64_t index_bucket_current = index_bucket + i; 43 | if (hm->GetBucketState(index_bucket_current) == 1) { 44 | count += 1; 45 | } 46 | } 47 | 48 | //if (index_bucket > sizes_window[index_window]) { 49 | //} 50 | clustering[index_window][count] += 1; 51 | } 52 | } 53 | 54 | return clustering; 55 | } 56 | 57 | 58 | void Monitoring::PrintClustering(HashMap *hm) { 59 | int sizes_window[5] = { 8, 16, 32, 64, 128 }; 60 | uint64_t** clustering = hm->monitoring_->GetClustering(hm); 61 | for (int i = 0; i < 5; i++) { 62 | fprintf(stdout, "Cluster for window of size %d:\n", sizes_window[i]); 63 | for (int j = 0; j < sizes_window[i] + 1; j++) { 64 | fprintf(stdout, " %5d: %5" PRIu64 "\n", j, clustering[i][j]); 65 | } 66 | } 67 | 68 | for (int i = 0; i < 8; i++) { 69 | delete[] clustering[i]; 70 | } 71 | delete[] clustering; 72 | } 73 | 74 | 75 | 76 | uint64_t Monitoring::GetDIB(uint64_t index) { 77 | std::map::iterator it; 78 | it = dib_.find(index); 79 | if (it == dib_.end()) { 80 | return num_buckets_; 81 | } 82 | return dib_[index]; 83 | } 84 | 85 | 86 | void Monitoring::SetDIB(uint64_t index, uint64_t dib) { 87 | dib_[index] = dib; 88 | //fprintf(stderr, "SetPSL [%" PRIu64 "]\n", index); 89 | } 90 | 91 | void Monitoring::RemoveDIB(uint64_t index) { 92 | std::map::iterator it; 93 | it = dib_.find(index); 94 | if (it != dib_.end()) { 95 | dib_.erase(it); 96 | } else { 97 | //fprintf(stderr, "RemovePSL error: cannot find index [%" PRIu64 "]\n", index); 98 | } 99 | 100 | } 101 | 102 | 103 | 104 | 105 | 106 | void Monitoring::PrintDIB(std::string filepath) { 107 | std::map counts; 108 | std::map::iterator it_dib, it_count, it_find; 109 | 110 | fprintf(stderr, "dib search size:%zu\n", dib_.size()); 111 | 112 | for (it_dib = dib_.begin(); it_dib != dib_.end(); it_dib++) { 113 | it_find = counts.find(it_dib->second); 114 | if (it_find == counts.end()) { 115 | counts[it_dib->second] = 0; 116 | } 117 | counts[it_dib->second] += 1; 118 | } 119 | 120 | FILE* fd = NULL; 121 | if (filepath == "stdout") { 122 | fd = stdout; 123 | } else { 124 | fd = fopen(filepath.c_str(), "w"); 125 | } 126 | 127 | fprintf(fd, "{\n"); 128 | PrintInfo(fd, "DIB"); 129 | fprintf(fd, " \"datapoints\":\n"); 130 | fprintf(fd, " {\n"); 131 | 132 | bool first_item = true; 133 | for (it_count = counts.begin(); it_count != counts.end(); it_count++) { 134 | if (!first_item) fprintf(fd, ",\n"); 135 | first_item = false; 136 | fprintf(fd, " \"%" PRIu64 "\": %" PRIu64, it_count->first, it_count->second); 137 | } 138 | fprintf(fd, "\n"); 139 | fprintf(fd, " }\n"); 140 | fprintf(fd, "}\n"); 141 | 142 | if (filepath != "stdout") { 143 | fclose(fd); 144 | } 145 | 146 | } 147 | 148 | 149 | 150 | void Monitoring::GetNumScannedBlocks(std::map& out_num_scanned_blocks, HashMap *hm) { 151 | 152 | std::map< uint64_t, uint64_t>::iterator it_find; 153 | for (uint64_t index_stored = 0; index_stored < num_buckets_; index_stored++) { 154 | uint64_t index_init; 155 | if (hm->FillInitIndex(index_stored, &index_init) != 0) continue; 156 | 157 | uint64_t index_stored_adjusted; 158 | if (index_init <= index_stored) { 159 | index_stored_adjusted = index_stored; 160 | } else { 161 | index_stored_adjusted = index_stored + num_buckets_; 162 | } 163 | 164 | //for (int i = 10; i > 0; i--) { 165 | int index_selected = 64; 166 | uint64_t chunk_size = 16; 167 | for (int i = 4; i < 64; i++) { 168 | uint64_t offset_init = AlignOffsetToBlock(index_init * size_bucket_, chunk_size); 169 | uint64_t offset_stored = AlignOffsetToBlock(index_stored_adjusted * size_bucket_, chunk_size); 170 | 171 | if (offset_init == offset_stored) { 172 | index_selected = i; 173 | break; 174 | } 175 | 176 | chunk_size *= 2; 177 | } 178 | 179 | it_find = out_num_scanned_blocks.find(index_selected); 180 | if (it_find == out_num_scanned_blocks.end()) { 181 | out_num_scanned_blocks[index_selected] = 0; 182 | } 183 | out_num_scanned_blocks[index_selected] += 1; 184 | } 185 | } 186 | 187 | 188 | 189 | 190 | void Monitoring::PrintNumScannedBlocks(std::string filepath) { 191 | FILE* fd = NULL; 192 | if (filepath == "stdout") { 193 | fd = stdout; 194 | } else { 195 | fd = fopen(filepath.c_str(), "w"); 196 | } 197 | 198 | char metric[1024]; 199 | std::map num_scanned_blocks; 200 | GetNumScannedBlocks(num_scanned_blocks, hm_); 201 | fprintf(fd, "{\n"); 202 | sprintf(metric, "aligned DIB"); 203 | PrintInfo(fd, metric); 204 | fprintf(fd, " \"datapoints\":\n"); 205 | fprintf(fd, " {"); 206 | std::map::iterator it; 207 | bool first_item = true; 208 | for (it = num_scanned_blocks.begin(); it != num_scanned_blocks.end(); ++it) { 209 | if (!first_item) fprintf(fd, ","); 210 | first_item = false; 211 | fprintf(fd, "\n"); 212 | fprintf(fd, " \"%" PRIu64 "\": %" PRIu64, it->first, it->second); 213 | } 214 | fprintf(fd, "\n"); 215 | fprintf(fd, " }\n"); 216 | fprintf(fd, "}\n"); 217 | 218 | if (filepath != "stdout") { 219 | fclose(fd); 220 | } 221 | } 222 | 223 | 224 | 225 | void Monitoring::AddDFB(uint64_t distance) { 226 | 227 | std::map::iterator it; 228 | it = dfb_.find(distance); 229 | if (it == dfb_.end()) { 230 | dfb_[distance] = 0; 231 | } 232 | dfb_[distance] += 1; 233 | } 234 | 235 | 236 | void Monitoring::ResetDFB() { 237 | dfb_.clear(); 238 | } 239 | 240 | 241 | void Monitoring::PrintDFB(std::string filepath) { 242 | std::map::iterator it; 243 | 244 | FILE* fd = NULL; 245 | if (filepath == "stdout") { 246 | fd = stdout; 247 | } else { 248 | fd = fopen(filepath.c_str(), "w"); 249 | } 250 | 251 | fprintf(fd, "{\n"); 252 | PrintInfo(fd, "DFB"); 253 | fprintf(fd, " \"datapoints\":\n"); 254 | fprintf(fd, " {\n"); 255 | 256 | bool first_item = true; 257 | for (it = dfb_.begin(); it != dfb_.end(); it++) { 258 | if (!first_item) fprintf(fd, ",\n"); 259 | first_item = false; 260 | fprintf(fd, " \"%" PRIu64 "\": %" PRIu64, it->first, it->second); 261 | } 262 | fprintf(fd, "\n"); 263 | fprintf(fd, " }\n"); 264 | fprintf(fd, "}\n"); 265 | 266 | if (filepath != "stdout") { 267 | fclose(fd); 268 | } 269 | } 270 | 271 | 272 | void Monitoring::AddAlignedDFB(uint64_t index_init, uint64_t index_free_bucket) { 273 | std::map::iterator it_find; 274 | 275 | if (index_init > index_free_bucket) { 276 | index_free_bucket += num_buckets_; 277 | } 278 | int index_selected = 64; 279 | uint64_t chunk_size = 16; 280 | for (int i = 4; i < 64; i++) { 281 | uint64_t offset_init = AlignOffsetToBlock(index_init * size_bucket_, chunk_size); 282 | uint64_t offset_free_bucket = AlignOffsetToBlock(index_free_bucket * size_bucket_, chunk_size); 283 | if (offset_init == offset_free_bucket) { 284 | index_selected = i; 285 | break; 286 | } 287 | 288 | chunk_size *= 2; 289 | } 290 | 291 | it_find = aligned_dfb_.find(index_selected); 292 | if (it_find == aligned_dfb_.end()) { 293 | aligned_dfb_[index_selected] = 0; 294 | } 295 | aligned_dfb_[index_selected] += 1; 296 | } 297 | 298 | 299 | 300 | void Monitoring::ResetAlignedDFB() { 301 | aligned_dfb_.clear(); 302 | } 303 | 304 | 305 | void Monitoring::PrintAlignedDFB(std::string filepath) { 306 | std::map::iterator it; 307 | 308 | FILE* fd = NULL; 309 | if (filepath == "stdout") { 310 | fd = stdout; 311 | } else { 312 | fd = fopen(filepath.c_str(), "w"); 313 | } 314 | 315 | fprintf(fd, "{\n"); 316 | PrintInfo(fd, "aligned DFB"); 317 | fprintf(fd, " \"datapoints\":\n"); 318 | fprintf(fd, " {\n"); 319 | 320 | bool first_item = true; 321 | for (it = aligned_dfb_.begin(); it != aligned_dfb_.end(); it++) { 322 | if (!first_item) fprintf(fd, ",\n"); 323 | first_item = false; 324 | fprintf(fd, " \"%" PRIu64 "\": %" PRIu64, it->first, it->second); 325 | } 326 | fprintf(fd, "\n"); 327 | fprintf(fd, " }\n"); 328 | fprintf(fd, "}\n"); 329 | 330 | if (filepath != "stdout") { 331 | fclose(fd); 332 | } 333 | } 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | void Monitoring::AddNumberOfSwaps(uint64_t distance) { 342 | 343 | std::map::iterator it; 344 | it = swaps_.find(distance); 345 | if (it == swaps_.end()) { 346 | swaps_[distance] = 0; 347 | } 348 | swaps_[distance] += 1; 349 | } 350 | 351 | 352 | void Monitoring::ResetNumberOfSwaps() { 353 | swaps_.clear(); 354 | } 355 | 356 | 357 | void Monitoring::PrintNumberOfSwaps(std::string filepath) { 358 | std::map::iterator it; 359 | 360 | FILE* fd = NULL; 361 | if (filepath == "stdout") { 362 | fd = stdout; 363 | } else { 364 | fd = fopen(filepath.c_str(), "w"); 365 | } 366 | 367 | fprintf(fd, "{\n"); 368 | PrintInfo(fd, "swap"); 369 | fprintf(fd, " \"datapoints\":\n"); 370 | fprintf(fd, " {\n"); 371 | 372 | bool first_item = true; 373 | for (it = swaps_.begin(); it != swaps_.end(); it++) { 374 | if (!first_item) fprintf(fd, ",\n"); 375 | first_item = false; 376 | fprintf(fd, " \"%" PRIu64 "\": %" PRIu64, it->first, it->second); 377 | } 378 | fprintf(fd, "\n"); 379 | fprintf(fd, " }\n"); 380 | fprintf(fd, "}\n"); 381 | 382 | if (filepath != "stdout") { 383 | fclose(fd); 384 | } 385 | } 386 | 387 | 388 | 389 | 390 | void Monitoring::AddDMB(uint64_t distance) { 391 | 392 | std::map::iterator it; 393 | it = dmb_.find(distance); 394 | if (it == dmb_.end()) { 395 | dmb_[distance] = 0; 396 | } 397 | dmb_[distance] += 1; 398 | //printf("Add DMB %" PRIu64 "\n", distance); 399 | } 400 | 401 | 402 | void Monitoring::ResetDMB() { 403 | dmb_.clear(); 404 | } 405 | 406 | 407 | void Monitoring::PrintDMB(std::string filepath) { 408 | std::map::iterator it; 409 | 410 | FILE* fd = NULL; 411 | if (filepath == "stdout") { 412 | fd = stdout; 413 | } else { 414 | fd = fopen(filepath.c_str(), "w"); 415 | } 416 | 417 | fprintf(fd, "{\n"); 418 | PrintInfo(fd, "DMB"); 419 | fprintf(fd, " \"datapoints\":\n"); 420 | fprintf(fd, " {\n"); 421 | 422 | bool first_item = true; 423 | for (it = dmb_.begin(); it != dmb_.end(); it++) { 424 | if (!first_item) fprintf(fd, ",\n"); 425 | first_item = false; 426 | fprintf(fd, " \"%" PRIu64 "\": %" PRIu64, it->first, it->second); 427 | } 428 | fprintf(fd, "\n"); 429 | fprintf(fd, " }\n"); 430 | fprintf(fd, "}\n"); 431 | 432 | if (filepath != "stdout") { 433 | fclose(fd); 434 | } 435 | } 436 | 437 | 438 | 439 | 440 | void Monitoring::AddAlignedDMB(uint64_t index_init, uint64_t index_missing_bucket) { 441 | std::map::iterator it_find; 442 | if (index_init > index_missing_bucket) { 443 | index_missing_bucket += num_buckets_; 444 | } 445 | int index_selected = 64; 446 | uint64_t chunk_size = 16; 447 | for (int i = 4; i < 64; i++) { 448 | uint64_t offset_init = AlignOffsetToBlock(index_init * size_bucket_, chunk_size); 449 | uint64_t offset_missing_bucket = AlignOffsetToBlock(index_missing_bucket * size_bucket_, chunk_size); 450 | if (offset_init == offset_missing_bucket) { 451 | index_selected = i; 452 | break; 453 | } 454 | 455 | chunk_size *= 2; 456 | } 457 | 458 | it_find = aligned_dmb_.find(index_selected); 459 | if (it_find == aligned_dmb_.end()) { 460 | aligned_dmb_[index_selected] = 0; 461 | } 462 | aligned_dmb_[index_selected] += 1; 463 | 464 | } 465 | 466 | 467 | 468 | 469 | void Monitoring::ResetAlignedDMB() { 470 | aligned_dmb_.clear(); 471 | } 472 | 473 | 474 | void Monitoring::PrintAlignedDMB(std::string filepath) { 475 | std::map::iterator it; 476 | 477 | FILE* fd = NULL; 478 | if (filepath == "stdout") { 479 | fd = stdout; 480 | } else { 481 | fd = fopen(filepath.c_str(), "w"); 482 | } 483 | 484 | fprintf(fd, "{\n"); 485 | PrintInfo(fd, "aligned DMB"); 486 | fprintf(fd, " \"datapoints\":\n"); 487 | fprintf(fd, " {\n"); 488 | 489 | bool first_item = true; 490 | for (it = aligned_dmb_.begin(); it != aligned_dmb_.end(); it++) { 491 | if (!first_item) fprintf(fd, ",\n"); 492 | first_item = false; 493 | fprintf(fd, " \"%" PRIu64 "\": %" PRIu64, it->first, it->second); 494 | } 495 | fprintf(fd, "\n"); 496 | fprintf(fd, " }\n"); 497 | fprintf(fd, "}\n"); 498 | 499 | if (filepath != "stdout") { 500 | fclose(fd); 501 | } 502 | } 503 | 504 | 505 | 506 | void Monitoring::AddDSB(uint64_t distance) { 507 | 508 | std::map::iterator it; 509 | it = dsb_.find(distance); 510 | if (it == dsb_.end()) { 511 | dsb_[distance] = 0; 512 | } 513 | dsb_[distance] += 1; 514 | //printf("Add DSB %" PRIu64 "\n", distance); 515 | } 516 | 517 | 518 | void Monitoring::ResetDSB() { 519 | dsb_.clear(); 520 | } 521 | 522 | 523 | void Monitoring::PrintDSB(std::string filepath) { 524 | std::map::iterator it; 525 | 526 | FILE* fd = NULL; 527 | if (filepath == "stdout") { 528 | fd = stdout; 529 | } else { 530 | fd = fopen(filepath.c_str(), "w"); 531 | } 532 | 533 | fprintf(fd, "{\n"); 534 | PrintInfo(fd, "DSB"); 535 | fprintf(fd, " \"datapoints\":\n"); 536 | fprintf(fd, " {\n"); 537 | 538 | bool first_item = true; 539 | for (it = dsb_.begin(); it != dsb_.end(); it++) { 540 | if (!first_item) fprintf(fd, ",\n"); 541 | first_item = false; 542 | fprintf(fd, " \"%" PRIu64 "\": %" PRIu64, it->first, it->second); 543 | } 544 | fprintf(fd, "\n"); 545 | fprintf(fd, " }\n"); 546 | fprintf(fd, "}\n"); 547 | 548 | if (filepath != "stdout") { 549 | fclose(fd); 550 | } 551 | } 552 | 553 | 554 | 555 | 556 | void Monitoring::AddAlignedDSB(uint64_t index_stored, uint64_t index_shift_bucket) { 557 | std::map::iterator it_find; 558 | if (index_stored > index_shift_bucket) { 559 | index_shift_bucket += num_buckets_; 560 | } 561 | int index_selected = 64; 562 | uint64_t chunk_size = 16; 563 | for (int i = 4; i < 64; i++) { 564 | uint64_t offset_stored = AlignOffsetToBlock(index_stored * size_bucket_, chunk_size); 565 | uint64_t offset_shift_bucket = AlignOffsetToBlock(index_shift_bucket * size_bucket_, chunk_size); 566 | if (offset_stored == offset_shift_bucket) { 567 | index_selected = i; 568 | break; 569 | } 570 | 571 | chunk_size *= 2; 572 | } 573 | 574 | it_find = aligned_dsb_.find(index_selected); 575 | if (it_find == aligned_dsb_.end()) { 576 | aligned_dsb_[index_selected] = 0; 577 | } 578 | aligned_dsb_[index_selected] += 1; 579 | 580 | } 581 | 582 | 583 | 584 | 585 | void Monitoring::ResetAlignedDSB() { 586 | aligned_dsb_.clear(); 587 | } 588 | 589 | 590 | void Monitoring::PrintAlignedDSB(std::string filepath) { 591 | std::map::iterator it; 592 | 593 | FILE* fd = NULL; 594 | if (filepath == "stdout") { 595 | fd = stdout; 596 | } else { 597 | fd = fopen(filepath.c_str(), "w"); 598 | } 599 | 600 | fprintf(fd, "{\n"); 601 | PrintInfo(fd, "aligned DSB"); 602 | fprintf(fd, " \"datapoints\":\n"); 603 | fprintf(fd, " {\n"); 604 | 605 | bool first_item = true; 606 | for (it = aligned_dsb_.begin(); it != aligned_dsb_.end(); it++) { 607 | if (!first_item) fprintf(fd, ",\n"); 608 | first_item = false; 609 | fprintf(fd, " \"%" PRIu64 "\": %" PRIu64, it->first, it->second); 610 | } 611 | fprintf(fd, "\n"); 612 | fprintf(fd, " }\n"); 613 | fprintf(fd, "}\n"); 614 | 615 | if (filepath != "stdout") { 616 | fclose(fd); 617 | } 618 | } 619 | 620 | }; // end namespace hashmap 621 | -------------------------------------------------------------------------------- /monitoring.h: -------------------------------------------------------------------------------- 1 | #ifndef HASHMAP_MONITORING 2 | #define HASHMAP_MONITORING 3 | 4 | #ifndef __STDC_FORMAT_MACROS 5 | #define __STDC_FORMAT_MACROS 6 | #endif 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | 18 | 19 | namespace hashmap 20 | { 21 | class HashMap; 22 | 23 | class Monitoring 24 | { 25 | public: 26 | Monitoring(uint64_t num_buckets, 27 | uint64_t max_num_items_in_bucket, 28 | HashMap *hm) { 29 | num_buckets_ = num_buckets; 30 | max_num_items_in_bucket_ = max_num_items_in_bucket; 31 | size_bucket_ = 4; 32 | hm_ = hm; 33 | fprintf(stderr, "starting\n"); 34 | } 35 | 36 | virtual ~Monitoring() { 37 | } 38 | 39 | uint64_t** GetClustering(HashMap* hm); 40 | void PrintClustering(HashMap *hm); 41 | const std::map& GetClustering(); 42 | 43 | uint64_t GetDIB(uint64_t index); 44 | void SetDIB(uint64_t index, uint64_t dib); 45 | void RemoveDIB(uint64_t index); 46 | void PrintDIB(std::string filepath); 47 | 48 | void AddDFB(uint64_t distance); 49 | void ResetDFB(); 50 | void PrintDFB(std::string filepath); 51 | 52 | void AddAlignedDFB(uint64_t index_init, uint64_t index_free_bucket); 53 | void ResetAlignedDFB(); 54 | void PrintAlignedDFB(std::string filepath); 55 | 56 | void AddDMB(uint64_t distance); 57 | void ResetDMB(); 58 | void PrintDMB(std::string filepath); 59 | 60 | void AddAlignedDMB(uint64_t index_init, uint64_t index_free_bucket); 61 | void ResetAlignedDMB(); 62 | void PrintAlignedDMB(std::string filepath); 63 | 64 | void AddNumberOfSwaps(uint64_t distance); 65 | void ResetNumberOfSwaps(); 66 | void PrintNumberOfSwaps(std::string filepath); 67 | 68 | void GetNumScannedBlocks(std::map& out_num_scanned_blocks, HashMap *hm); 69 | void PrintNumScannedBlocks(std::string filepath); 70 | 71 | void AddDSB(uint64_t distance); 72 | void ResetDSB(); 73 | void PrintDSB(std::string filepath); 74 | 75 | void AddAlignedDSB(uint64_t index_stored, uint64_t index_shift_bucket); 76 | void ResetAlignedDSB(); 77 | void PrintAlignedDSB(std::string filepath); 78 | 79 | void PrintInfo(FILE* fd, std::string metric); 80 | void SetCycle(uint64_t cycle) { cycle_ = cycle; } 81 | void SetInstance(uint64_t instance) { instance_ = instance; } 82 | 83 | void SetTestcase(std::string str) { 84 | testcase_ = str; 85 | } 86 | 87 | void SetParametersTestcaseString(std::string str) { 88 | parameters_testcase_string_ = str; 89 | } 90 | 91 | void SetParametersTestcaseJson(std::string str) { 92 | parameters_testcase_json_ = str; 93 | } 94 | 95 | 96 | private: 97 | std::map num_items_in_bucket_; 98 | uint64_t num_buckets_; 99 | uint64_t max_num_items_in_bucket_; 100 | uint64_t size_bucket_; 101 | std::map dib_; 102 | std::map dfb_; 103 | std::map aligned_dfb_; 104 | std::map dmb_; 105 | std::map aligned_dmb_; 106 | std::map dsb_; 107 | std::map aligned_dsb_; 108 | std::map swaps_; 109 | HashMap *hm_; 110 | uint64_t cycle_; 111 | uint64_t instance_; 112 | std::string parameters_testcase_string_; 113 | std::string parameters_testcase_json_; 114 | std::string testcase_; 115 | 116 | uint64_t AlignOffsetToBlock(uint64_t offset, uint64_t size_block) { 117 | return offset - offset % size_block; 118 | } 119 | 120 | }; 121 | 122 | 123 | }; // end namespace hashmap 124 | 125 | #endif // HASHMAP_MONITORING 126 | -------------------------------------------------------------------------------- /murmurhash3.cc: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // MurmurHash3 was written by Austin Appleby, and is placed in the public 3 | // domain. The author hereby disclaims copyright to this source code. 4 | 5 | // Note - The x86 and x64 versions do _not_ produce the same results, as the 6 | // algorithms are optimized for their respective platforms. You can still 7 | // compile and run any of them on any platform, but your performance with the 8 | // non-native version will be less than optimal. 9 | 10 | #include "murmurhash3.h" 11 | 12 | //----------------------------------------------------------------------------- 13 | // Platform-specific functions and macros 14 | 15 | // Microsoft Visual Studio 16 | 17 | #if defined(_MSC_VER) 18 | 19 | #define FORCE_INLINE __forceinline 20 | 21 | #include 22 | 23 | #define ROTL32(x,y) _rotl(x,y) 24 | #define ROTL64(x,y) _rotl64(x,y) 25 | 26 | #define BIG_CONSTANT(x) (x) 27 | 28 | // Other compilers 29 | 30 | #else // defined(_MSC_VER) 31 | 32 | #define FORCE_INLINE inline __attribute__((always_inline)) 33 | 34 | inline uint32_t rotl32 ( uint32_t x, int8_t r ) 35 | { 36 | return (x << r) | (x >> (32 - r)); 37 | } 38 | 39 | inline uint64_t rotl64 ( uint64_t x, int8_t r ) 40 | { 41 | return (x << r) | (x >> (64 - r)); 42 | } 43 | 44 | #define ROTL32(x,y) rotl32(x,y) 45 | #define ROTL64(x,y) rotl64(x,y) 46 | 47 | #define BIG_CONSTANT(x) (x##LLU) 48 | 49 | #endif // !defined(_MSC_VER) 50 | 51 | //----------------------------------------------------------------------------- 52 | // Block read - if your platform needs to do endian-swapping or can only 53 | // handle aligned reads, do the conversion here 54 | 55 | FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i ) 56 | { 57 | return p[i]; 58 | } 59 | 60 | FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i ) 61 | { 62 | return p[i]; 63 | } 64 | 65 | //----------------------------------------------------------------------------- 66 | // Finalization mix - force all bits of a hash block to avalanche 67 | 68 | FORCE_INLINE uint32_t fmix32 ( uint32_t h ) 69 | { 70 | h ^= h >> 16; 71 | h *= 0x85ebca6b; 72 | h ^= h >> 13; 73 | h *= 0xc2b2ae35; 74 | h ^= h >> 16; 75 | 76 | return h; 77 | } 78 | 79 | //---------- 80 | 81 | FORCE_INLINE uint64_t fmix64 ( uint64_t k ) 82 | { 83 | k ^= k >> 33; 84 | k *= BIG_CONSTANT(0xff51afd7ed558ccd); 85 | k ^= k >> 33; 86 | k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); 87 | k ^= k >> 33; 88 | 89 | return k; 90 | } 91 | 92 | //----------------------------------------------------------------------------- 93 | 94 | void MurmurHash3_x86_32 ( const void * key, int len, 95 | uint32_t seed, void * out ) 96 | { 97 | const uint8_t * data = (const uint8_t*)key; 98 | const int nblocks = len / 4; 99 | 100 | uint32_t h1 = seed; 101 | 102 | const uint32_t c1 = 0xcc9e2d51; 103 | const uint32_t c2 = 0x1b873593; 104 | 105 | //---------- 106 | // body 107 | 108 | const uint32_t * blocks = (const uint32_t *)(data + nblocks*4); 109 | 110 | for(int i = -nblocks; i; i++) 111 | { 112 | uint32_t k1 = getblock32(blocks,i); 113 | 114 | k1 *= c1; 115 | k1 = ROTL32(k1,15); 116 | k1 *= c2; 117 | 118 | h1 ^= k1; 119 | h1 = ROTL32(h1,13); 120 | h1 = h1*5+0xe6546b64; 121 | } 122 | 123 | //---------- 124 | // tail 125 | 126 | const uint8_t * tail = (const uint8_t*)(data + nblocks*4); 127 | 128 | uint32_t k1 = 0; 129 | 130 | switch(len & 3) 131 | { 132 | case 3: k1 ^= tail[2] << 16; 133 | case 2: k1 ^= tail[1] << 8; 134 | case 1: k1 ^= tail[0]; 135 | k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; 136 | }; 137 | 138 | //---------- 139 | // finalization 140 | 141 | h1 ^= len; 142 | 143 | h1 = fmix32(h1); 144 | 145 | *(uint32_t*)out = h1; 146 | } 147 | 148 | //----------------------------------------------------------------------------- 149 | 150 | void MurmurHash3_x86_128 ( const void * key, const int len, 151 | uint32_t seed, void * out ) 152 | { 153 | const uint8_t * data = (const uint8_t*)key; 154 | const int nblocks = len / 16; 155 | 156 | uint32_t h1 = seed; 157 | uint32_t h2 = seed; 158 | uint32_t h3 = seed; 159 | uint32_t h4 = seed; 160 | 161 | const uint32_t c1 = 0x239b961b; 162 | const uint32_t c2 = 0xab0e9789; 163 | const uint32_t c3 = 0x38b34ae5; 164 | const uint32_t c4 = 0xa1e38b93; 165 | 166 | //---------- 167 | // body 168 | 169 | const uint32_t * blocks = (const uint32_t *)(data + nblocks*16); 170 | 171 | for(int i = -nblocks; i; i++) 172 | { 173 | uint32_t k1 = getblock32(blocks,i*4+0); 174 | uint32_t k2 = getblock32(blocks,i*4+1); 175 | uint32_t k3 = getblock32(blocks,i*4+2); 176 | uint32_t k4 = getblock32(blocks,i*4+3); 177 | 178 | k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; 179 | 180 | h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b; 181 | 182 | k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; 183 | 184 | h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747; 185 | 186 | k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; 187 | 188 | h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35; 189 | 190 | k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; 191 | 192 | h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17; 193 | } 194 | 195 | //---------- 196 | // tail 197 | 198 | const uint8_t * tail = (const uint8_t*)(data + nblocks*16); 199 | 200 | uint32_t k1 = 0; 201 | uint32_t k2 = 0; 202 | uint32_t k3 = 0; 203 | uint32_t k4 = 0; 204 | 205 | switch(len & 15) 206 | { 207 | case 15: k4 ^= tail[14] << 16; 208 | case 14: k4 ^= tail[13] << 8; 209 | case 13: k4 ^= tail[12] << 0; 210 | k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; 211 | 212 | case 12: k3 ^= tail[11] << 24; 213 | case 11: k3 ^= tail[10] << 16; 214 | case 10: k3 ^= tail[ 9] << 8; 215 | case 9: k3 ^= tail[ 8] << 0; 216 | k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; 217 | 218 | case 8: k2 ^= tail[ 7] << 24; 219 | case 7: k2 ^= tail[ 6] << 16; 220 | case 6: k2 ^= tail[ 5] << 8; 221 | case 5: k2 ^= tail[ 4] << 0; 222 | k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; 223 | 224 | case 4: k1 ^= tail[ 3] << 24; 225 | case 3: k1 ^= tail[ 2] << 16; 226 | case 2: k1 ^= tail[ 1] << 8; 227 | case 1: k1 ^= tail[ 0] << 0; 228 | k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; 229 | }; 230 | 231 | //---------- 232 | // finalization 233 | 234 | h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len; 235 | 236 | h1 += h2; h1 += h3; h1 += h4; 237 | h2 += h1; h3 += h1; h4 += h1; 238 | 239 | h1 = fmix32(h1); 240 | h2 = fmix32(h2); 241 | h3 = fmix32(h3); 242 | h4 = fmix32(h4); 243 | 244 | h1 += h2; h1 += h3; h1 += h4; 245 | h2 += h1; h3 += h1; h4 += h1; 246 | 247 | ((uint32_t*)out)[0] = h1; 248 | ((uint32_t*)out)[1] = h2; 249 | ((uint32_t*)out)[2] = h3; 250 | ((uint32_t*)out)[3] = h4; 251 | } 252 | 253 | //----------------------------------------------------------------------------- 254 | 255 | void MurmurHash3_x64_128 ( const void * key, const int len, 256 | const uint32_t seed, void * out ) 257 | { 258 | const uint8_t * data = (const uint8_t*)key; 259 | const int nblocks = len / 16; 260 | 261 | uint64_t h1 = seed; 262 | uint64_t h2 = seed; 263 | 264 | const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); 265 | const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); 266 | 267 | //---------- 268 | // body 269 | 270 | const uint64_t * blocks = (const uint64_t *)(data); 271 | 272 | for(int i = 0; i < nblocks; i++) 273 | { 274 | uint64_t k1 = getblock64(blocks,i*2+0); 275 | uint64_t k2 = getblock64(blocks,i*2+1); 276 | 277 | k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; 278 | 279 | h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729; 280 | 281 | k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; 282 | 283 | h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5; 284 | } 285 | 286 | //---------- 287 | // tail 288 | 289 | const uint8_t * tail = (const uint8_t*)(data + nblocks*16); 290 | 291 | uint64_t k1 = 0; 292 | uint64_t k2 = 0; 293 | 294 | switch(len & 15) 295 | { 296 | case 15: k2 ^= ((uint64_t)tail[14]) << 48; 297 | case 14: k2 ^= ((uint64_t)tail[13]) << 40; 298 | case 13: k2 ^= ((uint64_t)tail[12]) << 32; 299 | case 12: k2 ^= ((uint64_t)tail[11]) << 24; 300 | case 11: k2 ^= ((uint64_t)tail[10]) << 16; 301 | case 10: k2 ^= ((uint64_t)tail[ 9]) << 8; 302 | case 9: k2 ^= ((uint64_t)tail[ 8]) << 0; 303 | k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; 304 | 305 | case 8: k1 ^= ((uint64_t)tail[ 7]) << 56; 306 | case 7: k1 ^= ((uint64_t)tail[ 6]) << 48; 307 | case 6: k1 ^= ((uint64_t)tail[ 5]) << 40; 308 | case 5: k1 ^= ((uint64_t)tail[ 4]) << 32; 309 | case 4: k1 ^= ((uint64_t)tail[ 3]) << 24; 310 | case 3: k1 ^= ((uint64_t)tail[ 2]) << 16; 311 | case 2: k1 ^= ((uint64_t)tail[ 1]) << 8; 312 | case 1: k1 ^= ((uint64_t)tail[ 0]) << 0; 313 | k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; 314 | }; 315 | 316 | //---------- 317 | // finalization 318 | 319 | h1 ^= len; h2 ^= len; 320 | 321 | h1 += h2; 322 | h2 += h1; 323 | 324 | h1 = fmix64(h1); 325 | h2 = fmix64(h2); 326 | 327 | h1 += h2; 328 | h2 += h1; 329 | 330 | ((uint64_t*)out)[0] = h1; 331 | ((uint64_t*)out)[1] = h2; 332 | } 333 | 334 | //----------------------------------------------------------------------------- 335 | 336 | -------------------------------------------------------------------------------- /murmurhash3.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // MurmurHash3 was written by Austin Appleby, and is placed in the public 3 | // domain. The author hereby disclaims copyright to this source code. 4 | 5 | #ifndef _MURMURHASH3_H_ 6 | #define _MURMURHASH3_H_ 7 | 8 | //----------------------------------------------------------------------------- 9 | // Platform-specific functions and macros 10 | 11 | // Microsoft Visual Studio 12 | 13 | #if defined(_MSC_VER) 14 | 15 | typedef unsigned char uint8_t; 16 | typedef unsigned long uint32_t; 17 | typedef unsigned __int64 uint64_t; 18 | 19 | // Other compilers 20 | 21 | #else // defined(_MSC_VER) 22 | 23 | #include 24 | 25 | #endif // !defined(_MSC_VER) 26 | 27 | //----------------------------------------------------------------------------- 28 | 29 | void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out ); 30 | 31 | void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out ); 32 | 33 | void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out ); 34 | 35 | //----------------------------------------------------------------------------- 36 | 37 | #endif // _MURMURHASH3_H_ 38 | -------------------------------------------------------------------------------- /plot.py: -------------------------------------------------------------------------------- 1 | # make the algorithms an argument 2 | # make the metrics an argument 3 | 4 | import os 5 | import sys 6 | import json 7 | import traceback 8 | import random 9 | import math 10 | 11 | import matplotlib 12 | import matplotlib.pyplot as plt 13 | import pprint 14 | 15 | colors = {'red': '#cd7058', 'blue': '#599ad3', 'orange': '#f9a65a', 'green': '#66cc66', 'black': '#000000', 'purple': '#990066'} 16 | numbering_subplots = ['a', 'b', 'c', 'd', 'e', 'f'] 17 | 18 | 19 | def compute_average(datapoints, has_shift): 20 | if len(datapoints) == 0: 21 | return 0, 0, 0 22 | num_freq = 0 23 | sum_metric = 0 24 | if not has_shift: 25 | minimum = 0 26 | else: 27 | minimum = None 28 | 29 | for key, value in datapoints.iteritems(): 30 | occurrence = float(key) 31 | frequency = float(value) 32 | num_freq += frequency 33 | sum_metric += frequency * occurrence 34 | if has_shift: 35 | if minimum is None or occurrence < minimum: 36 | minimum = occurrence 37 | 38 | if num_freq <= 1: 39 | return 0, 0, 0 40 | 41 | mean = float(sum_metric) / float(num_freq) 42 | 43 | sum_metric_squared = 0 44 | for key, value in datapoints.iteritems(): 45 | occurrence = float(key) 46 | frequency = float(value) 47 | sum_metric_squared += frequency * (occurrence - mean) * (occurrence - mean) 48 | 49 | variance = float(sum_metric_squared) / float(num_freq - 1) 50 | standard_deviation = math.sqrt(variance) 51 | return mean - minimum, variance, standard_deviation 52 | 53 | 54 | def compute_median(datapoints, has_shift): 55 | # TODO: very inefficient, could optimize this method 56 | if len(datapoints) == 0: 57 | return 0, 0, 0 58 | values = [] 59 | minimum = None 60 | for key, value in datapoints.iteritems(): 61 | occurrence = float(key) 62 | frequency = float(value) 63 | for i in range(int(frequency)): 64 | values.append(occurrence) 65 | if has_shift: 66 | if minimum is None or occurrence < minimum: 67 | minimum = occurrence 68 | 69 | if not has_shift: minimum = 0 70 | values = sorted(values) 71 | median = values[len(values) / 2] - minimum 72 | perc95 = values[int(float(len(values)) * .95)] - minimum 73 | maximum = values[-1] - minimum 74 | return median, perc95, maximum 75 | 76 | 77 | def aggregate_datapoints(dirpath_data, testcases, algorithms, shifts): 78 | print testcases, algorithms, shifts 79 | aggregate = {} 80 | for dirname, dirnames, filenames in os.walk(dirpath_data): 81 | for filename in filenames: 82 | basename, ext = os.path.splitext(filename) 83 | if ext.lower() != '.json': continue 84 | if '50000000' in filename: continue 85 | 86 | if testcases != 'all' and not any(filename.startswith(testcase) for testcase in testcases.split(',')): 87 | print 'skipping ' + filename 88 | continue 89 | 90 | if algorithms != 'all' and not any(algorithm in filename for algorithm in algorithms.split(',')): 91 | print 'skipping ' + filename 92 | continue 93 | 94 | try: 95 | filepath = os.path.join(dirname, filename) 96 | print "Reading file [%s]" % (filepath,) 97 | f = open(filepath, 'r') 98 | text = f.read() 99 | data_items = json.loads(text) 100 | f.close() 101 | has_shift = shifts and any(shift in filename for shift in shifts.split(',')) 102 | if not isinstance(data_items, list): 103 | data_items = [data_items] 104 | 105 | for data in data_items: 106 | average, variance, stddev = compute_average(data['datapoints'], has_shift) 107 | median, perc95, maximum = compute_median(data['datapoints'], has_shift) 108 | 109 | ia = data['algorithm'] 110 | im = data['metric'] 111 | ib = data['parameters_hashmap_string'] 112 | ia = '%s-%s' % (ia, ib) 113 | 114 | ii = data['instance'] 115 | ic = data['cycle'] 116 | 117 | it = data['testcase'] 118 | ip = data['parameters_testcase_string'] 119 | if '75' in ip: 120 | print "before", ip 121 | ip = ip.replace('lfm0.75', 'lfm0.80') 122 | print "after", ip 123 | it = '%s-%s' % (it, ip) 124 | if im not in aggregate: 125 | aggregate[im] = {} 126 | if it not in aggregate[im]: 127 | aggregate[im][it] = {} 128 | if ia not in aggregate[im][it]: 129 | aggregate[im][it][ia] = {} 130 | if ic not in aggregate[im][it][ia]: 131 | aggregate[im][it][ia][ic] = {} 132 | 133 | for m in ['mean', 'median', 'perc95', 'standard_deviation', 'variance', 'maximum']: 134 | if m not in aggregate[im][it][ia][ic]: 135 | aggregate[im][it][ia][ic][m] = [] 136 | 137 | aggregate[im][it][ia][ic]['mean'].append(average) 138 | aggregate[im][it][ia][ic]['standard_deviation'].append(stddev) 139 | aggregate[im][it][ia][ic]['variance'].append(variance) 140 | aggregate[im][it][ia][ic]['median'].append(median) 141 | aggregate[im][it][ia][ic]['perc95'].append(perc95) 142 | aggregate[im][it][ia][ic]['maximum'].append(maximum) 143 | except: 144 | print 'Crashed at file: [%s/%s]' % (dirname, filename) 145 | print traceback.print_exc() 146 | sys.exit(1) 147 | 148 | return aggregate 149 | 150 | 151 | 152 | 153 | def randomized_paired_sample_t_test(reference, candidate, details): 154 | num_items = len(reference) 155 | random.seed(None) 156 | population = [] 157 | print 'ref cand', reference, candidate 158 | 159 | diff = [] 160 | for i in range(num_items): 161 | diff.append(reference[i] - candidate[i]) 162 | 163 | 164 | num_population = 10240 165 | for k in range(num_population): 166 | diff_new = [] 167 | for i in range(num_items): 168 | sign = -1 if random.random() < 0.5 else 1 169 | diff_new.append(diff[i] * sign) 170 | 171 | mean_new = float(sum(diff_new)) / float(num_items) 172 | population.append(mean_new) 173 | 174 | count_passed = 0 175 | mean = sum(diff) / num_items 176 | population = sorted(population) 177 | 178 | for mean_current in population: 179 | if (mean > 0 and mean <= mean_current) or (mean < 0 and mean < mean_currrent): 180 | break 181 | count_passed += 1 182 | 183 | if mean > 0: 184 | count_passed = num_population - count_passed 185 | 186 | if False and details: 187 | print "*" * 64 188 | print "*" * 64 189 | print "details" 190 | print "population", population[0], population[1], population[-2], population[-1] 191 | print "mean", mean 192 | print "count_passed: %f" % (float(count_passed),) 193 | print "num_pop %f" % (float(num_population), ) 194 | 195 | p_value = float(count_passed) / float(num_population) 196 | print "passed: %f" % (p_value,) 197 | return p_value 198 | 199 | 200 | 201 | def add_curve_to_plot(ax, aggregates, im, it, index_testcase, statistic, algorithms_ordering, filters, numbering_subplot, includes): 202 | names = [] 203 | lines = [] 204 | font = {'family' : 'normal', 205 | 'weight' : 'normal', 206 | 'size' : 14} 207 | matplotlib.rc('font', **font) 208 | 209 | algorithms = [None] * 5 210 | for ia in aggregates[im][it].keys(): 211 | for pattern in algorithms_ordering.keys(): 212 | if pattern in ia: 213 | order = algorithms_ordering[pattern]['order'] 214 | algorithms[order] = ia 215 | 216 | for ia in algorithms: 217 | if ia is None: continue 218 | print "Generating curve for: stats:%s | metric:%s | testcase:%s | algorithm:%s" % (statistic, im, it, ia) 219 | 220 | xs = [] 221 | ys = [] 222 | 223 | for cycle, stats in sorted(aggregates[im][it][ia].items()): 224 | if 'loading' in it: 225 | xs.append((cycle * 2.0) / 100.0) 226 | else: 227 | xs.append(cycle) 228 | ys.append(sum(stats[statistic]) / len(stats[statistic])) 229 | 230 | name = '[ERROR: unknown algorithm]' 231 | color = '#000000' 232 | linewidth = 3 233 | zorder = 1 234 | for k, v in filters.iteritems(): 235 | if k in ia: 236 | name = filters[k]['name'] 237 | color = filters[k]['color'] 238 | linewidth = filters[k]['linewidth'] 239 | style = '-' 240 | zorder = filters[k]['zorder'] 241 | break 242 | 243 | if not any(pattern in ia for pattern in includes): 244 | continue 245 | 246 | line_current, = ax.plot(xs, ys, style, color=color, linewidth=linewidth, zorder=zorder) 247 | names.append(name) 248 | lines.append(line_current) 249 | 250 | if 'loading' in it: 251 | ax.set_xlabel('(%s) Load factor' % numbering_subplot) 252 | else: 253 | ax.set_xlabel('(%s) Iterations' % numbering_subplot) 254 | 255 | if statistic == 'mean': 256 | ax.set_ylabel('Mean %s' % im) 257 | if True or 'loading' not in it: 258 | x1,x2,y1,y2 = plt.axis() 259 | plt.axis((x1,x2,0,100)) 260 | elif statistic == 'variance': 261 | ax.set_ylabel('Variance of %s' % im) 262 | if True or 'loading' not in it: 263 | x1,x2,y1,y2 = plt.axis() 264 | plt.axis((x1,x2,0,600)) 265 | elif statistic == 'standard_deviation': 266 | ax.set_ylabel('Standard deviation of %s' % im) 267 | elif statistic == 'median': 268 | ax.set_ylabel('Median of %s' % im) 269 | if True or 'loading' not in it: 270 | x1,x2,y1,y2 = plt.axis() 271 | plt.axis((x1,x2,0,100)) 272 | elif statistic == 'perc95': 273 | ax.set_ylabel('95th percentile of %s' % im) 274 | if True or 'loading' not in it: 275 | x1,x2,y1,y2 = plt.axis() 276 | plt.axis((x1,x2,0,100)) 277 | elif statistic == 'maximum': 278 | ax.set_ylabel('Maximum %s' % im) 279 | if True or 'loading' not in it: 280 | x1,x2,y1,y2 = plt.axis() 281 | plt.axis((x1,x2,0,180)) 282 | plt.title('Test case: %s' % (it.strip('-'))) 283 | ax.grid(True) 284 | 285 | if any(metric in im for metric in ['blocks', 'aligned']) and statistic != 'variance': 286 | labels=['16 B', '32 B', '64 B', '128 B', '256 B', '512 B', '1 KB', '2 KB', '4 KB', '8 KB', '16 KB', '32 KB', '64 KB', '128 KB'] 287 | plt.axis((x1,x2,4,4+len(labels))) 288 | ax.set_yticks(range(4,4+len(labels))) 289 | ax.set_yticklabels(labels) 290 | 291 | plt.legend(lines, names).set_visible(False) 292 | return names, lines 293 | 294 | 295 | 296 | def plot_algorithms(aggregates): 297 | 298 | for index_stat, statistic in enumerate(['mean', 'median', 'perc95', 'maximum', 'variance']): 299 | for index_metric, im in enumerate(aggregates.keys()): 300 | fig = plt.figure((index_stat+1) * 10000 + (index_metric+1) * 100 + 1) 301 | legend = None 302 | for index_testcase, it in enumerate(sorted(aggregates[im].keys())): 303 | ax = fig.add_subplot(2, 2, index_testcase+1) 304 | lines = [] 305 | names = [] 306 | 307 | names_temp, lines_temp = add_curve_to_plot( 308 | ax=ax, 309 | aggregates=aggregates, 310 | im=im, 311 | it=it, 312 | index_testcase=index_testcase, 313 | statistic=statistic, 314 | algorithms_ordering = { 315 | 'linear': {'order': 0}, 316 | 'backshift': {'order': 1}, 317 | 'tombstone': {'order': 2}, 318 | 'shadow': {'order': 3}, 319 | 'bitmap': {'order': 4}, 320 | }, 321 | filters = { 322 | 'linear': { 'color': colors['blue'], 'name': 'Linear probing', 'linewidth': 8, 'zorder': 1 }, 323 | 'backshift': { 'color': colors['orange'], 'name': 'Robin Hood (backward shift)', 'linewidth': 6, 'zorder': 2 }, 324 | 'tombstone': { 'color': colors['red'], 'name': 'Robin Hood (tombstone)', 'linewidth': 4.5, 'zorder': 3 }, 325 | 'shadow': { 'color': colors['green'], 'name': 'Hopscotch (shadow)', 'linewidth': 3, 'zorder': 4 }, 326 | 'bitmap': { 'color': colors['black'], 'name': 'Hopscotch (bitmap)', 'linewidth': 1.75, 'zorder': 5 }, 327 | }, 328 | numbering_subplot=numbering_subplots[index_testcase], 329 | includes=['10000-'], 330 | ) 331 | 332 | 333 | names.extend(names_temp) 334 | lines.extend(lines_temp) 335 | 336 | legend = plt.legend(lines, names, prop={'size':12}, bbox_to_anchor=(0.2, -0.3)) 337 | if not os.path.isdir('plots/algorithms'): 338 | os.mkdir('plots/algorithms') 339 | fig.set_size_inches(10, 7.5) 340 | plt.tight_layout() 341 | plt.savefig('plots/algorithms/%s_%s.png' % (im.lower(), statistic), dpi=72, bbox_extra_artists=(legend,), bbox_inches='tight') 342 | 343 | 344 | 345 | 346 | 347 | def plot_robinhood(aggregates): 348 | for index_metric, im in enumerate(aggregates.keys()): 349 | fig = plt.figure((index_metric+1) * 100 + 1) 350 | for index_stat, statistic in enumerate(['mean', 'median', 'perc95', 'maximum', 'variance']): 351 | ax = fig.add_subplot(3, 2, index_stat+1) 352 | lines = [] 353 | names = [] 354 | for index_testcase, it in enumerate(sorted(aggregates[im].keys())): 355 | names_temp, lines_temp = add_curve_to_plot( 356 | ax=ax, 357 | aggregates=aggregates, 358 | im=im, 359 | it=it, 360 | index_testcase=index_testcase, 361 | statistic=statistic, 362 | algorithms_ordering = { 363 | '10000-': {'order': 0}, 364 | '100000-': {'order': 1}, 365 | '1000000-': {'order': 2}, 366 | '10000000-': {'order': 3}, 367 | '50000000-': {'order': 4}, 368 | }, 369 | filters = { 370 | '10000-': { 'color': colors['blue'], 'name': 'Robin Hood (backward shift, 10k)', 'linewidth': 8, 'zorder': 1 }, 371 | '100000-': { 'color': colors['orange'], 'name': 'Robin Hood (backward shift, 100k)', 'linewidth': 6, 'zorder': 2 }, 372 | '1000000-': { 'color': colors['red'], 'name': 'Robin Hood (backward shift, 1M)', 'linewidth': 4.5, 'zorder': 3 }, 373 | '10000000-': { 'color': colors['green'], 'name': 'Robin Hood (backward shift, 10M)', 'linewidth': 3, 'zorder': 4 }, 374 | '50000000-': { 'color': colors['black'], 'name': 'Robin Hood (backward shift, 50M)', 'linewidth': 1.75, 'zorder': 5 }, 375 | '100000000-': { 'color': colors['black'], 'name': 'Robin Hood (backward shift, 100M)', 'linewidth': 1.75, 'zorder': 5 }, 376 | }, 377 | numbering_subplot=numbering_subplots[index_stat], 378 | includes=['backshift'], 379 | ) 380 | names.extend(names_temp) 381 | lines.extend(lines_temp) 382 | 383 | legend = plt.legend(lines, names, prop={'size':12}, bbox_to_anchor=(2.10, 0.75)) 384 | fig.set_size_inches(10, 11.25) 385 | plt.tight_layout() 386 | if not os.path.isdir('plots/robinhood-backshift'): 387 | os.mkdir('plots/robinhood-backshift') 388 | plt.savefig('plots/robinhood-backshift/%s.png' % (im.lower()), dpi=72, bbox_extra_artists=(legend,), bbox_inches='tight') 389 | 390 | 391 | 392 | if __name__=="__main__": 393 | shifts = "" 394 | if len(sys.argv) == 5: 395 | shifts = sys.argv[4] 396 | 397 | agg = aggregate_datapoints(dirpath_data=sys.argv[1], 398 | testcases=sys.argv[2], 399 | algorithms=sys.argv[3], 400 | shifts=shifts) 401 | plot_algorithms(agg) 402 | plot_robinhood(agg) 403 | -------------------------------------------------------------------------------- /probing_hashmap.cc: -------------------------------------------------------------------------------- 1 | #include "probing_hashmap.h" 2 | 3 | namespace hashmap { 4 | 5 | 6 | 7 | int ProbingHashMap::Open() { 8 | buckets_ = new Bucket[num_buckets_]; 9 | memset(buckets_, 0, sizeof(Bucket) * (num_buckets_)); 10 | monitoring_ = new hashmap::Monitoring(num_buckets_, probing_max_, static_cast(this)); 11 | return 0; 12 | } 13 | 14 | int ProbingHashMap::Close() { 15 | if (buckets_ != NULL) { 16 | for (uint32_t i = 0; i < num_buckets_; i++) { 17 | if (buckets_[i].entry != NULL && buckets_[i].entry != DELETED_BUCKET) { 18 | delete[] buckets_[i].entry->data; 19 | delete buckets_[i].entry; 20 | } 21 | } 22 | delete[] buckets_; 23 | } 24 | 25 | if (monitoring_ != NULL) { 26 | delete monitoring_; 27 | } 28 | return 0; 29 | } 30 | 31 | 32 | 33 | int ProbingHashMap::Get(const std::string& key, std::string* value) { 34 | uint64_t hash = hash_function(key); 35 | uint64_t index_init = hash % num_buckets_; 36 | bool found = false; 37 | uint32_t i; 38 | for (i = 0; i < probing_max_; i++) { 39 | uint64_t index_current = (index_init + i) % num_buckets_; 40 | if (buckets_[index_current].entry == DELETED_BUCKET) { 41 | continue; 42 | } else if (buckets_[index_current].entry == NULL) { 43 | break; 44 | } 45 | 46 | if( key.size() == buckets_[index_current].entry->size_key 47 | && memcmp(buckets_[index_current].entry->data, key.c_str(), key.size()) == 0) { 48 | *value = std::string(buckets_[index_current].entry->data + key.size(), 49 | buckets_[index_current].entry->size_value); 50 | found = true; 51 | break; 52 | } 53 | } 54 | 55 | if (found) return 0; 56 | 57 | monitoring_->AddDMB(i); 58 | monitoring_->AddAlignedDMB(index_init, (index_init + i) % num_buckets_); 59 | return 1; 60 | } 61 | 62 | 63 | 64 | uint64_t ProbingHashMap::FindEmptyBucket(uint64_t index_init) { 65 | bool found = false; 66 | uint64_t index_current = index_init; 67 | 68 | for (uint32_t i = 0; i < probing_max_; i++) { 69 | index_current = index_init + i; 70 | if ( buckets_[index_current % num_buckets_].entry == NULL 71 | || buckets_[index_current % num_buckets_].entry == DELETED_BUCKET) { 72 | found = true; 73 | monitoring_->SetDIB(index_current % num_buckets_, i); 74 | monitoring_->AddDFB(i); 75 | monitoring_->AddAlignedDFB(index_init, index_current); 76 | break; 77 | } 78 | } 79 | 80 | if (!found) { 81 | return num_buckets_; 82 | } 83 | 84 | return index_current % num_buckets_; 85 | } 86 | 87 | 88 | 89 | 90 | 91 | int ProbingHashMap::Put(const std::string& key, const std::string& value) { 92 | uint64_t hash = hash_function(key); 93 | uint64_t index_init = hash % num_buckets_; 94 | uint64_t index_empty = FindEmptyBucket(index_init); 95 | 96 | if (index_empty == num_buckets_) { 97 | return 1; 98 | } 99 | 100 | char *data = new char[key.size() + value.size()]; 101 | memcpy(data, key.c_str(), key.size()); 102 | memcpy(data + key.size(), value.c_str(), value.size()); 103 | 104 | ProbingHashMap::Entry *entry = new ProbingHashMap::Entry; 105 | entry->size_key = key.size(); 106 | entry->size_value = value.size(); 107 | entry->data = data; 108 | buckets_[index_empty].entry = entry; 109 | buckets_[index_empty].hash = hash; 110 | 111 | return 0; 112 | } 113 | 114 | 115 | int ProbingHashMap::Exists(const std::string& key) { 116 | // TODO: implement 117 | return 0; 118 | } 119 | 120 | 121 | int ProbingHashMap::Remove(const std::string& key) { 122 | uint64_t hash = hash_function(key); 123 | uint64_t index_init = hash % num_buckets_; 124 | 125 | bool found = false; 126 | uint64_t index_current; 127 | 128 | for (uint32_t i = 0; i < probing_max_; i++) { 129 | index_current = (index_init + i) % num_buckets_; 130 | if (buckets_[index_current].entry == DELETED_BUCKET) { 131 | continue; 132 | } else if (buckets_[index_current].entry == NULL) { 133 | break; 134 | } else if ( key.size() == buckets_[index_current].entry->size_key 135 | && memcmp(buckets_[index_current].entry->data, key.c_str(), key.size()) == 0) { 136 | found = true; 137 | break; 138 | } 139 | } 140 | 141 | if (found) { 142 | delete[] buckets_[index_current].entry->data; 143 | delete buckets_[index_current].entry; 144 | buckets_[index_current].entry = DELETED_BUCKET; 145 | monitoring_->RemoveDIB(index_current); 146 | //fprintf(stderr, "Remove() OK\n"); 147 | return 0; 148 | } else { 149 | //fprintf(stderr, "Remove() not found - %" PRIu64 " %p\n", buckets_[index_current].hash, buckets_[index_current].entry); 150 | } 151 | 152 | return 1; 153 | } 154 | 155 | 156 | int ProbingHashMap::Resize() { 157 | // TODO: implement 158 | return 0; 159 | } 160 | 161 | 162 | // For debugging 163 | int ProbingHashMap::CheckDensity() { 164 | return 0; 165 | } 166 | 167 | int ProbingHashMap::BucketCounts() { 168 | return 0; 169 | } 170 | 171 | int ProbingHashMap::Dump() { 172 | return 0; 173 | } 174 | 175 | 176 | int ProbingHashMap::GetBucketState(int index) { 177 | //printf("GetBucketState %d\n", index); 178 | if (buckets_[index].entry == NULL) { 179 | return 0; 180 | } 181 | 182 | return 1; 183 | } 184 | 185 | int ProbingHashMap::FillInitIndex(uint64_t index_stored, uint64_t *index_init) { 186 | if(buckets_[index_stored].entry == NULL) return -1; 187 | *index_init = buckets_[index_stored].hash % num_buckets_; 188 | return 0; 189 | } 190 | 191 | 192 | void ProbingHashMap::GetMetadata(std::map< std::string, std::string >& metadata) { 193 | metadata["name"] = "linear"; 194 | char buffer[1024]; 195 | sprintf(buffer, "{\"num_buckets\": %" PRIu64 ", \"probing_max\": %u}", num_buckets_, probing_max_); 196 | metadata["parameters_hashmap"] = buffer; 197 | sprintf(buffer, "nb%" PRIu64 "-pm%u", num_buckets_, probing_max_); 198 | metadata["parameters_hashmap_string"] = buffer; 199 | } 200 | 201 | 202 | }; // end namespace hashmap 203 | -------------------------------------------------------------------------------- /probing_hashmap.h: -------------------------------------------------------------------------------- 1 | #ifndef HASHMAP_PROBING 2 | #define HASHMAP_PROBING 3 | 4 | #ifndef __STDC_FORMAT_MACROS 5 | #define __STDC_FORMAT_MACROS 6 | #endif 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #include "murmurhash3.h" 15 | #include "hamming.h" 16 | #include "hashmap.h" 17 | 18 | #include "monitoring.h" 19 | 20 | namespace hashmap 21 | { 22 | 23 | 24 | 25 | class ProbingHashMap: public HashMap 26 | { 27 | public: 28 | 29 | ProbingHashMap(uint64_t size, 30 | int probing_max) { 31 | buckets_ = NULL; 32 | num_buckets_ = size; 33 | HASH_DELETED_BUCKET = 1; 34 | DELETED_BUCKET = (Entry*)1; 35 | probing_max_ = probing_max; 36 | } 37 | 38 | virtual ~ProbingHashMap() { 39 | Close(); 40 | } 41 | 42 | int Open(); 43 | int Close(); 44 | 45 | struct Entry 46 | { 47 | uint32_t size_key; 48 | uint32_t size_value; 49 | char *data; 50 | }; 51 | 52 | struct Bucket 53 | { 54 | uint64_t hash; 55 | struct Entry* entry; 56 | }; 57 | 58 | 59 | int Get(const std::string& key, std::string* value); 60 | int Put(const std::string& key, const std::string& value); 61 | int Exists(const std::string& key); 62 | int Remove(const std::string& key); 63 | int Resize(); 64 | int Dump(); 65 | int CheckDensity(); 66 | int BucketCounts(); 67 | int GetBucketState(int index); 68 | int FillInitIndex(uint64_t index_stored, uint64_t *index_init); 69 | void GetMetadata(std::map< std::string, std::string >& metadata); 70 | 71 | private: 72 | Bucket* buckets_; 73 | uint64_t num_buckets_; 74 | 75 | uint64_t FindEmptyBucket(uint64_t index_init); 76 | 77 | uint64_t hash_function(const std::string& key) { 78 | static char hash[16]; 79 | static uint64_t output; 80 | MurmurHash3_x64_128(key.c_str(), key.size(), 0, hash); 81 | memcpy(&output, hash, 8); 82 | return output; 83 | } 84 | 85 | 86 | uint32_t probing_max_; 87 | uint64_t HASH_DELETED_BUCKET; 88 | Entry* DELETED_BUCKET; 89 | 90 | }; 91 | 92 | 93 | }; // end namespace hashmap 94 | 95 | #endif // HASHMAP_PROBING 96 | -------------------------------------------------------------------------------- /shadow_hashmap.cc: -------------------------------------------------------------------------------- 1 | #include "shadow_hashmap.h" 2 | 3 | namespace hashmap { 4 | 5 | 6 | int ShadowHashMap::Open() { 7 | buckets_ = new Bucket[num_buckets_]; 8 | memset(buckets_, 0, sizeof(Bucket) * (num_buckets_)); 9 | monitoring_ = new hashmap::Monitoring(num_buckets_, size_neighborhood_max_, static_cast(this)); 10 | return 0; 11 | } 12 | 13 | 14 | 15 | int ShadowHashMap::Close() { 16 | if (buckets_ != NULL) { 17 | for (uint32_t i = 0; i < num_buckets_; i++) { 18 | if (buckets_[i].entry != NULL) { 19 | delete[] buckets_[i].entry->data; 20 | delete buckets_[i].entry; 21 | } 22 | } 23 | delete[] buckets_; 24 | } 25 | 26 | if (monitoring_ != NULL) { 27 | delete monitoring_; 28 | } 29 | return 0; 30 | } 31 | 32 | 33 | 34 | 35 | int ShadowHashMap::Get(const std::string& key, std::string* value) { 36 | uint64_t hash = hash_function(key); 37 | uint64_t index_init = hash % num_buckets_; 38 | bool found = false; 39 | uint32_t i; 40 | for (i = 0; i < size_neighborhood_; i++) { 41 | uint64_t index_current = (index_init + i) % num_buckets_; 42 | if ( buckets_[index_current].entry != NULL 43 | && buckets_[index_current].hash == hash 44 | && key.size() == buckets_[index_current].entry->size_key 45 | && memcmp(buckets_[index_current].entry->data, key.c_str(), key.size()) == 0) { 46 | *value = std::string(buckets_[index_current].entry->data + key.size(), 47 | buckets_[index_current].entry->size_value); 48 | found = true; 49 | break; 50 | } 51 | } 52 | 53 | if (found) return 0; 54 | 55 | monitoring_->AddDMB(size_neighborhood_); 56 | monitoring_->AddAlignedDMB(index_init, (index_init + i) % num_buckets_); 57 | return 1; 58 | } 59 | 60 | 61 | uint64_t ShadowHashMap::FindEmptyBucketAndDoSwaps(uint64_t index_init) { 62 | // In this function, the modulos function is being applied on indexes at the last moment, 63 | // when they are being used or returned. This allows to handle cases where the 64 | // indexes are cycling back to the beginning of the bucket array. 65 | bool found = false; 66 | uint64_t index_current = index_init; 67 | for (uint32_t i = 0; i < size_probing_; i++) { 68 | index_current = index_init + i; 69 | if (buckets_[index_current % num_buckets_].entry == NULL) { 70 | found = true; 71 | monitoring_->AddDFB(i); 72 | monitoring_->AddAlignedDFB(index_init, index_current); 73 | break; 74 | } 75 | } 76 | 77 | if (!found) { 78 | return num_buckets_; 79 | } 80 | 81 | int num_swaps = 0; 82 | 83 | uint64_t index_empty = index_current; 84 | while (index_empty - index_init >= size_neighborhood_) { 85 | uint64_t index_base_min = index_empty - (size_neighborhood_ - 1); 86 | bool found_swap = false; 87 | for (uint32_t i = size_neighborhood_ - 1; i > 0; i--) { 88 | uint64_t index_candidate = index_empty - i; 89 | if (index_candidate < index_init) continue; 90 | if (buckets_[index_candidate % num_buckets_].hash % num_buckets_ >= index_base_min) { 91 | // the candidate has its base bucket within the right scope, so we swap! 92 | buckets_[index_empty % num_buckets_].entry = buckets_[index_candidate % num_buckets_].entry; 93 | buckets_[index_empty % num_buckets_].hash = buckets_[index_candidate % num_buckets_].hash; 94 | 95 | buckets_[index_candidate % num_buckets_].entry = NULL; 96 | buckets_[index_candidate % num_buckets_].hash = 0; 97 | 98 | uint64_t dib = monitoring_->GetDIB(index_candidate % num_buckets_); 99 | monitoring_->RemoveDIB(index_candidate % num_buckets_); 100 | monitoring_->SetDIB(index_empty % num_buckets_, dib); 101 | 102 | index_empty = index_candidate; 103 | found_swap = true; 104 | num_swaps += 1; 105 | break; 106 | } 107 | } 108 | 109 | if (!found_swap) { 110 | if (size_neighborhood_ < size_neighborhood_max_) { 111 | size_neighborhood_ *= 2; 112 | //std::cerr << "Increasing neighborhood, now " << size_neighborhood_ << std::endl; 113 | } else { 114 | // For debugging only, dump of the area around the neighborhood 115 | if (false) { 116 | //fprintf(stderr, "index [%" PRIu64 "] empty [%" PRIu64 "]\n", index_init, index_empty); 117 | uint32_t index_temp = index_empty - size_neighborhood_ + 1; 118 | if (index_temp > index_init) index_temp = index_init; 119 | if (index_temp < 20) { 120 | index_temp = 0; 121 | } else { 122 | index_temp -= 20; 123 | } 124 | for (; index_temp <= index_empty + 20; index_temp++) { 125 | if (index_temp == index_empty - size_neighborhood_ + 1) { 126 | fprintf(stderr, "neigh "); 127 | } else if (index_temp == index_init) { 128 | fprintf(stderr, "index "); 129 | } else if (index_temp == index_empty) { 130 | fprintf(stderr, "empty "); 131 | } else { 132 | fprintf(stderr, " "); 133 | } 134 | 135 | fprintf(stderr, " %7du ", index_temp); 136 | 137 | if (buckets_[index_temp % num_buckets_].entry == NULL) { 138 | fprintf(stderr, " EMP"); 139 | } else { 140 | fprintf(stderr, "%7" PRIu64 " ", buckets_[index_temp % num_buckets_].hash % num_buckets_); 141 | } 142 | fprintf(stderr, "\n"); 143 | } 144 | fprintf(stderr, "\n"); 145 | } 146 | return num_buckets_; 147 | } 148 | } 149 | } 150 | 151 | monitoring_->SetDIB(index_empty % num_buckets_, 152 | index_empty - index_init); 153 | monitoring_->AddNumberOfSwaps(num_swaps); 154 | 155 | return index_empty % num_buckets_; 156 | } 157 | 158 | int ShadowHashMap::Put(const std::string& key, const std::string& value) { 159 | uint64_t hash = hash_function(key); 160 | uint64_t index_init = hash % num_buckets_; 161 | uint64_t index_empty = FindEmptyBucketAndDoSwaps(index_init); 162 | // TODO: Put() should use Exists() and perform a replacement if needed. 163 | if (index_empty == num_buckets_) { 164 | return 1; 165 | } 166 | 167 | char *data = new char[key.size() + value.size()]; 168 | memcpy(data, key.c_str(), key.size()); 169 | memcpy(data + key.size(), value.c_str(), value.size()); 170 | 171 | ShadowHashMap::Entry *entry = new ShadowHashMap::Entry; 172 | entry->size_key = key.size(); 173 | entry->size_value = value.size(); 174 | entry->data = data; 175 | buckets_[index_empty].entry = entry; 176 | buckets_[index_empty].hash = hash; 177 | 178 | return 0; 179 | } 180 | 181 | int ShadowHashMap::Exists(const std::string& key) { 182 | // TODO: implement 183 | return 0; 184 | } 185 | 186 | int ShadowHashMap::Remove(const std::string& key) { 187 | uint64_t hash = hash_function(key); 188 | uint64_t index_init = hash % num_buckets_; 189 | bool found = false; 190 | uint64_t index_current; 191 | for (uint32_t i = 0; i < size_neighborhood_; i++) { 192 | index_current = (index_init + i) % num_buckets_; 193 | if ( buckets_[index_current].entry != NULL 194 | && buckets_[index_current].hash == hash 195 | && key.size() == buckets_[index_current].entry->size_key 196 | && memcmp(buckets_[index_current].entry->data, key.c_str(), key.size()) == 0) { 197 | found = true; 198 | break; 199 | } 200 | } 201 | 202 | if (found) { 203 | delete[] buckets_[index_current].entry->data; 204 | delete buckets_[index_current].entry; 205 | buckets_[index_current].entry = NULL; 206 | monitoring_->RemoveDIB(index_current); 207 | return 0; 208 | } 209 | 210 | return 0; 211 | } 212 | 213 | int ShadowHashMap::Resize() { 214 | // TODO: implement 215 | return 0; 216 | } 217 | 218 | 219 | // For debugging 220 | int ShadowHashMap::CheckDensity() { 221 | return 0; 222 | } 223 | 224 | 225 | int ShadowHashMap::BucketCounts() { 226 | std::cout << "current neighborhood: " << size_neighborhood_ << std::endl; 227 | return 0; 228 | } 229 | 230 | 231 | int ShadowHashMap::Dump() { 232 | return 0; 233 | } 234 | 235 | 236 | int ShadowHashMap::GetBucketState(int index) { 237 | if (buckets_[index].entry == NULL) { 238 | return 0; 239 | } 240 | 241 | return 1; 242 | 243 | } 244 | 245 | int ShadowHashMap::FillInitIndex(uint64_t index_stored, uint64_t *index_init) { 246 | if(buckets_[index_stored].entry == NULL) return -1; 247 | *index_init = buckets_[index_stored].hash % num_buckets_; 248 | return 0; 249 | } 250 | 251 | 252 | void ShadowHashMap::GetMetadata(std::map< std::string, std::string >& metadata) { 253 | metadata["name"] = "shadow"; 254 | char buffer[1024]; 255 | sprintf(buffer, "{\"num_buckets\": %" PRIu64 ", \"size_probing\": %u, \"size_neighborhood_start\": %u, \"size_neighborhood_end\": %u}", num_buckets_, size_probing_, size_neighborhood_start_, size_neighborhood_max_); 256 | metadata["parameters_hashmap"] = buffer; 257 | sprintf(buffer, "nb%" PRIu64 "-sp%u-sns%u-sne%u", num_buckets_, size_probing_, size_neighborhood_start_, size_neighborhood_max_); 258 | metadata["parameters_hashmap_string"] = buffer; 259 | } 260 | 261 | 262 | 263 | }; 264 | -------------------------------------------------------------------------------- /shadow_hashmap.h: -------------------------------------------------------------------------------- 1 | #ifndef HASHMAP_SHADOW 2 | #define HASHMAP_SHADOW 3 | 4 | #ifndef __STDC_FORMAT_MACROS 5 | #define __STDC_FORMAT_MACROS 6 | #endif 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #include "murmurhash3.h" 15 | #include "hamming.h" 16 | #include "hashmap.h" 17 | #include "monitoring.h" 18 | 19 | namespace hashmap 20 | { 21 | 22 | 23 | 24 | class ShadowHashMap: public HashMap 25 | { 26 | public: 27 | 28 | ShadowHashMap(uint64_t size, 29 | uint64_t size_probing, 30 | uint64_t size_neighborhood_start, 31 | uint64_t size_neighborhood_end 32 | ) { 33 | buckets_ = NULL; 34 | num_buckets_ = size; 35 | size_neighborhood_ = size_neighborhood_start; 36 | size_neighborhood_start_ = size_neighborhood_start; 37 | size_neighborhood_max_ = size_neighborhood_end; 38 | size_probing_ = size_probing; 39 | } 40 | 41 | virtual ~ShadowHashMap() { 42 | Close(); 43 | } 44 | 45 | int Open(); 46 | int Close(); 47 | 48 | 49 | struct Entry 50 | { 51 | uint32_t size_key; 52 | uint32_t size_value; 53 | char *data; 54 | }; 55 | 56 | struct Bucket 57 | { 58 | uint64_t hash; 59 | struct Entry* entry; 60 | }; 61 | 62 | 63 | int Get(const std::string& key, std::string* value); 64 | int Put(const std::string& key, const std::string& value); 65 | int Exists(const std::string& key); 66 | int Remove(const std::string& key); 67 | int Resize(); 68 | int Dump(); 69 | int CheckDensity(); 70 | int BucketCounts(); 71 | int GetBucketState(int index); 72 | int FillInitIndex(uint64_t index_stored, uint64_t *index_init); 73 | void GetMetadata(std::map< std::string, std::string >& metadata); 74 | 75 | 76 | private: 77 | Bucket* buckets_; 78 | uint64_t num_buckets_; 79 | 80 | uint64_t FindEmptyBucketAndDoSwaps(uint64_t index_init); 81 | 82 | uint64_t hash_function(const std::string& key) { 83 | static char hash[16]; 84 | static uint64_t output; 85 | MurmurHash3_x64_128(key.c_str(), key.size(), 0, hash); 86 | memcpy(&output, hash, 8); 87 | return output; 88 | } 89 | 90 | uint32_t size_neighborhood_; 91 | uint32_t size_neighborhood_start_; 92 | uint32_t size_neighborhood_max_; 93 | uint32_t size_probing_; 94 | 95 | }; 96 | 97 | 98 | }; // end namespace hashmap 99 | 100 | #endif // HASHMAP_SHADOW 101 | -------------------------------------------------------------------------------- /testcase.cc: -------------------------------------------------------------------------------- 1 | #include "testcase.h" 2 | 3 | 4 | // TODO: Factorize as much as possible across the test cases. 5 | 6 | namespace hashmap 7 | { 8 | 9 | int exists_or_mkdir(const char *path) { 10 | struct stat sb; 11 | 12 | if (stat(path, &sb) == 0) { 13 | if (!S_ISDIR(sb.st_mode)) { 14 | return 1; 15 | } 16 | } else if (mkdir(path, 0777) != 0) { 17 | return 1; 18 | } 19 | 20 | return 0; 21 | } 22 | 23 | std::string concatenate(std::string const& str, int i) 24 | { 25 | std::stringstream s; 26 | s << str << i; 27 | return s.str(); 28 | } 29 | 30 | void TestCase::InsertEntries(uint32_t num_items, std::set& keys) { 31 | std::string key; 32 | std::set::iterator it_find; 33 | std::string value_dummy; 34 | static uint64_t key_id_current = 0; 35 | // NOTE: If ever using this method, remember to reset key_id_current between 36 | // instances. 37 | 38 | for (uint32_t j = 0; j < num_items; j++) { 39 | key_id_current += 1 + rand() % 32; 40 | key = concatenate( "key", key_id_current ); 41 | it_find = keys.find(key); 42 | if (it_find != keys.end()) { 43 | fprintf(stderr, "Error: key already in the hash table, this should not happen\n"); 44 | } 45 | 46 | keys.insert(key); 47 | int ret_get = hm_->Get(key, &value_dummy); 48 | if (ret_get != 1) { 49 | fprintf(stderr, "Get() error\n"); 50 | } 51 | int ret_put = hm_->Put(key, key); 52 | if (ret_put != 0) { 53 | fprintf(stderr, "Put() error\n"); 54 | } 55 | } 56 | } 57 | 58 | 59 | void TestCase::RemoveEntries(uint32_t num_items, std::set& keys) { 60 | for (uint32_t index_del = 0; index_del < num_items; index_del++) { 61 | uint64_t r = rand(); 62 | uint64_t offset = r % keys.size(); 63 | //printf("delete index %d -- offset %" PRIu64 " -- rand %" PRIu64 "\n", index_del, offset, r); 64 | std::set::iterator it(keys.begin()); 65 | std::advance(it, offset); 66 | //fprintf(stdout, "str: %s\n", (*it).c_str()); 67 | //key = buffer; 68 | int ret_remove = hm_->Remove(*it); 69 | //fprintf(stderr, "Remove() [%s]\n", it->c_str()); 70 | if (ret_remove != 0) fprintf(stderr, "Error while removing\n"); 71 | keys.erase(it); 72 | } 73 | } 74 | 75 | 76 | 77 | 78 | void BatchTestCase::run() { 79 | std::set keys; 80 | std::string key; 81 | char filename[1024]; 82 | uint32_t num_items; 83 | uint32_t num_items_big = (uint32_t)((double)num_buckets_ * load_factor_max_); 84 | uint32_t num_items_small = (uint32_t)((double)num_buckets_ * load_factor_remove_); 85 | fprintf(stdout, "num_items %u %u\n", num_items_big, num_items_small); 86 | 87 | std::string testcase = "batch"; 88 | std::string directory = "results"; 89 | if (exists_or_mkdir(directory.c_str()) != 0) { 90 | fprintf(stderr, "Could not create directory [%s]\n", directory.c_str()); 91 | exit(1); 92 | } 93 | 94 | char pt_string[1024]; 95 | sprintf(pt_string, "lfm%.2f-lfr%.2f", load_factor_max_, load_factor_remove_); 96 | 97 | char pt_json[1024]; 98 | sprintf(pt_json, "{\"load_factor_max\": %.2f, \"load_factor_remove\": %.2f}", load_factor_max_, load_factor_remove_); 99 | 100 | 101 | std::set::iterator it_find; 102 | for (int i = 0; i < 50; i++) { 103 | 104 | num_items = num_items_big; 105 | srand(i); 106 | keys.clear(); 107 | hm_->Open(); 108 | 109 | 110 | std::map metadata; 111 | hm_->GetMetadata(metadata); 112 | 113 | char directory_sub_buffer[2048]; 114 | sprintf(directory_sub_buffer, "%s/%s-%s-%s", directory.c_str(), testcase.c_str(), metadata["name"].c_str(), metadata["parameters_hashmap_string"].c_str()); 115 | std::string directory_sub(directory_sub_buffer); 116 | if (exists_or_mkdir(directory_sub.c_str()) != 0) { 117 | fprintf(stderr, "Could not create directory [%s]\n", directory_sub.c_str()); 118 | exit(1); 119 | } 120 | 121 | std::string value_dummy; 122 | uint64_t key_id_current = 0; 123 | 124 | for (int cycle = 0; cycle < 50; cycle++) { 125 | fprintf(stderr, "instance %d cycle %d\n", i, cycle); 126 | bool has_error_on_put = false; 127 | for (uint32_t j = 0; j < num_items; j++) { 128 | key_id_current += 1 + rand() % 32; 129 | key = concatenate( "key", key_id_current ); 130 | it_find = keys.find(key); 131 | if (it_find != keys.end()) { 132 | fprintf(stderr, "Error: key already in the hash table, this should not happen\n"); 133 | } 134 | 135 | keys.insert(key); 136 | int ret_get = hm_->Get(key, &value_dummy); 137 | if (ret_get != 1) { 138 | fprintf(stderr, "Get() error\n"); 139 | } 140 | int ret_put = hm_->Put(key, key); 141 | //fprintf(stderr, "Put() [%s]\n", key.c_str()); 142 | if (ret_put != 0) { 143 | fprintf(stderr, "Put() error\n"); 144 | // break on error 145 | has_error_on_put = true; 146 | break; 147 | } 148 | } 149 | printf("keys insert %zu\n", keys.size()); 150 | if (has_error_on_put) { 151 | hm_->monitoring_->ResetDFB(); 152 | hm_->monitoring_->ResetAlignedDFB(); 153 | hm_->monitoring_->ResetNumberOfSwaps(); 154 | hm_->monitoring_->ResetDMB(); 155 | hm_->monitoring_->ResetAlignedDMB(); 156 | hm_->monitoring_->ResetDSB(); 157 | hm_->monitoring_->ResetAlignedDSB(); 158 | num_items = num_items_small; 159 | break; 160 | } 161 | 162 | hm_->monitoring_->SetTestcase(testcase); 163 | hm_->monitoring_->SetInstance(i); 164 | hm_->monitoring_->SetCycle(cycle); 165 | hm_->monitoring_->SetParametersTestcaseString(pt_string); 166 | hm_->monitoring_->SetParametersTestcaseJson(pt_json); 167 | 168 | sprintf(filename, "%s/%s-%s-%s--%s-dib--instance%05d-cycle%04d.json", directory_sub.c_str(), testcase.c_str(), metadata["name"].c_str(), metadata["parameters_hashmap_string"].c_str(), pt_string, i, cycle); 169 | fprintf(stderr, "filename dib %s\n", filename); 170 | hm_->monitoring_->PrintDIB(filename); 171 | 172 | 173 | sprintf(filename, 174 | "%s/%s-%s-%s--%s-adib--instance%05d-cycle%04d.json", 175 | directory_sub.c_str(), 176 | testcase.c_str(), 177 | metadata["name"].c_str(), 178 | metadata["parameters_hashmap_string"].c_str(), 179 | pt_string, 180 | i, 181 | cycle); 182 | hm_->monitoring_->PrintNumScannedBlocks(filename); 183 | 184 | sprintf(filename, 185 | "%s/%s-%s-%s--%s-dfb--instance%05d-cycle%04d.json", 186 | directory_sub.c_str(), 187 | testcase.c_str(), 188 | metadata["name"].c_str(), 189 | metadata["parameters_hashmap_string"].c_str(), 190 | pt_string, 191 | i, 192 | cycle); 193 | hm_->monitoring_->PrintDFB(filename); 194 | hm_->monitoring_->ResetDFB(); 195 | 196 | sprintf(filename, 197 | "%s/%s-%s-%s--%s-adfb--instance%05d-cycle%04d.json", 198 | directory_sub.c_str(), 199 | testcase.c_str(), 200 | metadata["name"].c_str(), 201 | metadata["parameters_hashmap_string"].c_str(), 202 | pt_string, 203 | i, 204 | cycle); 205 | hm_->monitoring_->PrintAlignedDFB(filename); 206 | hm_->monitoring_->ResetAlignedDFB(); 207 | 208 | sprintf(filename, 209 | "%s/%s-%s-%s--%s-swap--instance%05d-cycle%04d.json", 210 | directory_sub.c_str(), 211 | testcase.c_str(), 212 | metadata["name"].c_str(), 213 | metadata["parameters_hashmap_string"].c_str(), 214 | pt_string, 215 | i, 216 | cycle); 217 | hm_->monitoring_->PrintNumberOfSwaps(filename); 218 | hm_->monitoring_->ResetNumberOfSwaps(); 219 | 220 | sprintf(filename, 221 | "%s/%s-%s-%s--%s-dmb--instance%05d-cycle%04d.json", 222 | directory_sub.c_str(), 223 | testcase.c_str(), 224 | metadata["name"].c_str(), 225 | metadata["parameters_hashmap_string"].c_str(), 226 | pt_string, 227 | i, 228 | cycle); 229 | hm_->monitoring_->PrintDMB(filename); 230 | hm_->monitoring_->ResetDMB(); 231 | 232 | sprintf(filename, 233 | "%s/%s-%s-%s--%s-admb--instance%05d-cycle%04d.json", 234 | directory_sub.c_str(), 235 | testcase.c_str(), 236 | metadata["name"].c_str(), 237 | metadata["parameters_hashmap_string"].c_str(), 238 | pt_string, 239 | i, 240 | cycle); 241 | hm_->monitoring_->PrintAlignedDMB(filename); 242 | hm_->monitoring_->ResetAlignedDMB(); 243 | 244 | sprintf(filename, 245 | "%s/%s-%s-%s--%s-dsb--instance%05d-cycle%04d.json", 246 | directory_sub.c_str(), 247 | testcase.c_str(), 248 | metadata["name"].c_str(), 249 | metadata["parameters_hashmap_string"].c_str(), 250 | pt_string, 251 | i, 252 | cycle); 253 | hm_->monitoring_->PrintDSB(filename); 254 | hm_->monitoring_->ResetDSB(); 255 | 256 | sprintf(filename, 257 | "%s/%s-%s-%s--%s-adsb--instance%05d-cycle%04d.json", 258 | directory_sub.c_str(), 259 | testcase.c_str(), 260 | metadata["name"].c_str(), 261 | metadata["parameters_hashmap_string"].c_str(), 262 | pt_string, 263 | i, 264 | cycle); 265 | hm_->monitoring_->PrintAlignedDSB(filename); 266 | hm_->monitoring_->ResetAlignedDSB(); 267 | 268 | 269 | 270 | for (uint32_t index_del = 0; index_del < num_items_small; index_del++) { 271 | uint64_t r = rand(); 272 | uint64_t offset = r % keys.size(); 273 | //printf("delete index %d -- offset %" PRIu64 " -- rand %" PRIu64 "\n", index_del, offset, r); 274 | std::set::iterator it(keys.begin()); 275 | std::advance(it, offset); 276 | //fprintf(stdout, "str: %s\n", (*it).c_str()); 277 | //key = buffer; 278 | int ret_remove = hm_->Remove(*it); 279 | //fprintf(stderr, "Remove() [%s]\n", it->c_str()); 280 | if (ret_remove != 0) fprintf(stderr, "Error while removing\n"); 281 | keys.erase(it); 282 | } 283 | printf("keys erase %zu\n", keys.size()); 284 | num_items = num_items_small; 285 | } 286 | 287 | fprintf(stderr, "close\n"); 288 | hm_->Close(); 289 | fprintf(stderr, "ok\n"); 290 | } 291 | } 292 | 293 | 294 | 295 | 296 | void RippleTestCase::run() { 297 | std::set keys; 298 | std::string key; 299 | char filename[1024]; 300 | uint32_t num_items; 301 | uint32_t num_items_big = (uint32_t)((double)num_buckets_ * load_factor_max_); 302 | uint32_t num_items_small = (uint32_t)((double)num_buckets_ * load_factor_remove_); 303 | fprintf(stdout, "num_items %u %u\n", num_items_big, num_items_small); 304 | 305 | std::string testcase = "ripple"; 306 | std::string directory = "results"; 307 | if (exists_or_mkdir(directory.c_str()) != 0) { 308 | fprintf(stderr, "Could not create directory [%s]\n", testcase.c_str()); 309 | exit(1); 310 | } 311 | 312 | char pt_string[1024]; 313 | sprintf(pt_string, "lfm%.2f-lfr%.2f", load_factor_max_, load_factor_remove_); 314 | 315 | char pt_json[1024]; 316 | sprintf(pt_json, "{\"load_factor_max\": %.2f, \"load_factor_remove\": %.2f}", load_factor_max_, load_factor_remove_); 317 | 318 | 319 | std::set::iterator it_find; 320 | for (int i = 0; i < 50; i++) { 321 | num_items = num_items_big; 322 | srand(i); 323 | keys.clear(); 324 | hm_->Open(); 325 | 326 | std::map metadata; 327 | hm_->GetMetadata(metadata); 328 | 329 | char directory_sub_buffer[2048]; 330 | sprintf(directory_sub_buffer, "%s/%s-%s-%s", directory.c_str(), testcase.c_str(), metadata["name"].c_str(), metadata["parameters_hashmap_string"].c_str()); 331 | std::string directory_sub(directory_sub_buffer); 332 | if (exists_or_mkdir(directory_sub.c_str()) != 0) { 333 | fprintf(stderr, "Could not create directory [%s]\n", directory_sub.c_str()); 334 | exit(1); 335 | } 336 | 337 | std::string value_dummy; 338 | uint64_t key_id_current = 0; 339 | 340 | for (int cycle = 0; cycle < 50; cycle++) { 341 | fprintf(stderr, "instance %d cycle %d\n", i, cycle); 342 | bool has_error_on_put = false; 343 | for (uint32_t j = 0; j < num_items; j++) { 344 | key_id_current += 1 + rand() % 32; 345 | key = concatenate( "key", key_id_current ); 346 | it_find = keys.find(key); 347 | if (it_find != keys.end()) { 348 | fprintf(stderr, "Error: key already in the hash table, this should not happen\n"); 349 | } 350 | 351 | keys.insert(key); 352 | int ret_get = hm_->Get(key, &value_dummy); 353 | if (ret_get != 1) { 354 | fprintf(stderr, "Get() error\n"); 355 | } 356 | int ret_put = hm_->Put(key, key); 357 | if (ret_put != 0) { 358 | fprintf(stderr, "Put() error\n"); 359 | has_error_on_put = true; 360 | } 361 | 362 | if (cycle > 0) { 363 | uint64_t r = rand(); 364 | uint64_t offset = r % keys.size(); 365 | //printf("delete index %d -- offset %" PRIu64 " -- rand %" PRIu64 "\n", index_del, offset, r); 366 | std::set::iterator it(keys.begin()); 367 | std::advance(it, offset); 368 | //fprintf(stdout, "str: %s\n", (*it).c_str()); 369 | //key = buffer; 370 | int ret_remove = hm_->Remove(*it); 371 | //fprintf(stderr, "Remove() [%s]\n", it->c_str()); 372 | if (ret_remove != 0) fprintf(stderr, "Error while removing\n"); 373 | keys.erase(it); 374 | } 375 | } 376 | printf("keys insert %zu\n", keys.size()); 377 | if (has_error_on_put) { 378 | hm_->monitoring_->ResetDFB(); 379 | hm_->monitoring_->ResetAlignedDFB(); 380 | hm_->monitoring_->ResetNumberOfSwaps(); 381 | hm_->monitoring_->ResetDMB(); 382 | hm_->monitoring_->ResetAlignedDMB(); 383 | hm_->monitoring_->ResetDSB(); 384 | hm_->monitoring_->ResetAlignedDSB(); 385 | num_items = num_items_small; 386 | break; 387 | } 388 | 389 | hm_->monitoring_->SetTestcase(testcase); 390 | hm_->monitoring_->SetInstance(i); 391 | hm_->monitoring_->SetCycle(cycle); 392 | hm_->monitoring_->SetParametersTestcaseString(pt_string); 393 | hm_->monitoring_->SetParametersTestcaseJson(pt_json); 394 | 395 | sprintf(filename, "%s/%s-%s-%s--%s-dib--instance%05d-cycle%04d.json", directory_sub.c_str(), testcase.c_str(), metadata["name"].c_str(), metadata["parameters_hashmap_string"].c_str(), pt_string, i, cycle); 396 | fprintf(stderr, "filename dib %s\n", filename); 397 | hm_->monitoring_->PrintDIB(filename); 398 | 399 | sprintf(filename, 400 | "%s/%s-%s-%s--%s-blocks--instance%05d-cycle%04d.json", 401 | directory_sub.c_str(), 402 | testcase.c_str(), 403 | metadata["name"].c_str(), 404 | metadata["parameters_hashmap_string"].c_str(), 405 | pt_string, 406 | i, 407 | cycle); 408 | hm_->monitoring_->PrintNumScannedBlocks(filename); 409 | 410 | sprintf(filename, 411 | "%s/%s-%s-%s--%s-dfb--instance%05d-cycle%04d.json", 412 | directory_sub.c_str(), 413 | testcase.c_str(), 414 | metadata["name"].c_str(), 415 | metadata["parameters_hashmap_string"].c_str(), 416 | pt_string, 417 | i, 418 | cycle); 419 | hm_->monitoring_->PrintDFB(filename); 420 | hm_->monitoring_->ResetDFB(); 421 | 422 | sprintf(filename, 423 | "%s/%s-%s-%s--%s-adfb--instance%05d-cycle%04d.json", 424 | directory_sub.c_str(), 425 | testcase.c_str(), 426 | metadata["name"].c_str(), 427 | metadata["parameters_hashmap_string"].c_str(), 428 | pt_string, 429 | i, 430 | cycle); 431 | hm_->monitoring_->PrintAlignedDFB(filename); 432 | hm_->monitoring_->ResetAlignedDFB(); 433 | 434 | sprintf(filename, 435 | "%s/%s-%s-%s--%s-swap--instance%05d-cycle%04d.json", 436 | directory_sub.c_str(), 437 | testcase.c_str(), 438 | metadata["name"].c_str(), 439 | metadata["parameters_hashmap_string"].c_str(), 440 | pt_string, 441 | i, 442 | cycle); 443 | hm_->monitoring_->PrintNumberOfSwaps(filename); 444 | hm_->monitoring_->ResetNumberOfSwaps(); 445 | 446 | sprintf(filename, 447 | "%s/%s-%s-%s--%s-dmb--instance%05d-cycle%04d.json", 448 | directory_sub.c_str(), 449 | testcase.c_str(), 450 | metadata["name"].c_str(), 451 | metadata["parameters_hashmap_string"].c_str(), 452 | pt_string, 453 | i, 454 | cycle); 455 | hm_->monitoring_->PrintDMB(filename); 456 | hm_->monitoring_->ResetDMB(); 457 | 458 | sprintf(filename, 459 | "%s/%s-%s-%s--%s-admb--instance%05d-cycle%04d.json", 460 | directory_sub.c_str(), 461 | testcase.c_str(), 462 | metadata["name"].c_str(), 463 | metadata["parameters_hashmap_string"].c_str(), 464 | pt_string, 465 | i, 466 | cycle); 467 | hm_->monitoring_->PrintAlignedDMB(filename); 468 | hm_->monitoring_->ResetAlignedDMB(); 469 | 470 | sprintf(filename, 471 | "%s/%s-%s-%s--%s-dsb--instance%05d-cycle%04d.json", 472 | directory_sub.c_str(), 473 | testcase.c_str(), 474 | metadata["name"].c_str(), 475 | metadata["parameters_hashmap_string"].c_str(), 476 | pt_string, 477 | i, 478 | cycle); 479 | hm_->monitoring_->PrintDSB(filename); 480 | hm_->monitoring_->ResetDSB(); 481 | 482 | sprintf(filename, 483 | "%s/%s-%s-%s--%s-adsb--instance%05d-cycle%04d.json", 484 | directory_sub.c_str(), 485 | testcase.c_str(), 486 | metadata["name"].c_str(), 487 | metadata["parameters_hashmap_string"].c_str(), 488 | pt_string, 489 | i, 490 | cycle); 491 | hm_->monitoring_->PrintAlignedDSB(filename); 492 | hm_->monitoring_->ResetAlignedDSB(); 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | num_items = num_items_small; 501 | } 502 | 503 | fprintf(stderr, "close\n"); 504 | hm_->Close(); 505 | fprintf(stderr, "ok\n"); 506 | } 507 | } 508 | 509 | 510 | 511 | 512 | void LoadingTestCase::run() { 513 | std::set keys; 514 | std::string key; 515 | char filename[1024]; 516 | uint32_t num_items; 517 | uint32_t num_items_big = num_buckets_; 518 | 519 | std::string testcase = "loading"; 520 | std::string directory = "results"; 521 | if (exists_or_mkdir(directory.c_str()) != 0) { 522 | fprintf(stderr, "Could not create directory [%s]\n", testcase.c_str()); 523 | exit(1); 524 | } 525 | 526 | char pt_string[1024]; 527 | sprintf(pt_string, "%s", ""); 528 | 529 | char pt_json[1024]; 530 | sprintf(pt_json, "{}"); 531 | 532 | num_items = num_items_big / 50; 533 | std::set::iterator it_find; 534 | for (int i = 0; i < 50; i++) { 535 | srand(i); 536 | keys.clear(); 537 | hm_->Open(); 538 | 539 | std::map metadata; 540 | hm_->GetMetadata(metadata); 541 | 542 | char directory_sub_buffer[2048]; 543 | sprintf(directory_sub_buffer, "%s/%s-%s-%s", directory.c_str(), testcase.c_str(), metadata["name"].c_str(), metadata["parameters_hashmap_string"].c_str()); 544 | std::string directory_sub(directory_sub_buffer); 545 | if (exists_or_mkdir(directory_sub.c_str()) != 0) { 546 | fprintf(stderr, "Could not create directory [%s]\n", directory_sub.c_str()); 547 | exit(1); 548 | } 549 | 550 | std::string value_dummy; 551 | uint64_t key_id_current = 0; 552 | 553 | for (int cycle = 0; cycle < 50; cycle++) { 554 | fprintf(stderr, "instance %d cycle %d\n", i, cycle); 555 | bool has_error_on_put = false; 556 | for (uint32_t j = 0; j < num_items; j++) { 557 | key_id_current += 1 + rand() % 32; 558 | key = concatenate( "key", key_id_current ); 559 | it_find = keys.find(key); 560 | if (it_find != keys.end()) { 561 | fprintf(stderr, "Error: key already in the hash table, this should not happen\n"); 562 | } 563 | 564 | int ret_get = hm_->Get(key, &value_dummy); 565 | if (ret_get != 1) { 566 | fprintf(stderr, "Get() error\n"); 567 | } 568 | int ret_put = hm_->Put(key, key); 569 | //fprintf(stderr, "Put() [%s]\n", key.c_str()); 570 | if (ret_put != 0) { 571 | fprintf(stderr, "Put() error\n"); 572 | // break on error 573 | has_error_on_put = true; 574 | break; 575 | } 576 | keys.insert(key); 577 | } 578 | printf("keys insert %zu\n", keys.size()); 579 | if (has_error_on_put) { 580 | hm_->monitoring_->ResetDFB(); 581 | hm_->monitoring_->ResetAlignedDFB(); 582 | hm_->monitoring_->ResetNumberOfSwaps(); 583 | hm_->monitoring_->ResetDMB(); 584 | hm_->monitoring_->ResetAlignedDMB(); 585 | hm_->monitoring_->ResetDSB(); 586 | hm_->monitoring_->ResetAlignedDSB(); 587 | break; 588 | } 589 | 590 | hm_->monitoring_->SetTestcase(testcase); 591 | hm_->monitoring_->SetInstance(i); 592 | hm_->monitoring_->SetCycle(cycle); 593 | hm_->monitoring_->SetParametersTestcaseString(pt_string); 594 | hm_->monitoring_->SetParametersTestcaseJson(pt_json); 595 | 596 | sprintf(filename, 597 | "%s/%s-%s-%s--%s-dib--instance%05d-cycle%04d.json", 598 | directory_sub.c_str(), 599 | testcase.c_str(), 600 | metadata["name"].c_str(), 601 | metadata["parameters_hashmap_string"].c_str(), 602 | pt_string, 603 | i, 604 | cycle); 605 | fprintf(stderr, "filename dib %s\n", filename); 606 | hm_->monitoring_->PrintDIB(filename); 607 | 608 | sprintf(filename, 609 | "%s/%s-%s-%s--%s-blocks--instance%05d-cycle%04d.json", 610 | directory_sub.c_str(), 611 | testcase.c_str(), 612 | metadata["name"].c_str(), 613 | metadata["parameters_hashmap_string"].c_str(), 614 | pt_string, 615 | i, 616 | cycle); 617 | hm_->monitoring_->PrintNumScannedBlocks(filename); 618 | 619 | sprintf(filename, 620 | "%s/%s-%s-%s--%s-dfb--instance%05d-cycle%04d.json", 621 | directory_sub.c_str(), 622 | testcase.c_str(), 623 | metadata["name"].c_str(), 624 | metadata["parameters_hashmap_string"].c_str(), 625 | pt_string, 626 | i, 627 | cycle); 628 | hm_->monitoring_->PrintDFB(filename); 629 | hm_->monitoring_->ResetDFB(); 630 | 631 | sprintf(filename, 632 | "%s/%s-%s-%s--%s-adfb--instance%05d-cycle%04d.json", 633 | directory_sub.c_str(), 634 | testcase.c_str(), 635 | metadata["name"].c_str(), 636 | metadata["parameters_hashmap_string"].c_str(), 637 | pt_string, 638 | i, 639 | cycle); 640 | hm_->monitoring_->PrintAlignedDFB(filename); 641 | hm_->monitoring_->ResetAlignedDFB(); 642 | 643 | 644 | sprintf(filename, 645 | "%s/%s-%s-%s--%s-swap--instance%05d-cycle%04d.json", 646 | directory_sub.c_str(), 647 | testcase.c_str(), 648 | metadata["name"].c_str(), 649 | metadata["parameters_hashmap_string"].c_str(), 650 | pt_string, 651 | i, 652 | cycle); 653 | hm_->monitoring_->PrintNumberOfSwaps(filename); 654 | hm_->monitoring_->ResetNumberOfSwaps(); 655 | 656 | sprintf(filename, 657 | "%s/%s-%s-%s--%s-dmb--instance%05d-cycle%04d.json", 658 | directory_sub.c_str(), 659 | testcase.c_str(), 660 | metadata["name"].c_str(), 661 | metadata["parameters_hashmap_string"].c_str(), 662 | pt_string, 663 | i, 664 | cycle); 665 | hm_->monitoring_->PrintDMB(filename); 666 | hm_->monitoring_->ResetDMB(); 667 | 668 | sprintf(filename, 669 | "%s/%s-%s-%s--%s-admb--instance%05d-cycle%04d.json", 670 | directory_sub.c_str(), 671 | testcase.c_str(), 672 | metadata["name"].c_str(), 673 | metadata["parameters_hashmap_string"].c_str(), 674 | pt_string, 675 | i, 676 | cycle); 677 | hm_->monitoring_->PrintAlignedDMB(filename); 678 | hm_->monitoring_->ResetAlignedDMB(); 679 | 680 | sprintf(filename, 681 | "%s/%s-%s-%s--%s-dsb--instance%05d-cycle%04d.json", 682 | directory_sub.c_str(), 683 | testcase.c_str(), 684 | metadata["name"].c_str(), 685 | metadata["parameters_hashmap_string"].c_str(), 686 | pt_string, 687 | i, 688 | cycle); 689 | hm_->monitoring_->PrintDSB(filename); 690 | hm_->monitoring_->ResetDSB(); 691 | 692 | sprintf(filename, 693 | "%s/%s-%s-%s--%s-adsb--instance%05d-cycle%04d.json", 694 | directory_sub.c_str(), 695 | testcase.c_str(), 696 | metadata["name"].c_str(), 697 | metadata["parameters_hashmap_string"].c_str(), 698 | pt_string, 699 | i, 700 | cycle); 701 | hm_->monitoring_->PrintAlignedDSB(filename); 702 | hm_->monitoring_->ResetAlignedDSB(); 703 | 704 | 705 | 706 | 707 | 708 | } 709 | 710 | fprintf(stderr, "close\n"); 711 | hm_->Close(); 712 | fprintf(stderr, "ok\n"); 713 | } 714 | } 715 | 716 | 717 | 718 | 719 | 720 | }; 721 | -------------------------------------------------------------------------------- /testcase.h: -------------------------------------------------------------------------------- 1 | #ifndef HASHMAP_TESTCASE 2 | #define HASHMAP_TESTCASE 3 | 4 | #ifndef __STDC_FORMAT_MACROS 5 | #define __STDC_FORMAT_MACROS 6 | #endif 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | //#include 15 | //#include 16 | #include 17 | #include 18 | #include 19 | 20 | 21 | #include "hashmap.h" 22 | #include "monitoring.h" 23 | 24 | 25 | namespace hashmap 26 | { 27 | 28 | class TestCase { 29 | public: 30 | TestCase() {} 31 | virtual ~TestCase() {} 32 | virtual void run() = 0; 33 | void InsertEntries(uint32_t nb_items, std::set& keys); 34 | void RemoveEntries(uint32_t nb_items, std::set& keys); 35 | HashMap *hm_; 36 | }; 37 | 38 | 39 | class BatchTestCase: public TestCase { 40 | 41 | public: 42 | BatchTestCase(HashMap *hm, uint64_t num_buckets, double load_factor_max, double load_factor_remove) { 43 | hm_ = hm; 44 | num_buckets_ = num_buckets; 45 | load_factor_max_ = load_factor_max; 46 | load_factor_remove_ = load_factor_remove; 47 | } 48 | virtual void run(); 49 | 50 | private: 51 | uint64_t num_buckets_; 52 | double load_factor_max_; 53 | double load_factor_remove_; 54 | }; 55 | 56 | 57 | 58 | class RippleTestCase: public TestCase { 59 | 60 | public: 61 | RippleTestCase(HashMap *hm, uint64_t num_buckets, double load_factor_max, double load_factor_remove) { 62 | hm_ = hm; 63 | num_buckets_ = num_buckets; 64 | load_factor_max_ = load_factor_max; 65 | load_factor_remove_ = load_factor_remove; 66 | } 67 | virtual void run(); 68 | 69 | private: 70 | uint64_t num_buckets_; 71 | double load_factor_max_; 72 | double load_factor_remove_; 73 | }; 74 | 75 | 76 | class LoadingTestCase: public TestCase { 77 | 78 | public: 79 | LoadingTestCase(HashMap *hm, uint64_t num_buckets) { 80 | hm_ = hm; 81 | num_buckets_ = num_buckets; 82 | } 83 | virtual void run(); 84 | 85 | private: 86 | uint64_t num_buckets_; 87 | 88 | 89 | }; 90 | 91 | }; // namespace 92 | 93 | #endif 94 | -------------------------------------------------------------------------------- /tombstone_hashmap.cc: -------------------------------------------------------------------------------- 1 | #include "tombstone_hashmap.h" 2 | 3 | namespace hashmap { 4 | 5 | int TombstoneHashMap::Open() { 6 | buckets_ = new Bucket[num_buckets_]; 7 | memset(buckets_, 0, sizeof(Bucket) * (num_buckets_)); 8 | monitoring_ = new hashmap::Monitoring(num_buckets_, num_buckets_, static_cast(this)); 9 | num_buckets_used_ = 0; 10 | init_distance_min_ = 0; 11 | init_distance_max_ = 0; 12 | return 0; 13 | } 14 | 15 | int TombstoneHashMap::Close() { 16 | if (buckets_ != NULL) { 17 | for (uint32_t i = 0; i < num_buckets_; i++) { 18 | if (buckets_[i].entry != NULL && buckets_[i].entry != DELETED_BUCKET) { 19 | delete[] buckets_[i].entry->data; 20 | delete buckets_[i].entry; 21 | } 22 | } 23 | delete[] buckets_; 24 | } 25 | 26 | distances_.clear(); 27 | 28 | if (monitoring_ != NULL) { 29 | delete monitoring_; 30 | } 31 | return 0; 32 | } 33 | 34 | 35 | 36 | int TombstoneHashMap::Get(const std::string& key, std::string* value) { 37 | uint64_t hash = hash_function(key); 38 | uint64_t index_init = hash % num_buckets_; 39 | uint64_t probe_distance = 0; 40 | bool found = false; 41 | uint32_t i; 42 | for (i = 0; i < probing_max_; i++) { 43 | uint64_t index_current = (index_init + i) % num_buckets_; 44 | FillDistanceToInitIndex(index_current, &probe_distance); 45 | if ( buckets_[index_current].entry == NULL 46 | || i > probe_distance) { 47 | break; 48 | } 49 | 50 | if (buckets_[index_current].entry == DELETED_BUCKET) { 51 | continue; 52 | } 53 | 54 | if ( key.size() == buckets_[index_current].entry->size_key 55 | && memcmp(buckets_[index_current].entry->data, key.c_str(), key.size()) == 0) { 56 | *value = std::string(buckets_[index_current].entry->data + key.size(), 57 | buckets_[index_current].entry->size_value); 58 | found = true; 59 | break; 60 | } 61 | } 62 | 63 | if (found) return 0; 64 | 65 | monitoring_->AddDMB(i); 66 | monitoring_->AddAlignedDMB(index_init, (index_init + i) % num_buckets_); 67 | return 1; 68 | } 69 | 70 | 71 | 72 | 73 | int TombstoneHashMap::Put(const std::string& key, const std::string& value) { 74 | if (num_buckets_used_ == num_buckets_) { 75 | return 1; 76 | } 77 | num_buckets_used_ += 1; 78 | 79 | uint64_t hash = hash_function(key); 80 | uint64_t index_init = hash % num_buckets_; 81 | 82 | char *data = new char[key.size() + value.size()]; 83 | memcpy(data, key.c_str(), key.size()); 84 | memcpy(data + key.size(), value.c_str(), value.size()); 85 | 86 | TombstoneHashMap::Entry *entry = new TombstoneHashMap::Entry; 87 | entry->size_key = key.size(); 88 | entry->size_value = value.size(); 89 | entry->data = data; 90 | 91 | uint64_t index_current = index_init; 92 | uint64_t probe_distance = 0; 93 | uint64_t probe_current = GetMinInitDistance(); 94 | TombstoneHashMap::Entry *entry_temp = NULL; 95 | uint64_t hash_temp = 0; 96 | uint64_t i; 97 | int num_swaps = 0; 98 | 99 | for (i = probe_current; i < probing_max_; i++) { 100 | index_current = (index_init + i) % num_buckets_; 101 | if (buckets_[index_current].entry == NULL) { 102 | monitoring_->SetDIB(index_current, probe_current); 103 | UpdateInitDistance(probe_current, 1); 104 | buckets_[index_current].entry = entry; 105 | buckets_[index_current].hash = hash; 106 | break; 107 | } else { 108 | FillDistanceToInitIndex(index_current, &probe_distance); 109 | if (probe_current > probe_distance) { 110 | // Swapping the current bucket with the one to insert 111 | entry_temp = buckets_[index_current].entry; 112 | hash_temp = buckets_[index_current].hash; 113 | buckets_[index_current].entry = entry; 114 | buckets_[index_current].hash = hash; 115 | entry = entry_temp; 116 | hash = hash_temp; 117 | monitoring_->SetDIB(index_current, probe_current); 118 | UpdateInitDistance(probe_current, 1); 119 | num_swaps += 1; 120 | if (entry != DELETED_BUCKET) { 121 | UpdateInitDistance(probe_distance, -1); 122 | probe_current = probe_distance; 123 | } else { 124 | // The bucket we just swapped was a deleted bucket, 125 | // so the insertion process can stop here 126 | break; 127 | } 128 | } 129 | } 130 | probe_current++; 131 | } 132 | 133 | monitoring_->AddDFB(i); 134 | monitoring_->AddAlignedDFB(index_init, index_current); 135 | monitoring_->AddNumberOfSwaps(num_swaps); 136 | 137 | return 0; 138 | } 139 | 140 | 141 | int TombstoneHashMap::Exists(const std::string& key) { 142 | // TODO: implement 143 | return 0; 144 | } 145 | 146 | 147 | int TombstoneHashMap::Remove(const std::string& key) { 148 | uint64_t hash = hash_function(key); 149 | uint64_t index_init = hash % num_buckets_; 150 | uint64_t probe_distance = 0; 151 | bool found = false; 152 | uint64_t index_current; 153 | uint64_t distance_max = GetMaxInitDistance(); 154 | 155 | //for (uint64_t i = 0; i < num_buckets_; i++) { 156 | for (uint64_t i = GetMinInitDistance(); i <= distance_max; i++) { 157 | index_current = (index_init + i) % num_buckets_; 158 | 159 | if (buckets_[index_current].entry == DELETED_BUCKET) { 160 | continue; 161 | } 162 | 163 | FillDistanceToInitIndex(index_current, &probe_distance); 164 | if ( buckets_[index_current].entry == NULL) { 165 | // || i > probe_distance) { 166 | //fprintf(stderr, "Remove() found NULL\n"); 167 | continue; 168 | } 169 | 170 | if ( key.size() == buckets_[index_current].entry->size_key 171 | && memcmp(buckets_[index_current].entry->data, key.c_str(), key.size()) == 0) { 172 | found = true; 173 | uint64_t mind = GetMinInitDistance(); 174 | if (i < mind) { 175 | fprintf(stderr, "Found at distance %" PRIu64 " and min at %" PRIu64 "\n", i, GetMinInitDistance()); 176 | } 177 | break; 178 | } 179 | } 180 | 181 | if (found) { 182 | FillDistanceToInitIndex(index_current, &probe_distance); 183 | UpdateInitDistance(probe_distance, -1); 184 | 185 | delete[] buckets_[index_current].entry->data; 186 | delete buckets_[index_current].entry; 187 | buckets_[index_current].entry = DELETED_BUCKET; 188 | monitoring_->RemoveDIB(index_current); 189 | num_buckets_used_ -= 1; 190 | 191 | return 0; 192 | } 193 | 194 | return 1; 195 | } 196 | 197 | 198 | 199 | int TombstoneHashMap::Resize() { 200 | // TODO: implement 201 | return 0; 202 | } 203 | 204 | 205 | // For debugging 206 | int TombstoneHashMap::CheckDensity() { 207 | return 0; 208 | } 209 | 210 | int TombstoneHashMap::BucketCounts() { 211 | return 0; 212 | } 213 | 214 | int TombstoneHashMap::Dump() { 215 | return 0; 216 | } 217 | 218 | 219 | int TombstoneHashMap::GetBucketState(int index) { 220 | //printf("GetBucketState %d\n", index); 221 | if (buckets_[index].entry == NULL) { 222 | return 0; 223 | } 224 | 225 | return 1; 226 | } 227 | 228 | int TombstoneHashMap::FillInitIndex(uint64_t index_stored, uint64_t *index_init) { 229 | if(buckets_[index_stored].entry == NULL) return -1; 230 | *index_init = buckets_[index_stored].hash % num_buckets_; 231 | return 0; 232 | } 233 | 234 | int TombstoneHashMap::FillDistanceToInitIndex(uint64_t index_stored, uint64_t *distance) { 235 | if(buckets_[index_stored].entry == NULL) return -1; 236 | uint64_t index_init = buckets_[index_stored].hash % num_buckets_; 237 | if (index_init <= index_stored) { 238 | *distance = index_stored - index_init; 239 | } else { 240 | *distance = index_stored + (num_buckets_ - index_init); 241 | } 242 | return 0; 243 | } 244 | 245 | 246 | void TombstoneHashMap::GetMetadata(std::map< std::string, std::string >& metadata) { 247 | metadata["name"] = "tombstone"; 248 | char buffer[1024]; 249 | sprintf(buffer, "{\"num_buckets\": %" PRIu64 ", \"probing_max\": %" PRIu64 "}", num_buckets_, probing_max_); 250 | metadata["parameters_hashmap"] = buffer; 251 | sprintf(buffer, "nb%" PRIu64 "-pm%" PRIu64 "", num_buckets_, probing_max_); 252 | metadata["parameters_hashmap_string"] = buffer; 253 | } 254 | 255 | uint64_t TombstoneHashMap::GetMinInitDistance() { 256 | return init_distance_min_; 257 | } 258 | 259 | uint64_t TombstoneHashMap::GetMaxInitDistance() { 260 | return init_distance_max_; 261 | } 262 | 263 | 264 | 265 | void TombstoneHashMap::UpdateMinMaxInitDistance() { 266 | init_distance_min_ = 0; 267 | init_distance_max_ = 0; 268 | if (distances_.size() == 0) return; 269 | 270 | std::map::iterator it; 271 | //fprintf(stderr, "GetMinInitDistance() ----------------------\n"); 272 | 273 | init_distance_min_ = std::numeric_limits::max(); 274 | init_distance_max_ = 0; 275 | for (it = distances_.begin(); it != distances_.end(); ++it) { 276 | //fprintf(stderr, "GetMinInitDistance() %" PRIu64 " %" PRIu64 "\n", it->first, it->second); 277 | if (it->first < init_distance_min_) { 278 | init_distance_min_ = it->first; 279 | } 280 | 281 | if (it->first > init_distance_max_) { 282 | init_distance_max_ = it->first; 283 | } 284 | } 285 | 286 | //fprintf(stderr, "GetMaxInitDistance() %" PRIu64 "\n", distances_max); 287 | } 288 | 289 | 290 | void TombstoneHashMap::UpdateInitDistance(uint64_t distance, int32_t increment) { 291 | std::map::iterator it; 292 | it = distances_.find(distance); 293 | if (it == distances_.end()) { 294 | if (increment > 0) { 295 | distances_[distance] = increment; 296 | UpdateMinMaxInitDistance(); 297 | } else { 298 | fprintf(stderr, "UpdateInitDistance() neg on not exist %" PRIu64 " %d\n", distance, increment); 299 | } 300 | } else { 301 | distances_[distance] += increment; 302 | if (distances_[distance] <= 0) { 303 | distances_.erase(it); 304 | UpdateMinMaxInitDistance(); 305 | } 306 | } 307 | } 308 | 309 | 310 | 311 | 312 | }; // end namespace hashmap 313 | -------------------------------------------------------------------------------- /tombstone_hashmap.h: -------------------------------------------------------------------------------- 1 | #ifndef HASHMAP_TOMBSTONE 2 | #define HASHMAP_TOMBSTONE 3 | 4 | #ifndef __STDC_FORMAT_MACROS 5 | #define __STDC_FORMAT_MACROS 6 | #endif 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include "murmurhash3.h" 16 | #include "hamming.h" 17 | #include "hashmap.h" 18 | 19 | #include "monitoring.h" 20 | 21 | namespace hashmap 22 | { 23 | 24 | 25 | 26 | class TombstoneHashMap: public HashMap 27 | { 28 | public: 29 | 30 | TombstoneHashMap(uint64_t size) { 31 | buckets_ = NULL; 32 | num_buckets_ = size; 33 | probing_max_ = size; 34 | DELETED_BUCKET = (Entry*)1; 35 | } 36 | 37 | virtual ~TombstoneHashMap() { 38 | Close(); 39 | } 40 | 41 | int Open(); 42 | int Close(); 43 | 44 | struct Entry 45 | { 46 | uint32_t size_key; 47 | uint32_t size_value; 48 | char *data; 49 | }; 50 | 51 | struct Bucket 52 | { 53 | uint64_t hash; 54 | struct Entry* entry; 55 | }; 56 | 57 | int Get(const std::string& key, std::string* value); 58 | int Put(const std::string& key, const std::string& value); 59 | int Exists(const std::string& key); 60 | int Remove(const std::string& key); 61 | int Resize(); 62 | int Dump(); 63 | int CheckDensity(); 64 | int BucketCounts(); 65 | int GetBucketState(int index); 66 | int FillInitIndex(uint64_t index_stored, uint64_t *index_init); 67 | int FillDistanceToInitIndex(uint64_t index_stored, uint64_t *distance); 68 | void GetMetadata(std::map< std::string, std::string >& metadata); 69 | uint64_t GetMinInitDistance(); 70 | uint64_t GetMaxInitDistance(); 71 | 72 | private: 73 | Bucket* buckets_; 74 | uint64_t num_buckets_; 75 | uint64_t num_buckets_used_; 76 | 77 | uint64_t hash_function(const std::string& key) { 78 | static char hash[16]; 79 | static uint64_t output; 80 | MurmurHash3_x64_128(key.c_str(), key.size(), 0, hash); 81 | memcpy(&output, hash, 8); 82 | return output; 83 | } 84 | 85 | Entry* DELETED_BUCKET; 86 | uint64_t probing_max_; 87 | 88 | void UpdateInitDistance(uint64_t distance, int32_t increment); 89 | void UpdateMinMaxInitDistance(); 90 | std::map distances_; 91 | uint64_t init_distance_min_; 92 | uint64_t init_distance_max_; 93 | 94 | 95 | 96 | }; 97 | 98 | 99 | }; // end namespace hashmap 100 | 101 | #endif // HASHMAP_TOMBSTONE 102 | --------------------------------------------------------------------------------