├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── backshift_hashmap.cc
├── backshift_hashmap.h
├── bitmap_hashmap.cc
├── bitmap_hashmap.h
├── hamming.cc
├── hamming.h
├── hashmap.h
├── main.cc
├── monitoring.cc
├── monitoring.h
├── murmurhash3.cc
├── murmurhash3.h
├── plot.py
├── probing_hashmap.cc
├── probing_hashmap.h
├── shadow_hashmap.cc
├── shadow_hashmap.h
├── testcase.cc
├── testcase.h
├── tombstone_hashmap.cc
└── tombstone_hashmap.h


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Object files
 2 | *.slo
 3 | *.lo
 4 | *.o
 5 | 
 6 | # Compiled Dynamic libraries
 7 | *.so
 8 | *.dylib
 9 | 
10 | # Compiled Static libraries
11 | *.lai
12 | *.la
13 | *.a
14 | 
15 | *~
16 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2013 Emmanuel Goossaert
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so,
10 | subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | CC=g++
 2 | CFLAGS=-O3 -c -Wall -g
 3 | LDFLAGS=-g
 4 | SOURCES=bitmap_hashmap.cc shadow_hashmap.cc probing_hashmap.cc tombstone_hashmap.cc backshift_hashmap.cc testcase.cc monitoring.cc murmurhash3.cc hamming.cc
 5 | SOURCES_MAIN=main.cc
 6 | OBJECTS=$(SOURCES:.cc=.o)
 7 | OBJECTS_MAIN=$(SOURCES_MAIN:.cc=.o)
 8 | EXECUTABLE=hashmap
 9 | 
10 | all: $(SOURCES) $(EXECUTABLE)
11 | 
12 | $(EXECUTABLE): $(OBJECTS) $(OBJECTS_MAIN)
13 | 	$(CC) $(LDFLAGS) $(OBJECTS) $(OBJECTS_MAIN) -o $@
14 | 
15 | .cc.o:
16 | 	$(CC) $(CFLAGS) $< -o $@
17 | 
18 | clean:
19 | 	rm -f *~ *.o $(EXECUTABLE)
20 | 
21 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | hashmap
2 | =======
3 | 
4 | Implementation of open addressing hash table algorithms in C++.
5 | 
6 | Most of the code is under development, but the main() function in main.cc
7 | should provide enough information as to how to use the algorithms.
8 | 


--------------------------------------------------------------------------------
/backshift_hashmap.cc:
--------------------------------------------------------------------------------
  1 | #include "backshift_hashmap.h"
  2 | 
  3 | namespace hashmap {
  4 | 
  5 | int BackshiftHashMap::Open() {
  6 |   buckets_ = new Bucket[num_buckets_];
  7 |   memset(buckets_, 0, sizeof(Bucket) * (num_buckets_));
  8 |   monitoring_ = new hashmap::Monitoring(num_buckets_, num_buckets_, static_cast<HashMap*>(this));
  9 |   num_buckets_used_ = 0;
 10 |   return 0;
 11 | }
 12 | 
 13 | int BackshiftHashMap::Close() {
 14 |   if (buckets_ != NULL) {
 15 |     for (uint32_t i = 0; i < num_buckets_; i++) {
 16 |       if (buckets_[i].entry != NULL) {
 17 |         delete[] buckets_[i].entry->data;
 18 |         delete buckets_[i].entry;
 19 |       }
 20 |     }
 21 |     delete[] buckets_;
 22 |   }
 23 | 
 24 |   if (monitoring_ != NULL) {
 25 |     delete monitoring_;
 26 |   }
 27 |   return 0;
 28 | }
 29 | 
 30 | 
 31 | 
 32 | int BackshiftHashMap::Get(const std::string& key, std::string* value) {
 33 |   uint64_t hash = hash_function(key);
 34 |   uint64_t index_init = hash % num_buckets_;
 35 |   uint64_t probe_distance = 0;
 36 |   bool found = false;
 37 |   uint32_t i;
 38 |   for (i = 0; i < probing_max_; i++) {
 39 |     uint64_t index_current = (index_init + i) % num_buckets_;
 40 |     FillDistanceToInitIndex(index_current, &probe_distance);
 41 |     if (   buckets_[index_current].entry == NULL
 42 |         || i > probe_distance) {
 43 |       break;
 44 |     }
 45 | 
 46 |     if (   key.size() == buckets_[index_current].entry->size_key
 47 |         && memcmp(buckets_[index_current].entry->data, key.c_str(), key.size()) == 0) {
 48 |       *value = std::string(buckets_[index_current].entry->data + key.size(),
 49 |                            buckets_[index_current].entry->size_value);
 50 |       found = true;
 51 |       break;
 52 |     }
 53 |   }
 54 | 
 55 |   if (found) return 0;
 56 | 
 57 |   monitoring_->AddDMB(i);
 58 |   monitoring_->AddAlignedDMB(index_init, (index_init + i) % num_buckets_);
 59 |   return 1;
 60 | }
 61 | 
 62 | 
 63 | 
 64 | 
 65 | int BackshiftHashMap::Put(const std::string& key, const std::string& value) {
 66 |   if (num_buckets_used_ == num_buckets_) {
 67 |     return 1;
 68 |   }
 69 |   num_buckets_used_ += 1;
 70 | 
 71 |   uint64_t hash = hash_function(key);
 72 |   uint64_t index_init = hash % num_buckets_;
 73 | 
 74 |   char *data = new char[key.size() + value.size()];
 75 |   memcpy(data, key.c_str(), key.size());
 76 |   memcpy(data + key.size(), value.c_str(), value.size());
 77 | 
 78 |   BackshiftHashMap::Entry *entry = new BackshiftHashMap::Entry;
 79 |   entry->size_key = key.size();
 80 |   entry->size_value = value.size();
 81 |   entry->data = data;
 82 | 
 83 |   uint64_t index_current = index_init;
 84 |   uint64_t probe_distance = 0;
 85 |   uint64_t probe_current = 0;
 86 |   BackshiftHashMap::Entry *entry_temp = NULL;
 87 |   uint64_t hash_temp = 0;
 88 |   uint64_t i;
 89 |   int num_swaps = 0;
 90 | 
 91 |   for (i = 0; i < probing_max_; i++) {
 92 |     index_current = (index_init + i) % num_buckets_;
 93 |     if (buckets_[index_current].entry == NULL) {
 94 |       monitoring_->SetDIB(index_current, probe_current);
 95 |       buckets_[index_current].entry = entry;
 96 |       buckets_[index_current].hash = hash;
 97 |       break;
 98 |     } else {
 99 |       FillDistanceToInitIndex(index_current, &probe_distance);
100 |       if (probe_current > probe_distance) {
101 |         // Swapping the current bucket with the one to insert
102 |         entry_temp = buckets_[index_current].entry;
103 |         hash_temp = buckets_[index_current].hash;
104 |         buckets_[index_current].entry = entry;
105 |         buckets_[index_current].hash = hash;
106 |         entry = entry_temp;
107 |         hash = hash_temp;
108 |         monitoring_->SetDIB(index_current, probe_current);
109 |         probe_current = probe_distance;
110 |         num_swaps += 1;
111 |       }
112 |     }
113 |     probe_current++;
114 |   }
115 | 
116 |   monitoring_->AddDFB(i);
117 |   monitoring_->AddAlignedDFB(index_init, index_current);
118 |   monitoring_->AddNumberOfSwaps(num_swaps);
119 | 
120 |   return 0;
121 | }
122 | 
123 | 
124 | int BackshiftHashMap::Exists(const std::string& key) {
125 |   // TODO: implement
126 |   return 0;
127 | }
128 | 
129 | 
130 | int BackshiftHashMap::Remove(const std::string& key) {
131 |   uint64_t hash = hash_function(key);
132 |   uint64_t index_init = hash % num_buckets_;
133 |   bool found = false;
134 |   uint64_t index_current = 0;
135 |   uint64_t probe_distance = 0;
136 | 
137 |   for (uint64_t i = 0; i < num_buckets_; i++) {
138 |     index_current = (index_init + i) % num_buckets_;
139 |     FillDistanceToInitIndex(index_current, &probe_distance);
140 |     if (   buckets_[index_current].entry == NULL
141 |         || i > probe_distance) {
142 |       break;
143 |     }
144 | 
145 |     if (   key.size() == buckets_[index_current].entry->size_key
146 |         && memcmp(buckets_[index_current].entry->data, key.c_str(), key.size()) == 0) {
147 |       found = true;
148 |       break;
149 |     }
150 |   }
151 | 
152 |   if (found) {
153 |     delete[] buckets_[index_current].entry->data;
154 |     delete buckets_[index_current].entry;
155 |     monitoring_->RemoveDIB(index_current);
156 |     uint64_t i = 1;
157 |     uint64_t index_previous = 0, index_swap = 0;
158 |     for (i = 1; i < num_buckets_; i++) {
159 |       index_previous = (index_current + i - 1) % num_buckets_;
160 |       index_swap = (index_current + i) % num_buckets_;
161 |       if (buckets_[index_swap].entry == NULL) {
162 |         buckets_[index_previous].entry = NULL;
163 |         monitoring_->RemoveDIB(index_previous);
164 |         break;
165 |       }
166 |       uint64_t distance;
167 |       if (FillDistanceToInitIndex(index_swap, &distance) != 0) {
168 |         fprintf(stderr, "Error in FillDistanceToInitIndex()"); 
169 |       }
170 |       if (distance == 0) {
171 |         buckets_[index_previous].entry = NULL;
172 |         monitoring_->RemoveDIB(index_previous);
173 |         break;
174 |       }
175 |       buckets_[index_previous].entry = buckets_[index_swap].entry;
176 |       buckets_[index_previous].hash = buckets_[index_swap].hash;
177 |       monitoring_->SetDIB(index_previous, distance-1);
178 |     }
179 |     monitoring_->AddDSB(i);
180 |     monitoring_->AddAlignedDSB(index_current, index_swap);
181 |     num_buckets_used_ -= 1;
182 |     return 0;
183 |   }
184 | 
185 |   return 1;
186 | }
187 | 
188 | 
189 | 
190 | int BackshiftHashMap::Resize() {
191 |   // TODO: implement
192 |   return 0;
193 | }
194 | 
195 | 
196 | // For debugging
197 | int BackshiftHashMap::CheckDensity() {
198 |   return 0;
199 | }
200 | 
201 | int BackshiftHashMap::BucketCounts() {
202 |   return 0;
203 | }
204 | 
205 | int BackshiftHashMap::Dump() {
206 |   return 0;
207 | }
208 | 
209 | 
210 | int BackshiftHashMap::GetBucketState(int index) {
211 |   //printf("GetBucketState %d\n", index);
212 |   if (buckets_[index].entry == NULL) {
213 |     return 0;
214 |   }
215 | 
216 |   return 1;
217 | }
218 | 
219 | int BackshiftHashMap::FillInitIndex(uint64_t index_stored, uint64_t *index_init) {
220 |   if(buckets_[index_stored].entry == NULL) return -1;
221 |   *index_init = buckets_[index_stored].hash % num_buckets_;
222 |   return 0;
223 | }
224 | 
225 | int BackshiftHashMap::FillDistanceToInitIndex(uint64_t index_stored, uint64_t *distance) {
226 |   if(buckets_[index_stored].entry == NULL) return -1;
227 |   uint64_t index_init = buckets_[index_stored].hash % num_buckets_;
228 |   if (index_init <= index_stored) {
229 |     *distance = index_stored - index_init; 
230 |   } else {
231 |     *distance = index_stored + (num_buckets_ - index_init); 
232 |   }
233 |   return 0;
234 | }
235 | 
236 | 
237 | void BackshiftHashMap::GetMetadata(std::map< std::string, std::string >& metadata) {
238 |   metadata["name"] = "backshift";
239 |   char buffer[1024]; 
240 |   sprintf(buffer, "{\"num_buckets\": %" PRIu64 ", \"probing_max\": %" PRIu64 "}", num_buckets_, probing_max_);
241 |   metadata["parameters_hashmap"] = buffer;
242 |   sprintf(buffer, "nb%" PRIu64 "-pm%" PRIu64 "", num_buckets_, probing_max_);
243 |   metadata["parameters_hashmap_string"] = buffer;
244 | }
245 | 
246 | }; // end namespace hashmap
247 | 


--------------------------------------------------------------------------------
/backshift_hashmap.h:
--------------------------------------------------------------------------------
 1 | #ifndef HASHMAP_BACKSHIFT
 2 | #define HASHMAP_BACKSHIFT
 3 | 
 4 | #ifndef __STDC_FORMAT_MACROS
 5 | #define __STDC_FORMAT_MACROS
 6 | #endif
 7 | #include <inttypes.h>
 8 | #include <string.h>
 9 | #include <stdio.h>
10 | 
11 | #include <string>
12 | #include <iostream>
13 | #include <limits>
14 | 
15 | #include "murmurhash3.h"
16 | #include "hamming.h"
17 | #include "hashmap.h"
18 | 
19 | #include "monitoring.h"
20 | 
21 | namespace hashmap
22 | {
23 | 
24 | 
25 | 
26 | class BackshiftHashMap: public HashMap
27 | {
28 | public:
29 | 
30 |   BackshiftHashMap(uint64_t size) {
31 |     buckets_ = NULL;
32 |     num_buckets_ = size;
33 |     probing_max_ = size;
34 |   }
35 | 
36 |   virtual ~BackshiftHashMap() {
37 |     Close();
38 |   }
39 | 
40 |   int Open();
41 |   int Close();
42 | 
43 |   struct Entry
44 |   {
45 |     uint32_t size_key;
46 |     uint32_t size_value;
47 |     char *data;
48 |   };
49 | 
50 |   struct Bucket
51 |   {
52 |     uint64_t hash;
53 |     struct Entry* entry;
54 |   };
55 | 
56 |   int Get(const std::string& key, std::string* value);
57 |   int Put(const std::string& key, const std::string& value);
58 |   int Exists(const std::string& key);
59 |   int Remove(const std::string& key);
60 |   int Resize();
61 |   int Dump();
62 |   int CheckDensity();
63 |   int BucketCounts();
64 |   int GetBucketState(int index);
65 |   int FillInitIndex(uint64_t index_stored, uint64_t *index_init);
66 |   int FillDistanceToInitIndex(uint64_t index_stored, uint64_t *distance);
67 |   void GetMetadata(std::map< std::string, std::string >& metadata);
68 |   uint64_t GetMinInitDistance();
69 |   uint64_t GetMaxInitDistance();
70 | 
71 | private:
72 |   Bucket* buckets_;
73 |   uint64_t num_buckets_;
74 |   uint64_t num_buckets_used_;
75 | 
76 |   uint64_t hash_function(const std::string& key) {
77 |     static char hash[16];
78 |     static uint64_t output;
79 |     MurmurHash3_x64_128(key.c_str(), key.size(), 0, hash);
80 |     memcpy(&output, hash, 8); 
81 |     return output;
82 |   }
83 | 
84 |   uint64_t probing_max_;
85 | };
86 | 
87 | 
88 | }; // end namespace hashmap
89 | 
90 | #endif // HASHMAP_BACKSHIFT
91 | 


--------------------------------------------------------------------------------
/bitmap_hashmap.cc:
--------------------------------------------------------------------------------
  1 | #include "bitmap_hashmap.h"
  2 | 
  3 | namespace hashmap {
  4 | 
  5 | 
  6 | 
  7 | int BitmapHashMap::Open() {
  8 |   buckets_ = new Bucket[num_buckets_ + size_neighborhood_];
  9 |   memset(buckets_, 0, sizeof(Bucket) * (num_buckets_ + size_neighborhood_));
 10 |   monitoring_ = new hashmap::Monitoring(num_buckets_, size_neighborhood_, static_cast<HashMap*>(this));
 11 |   return 0;
 12 | }
 13 | 
 14 | 
 15 | int BitmapHashMap::Close() {
 16 |   if (buckets_ != NULL) {
 17 |     for (uint32_t i = 0; i < num_buckets_; i++) {
 18 |       if (buckets_[i].entry != NULL) {
 19 |         delete[] buckets_[i].entry->data;
 20 |         delete buckets_[i].entry;
 21 |       }
 22 |     }
 23 |     delete[] buckets_;
 24 |   }
 25 | 
 26 |   if (monitoring_ != NULL) {
 27 |     delete monitoring_;
 28 |   }
 29 |   return 0;
 30 | }
 31 | 
 32 | 
 33 | 
 34 | 
 35 | int BitmapHashMap::Get(const std::string& key, std::string* value) {
 36 |   uint64_t hash = hash_function(key);
 37 |   uint64_t index_init = hash % num_buckets_;
 38 |   uint32_t mask = 1 << (size_neighborhood_-1);
 39 |   bool found = false;
 40 |   uint32_t i;
 41 |   uint32_t dmb = 0;
 42 |   for (i = 0; i < size_neighborhood_; i++) {
 43 |     if (buckets_[index_init].bitmap & mask) {
 44 |       dmb = i;
 45 |       uint64_t index_current = (index_init + i) % num_buckets_;
 46 |       if (   buckets_[index_current].entry != NULL
 47 |           && key.size() == buckets_[index_current].entry->size_key
 48 |           && memcmp(buckets_[index_current].entry->data, key.c_str(), key.size()) == 0) {
 49 |         *value = std::string(buckets_[index_current].entry->data + key.size(),
 50 |                              buckets_[index_current].entry->size_value);
 51 |         found = true;
 52 |         break;
 53 |       }
 54 |     }
 55 |     mask = mask >> 1;
 56 |   }
 57 | 
 58 |   if (found) return 0;
 59 | 
 60 |   monitoring_->AddDMB(dmb);
 61 |   monitoring_->AddAlignedDMB(index_init, (index_init + i) % num_buckets_);
 62 |   return 1;
 63 | }
 64 | 
 65 | uint64_t BitmapHashMap::FindEmptyBucketAndDoSwaps(uint64_t index_init) {
 66 |   bool found = false;
 67 |   uint64_t index_current = index_init;
 68 |   for (uint32_t i = 0; i < size_probing_; i++) {
 69 |     index_current = (index_init + i) % num_buckets_;
 70 |     if (buckets_[index_current].entry == NULL) {
 71 |       found = true;
 72 |       monitoring_->AddDFB(i);
 73 |       monitoring_->AddAlignedDFB(index_init, index_current);
 74 |       break;
 75 |     }
 76 |   }
 77 | 
 78 |   if (!found) {
 79 |     return num_buckets_;
 80 |   }
 81 | 
 82 |   int num_swaps = 0;
 83 |   uint32_t index_base = 0;
 84 | 
 85 |   uint64_t index_empty = index_current;
 86 |   while (   (index_empty >= index_init && (index_empty - index_init) >= size_neighborhood_)
 87 |          || (index_empty <  index_init && (index_empty + num_buckets_ - index_init) >= size_neighborhood_)) {
 88 |     uint64_t index_base_init = (num_buckets_ + index_empty - (size_neighborhood_ - 1)) % num_buckets_;
 89 |     // For each candidate base bucket
 90 |     bool found_swap = false;
 91 |     for (uint32_t i = 0; i < size_neighborhood_ - 1; i++) {
 92 |       // -1 because no need to test the bucket at index_empty
 93 |       // For each mask position
 94 |       index_base = (index_base_init + i) % num_buckets_;
 95 |       uint32_t mask = 1 << (size_neighborhood_-1);
 96 |       for (uint32_t j = 0; j < size_neighborhood_ - i - 1; j++) {
 97 |         if (buckets_[index_base].bitmap & mask) {
 98 |           // Found, so now we swap buckets and update the bitmap
 99 |           uint32_t index_candidate = (index_base + j) % num_buckets_;
100 |           buckets_[index_empty].entry = buckets_[index_candidate].entry;
101 |           buckets_[index_base].bitmap &= ~mask;
102 |           uint32_t mask_new = 1 << i;
103 |           buckets_[index_base].bitmap |= mask_new;
104 | 
105 |           // Move PSL monitoring
106 |           uint64_t dib = monitoring_->GetDIB(index_candidate);
107 |           monitoring_->RemoveDIB(index_candidate);
108 |           monitoring_->SetDIB(index_empty, dib);
109 | 
110 |           // Prepare for next iteration
111 |           index_empty = index_candidate;
112 |           found_swap = true;
113 | 
114 |           num_swaps += 1;
115 |           break;
116 |         }
117 |         mask = mask >> 1;
118 |       }
119 |       if (found_swap) break;
120 |     }
121 |     if (!found_swap) {
122 |       // This is a dirty hack in case no reordering worked but we already had a
123 |       // few swaps, we want to avoid having the same entry pointer in two
124 |       // different buckets, which would make the program crash when freeing
125 |       // the memory in Close().
126 |       // This should be changed whenever the Resize() method is implemented.
127 |       buckets_[index_empty].entry = NULL;
128 |       return num_buckets_;
129 |     }
130 |   }
131 | 
132 |   // Monitoring
133 |   uint64_t dib;
134 |   if (index_empty >= index_init) {
135 |     dib = index_empty - index_init;
136 |   } else {
137 |     dib = index_empty + num_buckets_ - index_init;
138 |   }
139 |   monitoring_->SetDIB(index_empty, dib);
140 |   monitoring_->AddNumberOfSwaps(num_swaps);
141 | 
142 |   return index_empty;
143 | }
144 | 
145 | int BitmapHashMap::Put(const std::string& key, const std::string& value) {
146 |   uint64_t hash = hash_function(key);
147 |   uint64_t index_init = hash % num_buckets_;
148 |   uint64_t index_empty = FindEmptyBucketAndDoSwaps(index_init);
149 | 
150 |   if (index_empty == num_buckets_) {
151 |     return 1; 
152 |   }
153 | 
154 |   char *data = new char[key.size() + value.size()];
155 |   memcpy(data, key.c_str(), key.size());
156 |   memcpy(data + key.size(), value.c_str(), value.size());
157 | 
158 |   BitmapHashMap::Entry *entry = new BitmapHashMap::Entry;
159 |   entry->size_key = key.size();
160 |   entry->size_value = value.size();
161 |   entry->data = data;
162 |   buckets_[index_empty].entry = entry;
163 | 
164 |   uint32_t mask;
165 |   if (index_empty >= index_init) {
166 |     mask = 1 << (size_neighborhood_ - ((index_empty - index_init) + 1));
167 |   } else {
168 |     mask = 1 << (size_neighborhood_ - ((index_empty + num_buckets_ - index_init) + 1));
169 |   }
170 |   buckets_[index_init].bitmap |= mask; 
171 |   return 0;
172 | }
173 | 
174 | 
175 | int BitmapHashMap::Exists(const std::string& key) {
176 |   // TODO: implement
177 |   return 0;
178 | }
179 | 
180 | 
181 | int BitmapHashMap::Remove(const std::string& key) {
182 |   uint64_t hash = hash_function(key);
183 |   uint64_t index_init = hash % num_buckets_;
184 |   uint32_t mask = 1 << (size_neighborhood_-1);
185 |   bool found = false;
186 |   uint64_t index_current;
187 |   for (uint32_t i = 0; i < size_neighborhood_; i++) {
188 |     if (buckets_[index_init].bitmap & mask) {
189 |       index_current = (index_init + i) % num_buckets_;
190 |       if (   key.size() == buckets_[index_current].entry->size_key
191 |           && memcmp(buckets_[index_current].entry->data, key.c_str(), key.size()) == 0) {
192 |         found = true;
193 |         break;
194 |       }
195 |     }
196 |     mask = mask >> 1;
197 |   }
198 | 
199 |   if (found) {
200 |     //fprintf(stderr, "Remove() [%s] %" PRIu64 " %" PRIu64 "\n", key.c_str(), index_init, index_current);
201 |     delete[] buckets_[index_current].entry->data;
202 |     delete buckets_[index_current].entry;
203 |     buckets_[index_current].entry = NULL;
204 |     buckets_[index_init].bitmap = buckets_[index_init].bitmap & (~mask);
205 |     monitoring_->RemoveDIB(index_current);
206 |     return 0;
207 |   }
208 | 
209 |   return 1;
210 | }
211 | 
212 | 
213 | int BitmapHashMap::Resize() {
214 |   // TODO: implement
215 |   // If the resize is called when FindEmptyBucketAndDoSwaps() cannot perform
216 |   // the necessary swaps, then make sure that the item being inserted
217 |   // or swapped is not nullified and that it is correctly inserted
218 |   // after the resize.
219 |   return 0;
220 | }
221 | 
222 | 
223 | // For debugging
224 | int BitmapHashMap::CheckDensity() {
225 |   int num_pages = 0;
226 |   int count_empty = 0;
227 |   int count_probe = 0;
228 | 
229 |   int level = 32;
230 | 
231 |   for (uint32_t i = 0; i < num_buckets_; i++) {
232 |     if (buckets_[i].entry == NULL) {
233 |       count_empty += 1;
234 |     } else {
235 |       count_probe += 1; 
236 |     }
237 | 
238 |     if (i > 0 && i % level == 0) {
239 |       if (count_probe < 0.25 * level) {
240 |         std::cout << ".";
241 |       } else if (count_probe < 0.5 * level) {
242 |         std::cout << ":";
243 |       } else if (count_probe < 0.75 * level) {
244 |         std::cout << "|";
245 |       } else if (count_probe < 0.85 * level) {
246 |         std::cout << "o";
247 |       } else if (count_probe < 0.95 * level) {
248 |         std::cout << "U";
249 |       } else if (count_probe < level) {
250 |         std::cout << "O";
251 |       } else {
252 |         std::cout << "0";
253 |       }
254 |       count_probe = 0;
255 |       num_pages += 1;
256 |     }
257 |   }
258 |   std::cout << std::endl;
259 | 
260 |   std::cout << "Count empty: " << count_empty << "/" << num_buckets_ << std::endl;
261 |   std::cout << "Pages: " << num_pages << " | " << num_pages * level << std::endl;
262 |   return 0;
263 | }
264 | 
265 | 
266 | int BitmapHashMap::BucketCounts() {
267 |   int counts[33];
268 |   for (int i = 0; i <= 32; i++) {
269 |     counts[i] = 0;
270 |   }
271 | 
272 |   int total = 0;
273 | 
274 |   for (uint32_t i = 0; i < num_buckets_; i++) {
275 |     counts[hamming2(buckets_[i].bitmap)] += 1;
276 |   }
277 | 
278 |   for (int i = 0; i <= 32; i++) {
279 |     std::cout << "size " << i << ": " << counts[i] << std::endl;
280 |     total += counts[i];
281 |   }
282 | 
283 |   std::cout << "total: " << total << std::endl;
284 | 
285 |   return 0;
286 | }
287 | 
288 | 
289 | 
290 | int BitmapHashMap::Dump() {
291 |   for (uint32_t i = 0; i < num_buckets_ + size_neighborhood_; i++) {
292 | 
293 |     std::cout << "bitmap: ";
294 |     for (uint32_t j = 0; j < size_neighborhood_; j++) {
295 |       uint32_t mask = 1 << (size_neighborhood_-1-j);
296 |       if (buckets_[i].bitmap & mask) {
297 |         std::cout << "1";
298 |       } else {
299 |         std::cout << "0";
300 |       }
301 |     }
302 | 
303 |     if (buckets_[i].entry != NULL) {
304 |       std::string key(buckets_[i].entry->data,
305 |                       buckets_[i].entry->size_key);
306 |       std::string value(buckets_[i].entry->data + buckets_[i].entry->size_key,
307 |                         buckets_[i].entry->size_value);
308 |       std::cout << " | index: " << i << "  - " << key << " " << value;
309 |     }
310 |     std::cout << std::endl;
311 |   }
312 |   return 0;
313 | }
314 | 
315 | 
316 | 
317 | 
318 | int BitmapHashMap::GetBucketState(int index) {
319 |   if (buckets_[index].entry == NULL) {
320 |     return 0;
321 |   }
322 | 
323 |   return 1;
324 | }
325 | 
326 | 
327 | int BitmapHashMap::FillInitIndex(uint64_t index_stored, uint64_t *index_init) {
328 |   if(buckets_[index_stored].entry == NULL) return -1;
329 |   std::string key(buckets_[index_stored].entry->data,
330 |                   buckets_[index_stored].entry->size_key);
331 |   *index_init = hash_function(key) % num_buckets_;
332 |   return 0;
333 | }
334 | 
335 | 
336 | void BitmapHashMap::GetMetadata(std::map< std::string, std::string >& metadata) {
337 |   metadata["name"] = "bitmap";
338 |   char buffer[1024]; 
339 |   sprintf(buffer, "{\"num_buckets\": %" PRIu64 ", \"size_probing\": %u}", num_buckets_, size_probing_);
340 |   metadata["parameters_hashmap"] = buffer;
341 |   sprintf(buffer, "nb%" PRIu64 "-sp%u", num_buckets_, size_probing_);
342 |   metadata["parameters_hashmap_string"] = buffer;
343 | }
344 | 
345 | 
346 | 
347 | };
348 | 


--------------------------------------------------------------------------------
/bitmap_hashmap.h:
--------------------------------------------------------------------------------
 1 | #ifndef HASHMAP_BITMAP
 2 | #define HASHMAP_BITMAP
 3 | 
 4 | #ifndef __STDC_FORMAT_MACROS
 5 | #define __STDC_FORMAT_MACROS
 6 | #endif
 7 | #include <inttypes.h>
 8 | #include <string.h>
 9 | 
10 | #include <string>
11 | #include <iostream>
12 | 
13 | #include "murmurhash3.h"
14 | #include "hamming.h"
15 | #include "hashmap.h"
16 | #include "monitoring.h"
17 | 
18 | namespace hashmap
19 | {
20 | 
21 | 
22 | 
23 | class BitmapHashMap: public HashMap
24 | {
25 | public:
26 | 
27 |   BitmapHashMap(uint64_t size,
28 |                 uint64_t size_probing
29 |                ) {
30 |     buckets_ = NULL;
31 |     num_buckets_ = size;
32 |     size_neighborhood_ = 32;
33 |     size_probing_ = size_probing;
34 |   }
35 | 
36 |   virtual ~BitmapHashMap() {
37 |     Close();
38 |   }
39 | 
40 |   int Open();
41 |   int Close();
42 | 
43 |   struct Entry
44 |   {
45 |     uint32_t size_key;
46 |     uint32_t size_value;
47 |     char *data;
48 |   };
49 | 
50 |   struct Bucket
51 |   {
52 |     uint32_t bitmap;
53 |     struct Entry* entry;
54 |   };
55 | 
56 | 
57 |   int Get(const std::string& key, std::string* value);
58 |   int Put(const std::string& key, const std::string& value);
59 |   int Exists(const std::string& key);
60 |   int Remove(const std::string& key);
61 |   int Resize();
62 |   int Dump();
63 |   int CheckDensity();
64 |   int BucketCounts();
65 |   int GetBucketState(int index);
66 |   int FillInitIndex(uint64_t index_stored, uint64_t *index_init);
67 |   void GetMetadata(std::map< std::string, std::string >& metadata);
68 | 
69 | 
70 | private:
71 |   Bucket* buckets_;
72 |   uint64_t num_buckets_;
73 |   uint32_t size_neighborhood_;
74 |   uint32_t size_probing_;
75 | 
76 |   uint64_t FindEmptyBucketAndDoSwaps(uint64_t index_init);
77 | 
78 |   uint64_t hash_function(const std::string& key) {
79 |     static char hash[16];
80 |     static uint64_t output;
81 |     MurmurHash3_x64_128(key.c_str(), key.size(), 0, hash);
82 |     memcpy(&output, hash, 8); 
83 |     //std::cout << output << std::endl;
84 |     return output;
85 |   }
86 |  
87 | };
88 | 
89 | 
90 | }; // end namespace hashmap
91 | 
92 | #endif // HASHMAP_BITMAP
93 | 


--------------------------------------------------------------------------------
/hamming.cc:
--------------------------------------------------------------------------------
 1 | // Code is from: http://en.wikipedia.org/wiki/Hamming_weight
 2 | //
 3 | //types and constants used in the functions below
 4 | 
 5 | #include "hamming.h"
 6 |  
 7 | const uint64_t m1  = 0x5555555555555555; //binary: 0101...
 8 | const uint64_t m2  = 0x3333333333333333; //binary: 00110011..
 9 | const uint64_t m4  = 0x0f0f0f0f0f0f0f0f; //binary:  4 zeros,  4 ones ...
10 | const uint64_t m8  = 0x00ff00ff00ff00ff; //binary:  8 zeros,  8 ones ...
11 | const uint64_t m16 = 0x0000ffff0000ffff; //binary: 16 zeros, 16 ones ...
12 | const uint64_t m32 = 0x00000000ffffffff; //binary: 32 zeros, 32 ones
13 | const uint64_t hff = 0xffffffffffffffff; //binary: all ones
14 | const uint64_t h01 = 0x0101010101010101; //the sum of 256 to the power of 0,1,2,3...
15 |  
16 | //This is a naive implementation, shown for comparison,
17 | //and to help in understanding the better functions.
18 | //It uses 24 arithmetic operations (shift, add, and).
19 | int hamming1(uint64_t x) {
20 |     x = (x & m1 ) + ((x >>  1) & m1 ); //put count of each  2 bits into those  2 bits 
21 |     x = (x & m2 ) + ((x >>  2) & m2 ); //put count of each  4 bits into those  4 bits 
22 |     x = (x & m4 ) + ((x >>  4) & m4 ); //put count of each  8 bits into those  8 bits 
23 |     x = (x & m8 ) + ((x >>  8) & m8 ); //put count of each 16 bits into those 16 bits 
24 |     x = (x & m16) + ((x >> 16) & m16); //put count of each 32 bits into those 32 bits 
25 |     x = (x & m32) + ((x >> 32) & m32); //put count of each 64 bits into those 64 bits 
26 |     return x;
27 | }
28 |  
29 | //This uses fewer arithmetic operations than any other known  
30 | //implementation on machines with slow multiplication.
31 | //It uses 17 arithmetic operations.
32 | int hamming2(uint64_t x) {
33 |     x -= (x >> 1) & m1;             //put count of each 2 bits into those 2 bits
34 |     x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits 
35 |     x = (x + (x >> 4)) & m4;        //put count of each 8 bits into those 8 bits 
36 |     x += x >>  8;  //put count of each 16 bits into their lowest 8 bits
37 |     x += x >> 16;  //put count of each 32 bits into their lowest 8 bits
38 |     x += x >> 32;  //put count of each 64 bits into their lowest 8 bits
39 |     return x & 0x7f;
40 | }
41 |  
42 | //This uses fewer arithmetic operations than any other known  
43 | //implementation on machines with fast multiplication.
44 | //It uses 12 arithmetic operations, one of which is a multiply.
45 | int hamming3(uint64_t x) {
46 |     x -= (x >> 1) & m1;             //put count of each 2 bits into those 2 bits
47 |     x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits 
48 |     x = (x + (x >> 4)) & m4;        //put count of each 8 bits into those 8 bits 
49 |     return (x * h01)>>56;  //returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ... 
50 | }
51 | 


--------------------------------------------------------------------------------
/hamming.h:
--------------------------------------------------------------------------------
 1 | #ifndef HASHMAP_HAMMING
 2 | #define HASHMAP_HAMMING
 3 | #include <sys/types.h>
 4 | #include <stdint.h>
 5 | 
 6 | int hamming1(uint64_t x);
 7 | int hamming2(uint64_t x);
 8 | int hamming3(uint64_t x);
 9 | 
10 | #endif
11 | 


--------------------------------------------------------------------------------
/hashmap.h:
--------------------------------------------------------------------------------
 1 | #ifndef HASHMAP
 2 | #define HASHMAP
 3 | 
 4 | #include <string>
 5 | #include <iostream>
 6 | #include <map>
 7 | 
 8 | #include "murmurhash3.h"
 9 | #include "hamming.h"
10 | 
11 | 
12 | namespace hashmap
13 | {
14 | 
15 | class Monitoring;
16 | 
17 | class HashMap
18 | {
19 | public:
20 | 
21 |   HashMap() {
22 |     monitoring_ = NULL;
23 |   }
24 | 
25 |   virtual ~HashMap() {
26 |   }
27 | 
28 |   virtual int Open() = 0;
29 |   virtual int Close() = 0;
30 |   virtual int Get(const std::string& key, std::string* value) = 0;
31 |   virtual int Put(const std::string& key, const std::string& value) = 0;
32 |   virtual int Exists(const std::string& key) = 0;
33 |   virtual int Remove(const std::string& key) = 0;
34 |   virtual int Dump() = 0;
35 |   virtual int CheckDensity() = 0;
36 |   virtual int BucketCounts() = 0;
37 |   virtual int GetBucketState(int index) = 0;
38 |   virtual int FillInitIndex(uint64_t index_stored, uint64_t *index_init) = 0;
39 |   virtual void GetMetadata(std::map< std::string, std::string >& metadata) = 0;
40 | 
41 |   Monitoring *monitoring_;
42 | };
43 | 
44 | }; // end namespace hashmap
45 | 
46 | #endif // HASHMAP
47 | 


--------------------------------------------------------------------------------
/main.cc:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <string>
  3 | #include <sstream>
  4 | #include <stdlib.h>
  5 | #include <set>
  6 | #include <algorithm>
  7 | #include <sys/stat.h>
  8 | 
  9 | #include "hashmap.h"
 10 | #include "probing_hashmap.h"
 11 | #include "tombstone_hashmap.h"
 12 | #include "backshift_hashmap.h"
 13 | #include "bitmap_hashmap.h"
 14 | #include "shadow_hashmap.h"
 15 | 
 16 | #include "testcase.h"
 17 | 
 18 | 
 19 | 
 20 | std::string concatenate(std::string const& str, int i)
 21 | {
 22 |     std::stringstream s;
 23 |     s << str << i;
 24 |     return s.str();
 25 | }
 26 | 
 27 |  
 28 | uint32_t NearestPowerOfTwo(const uint32_t number)	{
 29 |   uint32_t power = 1;
 30 |   while (power < number) {
 31 |     power <<= 1;
 32 |   }
 33 |   return power;
 34 | }
 35 | 
 36 | 
 37 | int exists_or_mkdir(const char *path) {
 38 |   struct stat sb;
 39 | 
 40 |   if (stat(path, &sb) == 0) {
 41 |     if (!S_ISDIR(sb.st_mode)) {
 42 |       return 1;
 43 |     }
 44 |   } else if (mkdir(path, 0777) != 0) {
 45 |     return 1;
 46 |   }
 47 | 
 48 |   return 0;
 49 | }
 50 | 
 51 | 
 52 | void show_usage() {
 53 |   fprintf(stdout, "Test program for implementations of open addressing hash table algorithms.\n");
 54 |   fprintf(stdout, "\n");
 55 | 
 56 |   fprintf(stdout, "General parameters (mandatory):\n");
 57 |   fprintf(stdout, " --algo            algorithm to use for the hash table. Possible values are:\n");
 58 |   fprintf(stdout, "                     * linear: basic linear probing\n");
 59 |   fprintf(stdout, "                     * tombstone: Robin Hood hashing with tombstone deletion\n");
 60 |   fprintf(stdout, "                     * backshift: Robin Hood hashing with backward shifting deletion\n");
 61 |   fprintf(stdout, "                     * bitmap: hopscotch hashing with bitmap representation\n");
 62 |   fprintf(stdout, "                     * shadow: hopscotch hashing with shadow representation\n");
 63 |   fprintf(stdout, " --testcase        test case to use. Possible values are:\n");
 64 |   fprintf(stdout, "                     * loading: load the table until it is full (does not perform any removals).\n");
 65 |   fprintf(stdout, "                     * batch: load the table, then remove a large batch, and re-insert a large batch.\n");
 66 |   fprintf(stdout, "                     * ripple: load the table, then do a series of removal-insertion operations.\n");
 67 |   fprintf(stdout, "\n");
 68 | 
 69 |   fprintf(stdout, "Parameters for linear probing algorithm (optional):\n");
 70 |   fprintf(stdout, " --num_buckets     number of buckets in the hash table (default=10000)\n");
 71 |   fprintf(stdout, "\n");
 72 | 
 73 |   fprintf(stdout, "Parameters for tombstone algorithm (optional):\n");
 74 |   fprintf(stdout, " --num_buckets     number of buckets in the hash table (default=10000)\n");
 75 |   fprintf(stdout, "\n");
 76 | 
 77 |   fprintf(stdout, "Parameters for backshift algorithm (optional):\n");
 78 |   fprintf(stdout, " --num_buckets     number of buckets in the hash table (default=10000)\n");
 79 |   fprintf(stdout, "\n");
 80 | 
 81 |   fprintf(stdout, "Parameters for bitmap algorithm (optional):\n");
 82 |   fprintf(stdout, " --num_buckets     number of buckets in the hash table (default=10000)\n");
 83 |   fprintf(stdout, " --size_probing    maximum number of buckets used in the probing (default=4096)\n");
 84 |   fprintf(stdout, "\n");
 85 | 
 86 |   fprintf(stdout, "Parameters for shadow algorithm (optional):\n");
 87 |   fprintf(stdout, " --num_buckets     number of buckets in the hash table (default=10000)\n");
 88 |   fprintf(stdout, " --size_probing    maximum number of buckets used in the probing (default=4096)\n");
 89 |   fprintf(stdout, " --size_nh_start   starting size of the neighborhoods (default=32)\n");
 90 |   fprintf(stdout, " --size_nh_end     ending size of the neighborhoods (default=32)\n");
 91 |   fprintf(stdout, "\n");
 92 | 
 93 |   fprintf(stdout, "Parameters for the batch test case (optional):\n");
 94 |   fprintf(stdout, " --load_factor_max   maxium load factor at which the table should be used (default=.7)\n");
 95 |   fprintf(stdout, " --load_factor_step  load factor by which items in the table should be removed and inserted (default=.1)\n");
 96 |   fprintf(stdout, "\n");
 97 | 
 98 |   fprintf(stdout, "Parameters for the ripple test case (optional):\n");
 99 |   fprintf(stdout, " --load_factor_max   maxium load factor at which the table should be used (default=.7)\n");
100 |   fprintf(stdout, " --load_factor_step  load factor by which items in the table should be removed and inserted (default=.1)\n");
101 |   fprintf(stdout, "\n");
102 | 
103 |   fprintf(stdout, "Examples:\n");
104 |   fprintf(stdout, "./hashmap --algo backshift --num_buckets 10000 --testcase batch --load_factor_max 0.8 --load_factor_step 0.1\n");
105 |   fprintf(stdout, "./hashmap --algo shadow --num_buckets 10000 --size_nh_start 4 --size_nh_end 64 --testcase loading\n");
106 | }
107 | 
108 | 
109 | 
110 | 
111 | 
112 | int main(int argc, char **argv) {
113 |   bool has_error;
114 | 
115 |   if (argc == 1 || (argc == 2 && strcmp(argv[1], "--help") == 0)) {
116 |     show_usage(); 
117 |     exit(-1);
118 |   }
119 | 
120 |   if (argc % 2 == 0) {
121 |     std::cerr << "Error: invalid number of arguments" << std::endl; 
122 |     exit(-1);
123 |   }
124 | 
125 |   uint32_t size_neighborhood_start = 32;
126 |   uint32_t size_neighborhood_end = 32;
127 |   uint32_t size_probing = 4096;
128 |   uint32_t num_buckets = 10000;
129 |   double load_factor_max = 0.7;
130 |   double load_factor_step = 0.1;
131 |   std::string algorithm = "";
132 |   std::string testcase = "";
133 | 
134 |   if (argc > 2) {
135 |     for (int i = 1; i < argc; i += 2 ) {
136 |       if (strcmp(argv[i], "--algo" ) == 0) {
137 |         algorithm = std::string(argv[i+1]);
138 |       } else if (strcmp(argv[i], "--num_buckets" ) == 0) {
139 |         num_buckets = atoi(argv[i+1]);
140 |       } else if (strcmp(argv[i], "--size_nh_start" ) == 0) {
141 |         size_neighborhood_start = atoi(argv[i+1]);
142 |       } else if (strcmp(argv[i], "--size_nh_end" ) == 0) {
143 |         size_neighborhood_end = atoi(argv[i+1]);
144 |       } else if (strcmp(argv[i], "--size_probing" ) == 0) {
145 |         size_probing = atoi(argv[i+1]);
146 |       } else if (strcmp(argv[i], "--testcase" ) == 0) {
147 |         testcase = std::string(argv[i+1]);
148 |       } else if (strcmp(argv[i], "--load_factor_max" ) == 0) {
149 |         load_factor_max = atof(argv[i+1]);
150 |       } else if (strcmp(argv[i], "--load_factor_step" ) == 0) {
151 |         load_factor_step = atof(argv[i+1]);
152 |       } else {
153 |         fprintf(stderr, "Unknown parameter [%s]\n", argv[i]);
154 |         exit(-1); 
155 |       }
156 |     }
157 |   }
158 | 
159 |   int num_items = num_buckets;
160 |   //int num_items = NearestPowerOfTwo(num_buckets);
161 |   hashmap::HashMap *hm;
162 |   if (algorithm == "bitmap") {
163 |     hm = new hashmap::BitmapHashMap(num_items, size_probing);
164 |   } else if (algorithm == "shadow") {
165 |     hm = new hashmap::ShadowHashMap(num_items, size_probing, size_neighborhood_start, size_neighborhood_end);
166 |   } else if (algorithm == "linear") {
167 |     hm = new hashmap::ProbingHashMap(num_items, size_probing);
168 |   } else if (algorithm == "tombstone") {
169 |     hm = new hashmap::TombstoneHashMap(num_items);
170 |   } else if (algorithm == "backshift") {
171 |     hm = new hashmap::BackshiftHashMap(num_items);
172 |   } else {
173 |     fprintf(stderr, "Unknown algorithm [%s]\n", algorithm.c_str());
174 |     exit(-1); 
175 |   }
176 | 
177 |   if (testcase == "loading") {
178 |     //run_testcase2(hm, num_items, load_factor_max);
179 |     hashmap::LoadingTestCase tc(hm, num_items);
180 |     tc.run();
181 |     return 0;
182 |   } else if (testcase == "batch") {
183 |     //run_testcase2(hm, num_items, load_factor_max);
184 |     hashmap::BatchTestCase tc(hm, num_items, load_factor_max, load_factor_step);
185 |     tc.run();
186 |     return 0;
187 |   } else if (testcase == "ripple") {
188 |     hashmap::RippleTestCase tc(hm, num_items, load_factor_max, load_factor_step);
189 |     tc.run();
190 |     return 0;
191 |   } else if (testcase != "") {
192 |     fprintf(stderr, "Error: testcase is unknown [%s]\n", testcase.c_str());
193 |     return 1;
194 |   }
195 | 
196 |   hm->Open();
197 |   std::string value_out("value_out");
198 | 
199 | 
200 | 
201 |   int num_items_reached = 0;
202 | 
203 |   for (int i = 0; i < num_items; i++) {
204 |     value_out = "value_out";
205 |     std::string key = concatenate( "key", i );
206 |     std::string value = concatenate( "value", i );
207 |     int ret_put = hm->Put(key, value);
208 |     hm->Get(key, &value_out);
209 | 
210 |     if (ret_put != 0) {
211 |       std::cout << "Insertion stopped due to clustering at step: " << i << std::endl; 
212 |       std::cout << "Load factor: " << (double)i/num_items << std::endl; 
213 |       num_items_reached = i;
214 |       break;
215 |     }
216 |   }
217 | 
218 | 
219 |   has_error = false;
220 |   for (int i = 0; i < num_items_reached; i++) {
221 |     value_out = "value_out";
222 |     std::string key = concatenate( "key", i );
223 |     std::string value = concatenate( "value", i );
224 |     int ret_get = hm->Get(key, &value_out);
225 |     if (ret_get != 0 || value != value_out) {
226 |       std::cout << "Final check: error at step [" << i << "]" << std::endl; 
227 |       has_error = true;
228 |       break;
229 |     }
230 |   }
231 | 
232 |   if (!has_error) {
233 |       std::cout << "Final check: OK" << std::endl; 
234 |   }
235 | 
236 | 
237 |   /*
238 |   if (hm->monitoring_ != NULL) {
239 |       std::cout << "Monitoring: OK" << std::endl; 
240 |   }
241 | 
242 |   // testcase-algo-metric-runnumber-step.json
243 |   // batch50-shadow-density-00001-0001.json
244 | 
245 |   hm->monitoring_->PrintDensity("density.json");
246 |   std::cout << "Clustering" << std::endl; 
247 |   hm->monitoring_->PrintClustering(hm);
248 | 
249 |   hm->monitoring_->PrintDIB("probing_sequence_length_search.json");
250 |   hm->monitoring_->PrintNumScannedBlocks("num_scanned_blocks.json");
251 | 
252 |   */
253 |   //hm->CheckDensity();
254 |   //hm->BucketCounts();
255 |   
256 | 
257 |   has_error = false;
258 |   for (int i = 0; i < num_items_reached; i++) {
259 |     std::string key = concatenate( "key", i );
260 |     std::string value = concatenate( "value", i );
261 |     int ret_remove = hm->Remove(key);
262 |     if (ret_remove != 0) {
263 |       std::cout << "Remove: error at step [" << i << "]" << std::endl; 
264 |       has_error = true;
265 |       break;
266 |     }
267 |     int ret_get = hm->Get(key, &value_out);
268 |     if (ret_get == 0) {
269 |       std::cout << "Remove: error at step [" << i << "] -- can get after remove" << std::endl; 
270 |       has_error = true;
271 |       break;
272 |     }
273 |   }
274 | 
275 |   if (!has_error) {
276 |       std::cout << "Removing items: OK" << std::endl; 
277 |   }
278 | 
279 | 
280 |   return 0;
281 | }
282 | 


--------------------------------------------------------------------------------
/monitoring.cc:
--------------------------------------------------------------------------------
  1 | #include "monitoring.h"
  2 | #include "hashmap.h"
  3 | 
  4 | namespace hashmap {
  5 | 
  6 | void Monitoring::PrintInfo(FILE* fd, std::string metric) {
  7 |   std::map<std::string, std::string> metadata;
  8 |   hm_->GetMetadata(metadata);
  9 |   fprintf(fd, " \"algorithm\": \"%s\",\n", metadata["name"].c_str());
 10 |   fprintf(fd, " \"testcase\": \"%s\",\n", testcase_.c_str());
 11 |   fprintf(fd, " \"metric\": \"%s\",\n", metric.c_str());
 12 |   fprintf(fd, " \"parameters_testcase\": %s,\n", parameters_testcase_json_.c_str());
 13 |   fprintf(fd, " \"parameters_testcase_string\": \"%s\",\n", parameters_testcase_string_.c_str());
 14 |   fprintf(fd, " \"parameters_hashmap\": %s,\n", metadata["parameters_hashmap"].c_str());
 15 |   fprintf(fd, " \"parameters_hashmap_string\": \"%s\",\n", metadata["parameters_hashmap_string"].c_str());
 16 |   fprintf(fd, " \"instance\": %" PRIu64 ",\n", instance_);
 17 |   fprintf(fd, " \"cycle\": %" PRIu64 ",\n", cycle_);
 18 | }
 19 | 
 20 | 
 21 | uint64_t** Monitoring::GetClustering(HashMap* hm) {
 22 |   // This is a O(n^2) solution, but there is a O(n) one. If this gets too slow,
 23 |   // replace with the O(n) solution.
 24 |   uint64_t sizes_window[8] = { 8, 16, 32, 64, 128, 256, 512, 1024 };
 25 | 
 26 |   uint64_t **clustering = (uint64_t**) new uint64_t*[8];
 27 |   for (unsigned int i = 0; i < 8; i++) {
 28 |     clustering[i] = new uint64_t[ sizes_window[i] + 1 ];
 29 |     for (unsigned int j = 0; j < sizes_window[i]; j++) {
 30 |       clustering[i][j] = 0;
 31 |     }
 32 |   }
 33 | 
 34 |   for (uint64_t index_bucket = 0; index_bucket < num_buckets_; index_bucket++) {
 35 |     for (uint64_t index_window = 0; index_window < 8; index_window++) {
 36 |       if (index_bucket >= num_buckets_ - sizes_window[index_window]) {
 37 |         continue;
 38 |       }
 39 | 
 40 |       uint64_t count = 0;
 41 |       for (uint64_t i = 0; i < sizes_window[index_window]; i++) {
 42 |         uint64_t index_bucket_current = index_bucket + i;
 43 |         if (hm->GetBucketState(index_bucket_current) == 1) {
 44 |           count += 1;
 45 |         }
 46 |       }
 47 | 
 48 |       //if (index_bucket > sizes_window[index_window]) {
 49 |       //}
 50 |       clustering[index_window][count] += 1;
 51 |     }
 52 |   }
 53 | 
 54 |   return clustering;
 55 | }
 56 | 
 57 | 
 58 | void Monitoring::PrintClustering(HashMap *hm) {
 59 |   int sizes_window[5] = { 8, 16, 32, 64, 128 };
 60 |   uint64_t** clustering = hm->monitoring_->GetClustering(hm);
 61 |   for (int i = 0; i < 5; i++) {
 62 |     fprintf(stdout, "Cluster for window of size %d:\n", sizes_window[i]);
 63 |     for (int j = 0; j < sizes_window[i] + 1; j++) {
 64 |       fprintf(stdout, "    %5d: %5" PRIu64 "\n", j, clustering[i][j]);
 65 |     }
 66 |   }
 67 | 
 68 |   for (int i = 0; i < 8; i++) {
 69 |     delete[] clustering[i];
 70 |   }
 71 |   delete[] clustering;
 72 | }
 73 | 
 74 | 
 75 | 
 76 | uint64_t Monitoring::GetDIB(uint64_t index) {
 77 |   std::map<uint64_t, uint64_t>::iterator it;
 78 |   it = dib_.find(index);
 79 |   if (it == dib_.end()) {
 80 |     return num_buckets_;
 81 |   }
 82 |   return dib_[index];
 83 | }
 84 | 
 85 | 
 86 | void Monitoring::SetDIB(uint64_t index, uint64_t dib) {
 87 |   dib_[index] = dib;
 88 |   //fprintf(stderr, "SetPSL [%" PRIu64 "]\n", index);
 89 | }
 90 | 
 91 | void Monitoring::RemoveDIB(uint64_t index) {
 92 |   std::map<uint64_t, uint64_t>::iterator it;
 93 |   it = dib_.find(index);
 94 |   if (it != dib_.end()) {
 95 |     dib_.erase(it);
 96 |   } else {
 97 |     //fprintf(stderr, "RemovePSL error: cannot find index [%" PRIu64 "]\n", index); 
 98 |   }
 99 | 
100 | }
101 | 
102 | 
103 | 
104 | 
105 | 
106 | void Monitoring::PrintDIB(std::string filepath) {
107 |   std::map<uint64_t, uint64_t> counts;
108 |   std::map<uint64_t, uint64_t>::iterator it_dib, it_count, it_find;
109 | 
110 |   fprintf(stderr, "dib search size:%zu\n", dib_.size());
111 | 
112 |   for (it_dib = dib_.begin(); it_dib != dib_.end(); it_dib++) {
113 |     it_find = counts.find(it_dib->second);
114 |     if (it_find == counts.end()) {
115 |       counts[it_dib->second] = 0;
116 |     }
117 |     counts[it_dib->second] += 1;
118 |   }
119 | 
120 |   FILE* fd = NULL;
121 |   if (filepath == "stdout") {
122 |     fd = stdout;
123 |   } else {
124 |     fd = fopen(filepath.c_str(), "w");
125 |   }
126 | 
127 |   fprintf(fd, "{\n");
128 |   PrintInfo(fd, "DIB");
129 |   fprintf(fd, " \"datapoints\":\n");
130 |   fprintf(fd, "    {\n");
131 | 
132 |   bool first_item = true;
133 |   for (it_count = counts.begin(); it_count != counts.end(); it_count++) {
134 |     if (!first_item) fprintf(fd, ",\n");
135 |     first_item = false;
136 |     fprintf(fd, "     \"%" PRIu64 "\": %" PRIu64, it_count->first, it_count->second);
137 |   }
138 |   fprintf(fd, "\n");
139 |   fprintf(fd, "    }\n");
140 |   fprintf(fd, "}\n");
141 | 
142 |   if (filepath != "stdout") {
143 |     fclose(fd); 
144 |   }
145 | 
146 | }
147 | 
148 | 
149 | 
150 | void Monitoring::GetNumScannedBlocks(std::map<uint64_t, uint64_t>& out_num_scanned_blocks, HashMap *hm) {
151 | 
152 |   std::map< uint64_t, uint64_t>::iterator it_find;
153 |   for (uint64_t index_stored = 0; index_stored < num_buckets_; index_stored++) {
154 |     uint64_t index_init;
155 |     if (hm->FillInitIndex(index_stored, &index_init) != 0) continue;
156 | 
157 |     uint64_t index_stored_adjusted;
158 |     if (index_init <= index_stored) {
159 |       index_stored_adjusted = index_stored;
160 |     } else {
161 |       index_stored_adjusted = index_stored + num_buckets_;
162 |     }
163 | 
164 |     //for (int i = 10; i > 0; i--) {
165 |     int index_selected = 64;
166 |     uint64_t chunk_size = 16;
167 |     for (int i = 4; i < 64; i++) {
168 |       uint64_t offset_init = AlignOffsetToBlock(index_init * size_bucket_, chunk_size);
169 |       uint64_t offset_stored = AlignOffsetToBlock(index_stored_adjusted * size_bucket_, chunk_size);
170 | 
171 |       if (offset_init == offset_stored) {
172 |         index_selected = i;
173 |         break;
174 |       }
175 | 
176 |       chunk_size *= 2;
177 |     }
178 | 
179 |     it_find = out_num_scanned_blocks.find(index_selected);
180 |     if (it_find == out_num_scanned_blocks.end()) {
181 |       out_num_scanned_blocks[index_selected] = 0;
182 |     }
183 |     out_num_scanned_blocks[index_selected] += 1;
184 |   }
185 | }
186 | 
187 | 
188 | 
189 | 
190 | void Monitoring::PrintNumScannedBlocks(std::string filepath) {
191 |   FILE* fd = NULL;
192 |   if (filepath == "stdout") {
193 |     fd = stdout;
194 |   } else {
195 |     fd = fopen(filepath.c_str(), "w");
196 |   }
197 | 
198 |   char metric[1024];
199 |   std::map<uint64_t, uint64_t> num_scanned_blocks;
200 |   GetNumScannedBlocks(num_scanned_blocks, hm_);
201 |   fprintf(fd, "{\n");
202 |   sprintf(metric, "aligned DIB");
203 |   PrintInfo(fd, metric);
204 |   fprintf(fd, " \"datapoints\":\n");
205 |   fprintf(fd, "    {");
206 |   std::map<uint64_t, uint64_t>::iterator it;
207 |   bool first_item = true;
208 |   for (it = num_scanned_blocks.begin(); it != num_scanned_blocks.end(); ++it) {
209 |     if (!first_item) fprintf(fd, ",");
210 |     first_item = false;
211 |     fprintf(fd, "\n");
212 |     fprintf(fd, "      \"%" PRIu64 "\": %" PRIu64, it->first, it->second);
213 |   }
214 |   fprintf(fd, "\n");
215 |   fprintf(fd, "    }\n");
216 |   fprintf(fd, "}\n");
217 | 
218 |   if (filepath != "stdout") {
219 |     fclose(fd); 
220 |   }
221 | }
222 | 
223 | 
224 | 
225 | void Monitoring::AddDFB(uint64_t distance) {
226 |                                             
227 |   std::map<uint64_t, uint64_t>::iterator it;
228 |   it = dfb_.find(distance);
229 |   if (it == dfb_.end()) {
230 |       dfb_[distance] = 0;
231 |   }
232 |   dfb_[distance] += 1;
233 | }
234 | 
235 | 
236 | void Monitoring::ResetDFB() {
237 |   dfb_.clear();
238 | }
239 | 
240 | 
241 | void Monitoring::PrintDFB(std::string filepath) {
242 |   std::map<uint64_t, uint64_t>::iterator it;
243 | 
244 |   FILE* fd = NULL;
245 |   if (filepath == "stdout") {
246 |     fd = stdout;
247 |   } else {
248 |     fd = fopen(filepath.c_str(), "w");
249 |   }
250 | 
251 |   fprintf(fd, "{\n");
252 |   PrintInfo(fd, "DFB");
253 |   fprintf(fd, " \"datapoints\":\n");
254 |   fprintf(fd, "    {\n");
255 | 
256 |   bool first_item = true;
257 |   for (it = dfb_.begin(); it != dfb_.end(); it++) {
258 |     if (!first_item) fprintf(fd, ",\n");
259 |     first_item = false;
260 |     fprintf(fd, "     \"%" PRIu64 "\": %" PRIu64, it->first, it->second);
261 |   }
262 |   fprintf(fd, "\n");
263 |   fprintf(fd, "    }\n");
264 |   fprintf(fd, "}\n");
265 | 
266 |   if (filepath != "stdout") {
267 |     fclose(fd);
268 |   }
269 | }
270 | 
271 | 
272 | void Monitoring::AddAlignedDFB(uint64_t index_init, uint64_t index_free_bucket) {
273 |   std::map<uint64_t, uint64_t>::iterator it_find;
274 | 
275 |   if (index_init > index_free_bucket) {
276 |     index_free_bucket += num_buckets_;
277 |   }
278 |   int index_selected = 64;
279 |   uint64_t chunk_size = 16;
280 |   for (int i = 4; i < 64; i++) {
281 |     uint64_t offset_init = AlignOffsetToBlock(index_init * size_bucket_, chunk_size);
282 |     uint64_t offset_free_bucket = AlignOffsetToBlock(index_free_bucket * size_bucket_, chunk_size);
283 |     if (offset_init == offset_free_bucket) {
284 |       index_selected = i;
285 |       break;
286 |     }
287 | 
288 |     chunk_size *= 2;
289 |   }
290 | 
291 |   it_find = aligned_dfb_.find(index_selected);
292 |   if (it_find == aligned_dfb_.end()) {
293 |     aligned_dfb_[index_selected] = 0;
294 |   }
295 |   aligned_dfb_[index_selected] += 1;
296 | }
297 | 
298 | 
299 | 
300 | void Monitoring::ResetAlignedDFB() {
301 |   aligned_dfb_.clear();
302 | }
303 | 
304 | 
305 | void Monitoring::PrintAlignedDFB(std::string filepath) {
306 |   std::map<uint64_t, uint64_t>::iterator it;
307 | 
308 |   FILE* fd = NULL;
309 |   if (filepath == "stdout") {
310 |     fd = stdout;
311 |   } else {
312 |     fd = fopen(filepath.c_str(), "w");
313 |   }
314 | 
315 |   fprintf(fd, "{\n");
316 |   PrintInfo(fd, "aligned DFB");
317 |   fprintf(fd, " \"datapoints\":\n");
318 |   fprintf(fd, "    {\n");
319 | 
320 |   bool first_item = true;
321 |   for (it = aligned_dfb_.begin(); it != aligned_dfb_.end(); it++) {
322 |     if (!first_item) fprintf(fd, ",\n");
323 |     first_item = false;
324 |     fprintf(fd, "     \"%" PRIu64 "\": %" PRIu64, it->first, it->second);
325 |   }
326 |   fprintf(fd, "\n");
327 |   fprintf(fd, "    }\n");
328 |   fprintf(fd, "}\n");
329 | 
330 |   if (filepath != "stdout") {
331 |     fclose(fd);
332 |   }
333 | }
334 | 
335 | 
336 | 
337 | 
338 | 
339 | 
340 | 
341 | void Monitoring::AddNumberOfSwaps(uint64_t distance) {
342 |                                             
343 |   std::map<uint64_t, uint64_t>::iterator it;
344 |   it = swaps_.find(distance);
345 |   if (it == swaps_.end()) {
346 |       swaps_[distance] = 0;
347 |   }
348 |   swaps_[distance] += 1;
349 | }
350 | 
351 | 
352 | void Monitoring::ResetNumberOfSwaps() {
353 |   swaps_.clear();
354 | }
355 | 
356 | 
357 | void Monitoring::PrintNumberOfSwaps(std::string filepath) {
358 |   std::map<uint64_t, uint64_t>::iterator it;
359 | 
360 |   FILE* fd = NULL;
361 |   if (filepath == "stdout") {
362 |     fd = stdout;
363 |   } else {
364 |     fd = fopen(filepath.c_str(), "w");
365 |   }
366 | 
367 |   fprintf(fd, "{\n");
368 |   PrintInfo(fd, "swap");
369 |   fprintf(fd, " \"datapoints\":\n");
370 |   fprintf(fd, "    {\n");
371 | 
372 |   bool first_item = true;
373 |   for (it = swaps_.begin(); it != swaps_.end(); it++) {
374 |     if (!first_item) fprintf(fd, ",\n");
375 |     first_item = false;
376 |     fprintf(fd, "     \"%" PRIu64 "\": %" PRIu64, it->first, it->second);
377 |   }
378 |   fprintf(fd, "\n");
379 |   fprintf(fd, "    }\n");
380 |   fprintf(fd, "}\n");
381 | 
382 |   if (filepath != "stdout") {
383 |     fclose(fd);
384 |   }
385 | }
386 | 
387 | 
388 | 
389 | 
390 | void Monitoring::AddDMB(uint64_t distance) {
391 |                                             
392 |   std::map<uint64_t, uint64_t>::iterator it;
393 |   it = dmb_.find(distance);
394 |   if (it == dmb_.end()) {
395 |       dmb_[distance] = 0;
396 |   }
397 |   dmb_[distance] += 1;
398 |   //printf("Add DMB %" PRIu64 "\n", distance);
399 | }
400 | 
401 | 
402 | void Monitoring::ResetDMB() {
403 |   dmb_.clear();
404 | }
405 | 
406 | 
407 | void Monitoring::PrintDMB(std::string filepath) {
408 |   std::map<uint64_t, uint64_t>::iterator it;
409 | 
410 |   FILE* fd = NULL;
411 |   if (filepath == "stdout") {
412 |     fd = stdout;
413 |   } else {
414 |     fd = fopen(filepath.c_str(), "w");
415 |   }
416 | 
417 |   fprintf(fd, "{\n");
418 |   PrintInfo(fd, "DMB");
419 |   fprintf(fd, " \"datapoints\":\n");
420 |   fprintf(fd, "    {\n");
421 | 
422 |   bool first_item = true;
423 |   for (it = dmb_.begin(); it != dmb_.end(); it++) {
424 |     if (!first_item) fprintf(fd, ",\n");
425 |     first_item = false;
426 |     fprintf(fd, "     \"%" PRIu64 "\": %" PRIu64, it->first, it->second);
427 |   }
428 |   fprintf(fd, "\n");
429 |   fprintf(fd, "    }\n");
430 |   fprintf(fd, "}\n");
431 | 
432 |   if (filepath != "stdout") {
433 |     fclose(fd);
434 |   }
435 | }
436 | 
437 | 
438 | 
439 | 
440 | void Monitoring::AddAlignedDMB(uint64_t index_init, uint64_t index_missing_bucket) {
441 |   std::map<uint64_t, uint64_t>::iterator it_find;
442 |   if (index_init > index_missing_bucket) {
443 |     index_missing_bucket += num_buckets_;
444 |   }
445 |   int index_selected = 64;
446 |   uint64_t chunk_size = 16;
447 |   for (int i = 4; i < 64; i++) {
448 |     uint64_t offset_init = AlignOffsetToBlock(index_init * size_bucket_, chunk_size);
449 |     uint64_t offset_missing_bucket = AlignOffsetToBlock(index_missing_bucket * size_bucket_, chunk_size);
450 |     if (offset_init == offset_missing_bucket) {
451 |       index_selected = i;
452 |       break;
453 |     }
454 | 
455 |     chunk_size *= 2;
456 |   }
457 | 
458 |   it_find = aligned_dmb_.find(index_selected);
459 |   if (it_find == aligned_dmb_.end()) {
460 |     aligned_dmb_[index_selected] = 0;
461 |   }
462 |   aligned_dmb_[index_selected] += 1;
463 | 
464 | }
465 | 
466 | 
467 | 
468 | 
469 | void Monitoring::ResetAlignedDMB() {
470 |   aligned_dmb_.clear();
471 | }
472 | 
473 | 
474 | void Monitoring::PrintAlignedDMB(std::string filepath) {
475 |   std::map<uint64_t, uint64_t>::iterator it;
476 | 
477 |   FILE* fd = NULL;
478 |   if (filepath == "stdout") {
479 |     fd = stdout;
480 |   } else {
481 |     fd = fopen(filepath.c_str(), "w");
482 |   }
483 | 
484 |   fprintf(fd, "{\n");
485 |   PrintInfo(fd, "aligned DMB");
486 |   fprintf(fd, " \"datapoints\":\n");
487 |   fprintf(fd, "    {\n");
488 | 
489 |   bool first_item = true;
490 |   for (it = aligned_dmb_.begin(); it != aligned_dmb_.end(); it++) {
491 |     if (!first_item) fprintf(fd, ",\n");
492 |     first_item = false;
493 |     fprintf(fd, "     \"%" PRIu64 "\": %" PRIu64, it->first, it->second);
494 |   }
495 |   fprintf(fd, "\n");
496 |   fprintf(fd, "    }\n");
497 |   fprintf(fd, "}\n");
498 | 
499 |   if (filepath != "stdout") {
500 |     fclose(fd);
501 |   }
502 | }
503 | 
504 | 
505 | 
506 | void Monitoring::AddDSB(uint64_t distance) {
507 |                                             
508 |   std::map<uint64_t, uint64_t>::iterator it;
509 |   it = dsb_.find(distance);
510 |   if (it == dsb_.end()) {
511 |       dsb_[distance] = 0;
512 |   }
513 |   dsb_[distance] += 1;
514 |   //printf("Add DSB %" PRIu64 "\n", distance);
515 | }
516 | 
517 | 
518 | void Monitoring::ResetDSB() {
519 |   dsb_.clear();
520 | }
521 | 
522 | 
523 | void Monitoring::PrintDSB(std::string filepath) {
524 |   std::map<uint64_t, uint64_t>::iterator it;
525 | 
526 |   FILE* fd = NULL;
527 |   if (filepath == "stdout") {
528 |     fd = stdout;
529 |   } else {
530 |     fd = fopen(filepath.c_str(), "w");
531 |   }
532 | 
533 |   fprintf(fd, "{\n");
534 |   PrintInfo(fd, "DSB");
535 |   fprintf(fd, " \"datapoints\":\n");
536 |   fprintf(fd, "    {\n");
537 | 
538 |   bool first_item = true;
539 |   for (it = dsb_.begin(); it != dsb_.end(); it++) {
540 |     if (!first_item) fprintf(fd, ",\n");
541 |     first_item = false;
542 |     fprintf(fd, "     \"%" PRIu64 "\": %" PRIu64, it->first, it->second);
543 |   }
544 |   fprintf(fd, "\n");
545 |   fprintf(fd, "    }\n");
546 |   fprintf(fd, "}\n");
547 | 
548 |   if (filepath != "stdout") {
549 |     fclose(fd);
550 |   }
551 | }
552 | 
553 | 
554 | 
555 | 
556 | void Monitoring::AddAlignedDSB(uint64_t index_stored, uint64_t index_shift_bucket) {
557 |   std::map<uint64_t, uint64_t>::iterator it_find;
558 |   if (index_stored > index_shift_bucket) {
559 |     index_shift_bucket += num_buckets_;
560 |   }
561 |   int index_selected = 64;
562 |   uint64_t chunk_size = 16;
563 |   for (int i = 4; i < 64; i++) {
564 |     uint64_t offset_stored = AlignOffsetToBlock(index_stored * size_bucket_, chunk_size);
565 |     uint64_t offset_shift_bucket = AlignOffsetToBlock(index_shift_bucket * size_bucket_, chunk_size);
566 |     if (offset_stored == offset_shift_bucket) {
567 |       index_selected = i;
568 |       break;
569 |     }
570 | 
571 |     chunk_size *= 2;
572 |   }
573 | 
574 |   it_find = aligned_dsb_.find(index_selected);
575 |   if (it_find == aligned_dsb_.end()) {
576 |     aligned_dsb_[index_selected] = 0;
577 |   }
578 |   aligned_dsb_[index_selected] += 1;
579 | 
580 | }
581 | 
582 | 
583 | 
584 | 
585 | void Monitoring::ResetAlignedDSB() {
586 |   aligned_dsb_.clear();
587 | }
588 | 
589 | 
590 | void Monitoring::PrintAlignedDSB(std::string filepath) {
591 |   std::map<uint64_t, uint64_t>::iterator it;
592 | 
593 |   FILE* fd = NULL;
594 |   if (filepath == "stdout") {
595 |     fd = stdout;
596 |   } else {
597 |     fd = fopen(filepath.c_str(), "w");
598 |   }
599 | 
600 |   fprintf(fd, "{\n");
601 |   PrintInfo(fd, "aligned DSB");
602 |   fprintf(fd, " \"datapoints\":\n");
603 |   fprintf(fd, "    {\n");
604 | 
605 |   bool first_item = true;
606 |   for (it = aligned_dsb_.begin(); it != aligned_dsb_.end(); it++) {
607 |     if (!first_item) fprintf(fd, ",\n");
608 |     first_item = false;
609 |     fprintf(fd, "     \"%" PRIu64 "\": %" PRIu64, it->first, it->second);
610 |   }
611 |   fprintf(fd, "\n");
612 |   fprintf(fd, "    }\n");
613 |   fprintf(fd, "}\n");
614 | 
615 |   if (filepath != "stdout") {
616 |     fclose(fd);
617 |   }
618 | }
619 | 
620 | }; // end namespace hashmap
621 | 


--------------------------------------------------------------------------------
/monitoring.h:
--------------------------------------------------------------------------------
  1 | #ifndef HASHMAP_MONITORING
  2 | #define HASHMAP_MONITORING
  3 | 
  4 | #ifndef __STDC_FORMAT_MACROS
  5 | #define __STDC_FORMAT_MACROS
  6 | #endif
  7 | #include <inttypes.h>
  8 | #include <string.h>
  9 | #include <stdio.h>
 10 | #include <errno.h>
 11 | 
 12 | #include <string>
 13 | #include <iostream>
 14 | #include <map>
 15 | #include <vector>
 16 | 
 17 | 
 18 | 
 19 | namespace hashmap
 20 | {
 21 | class HashMap;
 22 | 
 23 | class Monitoring
 24 | {
 25 | public:
 26 |   Monitoring(uint64_t num_buckets,
 27 |              uint64_t max_num_items_in_bucket,
 28 |              HashMap *hm) {
 29 |     num_buckets_ = num_buckets;
 30 |     max_num_items_in_bucket_ = max_num_items_in_bucket;
 31 |     size_bucket_ = 4;
 32 |     hm_ = hm;
 33 |     fprintf(stderr, "starting\n");
 34 |   }
 35 | 
 36 |   virtual ~Monitoring() {
 37 |   }
 38 | 
 39 |   uint64_t** GetClustering(HashMap* hm);
 40 |   void PrintClustering(HashMap *hm);
 41 |   const std::map<uint64_t, uint64_t>& GetClustering();
 42 | 
 43 |   uint64_t GetDIB(uint64_t index);
 44 |   void SetDIB(uint64_t index, uint64_t dib);
 45 |   void RemoveDIB(uint64_t index);
 46 |   void PrintDIB(std::string filepath);
 47 | 
 48 |   void AddDFB(uint64_t distance);
 49 |   void ResetDFB();
 50 |   void PrintDFB(std::string filepath);
 51 | 
 52 |   void AddAlignedDFB(uint64_t index_init, uint64_t index_free_bucket);
 53 |   void ResetAlignedDFB();
 54 |   void PrintAlignedDFB(std::string filepath);
 55 | 
 56 |   void AddDMB(uint64_t distance);
 57 |   void ResetDMB();
 58 |   void PrintDMB(std::string filepath);
 59 |   
 60 |   void AddAlignedDMB(uint64_t index_init, uint64_t index_free_bucket);
 61 |   void ResetAlignedDMB();
 62 |   void PrintAlignedDMB(std::string filepath);
 63 | 
 64 |   void AddNumberOfSwaps(uint64_t distance);
 65 |   void ResetNumberOfSwaps();
 66 |   void PrintNumberOfSwaps(std::string filepath);
 67 | 
 68 |   void GetNumScannedBlocks(std::map<uint64_t, uint64_t>& out_num_scanned_blocks, HashMap *hm);
 69 |   void PrintNumScannedBlocks(std::string filepath);
 70 | 
 71 |   void AddDSB(uint64_t distance);
 72 |   void ResetDSB();
 73 |   void PrintDSB(std::string filepath);
 74 |   
 75 |   void AddAlignedDSB(uint64_t index_stored, uint64_t index_shift_bucket);
 76 |   void ResetAlignedDSB();
 77 |   void PrintAlignedDSB(std::string filepath);
 78 | 
 79 |   void PrintInfo(FILE* fd, std::string metric);
 80 |   void SetCycle(uint64_t cycle) { cycle_ = cycle; }
 81 |   void SetInstance(uint64_t instance) { instance_ = instance; }
 82 | 
 83 |   void SetTestcase(std::string str) {
 84 |     testcase_ = str;
 85 |   }
 86 | 
 87 |   void SetParametersTestcaseString(std::string str) {
 88 |     parameters_testcase_string_ = str;
 89 |   }
 90 | 
 91 |   void SetParametersTestcaseJson(std::string str) {
 92 |     parameters_testcase_json_ = str;
 93 |   }
 94 | 
 95 | 
 96 | private:
 97 |   std::map<uint64_t, uint64_t> num_items_in_bucket_;
 98 |   uint64_t num_buckets_;
 99 |   uint64_t max_num_items_in_bucket_;
100 |   uint64_t size_bucket_;
101 |   std::map<uint64_t, uint64_t> dib_;
102 |   std::map<uint64_t, uint64_t> dfb_;
103 |   std::map<uint64_t, uint64_t> aligned_dfb_;
104 |   std::map<uint64_t, uint64_t> dmb_;
105 |   std::map<uint64_t, uint64_t> aligned_dmb_;
106 |   std::map<uint64_t, uint64_t> dsb_;
107 |   std::map<uint64_t, uint64_t> aligned_dsb_;
108 |   std::map<uint64_t, uint64_t> swaps_;
109 |   HashMap *hm_;
110 |   uint64_t cycle_;
111 |   uint64_t instance_;
112 |   std::string parameters_testcase_string_;
113 |   std::string parameters_testcase_json_;
114 |   std::string testcase_;
115 | 
116 |   uint64_t AlignOffsetToBlock(uint64_t offset, uint64_t size_block) {
117 |     return offset - offset % size_block;
118 |   }
119 | 
120 | };
121 | 
122 | 
123 | }; // end namespace hashmap
124 | 
125 | #endif // HASHMAP_MONITORING
126 | 


--------------------------------------------------------------------------------
/murmurhash3.cc:
--------------------------------------------------------------------------------
  1 | //-----------------------------------------------------------------------------
  2 | // MurmurHash3 was written by Austin Appleby, and is placed in the public
  3 | // domain. The author hereby disclaims copyright to this source code.
  4 | 
  5 | // Note - The x86 and x64 versions do _not_ produce the same results, as the
  6 | // algorithms are optimized for their respective platforms. You can still
  7 | // compile and run any of them on any platform, but your performance with the
  8 | // non-native version will be less than optimal.
  9 | 
 10 | #include "murmurhash3.h"
 11 | 
 12 | //-----------------------------------------------------------------------------
 13 | // Platform-specific functions and macros
 14 | 
 15 | // Microsoft Visual Studio
 16 | 
 17 | #if defined(_MSC_VER)
 18 | 
 19 | #define FORCE_INLINE	__forceinline
 20 | 
 21 | #include <stdlib.h>
 22 | 
 23 | #define ROTL32(x,y)	_rotl(x,y)
 24 | #define ROTL64(x,y)	_rotl64(x,y)
 25 | 
 26 | #define BIG_CONSTANT(x) (x)
 27 | 
 28 | // Other compilers
 29 | 
 30 | #else	// defined(_MSC_VER)
 31 | 
 32 | #define	FORCE_INLINE inline __attribute__((always_inline))
 33 | 
 34 | inline uint32_t rotl32 ( uint32_t x, int8_t r )
 35 | {
 36 |   return (x << r) | (x >> (32 - r));
 37 | }
 38 | 
 39 | inline uint64_t rotl64 ( uint64_t x, int8_t r )
 40 | {
 41 |   return (x << r) | (x >> (64 - r));
 42 | }
 43 | 
 44 | #define	ROTL32(x,y)	rotl32(x,y)
 45 | #define ROTL64(x,y)	rotl64(x,y)
 46 | 
 47 | #define BIG_CONSTANT(x) (x##LLU)
 48 | 
 49 | #endif // !defined(_MSC_VER)
 50 | 
 51 | //-----------------------------------------------------------------------------
 52 | // Block read - if your platform needs to do endian-swapping or can only
 53 | // handle aligned reads, do the conversion here
 54 | 
 55 | FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i )
 56 | {
 57 |   return p[i];
 58 | }
 59 | 
 60 | FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i )
 61 | {
 62 |   return p[i];
 63 | }
 64 | 
 65 | //-----------------------------------------------------------------------------
 66 | // Finalization mix - force all bits of a hash block to avalanche
 67 | 
 68 | FORCE_INLINE uint32_t fmix32 ( uint32_t h )
 69 | {
 70 |   h ^= h >> 16;
 71 |   h *= 0x85ebca6b;
 72 |   h ^= h >> 13;
 73 |   h *= 0xc2b2ae35;
 74 |   h ^= h >> 16;
 75 | 
 76 |   return h;
 77 | }
 78 | 
 79 | //----------
 80 | 
 81 | FORCE_INLINE uint64_t fmix64 ( uint64_t k )
 82 | {
 83 |   k ^= k >> 33;
 84 |   k *= BIG_CONSTANT(0xff51afd7ed558ccd);
 85 |   k ^= k >> 33;
 86 |   k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
 87 |   k ^= k >> 33;
 88 | 
 89 |   return k;
 90 | }
 91 | 
 92 | //-----------------------------------------------------------------------------
 93 | 
 94 | void MurmurHash3_x86_32 ( const void * key, int len,
 95 |                           uint32_t seed, void * out )
 96 | {
 97 |   const uint8_t * data = (const uint8_t*)key;
 98 |   const int nblocks = len / 4;
 99 | 
100 |   uint32_t h1 = seed;
101 | 
102 |   const uint32_t c1 = 0xcc9e2d51;
103 |   const uint32_t c2 = 0x1b873593;
104 | 
105 |   //----------
106 |   // body
107 | 
108 |   const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
109 | 
110 |   for(int i = -nblocks; i; i++)
111 |   {
112 |     uint32_t k1 = getblock32(blocks,i);
113 | 
114 |     k1 *= c1;
115 |     k1 = ROTL32(k1,15);
116 |     k1 *= c2;
117 |     
118 |     h1 ^= k1;
119 |     h1 = ROTL32(h1,13); 
120 |     h1 = h1*5+0xe6546b64;
121 |   }
122 | 
123 |   //----------
124 |   // tail
125 | 
126 |   const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
127 | 
128 |   uint32_t k1 = 0;
129 | 
130 |   switch(len & 3)
131 |   {
132 |   case 3: k1 ^= tail[2] << 16;
133 |   case 2: k1 ^= tail[1] << 8;
134 |   case 1: k1 ^= tail[0];
135 |           k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
136 |   };
137 | 
138 |   //----------
139 |   // finalization
140 | 
141 |   h1 ^= len;
142 | 
143 |   h1 = fmix32(h1);
144 | 
145 |   *(uint32_t*)out = h1;
146 | } 
147 | 
148 | //-----------------------------------------------------------------------------
149 | 
150 | void MurmurHash3_x86_128 ( const void * key, const int len,
151 |                            uint32_t seed, void * out )
152 | {
153 |   const uint8_t * data = (const uint8_t*)key;
154 |   const int nblocks = len / 16;
155 | 
156 |   uint32_t h1 = seed;
157 |   uint32_t h2 = seed;
158 |   uint32_t h3 = seed;
159 |   uint32_t h4 = seed;
160 | 
161 |   const uint32_t c1 = 0x239b961b; 
162 |   const uint32_t c2 = 0xab0e9789;
163 |   const uint32_t c3 = 0x38b34ae5; 
164 |   const uint32_t c4 = 0xa1e38b93;
165 | 
166 |   //----------
167 |   // body
168 | 
169 |   const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
170 | 
171 |   for(int i = -nblocks; i; i++)
172 |   {
173 |     uint32_t k1 = getblock32(blocks,i*4+0);
174 |     uint32_t k2 = getblock32(blocks,i*4+1);
175 |     uint32_t k3 = getblock32(blocks,i*4+2);
176 |     uint32_t k4 = getblock32(blocks,i*4+3);
177 | 
178 |     k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
179 | 
180 |     h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
181 | 
182 |     k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
183 | 
184 |     h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
185 | 
186 |     k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
187 | 
188 |     h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
189 | 
190 |     k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
191 | 
192 |     h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
193 |   }
194 | 
195 |   //----------
196 |   // tail
197 | 
198 |   const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
199 | 
200 |   uint32_t k1 = 0;
201 |   uint32_t k2 = 0;
202 |   uint32_t k3 = 0;
203 |   uint32_t k4 = 0;
204 | 
205 |   switch(len & 15)
206 |   {
207 |   case 15: k4 ^= tail[14] << 16;
208 |   case 14: k4 ^= tail[13] << 8;
209 |   case 13: k4 ^= tail[12] << 0;
210 |            k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
211 | 
212 |   case 12: k3 ^= tail[11] << 24;
213 |   case 11: k3 ^= tail[10] << 16;
214 |   case 10: k3 ^= tail[ 9] << 8;
215 |   case  9: k3 ^= tail[ 8] << 0;
216 |            k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
217 | 
218 |   case  8: k2 ^= tail[ 7] << 24;
219 |   case  7: k2 ^= tail[ 6] << 16;
220 |   case  6: k2 ^= tail[ 5] << 8;
221 |   case  5: k2 ^= tail[ 4] << 0;
222 |            k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
223 | 
224 |   case  4: k1 ^= tail[ 3] << 24;
225 |   case  3: k1 ^= tail[ 2] << 16;
226 |   case  2: k1 ^= tail[ 1] << 8;
227 |   case  1: k1 ^= tail[ 0] << 0;
228 |            k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
229 |   };
230 | 
231 |   //----------
232 |   // finalization
233 | 
234 |   h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
235 | 
236 |   h1 += h2; h1 += h3; h1 += h4;
237 |   h2 += h1; h3 += h1; h4 += h1;
238 | 
239 |   h1 = fmix32(h1);
240 |   h2 = fmix32(h2);
241 |   h3 = fmix32(h3);
242 |   h4 = fmix32(h4);
243 | 
244 |   h1 += h2; h1 += h3; h1 += h4;
245 |   h2 += h1; h3 += h1; h4 += h1;
246 | 
247 |   ((uint32_t*)out)[0] = h1;
248 |   ((uint32_t*)out)[1] = h2;
249 |   ((uint32_t*)out)[2] = h3;
250 |   ((uint32_t*)out)[3] = h4;
251 | }
252 | 
253 | //-----------------------------------------------------------------------------
254 | 
255 | void MurmurHash3_x64_128 ( const void * key, const int len,
256 |                            const uint32_t seed, void * out )
257 | {
258 |   const uint8_t * data = (const uint8_t*)key;
259 |   const int nblocks = len / 16;
260 | 
261 |   uint64_t h1 = seed;
262 |   uint64_t h2 = seed;
263 | 
264 |   const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
265 |   const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
266 | 
267 |   //----------
268 |   // body
269 | 
270 |   const uint64_t * blocks = (const uint64_t *)(data);
271 | 
272 |   for(int i = 0; i < nblocks; i++)
273 |   {
274 |     uint64_t k1 = getblock64(blocks,i*2+0);
275 |     uint64_t k2 = getblock64(blocks,i*2+1);
276 | 
277 |     k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
278 | 
279 |     h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
280 | 
281 |     k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
282 | 
283 |     h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
284 |   }
285 | 
286 |   //----------
287 |   // tail
288 | 
289 |   const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
290 | 
291 |   uint64_t k1 = 0;
292 |   uint64_t k2 = 0;
293 | 
294 |   switch(len & 15)
295 |   {
296 |   case 15: k2 ^= ((uint64_t)tail[14]) << 48;
297 |   case 14: k2 ^= ((uint64_t)tail[13]) << 40;
298 |   case 13: k2 ^= ((uint64_t)tail[12]) << 32;
299 |   case 12: k2 ^= ((uint64_t)tail[11]) << 24;
300 |   case 11: k2 ^= ((uint64_t)tail[10]) << 16;
301 |   case 10: k2 ^= ((uint64_t)tail[ 9]) << 8;
302 |   case  9: k2 ^= ((uint64_t)tail[ 8]) << 0;
303 |            k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
304 | 
305 |   case  8: k1 ^= ((uint64_t)tail[ 7]) << 56;
306 |   case  7: k1 ^= ((uint64_t)tail[ 6]) << 48;
307 |   case  6: k1 ^= ((uint64_t)tail[ 5]) << 40;
308 |   case  5: k1 ^= ((uint64_t)tail[ 4]) << 32;
309 |   case  4: k1 ^= ((uint64_t)tail[ 3]) << 24;
310 |   case  3: k1 ^= ((uint64_t)tail[ 2]) << 16;
311 |   case  2: k1 ^= ((uint64_t)tail[ 1]) << 8;
312 |   case  1: k1 ^= ((uint64_t)tail[ 0]) << 0;
313 |            k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
314 |   };
315 | 
316 |   //----------
317 |   // finalization
318 | 
319 |   h1 ^= len; h2 ^= len;
320 | 
321 |   h1 += h2;
322 |   h2 += h1;
323 | 
324 |   h1 = fmix64(h1);
325 |   h2 = fmix64(h2);
326 | 
327 |   h1 += h2;
328 |   h2 += h1;
329 | 
330 |   ((uint64_t*)out)[0] = h1;
331 |   ((uint64_t*)out)[1] = h2;
332 | }
333 | 
334 | //-----------------------------------------------------------------------------
335 | 
336 | 


--------------------------------------------------------------------------------
/murmurhash3.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // MurmurHash3 was written by Austin Appleby, and is placed in the public
 3 | // domain. The author hereby disclaims copyright to this source code.
 4 | 
 5 | #ifndef _MURMURHASH3_H_
 6 | #define _MURMURHASH3_H_
 7 | 
 8 | //-----------------------------------------------------------------------------
 9 | // Platform-specific functions and macros
10 | 
11 | // Microsoft Visual Studio
12 | 
13 | #if defined(_MSC_VER)
14 | 
15 | typedef unsigned char uint8_t;
16 | typedef unsigned long uint32_t;
17 | typedef unsigned __int64 uint64_t;
18 | 
19 | // Other compilers
20 | 
21 | #else	// defined(_MSC_VER)
22 | 
23 | #include <stdint.h>
24 | 
25 | #endif // !defined(_MSC_VER)
26 | 
27 | //-----------------------------------------------------------------------------
28 | 
29 | void MurmurHash3_x86_32  ( const void * key, int len, uint32_t seed, void * out );
30 | 
31 | void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
32 | 
33 | void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
34 | 
35 | //-----------------------------------------------------------------------------
36 | 
37 | #endif // _MURMURHASH3_H_
38 | 


--------------------------------------------------------------------------------
/plot.py:
--------------------------------------------------------------------------------
  1 | # make the algorithms an argument
  2 | # make the metrics an argument
  3 | 
  4 | import os
  5 | import sys
  6 | import json
  7 | import traceback
  8 | import random
  9 | import math
 10 | 
 11 | import matplotlib
 12 | import matplotlib.pyplot as plt
 13 | import pprint
 14 | 
 15 | colors = {'red': '#cd7058', 'blue': '#599ad3', 'orange': '#f9a65a', 'green': '#66cc66', 'black': '#000000', 'purple': '#990066'}
 16 | numbering_subplots = ['a', 'b', 'c', 'd', 'e', 'f']
 17 | 
 18 | 
 19 | def compute_average(datapoints, has_shift):
 20 |     if len(datapoints) == 0:
 21 |         return 0, 0, 0
 22 |     num_freq = 0
 23 |     sum_metric = 0
 24 |     if not has_shift:
 25 |         minimum = 0
 26 |     else:
 27 |         minimum = None
 28 | 
 29 |     for key, value in datapoints.iteritems():
 30 |         occurrence = float(key)
 31 |         frequency = float(value)
 32 |         num_freq += frequency
 33 |         sum_metric += frequency * occurrence
 34 |         if has_shift:
 35 |             if minimum is None or occurrence < minimum:
 36 |                 minimum = occurrence
 37 | 
 38 |     if num_freq <= 1:
 39 |         return 0, 0, 0
 40 | 
 41 |     mean = float(sum_metric) / float(num_freq)
 42 | 
 43 |     sum_metric_squared = 0
 44 |     for key, value in datapoints.iteritems():
 45 |         occurrence = float(key)
 46 |         frequency = float(value)
 47 |         sum_metric_squared += frequency * (occurrence - mean) * (occurrence - mean)
 48 | 
 49 |     variance = float(sum_metric_squared) / float(num_freq - 1)
 50 |     standard_deviation = math.sqrt(variance)
 51 |     return mean - minimum, variance, standard_deviation
 52 | 
 53 | 
 54 | def compute_median(datapoints, has_shift):
 55 |     # TODO: very inefficient, could optimize this method
 56 |     if len(datapoints) == 0:
 57 |         return 0, 0, 0
 58 |     values = []
 59 |     minimum = None
 60 |     for key, value in datapoints.iteritems():
 61 |         occurrence = float(key)
 62 |         frequency = float(value)
 63 |         for i in range(int(frequency)):
 64 |             values.append(occurrence)
 65 |         if has_shift:
 66 |             if minimum is None or occurrence < minimum:
 67 |                 minimum = occurrence
 68 | 
 69 |     if not has_shift: minimum = 0
 70 |     values = sorted(values)
 71 |     median = values[len(values) / 2] - minimum
 72 |     perc95 = values[int(float(len(values)) * .95)] - minimum
 73 |     maximum = values[-1] - minimum
 74 |     return median, perc95, maximum
 75 | 
 76 | 
 77 | def aggregate_datapoints(dirpath_data, testcases, algorithms, shifts):
 78 |     print testcases, algorithms, shifts
 79 |     aggregate = {}
 80 |     for dirname, dirnames, filenames in os.walk(dirpath_data):
 81 |         for filename in filenames:
 82 |             basename, ext = os.path.splitext(filename)
 83 |             if ext.lower() != '.json': continue
 84 |             if '50000000' in filename: continue
 85 | 
 86 |             if testcases != 'all' and not any(filename.startswith(testcase) for testcase in testcases.split(',')):
 87 |                 print 'skipping ' + filename
 88 |                 continue
 89 | 
 90 |             if algorithms != 'all' and not any(algorithm in filename for algorithm in algorithms.split(',')):
 91 |                 print 'skipping ' + filename
 92 |                 continue
 93 | 
 94 |             try:
 95 |                 filepath = os.path.join(dirname, filename)
 96 |                 print "Reading file [%s]" % (filepath,)
 97 |                 f = open(filepath, 'r')
 98 |                 text = f.read()
 99 |                 data_items = json.loads(text)
100 |                 f.close()
101 |                 has_shift = shifts and any(shift in filename for shift in shifts.split(','))
102 |                 if not isinstance(data_items, list):
103 |                     data_items = [data_items]
104 | 
105 |                 for data in data_items:
106 |                     average, variance, stddev = compute_average(data['datapoints'], has_shift)
107 |                     median, perc95, maximum = compute_median(data['datapoints'], has_shift)
108 | 
109 |                     ia = data['algorithm']
110 |                     im = data['metric']
111 |                     ib = data['parameters_hashmap_string']
112 |                     ia = '%s-%s' % (ia, ib)
113 | 
114 |                     ii = data['instance']
115 |                     ic = data['cycle']
116 | 
117 |                     it = data['testcase']
118 |                     ip = data['parameters_testcase_string']
119 |                     if '75' in ip:
120 |                         print "before", ip
121 |                         ip = ip.replace('lfm0.75', 'lfm0.80')
122 |                         print "after", ip
123 |                     it = '%s-%s' % (it, ip)
124 |                     if im not in aggregate:
125 |                         aggregate[im] = {}
126 |                     if it not in aggregate[im]:
127 |                         aggregate[im][it] = {}
128 |                     if ia not in aggregate[im][it]:
129 |                         aggregate[im][it][ia] = {}
130 |                     if ic not in aggregate[im][it][ia]:
131 |                         aggregate[im][it][ia][ic] = {}
132 | 
133 |                     for m in ['mean', 'median', 'perc95', 'standard_deviation', 'variance', 'maximum']:
134 |                         if m not in aggregate[im][it][ia][ic]:
135 |                             aggregate[im][it][ia][ic][m] = []
136 | 
137 |                     aggregate[im][it][ia][ic]['mean'].append(average)
138 |                     aggregate[im][it][ia][ic]['standard_deviation'].append(stddev)
139 |                     aggregate[im][it][ia][ic]['variance'].append(variance)
140 |                     aggregate[im][it][ia][ic]['median'].append(median)
141 |                     aggregate[im][it][ia][ic]['perc95'].append(perc95)
142 |                     aggregate[im][it][ia][ic]['maximum'].append(maximum)
143 |             except:
144 |                 print 'Crashed at file: [%s/%s]' % (dirname, filename)
145 |                 print traceback.print_exc()
146 |                 sys.exit(1)
147 | 
148 |     return aggregate 
149 | 
150 | 
151 | 
152 | 
153 | def randomized_paired_sample_t_test(reference, candidate, details):
154 |     num_items = len(reference)
155 |     random.seed(None)
156 |     population = []
157 |     print 'ref cand', reference, candidate
158 | 
159 |     diff = []
160 |     for i in range(num_items):
161 |         diff.append(reference[i] - candidate[i])
162 | 
163 | 
164 |     num_population = 10240
165 |     for k in range(num_population):
166 |         diff_new = []
167 |         for i in range(num_items):
168 |             sign = -1 if random.random() < 0.5 else 1
169 |             diff_new.append(diff[i] * sign)
170 | 
171 |         mean_new = float(sum(diff_new)) / float(num_items)
172 |         population.append(mean_new)
173 | 
174 |     count_passed = 0
175 |     mean = sum(diff) / num_items
176 |     population = sorted(population)
177 | 
178 |     for mean_current in population:
179 |         if (mean > 0 and mean <= mean_current) or (mean < 0 and mean < mean_currrent):
180 |             break
181 |         count_passed += 1
182 | 
183 |     if mean > 0:
184 |         count_passed = num_population - count_passed
185 | 
186 |     if False and details:
187 |         print "*" * 64
188 |         print "*" * 64
189 |         print "details"
190 |         print "population", population[0], population[1], population[-2], population[-1]
191 |         print "mean", mean
192 |         print "count_passed: %f" % (float(count_passed),)
193 |         print "num_pop %f" % (float(num_population), )
194 | 
195 |     p_value = float(count_passed) / float(num_population)
196 |     print "passed: %f" % (p_value,)
197 |     return p_value
198 | 
199 | 
200 | 
201 | def add_curve_to_plot(ax, aggregates, im, it, index_testcase, statistic, algorithms_ordering, filters, numbering_subplot, includes):
202 |     names = []
203 |     lines = []
204 |     font = {'family' : 'normal',
205 |             'weight' : 'normal',
206 |             'size'   : 14}
207 |     matplotlib.rc('font', **font)
208 | 
209 |     algorithms = [None] * 5
210 |     for ia in aggregates[im][it].keys():
211 |         for pattern in algorithms_ordering.keys():
212 |             if pattern in ia:
213 |                 order = algorithms_ordering[pattern]['order']
214 |                 algorithms[order] = ia
215 | 
216 |     for ia in algorithms:
217 |         if ia is None: continue
218 |         print "Generating curve for: stats:%s | metric:%s | testcase:%s | algorithm:%s" % (statistic, im, it, ia)
219 | 
220 |         xs = []
221 |         ys = []
222 | 
223 |         for cycle, stats in sorted(aggregates[im][it][ia].items()):
224 |             if 'loading' in it:
225 |                 xs.append((cycle * 2.0) / 100.0)
226 |             else:
227 |                 xs.append(cycle)
228 |             ys.append(sum(stats[statistic]) / len(stats[statistic]))
229 | 
230 |         name = '[ERROR: unknown algorithm]'
231 |         color = '#000000'
232 |         linewidth = 3
233 |         zorder = 1
234 |         for k, v in filters.iteritems():
235 |             if k in ia:
236 |                 name = filters[k]['name']
237 |                 color = filters[k]['color']
238 |                 linewidth = filters[k]['linewidth']
239 |                 style = '-'
240 |                 zorder = filters[k]['zorder']
241 |                 break
242 | 
243 |         if not any(pattern in ia for pattern in includes):
244 |             continue
245 | 
246 |         line_current, = ax.plot(xs, ys, style, color=color, linewidth=linewidth, zorder=zorder)
247 |         names.append(name)
248 |         lines.append(line_current)
249 | 
250 |     if 'loading' in it:
251 |         ax.set_xlabel('(%s) Load factor' % numbering_subplot)
252 |     else:
253 |         ax.set_xlabel('(%s) Iterations' % numbering_subplot)
254 | 
255 |     if statistic == 'mean':
256 |         ax.set_ylabel('Mean %s' % im)
257 |         if True or 'loading' not in it:
258 |             x1,x2,y1,y2 = plt.axis()
259 |             plt.axis((x1,x2,0,100))
260 |     elif statistic == 'variance':
261 |         ax.set_ylabel('Variance of %s' % im)
262 |         if True or 'loading' not in it:
263 |             x1,x2,y1,y2 = plt.axis()
264 |             plt.axis((x1,x2,0,600))
265 |     elif statistic == 'standard_deviation':
266 |         ax.set_ylabel('Standard deviation of %s' % im)
267 |     elif statistic == 'median':
268 |         ax.set_ylabel('Median of %s' % im)
269 |         if True or 'loading' not in it:
270 |             x1,x2,y1,y2 = plt.axis()
271 |             plt.axis((x1,x2,0,100))
272 |     elif statistic == 'perc95':
273 |         ax.set_ylabel('95th percentile of %s' % im)
274 |         if True or 'loading' not in it:
275 |             x1,x2,y1,y2 = plt.axis()
276 |             plt.axis((x1,x2,0,100))
277 |     elif statistic == 'maximum':
278 |         ax.set_ylabel('Maximum %s' % im)
279 |         if True or 'loading' not in it:
280 |             x1,x2,y1,y2 = plt.axis()
281 |             plt.axis((x1,x2,0,180))
282 |     plt.title('Test case: %s' % (it.strip('-')))
283 |     ax.grid(True)
284 | 
285 |     if any(metric in im for metric in ['blocks', 'aligned']) and statistic != 'variance':
286 |         labels=['16 B', '32 B', '64 B', '128 B', '256 B', '512 B', '1 KB', '2 KB', '4 KB', '8 KB', '16 KB', '32 KB', '64 KB', '128 KB']
287 |         plt.axis((x1,x2,4,4+len(labels)))
288 |         ax.set_yticks(range(4,4+len(labels)))
289 |         ax.set_yticklabels(labels)
290 | 
291 |     plt.legend(lines, names).set_visible(False)
292 |     return names, lines
293 | 
294 | 
295 |      
296 | def plot_algorithms(aggregates):
297 | 
298 |     for index_stat, statistic in enumerate(['mean', 'median', 'perc95', 'maximum', 'variance']):
299 |         for index_metric, im in enumerate(aggregates.keys()):
300 |             fig = plt.figure((index_stat+1) * 10000 + (index_metric+1) * 100 + 1)
301 |             legend = None
302 |             for index_testcase, it in enumerate(sorted(aggregates[im].keys())):
303 |                 ax = fig.add_subplot(2, 2, index_testcase+1)
304 |                 lines = []
305 |                 names = []
306 | 
307 |                 names_temp, lines_temp = add_curve_to_plot( 
308 |                                     ax=ax,
309 |                                     aggregates=aggregates,
310 |                                     im=im,
311 |                                     it=it,
312 |                                     index_testcase=index_testcase,
313 |                                     statistic=statistic,
314 |                                     algorithms_ordering = {
315 |                                                             'linear': {'order': 0},
316 |                                                             'backshift': {'order': 1},
317 |                                                             'tombstone': {'order': 2},
318 |                                                             'shadow': {'order': 3},
319 |                                                             'bitmap': {'order': 4},
320 |                                                           },
321 |                                     filters = {
322 |                                                 'linear':    { 'color': colors['blue'],   'name': 'Linear probing',              'linewidth': 8,    'zorder': 1 },
323 |                                                 'backshift': { 'color': colors['orange'], 'name': 'Robin Hood (backward shift)', 'linewidth': 6,    'zorder': 2 },
324 |                                                 'tombstone': { 'color': colors['red'],    'name': 'Robin Hood (tombstone)',      'linewidth': 4.5,  'zorder': 3 },
325 |                                                 'shadow':    { 'color': colors['green'],  'name': 'Hopscotch (shadow)',          'linewidth': 3,    'zorder': 4 },
326 |                                                 'bitmap':    { 'color': colors['black'],  'name': 'Hopscotch (bitmap)',          'linewidth': 1.75, 'zorder': 5 },
327 |                                               },
328 |                                     numbering_subplot=numbering_subplots[index_testcase],
329 |                                     includes=['10000-'],
330 |                                  )
331 | 
332 |                     
333 |                 names.extend(names_temp)
334 |                 lines.extend(lines_temp)
335 | 
336 |             legend = plt.legend(lines, names, prop={'size':12}, bbox_to_anchor=(0.2, -0.3))
337 |             if not os.path.isdir('plots/algorithms'):
338 |                 os.mkdir('plots/algorithms')
339 |             fig.set_size_inches(10, 7.5)
340 |             plt.tight_layout()
341 |             plt.savefig('plots/algorithms/%s_%s.png' % (im.lower(), statistic), dpi=72, bbox_extra_artists=(legend,), bbox_inches='tight')
342 | 
343 | 
344 | 
345 | 
346 | 
347 | def plot_robinhood(aggregates):
348 |     for index_metric, im in enumerate(aggregates.keys()):
349 |         fig = plt.figure((index_metric+1) * 100 + 1)
350 |         for index_stat, statistic in enumerate(['mean', 'median', 'perc95', 'maximum', 'variance']):
351 |             ax = fig.add_subplot(3, 2, index_stat+1)
352 |             lines = []
353 |             names = []
354 |             for index_testcase, it in enumerate(sorted(aggregates[im].keys())):
355 |                 names_temp, lines_temp = add_curve_to_plot( 
356 |                                     ax=ax,
357 |                                     aggregates=aggregates,
358 |                                     im=im,
359 |                                     it=it,
360 |                                     index_testcase=index_testcase,
361 |                                     statistic=statistic,
362 |                                     algorithms_ordering = {
363 |                                                             '10000-': {'order': 0},
364 |                                                             '100000-': {'order': 1},
365 |                                                             '1000000-': {'order': 2},
366 |                                                             '10000000-': {'order': 3},
367 |                                                             '50000000-': {'order': 4},
368 |                                                           },
369 |                                     filters = {
370 |                                                 '10000-':     { 'color': colors['blue'],   'name': 'Robin Hood (backward shift, 10k)',  'linewidth': 8, 'zorder': 1 },
371 |                                                 '100000-':    { 'color': colors['orange'], 'name': 'Robin Hood (backward shift, 100k)', 'linewidth': 6, 'zorder': 2 },
372 |                                                 '1000000-':   { 'color': colors['red'],    'name': 'Robin Hood (backward shift, 1M)',   'linewidth': 4.5, 'zorder': 3 },
373 |                                                 '10000000-':  { 'color': colors['green'],  'name': 'Robin Hood (backward shift, 10M)',  'linewidth': 3, 'zorder': 4 },
374 |                                                 '50000000-':  { 'color': colors['black'],   'name': 'Robin Hood (backward shift, 50M)', 'linewidth': 1.75, 'zorder': 5 },
375 |                                                 '100000000-': { 'color': colors['black'],  'name': 'Robin Hood (backward shift, 100M)', 'linewidth': 1.75, 'zorder': 5 },
376 |                                               },
377 |                                     numbering_subplot=numbering_subplots[index_stat],
378 |                                     includes=['backshift'],
379 |                                  )
380 |                 names.extend(names_temp)
381 |                 lines.extend(lines_temp)
382 | 
383 |         legend = plt.legend(lines, names, prop={'size':12}, bbox_to_anchor=(2.10, 0.75))
384 |         fig.set_size_inches(10, 11.25)
385 |         plt.tight_layout()
386 |         if not os.path.isdir('plots/robinhood-backshift'):
387 |             os.mkdir('plots/robinhood-backshift')
388 |         plt.savefig('plots/robinhood-backshift/%s.png' % (im.lower()), dpi=72, bbox_extra_artists=(legend,), bbox_inches='tight')
389 | 
390 | 
391 | 
392 | if __name__=="__main__":
393 |     shifts = ""
394 |     if len(sys.argv) == 5:
395 |         shifts = sys.argv[4]
396 | 
397 |     agg = aggregate_datapoints(dirpath_data=sys.argv[1],
398 |                                testcases=sys.argv[2],
399 |                                algorithms=sys.argv[3],
400 |                                shifts=shifts)
401 |     plot_algorithms(agg)
402 |     plot_robinhood(agg)
403 | 


--------------------------------------------------------------------------------
/probing_hashmap.cc:
--------------------------------------------------------------------------------
  1 | #include "probing_hashmap.h"
  2 | 
  3 | namespace hashmap {
  4 | 
  5 | 
  6 | 
  7 | int ProbingHashMap::Open() {
  8 |   buckets_ = new Bucket[num_buckets_];
  9 |   memset(buckets_, 0, sizeof(Bucket) * (num_buckets_));
 10 |   monitoring_ = new hashmap::Monitoring(num_buckets_, probing_max_, static_cast<HashMap*>(this));
 11 |   return 0;
 12 | }
 13 | 
 14 | int ProbingHashMap::Close() {
 15 |   if (buckets_ != NULL) {
 16 |     for (uint32_t i = 0; i < num_buckets_; i++) {
 17 |       if (buckets_[i].entry != NULL && buckets_[i].entry != DELETED_BUCKET) {
 18 |         delete[] buckets_[i].entry->data;
 19 |         delete buckets_[i].entry;
 20 |       }
 21 |     }
 22 |     delete[] buckets_;
 23 |   }
 24 | 
 25 |   if (monitoring_ != NULL) {
 26 |     delete monitoring_;
 27 |   }
 28 |   return 0;
 29 | }
 30 | 
 31 | 
 32 | 
 33 | int ProbingHashMap::Get(const std::string& key, std::string* value) {
 34 |   uint64_t hash = hash_function(key);
 35 |   uint64_t index_init = hash % num_buckets_;
 36 |   bool found = false;
 37 |   uint32_t i; 
 38 |   for (i = 0; i < probing_max_; i++) {
 39 |     uint64_t index_current = (index_init + i) % num_buckets_;
 40 |     if (buckets_[index_current].entry == DELETED_BUCKET) {
 41 |       continue;
 42 |     } else if (buckets_[index_current].entry == NULL) {
 43 |       break;
 44 |     }
 45 | 
 46 |     if(   key.size() == buckets_[index_current].entry->size_key
 47 |        && memcmp(buckets_[index_current].entry->data, key.c_str(), key.size()) == 0) {
 48 |       *value = std::string(buckets_[index_current].entry->data + key.size(),
 49 |                            buckets_[index_current].entry->size_value);
 50 |       found = true;
 51 |       break;
 52 |     }
 53 |   }
 54 | 
 55 |   if (found) return 0;
 56 | 
 57 |   monitoring_->AddDMB(i);
 58 |   monitoring_->AddAlignedDMB(index_init, (index_init + i) % num_buckets_);
 59 |   return 1;
 60 | }
 61 | 
 62 | 
 63 | 
 64 | uint64_t ProbingHashMap::FindEmptyBucket(uint64_t index_init) {
 65 |   bool found = false;
 66 |   uint64_t index_current = index_init;
 67 | 
 68 |   for (uint32_t i = 0; i < probing_max_; i++) {
 69 |     index_current = index_init + i;
 70 |     if (   buckets_[index_current % num_buckets_].entry == NULL
 71 |         || buckets_[index_current % num_buckets_].entry == DELETED_BUCKET) {
 72 |       found = true;
 73 |       monitoring_->SetDIB(index_current % num_buckets_, i);
 74 |       monitoring_->AddDFB(i);
 75 |       monitoring_->AddAlignedDFB(index_init, index_current);
 76 |       break;
 77 |     }
 78 |   }
 79 | 
 80 |   if (!found) {
 81 |     return num_buckets_;
 82 |   }
 83 | 
 84 |   return index_current % num_buckets_;
 85 | }
 86 | 
 87 | 
 88 | 
 89 | 
 90 | 
 91 | int ProbingHashMap::Put(const std::string& key, const std::string& value) {
 92 |   uint64_t hash = hash_function(key);
 93 |   uint64_t index_init = hash % num_buckets_;
 94 |   uint64_t index_empty = FindEmptyBucket(index_init);
 95 | 
 96 |   if (index_empty == num_buckets_) {
 97 |     return 1; 
 98 |   }
 99 | 
100 |   char *data = new char[key.size() + value.size()];
101 |   memcpy(data, key.c_str(), key.size());
102 |   memcpy(data + key.size(), value.c_str(), value.size());
103 | 
104 |   ProbingHashMap::Entry *entry = new ProbingHashMap::Entry;
105 |   entry->size_key = key.size();
106 |   entry->size_value = value.size();
107 |   entry->data = data;
108 |   buckets_[index_empty].entry = entry;
109 |   buckets_[index_empty].hash  = hash;
110 | 
111 |   return 0;
112 | }
113 | 
114 | 
115 | int ProbingHashMap::Exists(const std::string& key) {
116 |   // TODO: implement
117 |   return 0;
118 | }
119 | 
120 | 
121 | int ProbingHashMap::Remove(const std::string& key) {
122 |   uint64_t hash = hash_function(key);
123 |   uint64_t index_init = hash % num_buckets_;
124 | 
125 |   bool found = false;
126 |   uint64_t index_current;
127 | 
128 |   for (uint32_t i = 0; i < probing_max_; i++) {
129 |     index_current = (index_init + i) % num_buckets_;
130 |     if (buckets_[index_current].entry == DELETED_BUCKET) {
131 |       continue;
132 |     } else if (buckets_[index_current].entry == NULL) {
133 |       break;
134 |     } else if (   key.size() == buckets_[index_current].entry->size_key
135 |                && memcmp(buckets_[index_current].entry->data, key.c_str(), key.size()) == 0) {
136 |       found = true;
137 |       break;
138 |     }
139 |   }
140 | 
141 |   if (found) {
142 |     delete[] buckets_[index_current].entry->data;
143 |     delete buckets_[index_current].entry;
144 |     buckets_[index_current].entry = DELETED_BUCKET;
145 |     monitoring_->RemoveDIB(index_current);
146 |     //fprintf(stderr, "Remove() OK\n");
147 |     return 0;
148 |   } else {
149 |     //fprintf(stderr, "Remove() not found - %" PRIu64 " %p\n", buckets_[index_current].hash, buckets_[index_current].entry);
150 |   }
151 | 
152 |   return 1;
153 | }
154 | 
155 | 
156 | int ProbingHashMap::Resize() {
157 |   // TODO: implement
158 |   return 0;
159 | }
160 | 
161 | 
162 | // For debugging
163 | int ProbingHashMap::CheckDensity() {
164 |   return 0;
165 | }
166 | 
167 | int ProbingHashMap::BucketCounts() {
168 |   return 0;
169 | }
170 | 
171 | int ProbingHashMap::Dump() {
172 |   return 0;
173 | }
174 | 
175 | 
176 | int ProbingHashMap::GetBucketState(int index) {
177 |   //printf("GetBucketState %d\n", index);
178 |   if (buckets_[index].entry == NULL) {
179 |     return 0;
180 |   }
181 | 
182 |   return 1;
183 | }
184 | 
185 | int ProbingHashMap::FillInitIndex(uint64_t index_stored, uint64_t *index_init) {
186 |   if(buckets_[index_stored].entry == NULL) return -1;
187 |   *index_init = buckets_[index_stored].hash % num_buckets_;
188 |   return 0;
189 | }
190 | 
191 | 
192 | void ProbingHashMap::GetMetadata(std::map< std::string, std::string >& metadata) {
193 |   metadata["name"] = "linear";
194 |   char buffer[1024]; 
195 |   sprintf(buffer, "{\"num_buckets\": %" PRIu64 ", \"probing_max\": %u}", num_buckets_, probing_max_);
196 |   metadata["parameters_hashmap"] = buffer;
197 |   sprintf(buffer, "nb%" PRIu64 "-pm%u", num_buckets_, probing_max_);
198 |   metadata["parameters_hashmap_string"] = buffer;
199 | }
200 | 
201 | 
202 | }; // end namespace hashmap
203 | 


--------------------------------------------------------------------------------
/probing_hashmap.h:
--------------------------------------------------------------------------------
 1 | #ifndef HASHMAP_PROBING
 2 | #define HASHMAP_PROBING
 3 | 
 4 | #ifndef __STDC_FORMAT_MACROS
 5 | #define __STDC_FORMAT_MACROS
 6 | #endif
 7 | #include <inttypes.h>
 8 | #include <string.h>
 9 | #include <stdio.h>
10 | 
11 | #include <string>
12 | #include <iostream>
13 | 
14 | #include "murmurhash3.h"
15 | #include "hamming.h"
16 | #include "hashmap.h"
17 | 
18 | #include "monitoring.h"
19 | 
20 | namespace hashmap
21 | {
22 | 
23 | 
24 | 
25 | class ProbingHashMap: public HashMap
26 | {
27 | public:
28 | 
29 |   ProbingHashMap(uint64_t size,
30 |                  int      probing_max) {
31 |     buckets_ = NULL;
32 |     num_buckets_ = size;
33 |     HASH_DELETED_BUCKET = 1;
34 |     DELETED_BUCKET = (Entry*)1;
35 |     probing_max_ = probing_max;
36 |   }
37 | 
38 |   virtual ~ProbingHashMap() {
39 |     Close();
40 |   }
41 | 
42 |   int Open();
43 |   int Close();
44 | 
45 |   struct Entry
46 |   {
47 |     uint32_t size_key;
48 |     uint32_t size_value;
49 |     char *data;
50 |   };
51 | 
52 |   struct Bucket
53 |   {
54 |     uint64_t hash;
55 |     struct Entry* entry;
56 |   };
57 | 
58 | 
59 |   int Get(const std::string& key, std::string* value);
60 |   int Put(const std::string& key, const std::string& value);
61 |   int Exists(const std::string& key);
62 |   int Remove(const std::string& key);
63 |   int Resize();
64 |   int Dump();
65 |   int CheckDensity();
66 |   int BucketCounts();
67 |   int GetBucketState(int index);
68 |   int FillInitIndex(uint64_t index_stored, uint64_t *index_init);
69 |   void GetMetadata(std::map< std::string, std::string >& metadata);
70 | 
71 | private:
72 |   Bucket* buckets_;
73 |   uint64_t num_buckets_;
74 | 
75 |   uint64_t FindEmptyBucket(uint64_t index_init);
76 | 
77 |   uint64_t hash_function(const std::string& key) {
78 |     static char hash[16];
79 |     static uint64_t output;
80 |     MurmurHash3_x64_128(key.c_str(), key.size(), 0, hash);
81 |     memcpy(&output, hash, 8); 
82 |     return output;
83 |   }
84 | 
85 | 
86 |   uint32_t probing_max_;
87 |   uint64_t HASH_DELETED_BUCKET;
88 |   Entry* DELETED_BUCKET;
89 | 
90 | };
91 | 
92 | 
93 | }; // end namespace hashmap
94 | 
95 | #endif // HASHMAP_PROBING
96 | 


--------------------------------------------------------------------------------
/shadow_hashmap.cc:
--------------------------------------------------------------------------------
  1 | #include "shadow_hashmap.h"
  2 | 
  3 | namespace hashmap {
  4 | 
  5 | 
  6 | int ShadowHashMap::Open() {
  7 |   buckets_ = new Bucket[num_buckets_];
  8 |   memset(buckets_, 0, sizeof(Bucket) * (num_buckets_));
  9 |   monitoring_ = new hashmap::Monitoring(num_buckets_, size_neighborhood_max_, static_cast<HashMap*>(this));
 10 |   return 0;
 11 | }
 12 | 
 13 | 
 14 | 
 15 | int ShadowHashMap::Close() {
 16 |   if (buckets_ != NULL) {
 17 |     for (uint32_t i = 0; i < num_buckets_; i++) {
 18 |       if (buckets_[i].entry != NULL) {
 19 |         delete[] buckets_[i].entry->data;
 20 |         delete buckets_[i].entry;
 21 |       }
 22 |     }
 23 |     delete[] buckets_;
 24 |   }
 25 | 
 26 |   if (monitoring_ != NULL) {
 27 |     delete monitoring_;
 28 |   }
 29 |   return 0;
 30 | }
 31 | 
 32 | 
 33 | 
 34 | 
 35 | int ShadowHashMap::Get(const std::string& key, std::string* value) {
 36 |   uint64_t hash = hash_function(key);
 37 |   uint64_t index_init = hash % num_buckets_;
 38 |   bool found = false;
 39 |   uint32_t i;
 40 |   for (i = 0; i < size_neighborhood_; i++) {
 41 |     uint64_t index_current = (index_init + i) % num_buckets_;
 42 |     if (   buckets_[index_current].entry != NULL
 43 |         && buckets_[index_current].hash  == hash
 44 |         && key.size() == buckets_[index_current].entry->size_key
 45 |         && memcmp(buckets_[index_current].entry->data, key.c_str(), key.size()) == 0) {
 46 |       *value = std::string(buckets_[index_current].entry->data + key.size(),
 47 |                            buckets_[index_current].entry->size_value);
 48 |       found = true;
 49 |       break;
 50 |     }
 51 |   }
 52 | 
 53 |   if (found) return 0;
 54 | 
 55 |   monitoring_->AddDMB(size_neighborhood_);
 56 |   monitoring_->AddAlignedDMB(index_init, (index_init + i) % num_buckets_);
 57 |   return 1;
 58 | }
 59 | 
 60 | 
 61 | uint64_t ShadowHashMap::FindEmptyBucketAndDoSwaps(uint64_t index_init) {
 62 |   // In this function, the modulos function is being applied on indexes at the last moment,
 63 |   // when they are being used or returned. This allows to handle cases where the
 64 |   // indexes are cycling back to the beginning of the bucket array.
 65 |   bool found = false;
 66 |   uint64_t index_current = index_init;
 67 |   for (uint32_t i = 0; i < size_probing_; i++) {
 68 |     index_current = index_init + i;
 69 |     if (buckets_[index_current % num_buckets_].entry == NULL) {
 70 |       found = true;
 71 |       monitoring_->AddDFB(i);
 72 |       monitoring_->AddAlignedDFB(index_init, index_current);
 73 |       break;
 74 |     }
 75 |   }
 76 | 
 77 |   if (!found) {
 78 |     return num_buckets_;
 79 |   }
 80 | 
 81 |   int num_swaps = 0;
 82 | 
 83 |   uint64_t index_empty = index_current;
 84 |   while (index_empty - index_init >= size_neighborhood_) {
 85 |     uint64_t index_base_min = index_empty - (size_neighborhood_ - 1);
 86 |     bool found_swap = false;
 87 |     for (uint32_t i = size_neighborhood_ - 1; i > 0; i--) {
 88 |       uint64_t index_candidate = index_empty - i;
 89 |       if (index_candidate < index_init) continue;
 90 |       if (buckets_[index_candidate % num_buckets_].hash % num_buckets_ >= index_base_min) {
 91 |         // the candidate has its base bucket within the right scope, so we swap!
 92 |         buckets_[index_empty % num_buckets_].entry = buckets_[index_candidate % num_buckets_].entry;
 93 |         buckets_[index_empty % num_buckets_].hash = buckets_[index_candidate % num_buckets_].hash;
 94 | 
 95 |         buckets_[index_candidate % num_buckets_].entry = NULL;
 96 |         buckets_[index_candidate % num_buckets_].hash = 0;
 97 | 
 98 |         uint64_t dib = monitoring_->GetDIB(index_candidate % num_buckets_);
 99 |         monitoring_->RemoveDIB(index_candidate % num_buckets_);
100 |         monitoring_->SetDIB(index_empty % num_buckets_, dib);
101 | 
102 |         index_empty = index_candidate;
103 |         found_swap = true;
104 |         num_swaps += 1;
105 |         break;
106 |       }
107 |     }
108 | 
109 |     if (!found_swap) {
110 |       if (size_neighborhood_ < size_neighborhood_max_) {
111 |         size_neighborhood_ *= 2;
112 |         //std::cerr << "Increasing neighborhood, now " << size_neighborhood_ << std::endl;
113 |       } else {
114 |         // For debugging only, dump of the area around the neighborhood
115 |         if (false) {
116 |           //fprintf(stderr, "index [%" PRIu64 "] empty [%" PRIu64 "]\n", index_init, index_empty);
117 |           uint32_t index_temp = index_empty - size_neighborhood_ + 1;
118 |           if (index_temp > index_init) index_temp = index_init;
119 |           if (index_temp < 20) {
120 |             index_temp = 0;
121 |           } else {
122 |             index_temp -= 20;
123 |           }
124 |           for (; index_temp <= index_empty + 20; index_temp++) {
125 |             if (index_temp == index_empty - size_neighborhood_ + 1) {
126 |               fprintf(stderr, "neigh ");
127 |             } else if (index_temp == index_init) {
128 |               fprintf(stderr, "index ");
129 |             } else if (index_temp == index_empty) {
130 |               fprintf(stderr, "empty ");
131 |             } else {
132 |               fprintf(stderr, "      ");
133 |             }
134 | 
135 |             fprintf(stderr, " %7du ", index_temp);
136 | 
137 |             if (buckets_[index_temp % num_buckets_].entry == NULL) {
138 |               fprintf(stderr, "    EMP");
139 |             } else {
140 |               fprintf(stderr, "%7" PRIu64 " ", buckets_[index_temp % num_buckets_].hash % num_buckets_);
141 |             }
142 |             fprintf(stderr, "\n");
143 |           }
144 |           fprintf(stderr, "\n");
145 |         }
146 |         return num_buckets_;
147 |       }
148 |     }
149 |   }
150 | 
151 |   monitoring_->SetDIB(index_empty % num_buckets_,
152 |                                               index_empty - index_init);
153 |   monitoring_->AddNumberOfSwaps(num_swaps);
154 | 
155 |   return index_empty % num_buckets_;
156 | }
157 | 
158 | int ShadowHashMap::Put(const std::string& key, const std::string& value) {
159 |   uint64_t hash = hash_function(key);
160 |   uint64_t index_init = hash % num_buckets_;
161 |   uint64_t index_empty = FindEmptyBucketAndDoSwaps(index_init);
162 |   // TODO: Put() should use Exists() and perform a replacement if needed.
163 |   if (index_empty == num_buckets_) {
164 |     return 1;
165 |   }
166 | 
167 |   char *data = new char[key.size() + value.size()];
168 |   memcpy(data, key.c_str(), key.size());
169 |   memcpy(data + key.size(), value.c_str(), value.size());
170 | 
171 |   ShadowHashMap::Entry *entry = new ShadowHashMap::Entry;
172 |   entry->size_key = key.size();
173 |   entry->size_value = value.size();
174 |   entry->data = data;
175 |   buckets_[index_empty].entry = entry;
176 |   buckets_[index_empty].hash = hash;
177 | 
178 |   return 0;
179 | }
180 | 
181 | int ShadowHashMap::Exists(const std::string& key) {
182 |   // TODO: implement
183 |   return 0;
184 | }
185 | 
186 | int ShadowHashMap::Remove(const std::string& key) {
187 |   uint64_t hash = hash_function(key);
188 |   uint64_t index_init = hash % num_buckets_;
189 |   bool found = false;
190 |   uint64_t index_current;
191 |   for (uint32_t i = 0; i < size_neighborhood_; i++) {
192 |     index_current = (index_init + i) % num_buckets_;
193 |     if (   buckets_[index_current].entry != NULL
194 |         && buckets_[index_current].hash  == hash
195 |         && key.size() == buckets_[index_current].entry->size_key
196 |         && memcmp(buckets_[index_current].entry->data, key.c_str(), key.size()) == 0) {
197 |       found = true;
198 |       break;
199 |     }
200 |   }
201 | 
202 |   if (found) {
203 |     delete[] buckets_[index_current].entry->data;
204 |     delete buckets_[index_current].entry;
205 |     buckets_[index_current].entry = NULL;
206 |     monitoring_->RemoveDIB(index_current);
207 |     return 0;
208 |   }
209 | 
210 |   return 0;
211 | }
212 | 
213 | int ShadowHashMap::Resize() {
214 |   // TODO: implement
215 |   return 0;
216 | }
217 | 
218 | 
219 | // For debugging
220 | int ShadowHashMap::CheckDensity() {
221 |   return 0;
222 | }
223 | 
224 | 
225 | int ShadowHashMap::BucketCounts() {
226 |   std::cout << "current neighborhood: " << size_neighborhood_ << std::endl;
227 |   return 0;
228 | }
229 | 
230 | 
231 | int ShadowHashMap::Dump() {
232 |   return 0;
233 | }
234 | 
235 | 
236 | int ShadowHashMap::GetBucketState(int index) {
237 |   if (buckets_[index].entry == NULL) {
238 |     return 0;
239 |   }
240 | 
241 |   return 1;
242 | 
243 | }
244 | 
245 | int ShadowHashMap::FillInitIndex(uint64_t index_stored, uint64_t *index_init) {
246 |   if(buckets_[index_stored].entry == NULL) return -1;
247 |   *index_init = buckets_[index_stored].hash % num_buckets_;
248 |   return 0;
249 | }
250 | 
251 | 
252 | void ShadowHashMap::GetMetadata(std::map< std::string, std::string >& metadata) {
253 |   metadata["name"] = "shadow";
254 |   char buffer[1024]; 
255 |   sprintf(buffer, "{\"num_buckets\": %" PRIu64 ", \"size_probing\": %u, \"size_neighborhood_start\": %u, \"size_neighborhood_end\": %u}", num_buckets_, size_probing_, size_neighborhood_start_, size_neighborhood_max_);
256 |   metadata["parameters_hashmap"] = buffer;
257 |   sprintf(buffer, "nb%" PRIu64 "-sp%u-sns%u-sne%u", num_buckets_, size_probing_, size_neighborhood_start_, size_neighborhood_max_);
258 |   metadata["parameters_hashmap_string"] = buffer;
259 | }
260 | 
261 | 
262 | 
263 | };
264 | 


--------------------------------------------------------------------------------
/shadow_hashmap.h:
--------------------------------------------------------------------------------
  1 | #ifndef HASHMAP_SHADOW
  2 | #define HASHMAP_SHADOW
  3 | 
  4 | #ifndef __STDC_FORMAT_MACROS
  5 | #define __STDC_FORMAT_MACROS
  6 | #endif
  7 | #include <inttypes.h>
  8 | #include <string.h>
  9 | #include <stdio.h>
 10 | 
 11 | #include <string>
 12 | #include <iostream>
 13 | 
 14 | #include "murmurhash3.h"
 15 | #include "hamming.h"
 16 | #include "hashmap.h"
 17 | #include "monitoring.h"
 18 | 
 19 | namespace hashmap
 20 | {
 21 | 
 22 | 
 23 | 
 24 | class ShadowHashMap: public HashMap
 25 | {
 26 | public:
 27 | 
 28 |   ShadowHashMap(uint64_t size,
 29 |                 uint64_t size_probing,
 30 |                 uint64_t size_neighborhood_start,
 31 |                 uint64_t size_neighborhood_end
 32 |                ) {
 33 |     buckets_ = NULL;
 34 |     num_buckets_ = size;
 35 |     size_neighborhood_ = size_neighborhood_start;
 36 |     size_neighborhood_start_ = size_neighborhood_start;
 37 |     size_neighborhood_max_ = size_neighborhood_end;
 38 |     size_probing_ = size_probing;
 39 |   }
 40 | 
 41 |   virtual ~ShadowHashMap() {
 42 |     Close();
 43 |   }
 44 | 
 45 |   int Open();
 46 |   int Close();
 47 | 
 48 | 
 49 |   struct Entry
 50 |   {
 51 |     uint32_t size_key;
 52 |     uint32_t size_value;
 53 |     char *data;
 54 |   };
 55 | 
 56 |   struct Bucket
 57 |   {
 58 |     uint64_t hash;
 59 |     struct Entry* entry;
 60 |   };
 61 | 
 62 | 
 63 |   int Get(const std::string& key, std::string* value);
 64 |   int Put(const std::string& key, const std::string& value);
 65 |   int Exists(const std::string& key);
 66 |   int Remove(const std::string& key);
 67 |   int Resize();
 68 |   int Dump();
 69 |   int CheckDensity();
 70 |   int BucketCounts();
 71 |   int GetBucketState(int index);
 72 |   int FillInitIndex(uint64_t index_stored, uint64_t *index_init);
 73 |   void GetMetadata(std::map< std::string, std::string >& metadata);
 74 | 
 75 | 
 76 | private:
 77 |   Bucket* buckets_;
 78 |   uint64_t num_buckets_;
 79 | 
 80 |   uint64_t FindEmptyBucketAndDoSwaps(uint64_t index_init);
 81 | 
 82 |   uint64_t hash_function(const std::string& key) {
 83 |     static char hash[16];
 84 |     static uint64_t output;
 85 |     MurmurHash3_x64_128(key.c_str(), key.size(), 0, hash);
 86 |     memcpy(&output, hash, 8); 
 87 |     return output;
 88 |   }
 89 | 
 90 |   uint32_t size_neighborhood_;
 91 |   uint32_t size_neighborhood_start_;
 92 |   uint32_t size_neighborhood_max_;
 93 |   uint32_t size_probing_;
 94 | 
 95 | };
 96 | 
 97 | 
 98 | }; // end namespace hashmap
 99 | 
100 | #endif // HASHMAP_SHADOW
101 | 


--------------------------------------------------------------------------------
/testcase.cc:
--------------------------------------------------------------------------------
  1 | #include "testcase.h"
  2 | 
  3 | 
  4 | // TODO: Factorize as much as possible across the test cases.
  5 | 
  6 | namespace hashmap
  7 | {
  8 | 
  9 | int exists_or_mkdir(const char *path) {
 10 |   struct stat sb;
 11 | 
 12 |   if (stat(path, &sb) == 0) {
 13 |     if (!S_ISDIR(sb.st_mode)) {
 14 |       return 1;
 15 |     }
 16 |   } else if (mkdir(path, 0777) != 0) {
 17 |     return 1;
 18 |   }
 19 | 
 20 |   return 0;
 21 | }
 22 | 
 23 | std::string concatenate(std::string const& str, int i)
 24 | {
 25 |     std::stringstream s;
 26 |     s << str << i;
 27 |     return s.str();
 28 | }
 29 | 
 30 | void TestCase::InsertEntries(uint32_t num_items, std::set<std::string>& keys) {
 31 |   std::string key;
 32 |   std::set<std::string>::iterator it_find;
 33 |   std::string value_dummy;
 34 |   static uint64_t key_id_current = 0;
 35 |   // NOTE: If ever using this method, remember to reset key_id_current between
 36 |   //       instances.
 37 | 
 38 |   for (uint32_t j = 0; j < num_items; j++) {
 39 |     key_id_current += 1 + rand() % 32;
 40 |     key = concatenate( "key", key_id_current );
 41 |     it_find = keys.find(key);
 42 |     if (it_find != keys.end()) {
 43 |       fprintf(stderr, "Error: key already in the hash table, this should not happen\n");
 44 |     }
 45 | 
 46 |     keys.insert(key);
 47 |     int ret_get = hm_->Get(key, &value_dummy);
 48 |     if (ret_get != 1) {
 49 |       fprintf(stderr, "Get() error\n");
 50 |     }
 51 |     int ret_put = hm_->Put(key, key);
 52 |     if (ret_put != 0) {
 53 |       fprintf(stderr, "Put() error\n");
 54 |     }
 55 |   }
 56 | }
 57 | 
 58 | 
 59 | void TestCase::RemoveEntries(uint32_t num_items, std::set<std::string>& keys) {
 60 |   for (uint32_t index_del = 0; index_del < num_items; index_del++) {
 61 |     uint64_t r = rand();
 62 |     uint64_t offset = r % keys.size();
 63 |     //printf("delete index %d -- offset %" PRIu64 " -- rand %" PRIu64 "\n", index_del, offset, r);
 64 |     std::set<std::string>::iterator it(keys.begin());
 65 |     std::advance(it, offset);
 66 |     //fprintf(stdout, "str: %s\n", (*it).c_str());
 67 |     //key = buffer;
 68 |     int ret_remove = hm_->Remove(*it);
 69 |     //fprintf(stderr, "Remove() [%s]\n", it->c_str());
 70 |     if (ret_remove != 0) fprintf(stderr, "Error while removing\n");
 71 |     keys.erase(it);
 72 |   }
 73 | }
 74 | 
 75 | 
 76 | 
 77 | 
 78 | void BatchTestCase::run() {
 79 |   std::set<std::string> keys;
 80 |   std::string key;
 81 |   char filename[1024];
 82 |   uint32_t num_items;
 83 |   uint32_t num_items_big = (uint32_t)((double)num_buckets_ * load_factor_max_);
 84 |   uint32_t num_items_small = (uint32_t)((double)num_buckets_ * load_factor_remove_);
 85 |   fprintf(stdout, "num_items %u %u\n", num_items_big, num_items_small);
 86 | 
 87 |   std::string testcase = "batch";
 88 |   std::string directory = "results";
 89 |   if (exists_or_mkdir(directory.c_str()) != 0) {
 90 |     fprintf(stderr, "Could not create directory [%s]\n", directory.c_str());
 91 |     exit(1);
 92 |   }
 93 | 
 94 |   char pt_string[1024];
 95 |   sprintf(pt_string, "lfm%.2f-lfr%.2f", load_factor_max_, load_factor_remove_);
 96 | 
 97 |   char pt_json[1024];
 98 |   sprintf(pt_json, "{\"load_factor_max\": %.2f, \"load_factor_remove\": %.2f}", load_factor_max_, load_factor_remove_);
 99 | 
100 | 
101 |   std::set<std::string>::iterator it_find;
102 |   for (int i = 0; i < 50; i++) {
103 |     
104 |     num_items = num_items_big;
105 |     srand(i);
106 |     keys.clear();
107 |     hm_->Open();
108 | 
109 | 
110 |     std::map<std::string, std::string> metadata;
111 |     hm_->GetMetadata(metadata);
112 | 
113 |     char directory_sub_buffer[2048];
114 |     sprintf(directory_sub_buffer, "%s/%s-%s-%s", directory.c_str(), testcase.c_str(), metadata["name"].c_str(), metadata["parameters_hashmap_string"].c_str());
115 |     std::string directory_sub(directory_sub_buffer);
116 |     if (exists_or_mkdir(directory_sub.c_str()) != 0) {
117 |       fprintf(stderr, "Could not create directory [%s]\n", directory_sub.c_str());
118 |       exit(1);
119 |     }
120 | 
121 |     std::string value_dummy;
122 |     uint64_t key_id_current = 0;
123 | 
124 |     for (int cycle = 0; cycle < 50; cycle++) {
125 |       fprintf(stderr, "instance %d cycle %d\n", i, cycle);
126 |       bool has_error_on_put = false;
127 |       for (uint32_t j = 0; j < num_items; j++) {
128 |         key_id_current += 1 + rand() % 32;
129 |         key = concatenate( "key", key_id_current );
130 |         it_find = keys.find(key);
131 |         if (it_find != keys.end()) {
132 |           fprintf(stderr, "Error: key already in the hash table, this should not happen\n");
133 |         }
134 | 
135 |         keys.insert(key);
136 |         int ret_get = hm_->Get(key, &value_dummy);
137 |         if (ret_get != 1) {
138 |           fprintf(stderr, "Get() error\n");
139 |         }
140 |         int ret_put = hm_->Put(key, key);
141 |         //fprintf(stderr, "Put() [%s]\n", key.c_str());
142 |         if (ret_put != 0) {
143 |           fprintf(stderr, "Put() error\n");
144 |           // break on error
145 |           has_error_on_put = true;
146 |           break;
147 |         }
148 |       }
149 |       printf("keys insert %zu\n", keys.size());
150 |       if (has_error_on_put) {
151 |         hm_->monitoring_->ResetDFB();
152 |         hm_->monitoring_->ResetAlignedDFB();
153 |         hm_->monitoring_->ResetNumberOfSwaps();
154 |         hm_->monitoring_->ResetDMB();
155 |         hm_->monitoring_->ResetAlignedDMB();
156 |         hm_->monitoring_->ResetDSB();
157 |         hm_->monitoring_->ResetAlignedDSB();
158 |         num_items = num_items_small;
159 |         break;
160 |       }
161 | 
162 |       hm_->monitoring_->SetTestcase(testcase);
163 |       hm_->monitoring_->SetInstance(i);
164 |       hm_->monitoring_->SetCycle(cycle);
165 |       hm_->monitoring_->SetParametersTestcaseString(pt_string);
166 |       hm_->monitoring_->SetParametersTestcaseJson(pt_json);
167 | 
168 |       sprintf(filename, "%s/%s-%s-%s--%s-dib--instance%05d-cycle%04d.json", directory_sub.c_str(), testcase.c_str(), metadata["name"].c_str(), metadata["parameters_hashmap_string"].c_str(), pt_string, i, cycle);
169 |       fprintf(stderr, "filename dib %s\n", filename);
170 |       hm_->monitoring_->PrintDIB(filename);
171 | 
172 | 
173 |       sprintf(filename,
174 |               "%s/%s-%s-%s--%s-adib--instance%05d-cycle%04d.json",
175 |               directory_sub.c_str(),
176 |               testcase.c_str(),
177 |               metadata["name"].c_str(),
178 |               metadata["parameters_hashmap_string"].c_str(),
179 |               pt_string,
180 |               i,
181 |               cycle);
182 |       hm_->monitoring_->PrintNumScannedBlocks(filename);
183 | 
184 |       sprintf(filename,
185 |               "%s/%s-%s-%s--%s-dfb--instance%05d-cycle%04d.json",
186 |               directory_sub.c_str(),
187 |               testcase.c_str(),
188 |               metadata["name"].c_str(),
189 |               metadata["parameters_hashmap_string"].c_str(),
190 |               pt_string,
191 |               i,
192 |               cycle);
193 |       hm_->monitoring_->PrintDFB(filename);
194 |       hm_->monitoring_->ResetDFB();
195 | 
196 |       sprintf(filename,
197 |               "%s/%s-%s-%s--%s-adfb--instance%05d-cycle%04d.json",
198 |               directory_sub.c_str(),
199 |               testcase.c_str(),
200 |               metadata["name"].c_str(),
201 |               metadata["parameters_hashmap_string"].c_str(),
202 |               pt_string,
203 |               i,
204 |               cycle);
205 |       hm_->monitoring_->PrintAlignedDFB(filename);
206 |       hm_->monitoring_->ResetAlignedDFB();
207 | 
208 |       sprintf(filename,
209 |               "%s/%s-%s-%s--%s-swap--instance%05d-cycle%04d.json",
210 |               directory_sub.c_str(),
211 |               testcase.c_str(),
212 |               metadata["name"].c_str(),
213 |               metadata["parameters_hashmap_string"].c_str(),
214 |               pt_string,
215 |               i,
216 |               cycle);
217 |       hm_->monitoring_->PrintNumberOfSwaps(filename);
218 |       hm_->monitoring_->ResetNumberOfSwaps();
219 | 
220 |       sprintf(filename,
221 |               "%s/%s-%s-%s--%s-dmb--instance%05d-cycle%04d.json",
222 |               directory_sub.c_str(),
223 |               testcase.c_str(),
224 |               metadata["name"].c_str(),
225 |               metadata["parameters_hashmap_string"].c_str(),
226 |               pt_string,
227 |               i,
228 |               cycle);
229 |       hm_->monitoring_->PrintDMB(filename);
230 |       hm_->monitoring_->ResetDMB();
231 | 
232 |       sprintf(filename,
233 |               "%s/%s-%s-%s--%s-admb--instance%05d-cycle%04d.json",
234 |               directory_sub.c_str(),
235 |               testcase.c_str(),
236 |               metadata["name"].c_str(),
237 |               metadata["parameters_hashmap_string"].c_str(),
238 |               pt_string,
239 |               i,
240 |               cycle);
241 |       hm_->monitoring_->PrintAlignedDMB(filename);
242 |       hm_->monitoring_->ResetAlignedDMB();
243 | 
244 |       sprintf(filename,
245 |               "%s/%s-%s-%s--%s-dsb--instance%05d-cycle%04d.json",
246 |               directory_sub.c_str(),
247 |               testcase.c_str(),
248 |               metadata["name"].c_str(),
249 |               metadata["parameters_hashmap_string"].c_str(),
250 |               pt_string,
251 |               i,
252 |               cycle);
253 |       hm_->monitoring_->PrintDSB(filename);
254 |       hm_->monitoring_->ResetDSB();
255 | 
256 |       sprintf(filename,
257 |               "%s/%s-%s-%s--%s-adsb--instance%05d-cycle%04d.json",
258 |               directory_sub.c_str(),
259 |               testcase.c_str(),
260 |               metadata["name"].c_str(),
261 |               metadata["parameters_hashmap_string"].c_str(),
262 |               pt_string,
263 |               i,
264 |               cycle);
265 |       hm_->monitoring_->PrintAlignedDSB(filename);
266 |       hm_->monitoring_->ResetAlignedDSB();
267 | 
268 | 
269 | 
270 |       for (uint32_t index_del = 0; index_del < num_items_small; index_del++) {
271 |         uint64_t r = rand();
272 |         uint64_t offset = r % keys.size();
273 |         //printf("delete index %d -- offset %" PRIu64 " -- rand %" PRIu64 "\n", index_del, offset, r);
274 |         std::set<std::string>::iterator it(keys.begin());
275 |         std::advance(it, offset);
276 |         //fprintf(stdout, "str: %s\n", (*it).c_str());
277 |         //key = buffer;
278 |         int ret_remove = hm_->Remove(*it);
279 |         //fprintf(stderr, "Remove() [%s]\n", it->c_str());
280 |         if (ret_remove != 0) fprintf(stderr, "Error while removing\n");
281 |         keys.erase(it);
282 |       }
283 |       printf("keys erase %zu\n", keys.size());
284 |       num_items = num_items_small;
285 |     }
286 | 
287 |     fprintf(stderr, "close\n");
288 |     hm_->Close();
289 |     fprintf(stderr, "ok\n");
290 |   }
291 | }
292 | 
293 | 
294 | 
295 | 
296 | void RippleTestCase::run() {
297 |   std::set<std::string> keys;
298 |   std::string key;
299 |   char filename[1024];
300 |   uint32_t num_items;
301 |   uint32_t num_items_big = (uint32_t)((double)num_buckets_ * load_factor_max_);
302 |   uint32_t num_items_small = (uint32_t)((double)num_buckets_ * load_factor_remove_);
303 |   fprintf(stdout, "num_items %u %u\n", num_items_big, num_items_small);
304 | 
305 |   std::string testcase = "ripple";
306 |   std::string directory = "results";
307 |   if (exists_or_mkdir(directory.c_str()) != 0) {
308 |     fprintf(stderr, "Could not create directory [%s]\n", testcase.c_str());
309 |     exit(1);
310 |   }
311 | 
312 |   char pt_string[1024];
313 |   sprintf(pt_string, "lfm%.2f-lfr%.2f", load_factor_max_, load_factor_remove_);
314 | 
315 |   char pt_json[1024];
316 |   sprintf(pt_json, "{\"load_factor_max\": %.2f, \"load_factor_remove\": %.2f}", load_factor_max_, load_factor_remove_);
317 | 
318 | 
319 |   std::set<std::string>::iterator it_find;
320 |   for (int i = 0; i < 50; i++) {
321 |     num_items = num_items_big;
322 |     srand(i);
323 |     keys.clear();
324 |     hm_->Open();
325 | 
326 |     std::map<std::string, std::string> metadata;
327 |     hm_->GetMetadata(metadata);
328 | 
329 |     char directory_sub_buffer[2048];
330 |     sprintf(directory_sub_buffer, "%s/%s-%s-%s", directory.c_str(), testcase.c_str(), metadata["name"].c_str(), metadata["parameters_hashmap_string"].c_str());
331 |     std::string directory_sub(directory_sub_buffer);
332 |     if (exists_or_mkdir(directory_sub.c_str()) != 0) {
333 |       fprintf(stderr, "Could not create directory [%s]\n", directory_sub.c_str());
334 |       exit(1);
335 |     }
336 | 
337 |     std::string value_dummy;
338 |     uint64_t key_id_current = 0;
339 | 
340 |     for (int cycle = 0; cycle < 50; cycle++) {
341 |       fprintf(stderr, "instance %d cycle %d\n", i, cycle);
342 |       bool has_error_on_put = false;
343 |       for (uint32_t j = 0; j < num_items; j++) {
344 |         key_id_current += 1 + rand() % 32;
345 |         key = concatenate( "key", key_id_current );
346 |         it_find = keys.find(key);
347 |         if (it_find != keys.end()) {
348 |           fprintf(stderr, "Error: key already in the hash table, this should not happen\n");
349 |         }
350 | 
351 |         keys.insert(key);
352 |         int ret_get = hm_->Get(key, &value_dummy);
353 |         if (ret_get != 1) {
354 |           fprintf(stderr, "Get() error\n");
355 |         }
356 |         int ret_put = hm_->Put(key, key);
357 |         if (ret_put != 0) {
358 |           fprintf(stderr, "Put() error\n");
359 |           has_error_on_put = true;
360 |         }
361 | 
362 |         if (cycle > 0) {
363 |           uint64_t r = rand();
364 |           uint64_t offset = r % keys.size();
365 |           //printf("delete index %d -- offset %" PRIu64 " -- rand %" PRIu64 "\n", index_del, offset, r);
366 |           std::set<std::string>::iterator it(keys.begin());
367 |           std::advance(it, offset);
368 |           //fprintf(stdout, "str: %s\n", (*it).c_str());
369 |           //key = buffer;
370 |           int ret_remove = hm_->Remove(*it);
371 |           //fprintf(stderr, "Remove() [%s]\n", it->c_str());
372 |           if (ret_remove != 0) fprintf(stderr, "Error while removing\n");
373 |           keys.erase(it);
374 |         }
375 |       }
376 |       printf("keys insert %zu\n", keys.size());
377 |       if (has_error_on_put) {
378 |         hm_->monitoring_->ResetDFB();
379 |         hm_->monitoring_->ResetAlignedDFB();
380 |         hm_->monitoring_->ResetNumberOfSwaps();
381 |         hm_->monitoring_->ResetDMB();
382 |         hm_->monitoring_->ResetAlignedDMB();
383 |         hm_->monitoring_->ResetDSB();
384 |         hm_->monitoring_->ResetAlignedDSB();
385 |         num_items = num_items_small;
386 |         break;
387 |       }
388 | 
389 |       hm_->monitoring_->SetTestcase(testcase);
390 |       hm_->monitoring_->SetInstance(i);
391 |       hm_->monitoring_->SetCycle(cycle);
392 |       hm_->monitoring_->SetParametersTestcaseString(pt_string);
393 |       hm_->monitoring_->SetParametersTestcaseJson(pt_json);
394 | 
395 |       sprintf(filename, "%s/%s-%s-%s--%s-dib--instance%05d-cycle%04d.json", directory_sub.c_str(), testcase.c_str(), metadata["name"].c_str(), metadata["parameters_hashmap_string"].c_str(), pt_string, i, cycle);
396 |       fprintf(stderr, "filename dib %s\n", filename);
397 |       hm_->monitoring_->PrintDIB(filename);
398 | 
399 |       sprintf(filename,
400 |               "%s/%s-%s-%s--%s-blocks--instance%05d-cycle%04d.json",
401 |               directory_sub.c_str(),
402 |               testcase.c_str(),
403 |               metadata["name"].c_str(),
404 |               metadata["parameters_hashmap_string"].c_str(),
405 |               pt_string,
406 |               i,
407 |               cycle);
408 |       hm_->monitoring_->PrintNumScannedBlocks(filename);
409 | 
410 |       sprintf(filename,
411 |               "%s/%s-%s-%s--%s-dfb--instance%05d-cycle%04d.json",
412 |               directory_sub.c_str(),
413 |               testcase.c_str(),
414 |               metadata["name"].c_str(),
415 |               metadata["parameters_hashmap_string"].c_str(),
416 |               pt_string,
417 |               i,
418 |               cycle);
419 |       hm_->monitoring_->PrintDFB(filename);
420 |       hm_->monitoring_->ResetDFB();
421 | 
422 |       sprintf(filename,
423 |               "%s/%s-%s-%s--%s-adfb--instance%05d-cycle%04d.json",
424 |               directory_sub.c_str(),
425 |               testcase.c_str(),
426 |               metadata["name"].c_str(),
427 |               metadata["parameters_hashmap_string"].c_str(),
428 |               pt_string,
429 |               i,
430 |               cycle);
431 |       hm_->monitoring_->PrintAlignedDFB(filename);
432 |       hm_->monitoring_->ResetAlignedDFB();
433 | 
434 |       sprintf(filename,
435 |               "%s/%s-%s-%s--%s-swap--instance%05d-cycle%04d.json",
436 |               directory_sub.c_str(),
437 |               testcase.c_str(),
438 |               metadata["name"].c_str(),
439 |               metadata["parameters_hashmap_string"].c_str(),
440 |               pt_string,
441 |               i,
442 |               cycle);
443 |       hm_->monitoring_->PrintNumberOfSwaps(filename);
444 |       hm_->monitoring_->ResetNumberOfSwaps();
445 | 
446 |       sprintf(filename,
447 |               "%s/%s-%s-%s--%s-dmb--instance%05d-cycle%04d.json",
448 |               directory_sub.c_str(),
449 |               testcase.c_str(),
450 |               metadata["name"].c_str(),
451 |               metadata["parameters_hashmap_string"].c_str(),
452 |               pt_string,
453 |               i,
454 |               cycle);
455 |       hm_->monitoring_->PrintDMB(filename);
456 |       hm_->monitoring_->ResetDMB();
457 | 
458 |       sprintf(filename,
459 |               "%s/%s-%s-%s--%s-admb--instance%05d-cycle%04d.json",
460 |               directory_sub.c_str(),
461 |               testcase.c_str(),
462 |               metadata["name"].c_str(),
463 |               metadata["parameters_hashmap_string"].c_str(),
464 |               pt_string,
465 |               i,
466 |               cycle);
467 |       hm_->monitoring_->PrintAlignedDMB(filename);
468 |       hm_->monitoring_->ResetAlignedDMB();
469 | 
470 |       sprintf(filename,
471 |               "%s/%s-%s-%s--%s-dsb--instance%05d-cycle%04d.json",
472 |               directory_sub.c_str(),
473 |               testcase.c_str(),
474 |               metadata["name"].c_str(),
475 |               metadata["parameters_hashmap_string"].c_str(),
476 |               pt_string,
477 |               i,
478 |               cycle);
479 |       hm_->monitoring_->PrintDSB(filename);
480 |       hm_->monitoring_->ResetDSB();
481 | 
482 |       sprintf(filename,
483 |               "%s/%s-%s-%s--%s-adsb--instance%05d-cycle%04d.json",
484 |               directory_sub.c_str(),
485 |               testcase.c_str(),
486 |               metadata["name"].c_str(),
487 |               metadata["parameters_hashmap_string"].c_str(),
488 |               pt_string,
489 |               i,
490 |               cycle);
491 |       hm_->monitoring_->PrintAlignedDSB(filename);
492 |       hm_->monitoring_->ResetAlignedDSB();
493 | 
494 | 
495 | 
496 | 
497 | 
498 |       
499 | 
500 |       num_items = num_items_small;
501 |     }
502 | 
503 |     fprintf(stderr, "close\n");
504 |     hm_->Close();
505 |     fprintf(stderr, "ok\n");
506 |   }
507 | }
508 | 
509 | 
510 | 
511 | 
512 | void LoadingTestCase::run() {
513 |   std::set<std::string> keys;
514 |   std::string key;
515 |   char filename[1024];
516 |   uint32_t num_items;
517 |   uint32_t num_items_big = num_buckets_;
518 | 
519 |   std::string testcase = "loading";
520 |   std::string directory = "results";
521 |   if (exists_or_mkdir(directory.c_str()) != 0) {
522 |     fprintf(stderr, "Could not create directory [%s]\n", testcase.c_str());
523 |     exit(1);
524 |   }
525 | 
526 |   char pt_string[1024];
527 |   sprintf(pt_string, "%s", "");
528 | 
529 |   char pt_json[1024];
530 |   sprintf(pt_json, "{}");
531 | 
532 |   num_items = num_items_big / 50;
533 |   std::set<std::string>::iterator it_find;
534 |   for (int i = 0; i < 50; i++) {
535 |     srand(i);
536 |     keys.clear();
537 |     hm_->Open();
538 | 
539 |     std::map<std::string, std::string> metadata;
540 |     hm_->GetMetadata(metadata);
541 | 
542 |     char directory_sub_buffer[2048];
543 |     sprintf(directory_sub_buffer, "%s/%s-%s-%s", directory.c_str(), testcase.c_str(), metadata["name"].c_str(), metadata["parameters_hashmap_string"].c_str());
544 |     std::string directory_sub(directory_sub_buffer);
545 |     if (exists_or_mkdir(directory_sub.c_str()) != 0) {
546 |       fprintf(stderr, "Could not create directory [%s]\n", directory_sub.c_str());
547 |       exit(1);
548 |     }
549 | 
550 |     std::string value_dummy;
551 |     uint64_t key_id_current = 0;
552 | 
553 |     for (int cycle = 0; cycle < 50; cycle++) {
554 |       fprintf(stderr, "instance %d cycle %d\n", i, cycle);
555 |       bool has_error_on_put = false;
556 |       for (uint32_t j = 0; j < num_items; j++) {
557 |         key_id_current += 1 + rand() % 32;
558 |         key = concatenate( "key", key_id_current );
559 |         it_find = keys.find(key);
560 |         if (it_find != keys.end()) {
561 |           fprintf(stderr, "Error: key already in the hash table, this should not happen\n");
562 |         }
563 | 
564 |         int ret_get = hm_->Get(key, &value_dummy);
565 |         if (ret_get != 1) {
566 |           fprintf(stderr, "Get() error\n");
567 |         }
568 |         int ret_put = hm_->Put(key, key);
569 |         //fprintf(stderr, "Put() [%s]\n", key.c_str());
570 |         if (ret_put != 0) {
571 |           fprintf(stderr, "Put() error\n");
572 |           // break on error
573 |           has_error_on_put = true;
574 |           break;
575 |         }
576 |         keys.insert(key);
577 |       }
578 |       printf("keys insert %zu\n", keys.size());
579 |       if (has_error_on_put) {
580 |         hm_->monitoring_->ResetDFB();
581 |         hm_->monitoring_->ResetAlignedDFB();
582 |         hm_->monitoring_->ResetNumberOfSwaps();
583 |         hm_->monitoring_->ResetDMB();
584 |         hm_->monitoring_->ResetAlignedDMB();
585 |         hm_->monitoring_->ResetDSB();
586 |         hm_->monitoring_->ResetAlignedDSB();
587 |         break;
588 |       }
589 | 
590 |       hm_->monitoring_->SetTestcase(testcase);
591 |       hm_->monitoring_->SetInstance(i);
592 |       hm_->monitoring_->SetCycle(cycle);
593 |       hm_->monitoring_->SetParametersTestcaseString(pt_string);
594 |       hm_->monitoring_->SetParametersTestcaseJson(pt_json);
595 | 
596 |       sprintf(filename,
597 |               "%s/%s-%s-%s--%s-dib--instance%05d-cycle%04d.json",
598 |               directory_sub.c_str(),
599 |               testcase.c_str(),
600 |               metadata["name"].c_str(),
601 |               metadata["parameters_hashmap_string"].c_str(),
602 |               pt_string,
603 |               i,
604 |               cycle);
605 |       fprintf(stderr, "filename dib %s\n", filename);
606 |       hm_->monitoring_->PrintDIB(filename);
607 | 
608 |       sprintf(filename,
609 |               "%s/%s-%s-%s--%s-blocks--instance%05d-cycle%04d.json",
610 |               directory_sub.c_str(),
611 |               testcase.c_str(),
612 |               metadata["name"].c_str(),
613 |               metadata["parameters_hashmap_string"].c_str(),
614 |               pt_string,
615 |               i,
616 |               cycle);
617 |       hm_->monitoring_->PrintNumScannedBlocks(filename);
618 | 
619 |       sprintf(filename,
620 |               "%s/%s-%s-%s--%s-dfb--instance%05d-cycle%04d.json",
621 |               directory_sub.c_str(),
622 |               testcase.c_str(),
623 |               metadata["name"].c_str(),
624 |               metadata["parameters_hashmap_string"].c_str(),
625 |               pt_string,
626 |               i,
627 |               cycle);
628 |       hm_->monitoring_->PrintDFB(filename);
629 |       hm_->monitoring_->ResetDFB();
630 | 
631 |       sprintf(filename,
632 |               "%s/%s-%s-%s--%s-adfb--instance%05d-cycle%04d.json",
633 |               directory_sub.c_str(),
634 |               testcase.c_str(),
635 |               metadata["name"].c_str(),
636 |               metadata["parameters_hashmap_string"].c_str(),
637 |               pt_string,
638 |               i,
639 |               cycle);
640 |       hm_->monitoring_->PrintAlignedDFB(filename);
641 |       hm_->monitoring_->ResetAlignedDFB();
642 | 
643 | 
644 |       sprintf(filename,
645 |               "%s/%s-%s-%s--%s-swap--instance%05d-cycle%04d.json",
646 |               directory_sub.c_str(),
647 |               testcase.c_str(),
648 |               metadata["name"].c_str(),
649 |               metadata["parameters_hashmap_string"].c_str(),
650 |               pt_string,
651 |               i,
652 |               cycle);
653 |       hm_->monitoring_->PrintNumberOfSwaps(filename);
654 |       hm_->monitoring_->ResetNumberOfSwaps();
655 | 
656 |       sprintf(filename,
657 |               "%s/%s-%s-%s--%s-dmb--instance%05d-cycle%04d.json",
658 |               directory_sub.c_str(),
659 |               testcase.c_str(),
660 |               metadata["name"].c_str(),
661 |               metadata["parameters_hashmap_string"].c_str(),
662 |               pt_string,
663 |               i,
664 |               cycle);
665 |       hm_->monitoring_->PrintDMB(filename);
666 |       hm_->monitoring_->ResetDMB();
667 | 
668 |       sprintf(filename,
669 |               "%s/%s-%s-%s--%s-admb--instance%05d-cycle%04d.json",
670 |               directory_sub.c_str(),
671 |               testcase.c_str(),
672 |               metadata["name"].c_str(),
673 |               metadata["parameters_hashmap_string"].c_str(),
674 |               pt_string,
675 |               i,
676 |               cycle);
677 |       hm_->monitoring_->PrintAlignedDMB(filename);
678 |       hm_->monitoring_->ResetAlignedDMB();
679 | 
680 |       sprintf(filename,
681 |               "%s/%s-%s-%s--%s-dsb--instance%05d-cycle%04d.json",
682 |               directory_sub.c_str(),
683 |               testcase.c_str(),
684 |               metadata["name"].c_str(),
685 |               metadata["parameters_hashmap_string"].c_str(),
686 |               pt_string,
687 |               i,
688 |               cycle);
689 |       hm_->monitoring_->PrintDSB(filename);
690 |       hm_->monitoring_->ResetDSB();
691 | 
692 |       sprintf(filename,
693 |               "%s/%s-%s-%s--%s-adsb--instance%05d-cycle%04d.json",
694 |               directory_sub.c_str(),
695 |               testcase.c_str(),
696 |               metadata["name"].c_str(),
697 |               metadata["parameters_hashmap_string"].c_str(),
698 |               pt_string,
699 |               i,
700 |               cycle);
701 |       hm_->monitoring_->PrintAlignedDSB(filename);
702 |       hm_->monitoring_->ResetAlignedDSB();
703 | 
704 | 
705 | 
706 | 
707 | 
708 |     }
709 | 
710 |     fprintf(stderr, "close\n");
711 |     hm_->Close();
712 |     fprintf(stderr, "ok\n");
713 |   }
714 | }
715 | 
716 | 
717 | 
718 | 
719 | 
720 | };
721 | 


--------------------------------------------------------------------------------
/testcase.h:
--------------------------------------------------------------------------------
 1 | #ifndef HASHMAP_TESTCASE
 2 | #define HASHMAP_TESTCASE
 3 | 
 4 | #ifndef __STDC_FORMAT_MACROS
 5 | #define __STDC_FORMAT_MACROS
 6 | #endif
 7 | #include <inttypes.h>
 8 | #include <string.h>
 9 | #include <sstream>
10 | #include <stdio.h>
11 | #include <errno.h>
12 | 
13 | #include <string>
14 | //#include <iostream>
15 | //#include <map>
16 | #include <set>
17 | #include <algorithm>
18 | #include <sys/stat.h>
19 | 
20 | 
21 | #include "hashmap.h"
22 | #include "monitoring.h"
23 | 
24 | 
25 | namespace hashmap
26 | {
27 | 
28 | class TestCase {
29 |  public:
30 |   TestCase() {}
31 |   virtual ~TestCase() {}
32 |   virtual void run() = 0;
33 |   void InsertEntries(uint32_t nb_items, std::set<std::string>& keys);
34 |   void RemoveEntries(uint32_t nb_items, std::set<std::string>& keys);
35 |   HashMap *hm_;
36 | };
37 | 
38 | 
39 | class BatchTestCase: public TestCase {
40 | 
41 |  public:
42 |   BatchTestCase(HashMap *hm, uint64_t num_buckets, double load_factor_max, double load_factor_remove) {
43 |     hm_ = hm;
44 |     num_buckets_ = num_buckets;
45 |     load_factor_max_ = load_factor_max;
46 |     load_factor_remove_ = load_factor_remove;
47 |   }
48 |   virtual void run();
49 | 
50 |  private:
51 |   uint64_t num_buckets_;
52 |   double load_factor_max_;
53 |   double load_factor_remove_;
54 | };
55 | 
56 | 
57 | 
58 | class RippleTestCase: public TestCase {
59 | 
60 |  public:
61 |   RippleTestCase(HashMap *hm, uint64_t num_buckets, double load_factor_max, double load_factor_remove) {
62 |     hm_ = hm;
63 |     num_buckets_ = num_buckets;
64 |     load_factor_max_ = load_factor_max;
65 |     load_factor_remove_ = load_factor_remove;
66 |   }
67 |   virtual void run();
68 | 
69 |  private:
70 |   uint64_t num_buckets_;
71 |   double load_factor_max_;
72 |   double load_factor_remove_;
73 | };
74 | 
75 | 
76 | class LoadingTestCase: public TestCase {
77 | 
78 |  public:
79 |   LoadingTestCase(HashMap *hm, uint64_t num_buckets) {
80 |     hm_ = hm;
81 |     num_buckets_ = num_buckets;
82 |   }
83 |   virtual void run();
84 | 
85 |  private:
86 |   uint64_t num_buckets_;
87 | 
88 | 
89 | };
90 | 
91 | }; // namespace
92 | 
93 | #endif
94 | 


--------------------------------------------------------------------------------
/tombstone_hashmap.cc:
--------------------------------------------------------------------------------
  1 | #include "tombstone_hashmap.h"
  2 | 
  3 | namespace hashmap {
  4 | 
  5 | int TombstoneHashMap::Open() {
  6 |   buckets_ = new Bucket[num_buckets_];
  7 |   memset(buckets_, 0, sizeof(Bucket) * (num_buckets_));
  8 |   monitoring_ = new hashmap::Monitoring(num_buckets_, num_buckets_, static_cast<HashMap*>(this));
  9 |   num_buckets_used_ = 0;
 10 |   init_distance_min_ = 0;
 11 |   init_distance_max_ = 0;
 12 |   return 0;
 13 | }
 14 | 
 15 | int TombstoneHashMap::Close() {
 16 |   if (buckets_ != NULL) {
 17 |     for (uint32_t i = 0; i < num_buckets_; i++) {
 18 |       if (buckets_[i].entry != NULL && buckets_[i].entry != DELETED_BUCKET) {
 19 |         delete[] buckets_[i].entry->data;
 20 |         delete buckets_[i].entry;
 21 |       }
 22 |     }
 23 |     delete[] buckets_;
 24 |   }
 25 | 
 26 |   distances_.clear();
 27 | 
 28 |   if (monitoring_ != NULL) {
 29 |     delete monitoring_;
 30 |   }
 31 |   return 0;
 32 | }
 33 | 
 34 | 
 35 | 
 36 | int TombstoneHashMap::Get(const std::string& key, std::string* value) {
 37 |   uint64_t hash = hash_function(key);
 38 |   uint64_t index_init = hash % num_buckets_;
 39 |   uint64_t probe_distance = 0;
 40 |   bool found = false;
 41 |   uint32_t i;
 42 |   for (i = 0; i < probing_max_; i++) {
 43 |     uint64_t index_current = (index_init + i) % num_buckets_;
 44 |     FillDistanceToInitIndex(index_current, &probe_distance);
 45 |     if (   buckets_[index_current].entry == NULL
 46 |         || i > probe_distance) {
 47 |       break;
 48 |     }
 49 | 
 50 |     if (buckets_[index_current].entry == DELETED_BUCKET) {
 51 |       continue;
 52 |     }
 53 | 
 54 |     if (   key.size() == buckets_[index_current].entry->size_key
 55 |         && memcmp(buckets_[index_current].entry->data, key.c_str(), key.size()) == 0) {
 56 |       *value = std::string(buckets_[index_current].entry->data + key.size(),
 57 |                            buckets_[index_current].entry->size_value);
 58 |       found = true;
 59 |       break;
 60 |     }
 61 |   }
 62 | 
 63 |   if (found) return 0;
 64 | 
 65 |   monitoring_->AddDMB(i);
 66 |   monitoring_->AddAlignedDMB(index_init, (index_init + i) % num_buckets_);
 67 |   return 1;
 68 | }
 69 | 
 70 | 
 71 | 
 72 | 
 73 | int TombstoneHashMap::Put(const std::string& key, const std::string& value) {
 74 |   if (num_buckets_used_ == num_buckets_) {
 75 |     return 1;
 76 |   }
 77 |   num_buckets_used_ += 1;
 78 | 
 79 |   uint64_t hash = hash_function(key);
 80 |   uint64_t index_init = hash % num_buckets_;
 81 | 
 82 |   char *data = new char[key.size() + value.size()];
 83 |   memcpy(data, key.c_str(), key.size());
 84 |   memcpy(data + key.size(), value.c_str(), value.size());
 85 | 
 86 |   TombstoneHashMap::Entry *entry = new TombstoneHashMap::Entry;
 87 |   entry->size_key = key.size();
 88 |   entry->size_value = value.size();
 89 |   entry->data = data;
 90 | 
 91 |   uint64_t index_current = index_init;
 92 |   uint64_t probe_distance = 0;
 93 |   uint64_t probe_current = GetMinInitDistance();
 94 |   TombstoneHashMap::Entry *entry_temp = NULL;
 95 |   uint64_t hash_temp = 0;
 96 |   uint64_t i;
 97 |   int num_swaps = 0;
 98 | 
 99 |   for (i = probe_current; i < probing_max_; i++) {
100 |     index_current = (index_init + i) % num_buckets_;
101 |     if (buckets_[index_current].entry == NULL) {
102 |       monitoring_->SetDIB(index_current, probe_current);
103 |       UpdateInitDistance(probe_current, 1);
104 |       buckets_[index_current].entry = entry;
105 |       buckets_[index_current].hash = hash;
106 |       break;
107 |     } else {
108 |       FillDistanceToInitIndex(index_current, &probe_distance);
109 |       if (probe_current > probe_distance) {
110 |         // Swapping the current bucket with the one to insert
111 |         entry_temp = buckets_[index_current].entry;
112 |         hash_temp = buckets_[index_current].hash;
113 |         buckets_[index_current].entry = entry;
114 |         buckets_[index_current].hash = hash;
115 |         entry = entry_temp;
116 |         hash = hash_temp;
117 |         monitoring_->SetDIB(index_current, probe_current);
118 |         UpdateInitDistance(probe_current, 1);
119 |         num_swaps += 1;
120 |         if (entry != DELETED_BUCKET) {
121 |           UpdateInitDistance(probe_distance, -1);
122 |           probe_current = probe_distance;
123 |         } else {
124 |           // The bucket we just swapped was a deleted bucket,
125 |           // so the insertion process can stop here
126 |           break;
127 |         }
128 |       }
129 |     }
130 |     probe_current++;
131 |   }
132 | 
133 |   monitoring_->AddDFB(i);
134 |   monitoring_->AddAlignedDFB(index_init, index_current);
135 |   monitoring_->AddNumberOfSwaps(num_swaps);
136 | 
137 |   return 0;
138 | }
139 | 
140 | 
141 | int TombstoneHashMap::Exists(const std::string& key) {
142 |   // TODO: implement
143 |   return 0;
144 | }
145 | 
146 | 
147 | int TombstoneHashMap::Remove(const std::string& key) {
148 |   uint64_t hash = hash_function(key);
149 |   uint64_t index_init = hash % num_buckets_;
150 |   uint64_t probe_distance = 0;
151 |   bool found = false;
152 |   uint64_t index_current;
153 |   uint64_t distance_max = GetMaxInitDistance();
154 | 
155 |   //for (uint64_t i = 0; i < num_buckets_; i++) {
156 |   for (uint64_t i = GetMinInitDistance(); i <= distance_max; i++) {
157 |     index_current = (index_init + i) % num_buckets_;
158 | 
159 |     if (buckets_[index_current].entry == DELETED_BUCKET) {
160 |       continue;
161 |     }
162 | 
163 |     FillDistanceToInitIndex(index_current, &probe_distance);
164 |     if (   buckets_[index_current].entry == NULL) {
165 |        // || i > probe_distance) {
166 |       //fprintf(stderr, "Remove() found NULL\n");
167 |       continue;
168 |     }
169 | 
170 |     if (   key.size() == buckets_[index_current].entry->size_key
171 |         && memcmp(buckets_[index_current].entry->data, key.c_str(), key.size()) == 0) {
172 |       found = true;
173 |       uint64_t mind = GetMinInitDistance();
174 |       if (i < mind) {
175 |         fprintf(stderr, "Found at distance %" PRIu64 " and min at %" PRIu64 "\n", i, GetMinInitDistance());
176 |       }
177 |       break;
178 |     }
179 |   }
180 | 
181 |   if (found) {
182 |     FillDistanceToInitIndex(index_current, &probe_distance);
183 |     UpdateInitDistance(probe_distance, -1);
184 | 
185 |     delete[] buckets_[index_current].entry->data;
186 |     delete buckets_[index_current].entry;
187 |     buckets_[index_current].entry = DELETED_BUCKET;
188 |     monitoring_->RemoveDIB(index_current);
189 |     num_buckets_used_ -= 1;
190 | 
191 |     return 0;
192 |   }
193 | 
194 |   return 1;
195 | }
196 | 
197 | 
198 | 
199 | int TombstoneHashMap::Resize() {
200 |   // TODO: implement
201 |   return 0;
202 | }
203 | 
204 | 
205 | // For debugging
206 | int TombstoneHashMap::CheckDensity() {
207 |   return 0;
208 | }
209 | 
210 | int TombstoneHashMap::BucketCounts() {
211 |   return 0;
212 | }
213 | 
214 | int TombstoneHashMap::Dump() {
215 |   return 0;
216 | }
217 | 
218 | 
219 | int TombstoneHashMap::GetBucketState(int index) {
220 |   //printf("GetBucketState %d\n", index);
221 |   if (buckets_[index].entry == NULL) {
222 |     return 0;
223 |   }
224 | 
225 |   return 1;
226 | }
227 | 
228 | int TombstoneHashMap::FillInitIndex(uint64_t index_stored, uint64_t *index_init) {
229 |   if(buckets_[index_stored].entry == NULL) return -1;
230 |   *index_init = buckets_[index_stored].hash % num_buckets_;
231 |   return 0;
232 | }
233 | 
234 | int TombstoneHashMap::FillDistanceToInitIndex(uint64_t index_stored, uint64_t *distance) {
235 |   if(buckets_[index_stored].entry == NULL) return -1;
236 |   uint64_t index_init = buckets_[index_stored].hash % num_buckets_;
237 |   if (index_init <= index_stored) {
238 |     *distance = index_stored - index_init; 
239 |   } else {
240 |     *distance = index_stored + (num_buckets_ - index_init); 
241 |   }
242 |   return 0;
243 | }
244 | 
245 | 
246 | void TombstoneHashMap::GetMetadata(std::map< std::string, std::string >& metadata) {
247 |   metadata["name"] = "tombstone";
248 |   char buffer[1024]; 
249 |   sprintf(buffer, "{\"num_buckets\": %" PRIu64 ", \"probing_max\": %" PRIu64 "}", num_buckets_, probing_max_);
250 |   metadata["parameters_hashmap"] = buffer;
251 |   sprintf(buffer, "nb%" PRIu64 "-pm%" PRIu64 "", num_buckets_, probing_max_);
252 |   metadata["parameters_hashmap_string"] = buffer;
253 | }
254 | 
255 | uint64_t TombstoneHashMap::GetMinInitDistance() {
256 |   return init_distance_min_;
257 | }
258 | 
259 | uint64_t TombstoneHashMap::GetMaxInitDistance() {
260 |   return init_distance_max_;
261 | }
262 | 
263 | 
264 | 
265 | void TombstoneHashMap::UpdateMinMaxInitDistance() {
266 |   init_distance_min_ = 0;
267 |   init_distance_max_ = 0;
268 |   if (distances_.size() == 0) return;
269 | 
270 |   std::map<uint64_t, uint64_t>::iterator it;
271 |   //fprintf(stderr, "GetMinInitDistance() ----------------------\n");
272 | 
273 |   init_distance_min_ = std::numeric_limits<uint64_t>::max();
274 |   init_distance_max_ = 0;
275 |   for (it = distances_.begin(); it != distances_.end(); ++it) {
276 |     //fprintf(stderr, "GetMinInitDistance() %" PRIu64 " %" PRIu64 "\n", it->first, it->second);
277 |     if (it->first < init_distance_min_) {
278 |       init_distance_min_ = it->first;
279 |     }
280 | 
281 |     if (it->first > init_distance_max_) {
282 |       init_distance_max_ = it->first;
283 |     }
284 |   }
285 | 
286 |   //fprintf(stderr, "GetMaxInitDistance() %" PRIu64 "\n", distances_max);
287 | }
288 | 
289 | 
290 | void TombstoneHashMap::UpdateInitDistance(uint64_t distance, int32_t increment) {
291 |   std::map<uint64_t, uint64_t>::iterator it;
292 |   it = distances_.find(distance);
293 |   if (it == distances_.end()) {
294 |     if (increment > 0) {
295 |       distances_[distance] = increment;
296 |       UpdateMinMaxInitDistance();
297 |     } else {
298 |       fprintf(stderr, "UpdateInitDistance() neg on not exist %" PRIu64 " %d\n", distance, increment);
299 |     }
300 |   } else {
301 |     distances_[distance] += increment;
302 |     if (distances_[distance] <= 0) {
303 |       distances_.erase(it); 
304 |       UpdateMinMaxInitDistance();
305 |     }
306 |   }
307 | }
308 | 
309 | 
310 | 
311 | 
312 | }; // end namespace hashmap
313 | 


--------------------------------------------------------------------------------
/tombstone_hashmap.h:
--------------------------------------------------------------------------------
  1 | #ifndef HASHMAP_TOMBSTONE
  2 | #define HASHMAP_TOMBSTONE
  3 | 
  4 | #ifndef __STDC_FORMAT_MACROS
  5 | #define __STDC_FORMAT_MACROS
  6 | #endif
  7 | #include <inttypes.h>
  8 | #include <string.h>
  9 | #include <stdio.h>
 10 | 
 11 | #include <string>
 12 | #include <iostream>
 13 | #include <limits>
 14 | 
 15 | #include "murmurhash3.h"
 16 | #include "hamming.h"
 17 | #include "hashmap.h"
 18 | 
 19 | #include "monitoring.h"
 20 | 
 21 | namespace hashmap
 22 | {
 23 | 
 24 | 
 25 | 
 26 | class TombstoneHashMap: public HashMap
 27 | {
 28 | public:
 29 | 
 30 |   TombstoneHashMap(uint64_t size) {
 31 |     buckets_ = NULL;
 32 |     num_buckets_ = size;
 33 |     probing_max_ = size;
 34 |     DELETED_BUCKET = (Entry*)1;
 35 |   }
 36 | 
 37 |   virtual ~TombstoneHashMap() {
 38 |     Close();
 39 |   }
 40 | 
 41 |   int Open();
 42 |   int Close();
 43 | 
 44 |   struct Entry
 45 |   {
 46 |     uint32_t size_key;
 47 |     uint32_t size_value;
 48 |     char *data;
 49 |   };
 50 | 
 51 |   struct Bucket
 52 |   {
 53 |     uint64_t hash;
 54 |     struct Entry* entry;
 55 |   };
 56 | 
 57 |   int Get(const std::string& key, std::string* value);
 58 |   int Put(const std::string& key, const std::string& value);
 59 |   int Exists(const std::string& key);
 60 |   int Remove(const std::string& key);
 61 |   int Resize();
 62 |   int Dump();
 63 |   int CheckDensity();
 64 |   int BucketCounts();
 65 |   int GetBucketState(int index);
 66 |   int FillInitIndex(uint64_t index_stored, uint64_t *index_init);
 67 |   int FillDistanceToInitIndex(uint64_t index_stored, uint64_t *distance);
 68 |   void GetMetadata(std::map< std::string, std::string >& metadata);
 69 |   uint64_t GetMinInitDistance();
 70 |   uint64_t GetMaxInitDistance();
 71 | 
 72 | private:
 73 |   Bucket* buckets_;
 74 |   uint64_t num_buckets_;
 75 |   uint64_t num_buckets_used_;
 76 | 
 77 |   uint64_t hash_function(const std::string& key) {
 78 |     static char hash[16];
 79 |     static uint64_t output;
 80 |     MurmurHash3_x64_128(key.c_str(), key.size(), 0, hash);
 81 |     memcpy(&output, hash, 8); 
 82 |     return output;
 83 |   }
 84 | 
 85 |   Entry* DELETED_BUCKET;
 86 |   uint64_t probing_max_;
 87 | 
 88 |   void UpdateInitDistance(uint64_t distance, int32_t increment);
 89 |   void UpdateMinMaxInitDistance();
 90 |   std::map<uint64_t, uint64_t> distances_;
 91 |   uint64_t init_distance_min_;
 92 |   uint64_t init_distance_max_;
 93 | 
 94 | 
 95 | 
 96 | };
 97 | 
 98 | 
 99 | }; // end namespace hashmap
100 | 
101 | #endif // HASHMAP_TOMBSTONE
102 | 


--------------------------------------------------------------------------------