├── .clang-format ├── .github └── workflows │ └── ccpp.yml ├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── include └── rigtorp │ └── HashMap.h └── src ├── HashMapBenchmark.cpp ├── HashMapExample.cpp └── HashMapTest.cpp /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: LLVM -------------------------------------------------------------------------------- /.github/workflows/ccpp.yml: -------------------------------------------------------------------------------- 1 | name: C/C++ CI 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build-ubuntu: 7 | 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - uses: actions/checkout@v1 12 | - name: Build & Test 13 | run: | 14 | cmake -E remove_directory build 15 | cmake -B build -S . -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS="-Werror -O2 -fsanitize=address,undefined" 16 | cmake --build build 17 | cd build 18 | ctest --output-on-failure 19 | 20 | build-windows: 21 | 22 | runs-on: ${{ matrix.os }} 23 | strategy: 24 | matrix: 25 | os: [windows-latest, windows-2016] 26 | 27 | steps: 28 | - uses: actions/checkout@v1 29 | - name: Build & Test 30 | run: | 31 | cmake -E remove_directory build 32 | cmake -B build -S . 33 | cmake --build build --config Debug 34 | cd build 35 | ctest --output-on-failure 36 | 37 | build-macos: 38 | 39 | runs-on: macOS-latest 40 | 41 | steps: 42 | - uses: actions/checkout@v1 43 | - name: Build & Test 44 | run: | 45 | cmake -E remove_directory build 46 | cmake -B build -S . -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS="-Werror -O2 -fsanitize=address,undefined" 47 | cmake --build build 48 | cd build 49 | ctest --output-on-failure -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | .idea/ 3 | build/ 4 | cmake-build-* -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.6) 2 | 3 | project(HashMap VERSION 1.0 LANGUAGES CXX) 4 | 5 | add_library(${PROJECT_NAME} INTERFACE) 6 | add_library(${PROJECT_NAME}::${PROJECT_NAME} ALIAS ${PROJECT_NAME}) 7 | 8 | target_compile_features(${PROJECT_NAME} INTERFACE cxx_std_14) 9 | 10 | target_include_directories(${PROJECT_NAME} INTERFACE 11 | $ 12 | $) 13 | 14 | # Tests and examples 15 | if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) 16 | if(MSVC) 17 | add_compile_options(/permissive- /W4) 18 | else() 19 | add_compile_options(-Wall -Wextra -Wpedantic) 20 | endif() 21 | 22 | find_package(absl) 23 | 24 | add_executable(HashMapBenchmark src/HashMapBenchmark.cpp) 25 | target_link_libraries(HashMapBenchmark HashMap) 26 | if (absl_FOUND) 27 | target_link_libraries(HashMapBenchmark absl::flat_hash_map) 28 | endif() 29 | target_compile_options(HashMapBenchmark PRIVATE -mavx2) 30 | target_compile_features(HashMapBenchmark INTERFACE cxx_std_17) 31 | 32 | add_executable(HashMapExample src/HashMapExample.cpp) 33 | target_link_libraries(HashMapExample HashMap) 34 | 35 | add_executable(HashMapTest src/HashMapTest.cpp) 36 | target_link_libraries(HashMapTest HashMap) 37 | 38 | enable_testing() 39 | add_test(HashMapTest HashMapTest) 40 | endif() 41 | 42 | # Install 43 | include(GNUInstallDirs) 44 | include(CMakePackageConfigHelpers) 45 | 46 | write_basic_package_version_file( 47 | "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake" 48 | COMPATIBILITY SameMajorVersion 49 | ) 50 | 51 | export( 52 | TARGETS ${PROJECT_NAME} 53 | NAMESPACE ${PROJECT_NAME}:: 54 | FILE "${PROJECT_NAME}Config.cmake" 55 | ) 56 | 57 | if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) 58 | install( 59 | DIRECTORY "include/" 60 | DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} 61 | ) 62 | 63 | install( 64 | TARGETS ${PROJECT_NAME} 65 | EXPORT "${PROJECT_NAME}Config" 66 | INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} 67 | ) 68 | 69 | install( 70 | EXPORT "${PROJECT_NAME}Config" 71 | NAMESPACE ${PROJECT_NAME}:: 72 | DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" 73 | ) 74 | 75 | install( 76 | FILES "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake" 77 | DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" 78 | ) 79 | endif() -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016-2020 Erik Rigtorp 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HashMap.h 2 | 3 | [![C/C++ CI](https://github.com/rigtorp/HashMap/workflows/C/C++%20CI/badge.svg)](https://github.com/rigtorp/HashMap/actions) 4 | [![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://raw.githubusercontent.com/rigtorp/HashMap/master/LICENSE) 5 | 6 | A hash table mostly compatible with the C++11 *std::unordered_map* 7 | interface, but with much higher performance for many workloads. 8 | 9 | ## Implementation 10 | 11 | This hash table uses [open addressing][1] with [linear probing][2] and 12 | backshift deletion. Open addressing and linear probing minimizes 13 | memory allocations and achieves high cache efficiency. Backshift deletion 14 | keeps performance high for delete heavy workloads by not clobbering 15 | the hash table with [tombestones][3]. 16 | 17 | [1]: https://en.wikipedia.org/wiki/Open_addressing "Open addressing" 18 | [2]: https://en.wikipedia.org/wiki/Linear_probing "Linear probing" 19 | [3]: https://en.wikipedia.org/wiki/Lazy_deletion "Lazy deletion" 20 | 21 | ## Usage 22 | 23 | `HashMap` is mostly compatible with the C++11 container interface. The 24 | main differences are: 25 | 26 | - A key value to represent the empty key is required. 27 | - `Key` and `T` needs to be default constructible. 28 | - Iterators are invalidated on all modifying operations. 29 | - It's invalid to perform any operations with the empty key. 30 | - Destructors are not called on `erase`. 31 | - Extensions for lookups using related key types. 32 | 33 | Member functions: 34 | 35 | - `HashMap(size_type bucket_count, key_type empty_key);` 36 | 37 | Construct a `HashMap` with `bucket_count` buckets and `empty_key` as 38 | the empty key. 39 | 40 | The rest of the member functions are implemented as for 41 | [`std::unordered_map`](http://en.cppreference.com/w/cpp/container/unordered_map). 42 | 43 | ## Example 44 | 45 | ```cpp 46 | using namespace rigtorp; 47 | 48 | // Hash for using std::string as lookup key 49 | struct Hash { 50 | size_t operator()(int v) { return v * 7; } 51 | size_t operator()(const std::string &v) { return std::stoi(v) * 7; } 52 | }; 53 | 54 | // Equal comparison for using std::string as lookup key 55 | struct Equal { 56 | bool operator()(int lhs, int rhs) { return lhs == rhs; } 57 | bool operator()(int lhs, const std::string &rhs) { 58 | return lhs == std::stoi(rhs); 59 | } 60 | }; 61 | 62 | // Create a HashMap with 16 buckets and 0 as the empty key 63 | HashMap hm(16, 0); 64 | hm.emplace(1, 1); 65 | hm[2] = 2; 66 | 67 | // Iterate and print key-value pairs 68 | for (const auto &e : hm) { 69 | std::cout << e.first << " = " << e.second << "\n"; 70 | } 71 | 72 | // Lookup using std::string 73 | std::cout << hm.at("1") << "\n"; 74 | 75 | // Erase entry 76 | hm.erase(1); 77 | ``` 78 | 79 | ## Benchmark 80 | 81 | A benchmark `src/HashMapBenchmark.cpp` is included with the sources. The 82 | benchmark simulates a delete heavy workload where items are repeatedly inserted 83 | and deleted. 84 | 85 | I ran this benchmark on the following configuration: 86 | 87 | - AMD Ryzen 9 3900X 88 | - Linux 5.8.4-200.fc32.x86_64 89 | - gcc (GCC) 10.2.1 20200723 (Red Hat 10.2.1-1) 90 | - Isolated a core complex (CCX) using `isolcpus` for running the benchmark 91 | 92 | When working set fits in L3 cache (`HashMapBenchmark -c 100000 -i 100000000`): 93 | 94 | | Implementation | mean ns/iter | max ns/iter | 95 | | ---------------------- | -----------: | ----------: | 96 | | HashMap | 24 | 1082 | 97 | | absl::flat_hash_map | 24 | 2074 | 98 | | google::dense_hash_map | 49 | 689846 | 99 | | std::unordered_map | 67 | 10299 | 100 | 101 | When working set is larger than L3 cache (`HashMapBenchmark -c 10000000 -i 1000000000`): 102 | 103 | | Implementation | mean ns/iter | max ns/iter | 104 | | ---------------------- | -----------: | ----------: | 105 | | HashMap | 75 | 19026 | 106 | | absl::flat_hash_map | 101 | 19848 | 107 | | google::dense_hash_map | 111 | 226083255 | 108 | | std::unordered_map | 408 | 22422 | 109 | 110 | 111 | ## Cited by 112 | 113 | HashMap has been cited by the following papers: 114 | - Koppl, Dominik. “Separate Chaining Meets Compact Hashing.” (2019). 115 | https://arxiv.org/abs/1905.00163 116 | 117 | ## About 118 | 119 | This project was created by [Erik Rigtorp](http://rigtorp.se) 120 | <[erik@rigtorp.se](mailto:erik@rigtorp.se)>. 121 | -------------------------------------------------------------------------------- /include/rigtorp/HashMap.h: -------------------------------------------------------------------------------- 1 | // © 2017-2020 Erik Rigtorp 2 | // SPDX-License-Identifier: MIT 3 | 4 | /* 5 | HashMap 6 | 7 | A high performance hash map. Uses open addressing with linear 8 | probing. 9 | 10 | Advantages: 11 | - Predictable performance. Doesn't use the allocator unless load factor 12 | grows beyond 50%. Linear probing ensures cash efficency. 13 | - Deletes items by rearranging items and marking slots as empty instead of 14 | marking items as deleted. This is keeps performance high when there 15 | is a high rate of churn (many paired inserts and deletes) since otherwise 16 | most slots would be marked deleted and probing would end up scanning 17 | most of the table. 18 | 19 | Disadvantages: 20 | - Significant performance degradation at high load factors. 21 | - Maximum load factor hard coded to 50%, memory inefficient. 22 | - Memory is not reclaimed on erase. 23 | */ 24 | 25 | #pragma once 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | namespace rigtorp { 36 | 37 | template , 38 | typename KeyEqual = std::equal_to, 39 | typename Allocator = std::allocator>> 40 | class HashMap { 41 | public: 42 | using key_type = Key; 43 | using mapped_type = T; 44 | using value_type = std::pair; 45 | using size_type = std::size_t; 46 | using hasher = Hash; 47 | using key_equal = KeyEqual; 48 | using allocator_type = Allocator; 49 | using reference = value_type &; 50 | using const_reference = const value_type &; 51 | using buckets = std::vector; 52 | 53 | template struct hm_iterator { 54 | using difference_type = std::ptrdiff_t; 55 | using value_type = IterVal; 56 | using pointer = value_type *; 57 | using reference = value_type &; 58 | using iterator_category = std::forward_iterator_tag; 59 | 60 | bool operator==(const hm_iterator &other) const { 61 | return other.hm_ == hm_ && other.idx_ == idx_; 62 | } 63 | bool operator!=(const hm_iterator &other) const { 64 | return !(other == *this); 65 | } 66 | 67 | hm_iterator &operator++() { 68 | ++idx_; 69 | advance_past_empty(); 70 | return *this; 71 | } 72 | 73 | reference operator*() const { return hm_->buckets_[idx_]; } 74 | pointer operator->() const { return &hm_->buckets_[idx_]; } 75 | 76 | private: 77 | explicit hm_iterator(ContT *hm) : hm_(hm) { advance_past_empty(); } 78 | explicit hm_iterator(ContT *hm, size_type idx) : hm_(hm), idx_(idx) {} 79 | template 80 | hm_iterator(const hm_iterator &other) 81 | : hm_(other.hm_), idx_(other.idx_) {} 82 | 83 | void advance_past_empty() { 84 | while (idx_ < hm_->buckets_.size() && 85 | key_equal()(hm_->buckets_[idx_].first, hm_->empty_key_)) { 86 | ++idx_; 87 | } 88 | } 89 | 90 | ContT *hm_ = nullptr; 91 | typename ContT::size_type idx_ = 0; 92 | friend ContT; 93 | }; 94 | 95 | using iterator = hm_iterator; 96 | using const_iterator = hm_iterator; 97 | 98 | public: 99 | HashMap(size_type bucket_count, key_type empty_key, 100 | const allocator_type &alloc = allocator_type()) 101 | : empty_key_(empty_key), buckets_(alloc) { 102 | size_t pow2 = 1; 103 | while (pow2 < bucket_count) { 104 | pow2 <<= 1; 105 | } 106 | buckets_.resize(pow2, std::make_pair(empty_key_, T())); 107 | } 108 | 109 | HashMap(const HashMap &other, size_type bucket_count) 110 | : HashMap(bucket_count, other.empty_key_, other.get_allocator()) { 111 | for (auto it = other.begin(); it != other.end(); ++it) { 112 | insert(*it); 113 | } 114 | } 115 | 116 | allocator_type get_allocator() const noexcept { 117 | return buckets_.get_allocator(); 118 | } 119 | 120 | // Iterators 121 | iterator begin() noexcept { return iterator(this); } 122 | 123 | const_iterator begin() const noexcept { return const_iterator(this); } 124 | 125 | const_iterator cbegin() const noexcept { return const_iterator(this); } 126 | 127 | iterator end() noexcept { return iterator(this, buckets_.size()); } 128 | 129 | const_iterator end() const noexcept { 130 | return const_iterator(this, buckets_.size()); 131 | } 132 | 133 | const_iterator cend() const noexcept { 134 | return const_iterator(this, buckets_.size()); 135 | } 136 | 137 | // Capacity 138 | bool empty() const noexcept { return size() == 0; } 139 | 140 | size_type size() const noexcept { return size_; } 141 | 142 | size_type max_size() const noexcept { return buckets_.max_size() / 2; } 143 | 144 | // Modifiers 145 | void clear() noexcept { 146 | for (auto &b : buckets_) { 147 | if (b.first != empty_key_) { 148 | b.first = empty_key_; 149 | } 150 | } 151 | size_ = 0; 152 | } 153 | 154 | std::pair insert(const value_type &value) { 155 | return emplace_impl(value.first, value.second); 156 | } 157 | 158 | std::pair insert(value_type &&value) { 159 | return emplace_impl(value.first, std::move(value.second)); 160 | } 161 | 162 | template 163 | std::pair emplace(Args &&... args) { 164 | return emplace_impl(std::forward(args)...); 165 | } 166 | 167 | void erase(iterator it) { erase_impl(it); } 168 | 169 | size_type erase(const key_type &key) { return erase_impl(key); } 170 | 171 | template size_type erase(const K &x) { return erase_impl(x); } 172 | 173 | void swap(HashMap &other) noexcept { 174 | std::swap(buckets_, other.buckets_); 175 | std::swap(size_, other.size_); 176 | std::swap(empty_key_, other.empty_key_); 177 | } 178 | 179 | // Lookup 180 | mapped_type &at(const key_type &key) { return at_impl(key); } 181 | 182 | template mapped_type &at(const K &x) { return at_impl(x); } 183 | 184 | const mapped_type &at(const key_type &key) const { return at_impl(key); } 185 | 186 | template const mapped_type &at(const K &x) const { 187 | return at_impl(x); 188 | } 189 | 190 | mapped_type &operator[](const key_type &key) { 191 | return emplace_impl(key).first->second; 192 | } 193 | 194 | size_type count(const key_type &key) const { return count_impl(key); } 195 | 196 | template size_type count(const K &x) const { 197 | return count_impl(x); 198 | } 199 | 200 | iterator find(const key_type &key) { return find_impl(key); } 201 | 202 | template iterator find(const K &x) { return find_impl(x); } 203 | 204 | const_iterator find(const key_type &key) const { return find_impl(key); } 205 | 206 | template const_iterator find(const K &x) const { 207 | return find_impl(x); 208 | } 209 | 210 | // Bucket interface 211 | size_type bucket_count() const noexcept { return buckets_.size(); } 212 | 213 | size_type max_bucket_count() const noexcept { return buckets_.max_size(); } 214 | 215 | // Hash policy 216 | void rehash(size_type count) { 217 | count = std::max(count, size() * 2); 218 | HashMap other(*this, count); 219 | swap(other); 220 | } 221 | 222 | void reserve(size_type count) { 223 | if (count * 2 > buckets_.size()) { 224 | rehash(count * 2); 225 | } 226 | } 227 | 228 | // Observers 229 | hasher hash_function() const { return hasher(); } 230 | 231 | key_equal key_eq() const { return key_equal(); } 232 | 233 | private: 234 | template 235 | std::pair emplace_impl(const K &key, Args &&... args) { 236 | assert(!key_equal()(empty_key_, key) && "empty key shouldn't be used"); 237 | reserve(size_ + 1); 238 | for (size_t idx = key_to_idx(key);; idx = probe_next(idx)) { 239 | if (key_equal()(buckets_[idx].first, empty_key_)) { 240 | buckets_[idx].second = mapped_type(std::forward(args)...); 241 | buckets_[idx].first = key; 242 | size_++; 243 | return {iterator(this, idx), true}; 244 | } else if (key_equal()(buckets_[idx].first, key)) { 245 | return {iterator(this, idx), false}; 246 | } 247 | } 248 | } 249 | 250 | void erase_impl(iterator it) { 251 | size_t bucket = it.idx_; 252 | for (size_t idx = probe_next(bucket);; idx = probe_next(idx)) { 253 | if (key_equal()(buckets_[idx].first, empty_key_)) { 254 | buckets_[bucket].first = empty_key_; 255 | size_--; 256 | return; 257 | } 258 | size_t ideal = key_to_idx(buckets_[idx].first); 259 | if (diff(bucket, ideal) < diff(idx, ideal)) { 260 | // swap, bucket is closer to ideal than idx 261 | buckets_[bucket] = buckets_[idx]; 262 | bucket = idx; 263 | } 264 | } 265 | } 266 | 267 | template size_type erase_impl(const K &key) { 268 | auto it = find_impl(key); 269 | if (it != end()) { 270 | erase_impl(it); 271 | return 1; 272 | } 273 | return 0; 274 | } 275 | 276 | template mapped_type &at_impl(const K &key) { 277 | iterator it = find_impl(key); 278 | if (it != end()) { 279 | return it->second; 280 | } 281 | throw std::out_of_range("HashMap::at"); 282 | } 283 | 284 | template const mapped_type &at_impl(const K &key) const { 285 | return const_cast(this)->at_impl(key); 286 | } 287 | 288 | template size_t count_impl(const K &key) const { 289 | return find_impl(key) == end() ? 0 : 1; 290 | } 291 | 292 | template iterator find_impl(const K &key) { 293 | assert(!key_equal()(empty_key_, key) && "empty key shouldn't be used"); 294 | for (size_t idx = key_to_idx(key);; idx = probe_next(idx)) { 295 | if (key_equal()(buckets_[idx].first, key)) { 296 | return iterator(this, idx); 297 | } 298 | if (key_equal()(buckets_[idx].first, empty_key_)) { 299 | return end(); 300 | } 301 | } 302 | } 303 | 304 | template const_iterator find_impl(const K &key) const { 305 | return const_cast(this)->find_impl(key); 306 | } 307 | 308 | template 309 | size_t key_to_idx(const K &key) const noexcept(noexcept(hasher()(key))) { 310 | const size_t mask = buckets_.size() - 1; 311 | return hasher()(key) & mask; 312 | } 313 | 314 | size_t probe_next(size_t idx) const noexcept { 315 | const size_t mask = buckets_.size() - 1; 316 | return (idx + 1) & mask; 317 | } 318 | 319 | size_t diff(size_t a, size_t b) const noexcept { 320 | const size_t mask = buckets_.size() - 1; 321 | return (buckets_.size() + (a - b)) & mask; 322 | } 323 | 324 | private: 325 | key_type empty_key_; 326 | buckets buckets_; 327 | size_t size_ = 0; 328 | }; 329 | } // namespace rigtorp 330 | -------------------------------------------------------------------------------- /src/HashMapBenchmark.cpp: -------------------------------------------------------------------------------- 1 | // © 2017-2020 Erik Rigtorp 2 | // SPDX-License-Identifier: MIT 3 | 4 | #include // _mm_crc32_u64 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #if __has_include() 13 | #include 14 | #endif 15 | 16 | #if __has_include() 17 | #include 18 | #endif 19 | 20 | #include 21 | 22 | #if __has_include() 23 | #include // mmap, munmap 24 | #endif 25 | 26 | #if defined(MAP_POPULATE) && defined(MAP_HUGETLB) 27 | template struct huge_page_allocator { 28 | constexpr static std::size_t huge_page_size = 1 << 21; // 2 MiB 29 | using value_type = T; 30 | 31 | huge_page_allocator() = default; 32 | template 33 | constexpr huge_page_allocator(const huge_page_allocator &) noexcept {} 34 | 35 | size_t round_to_huge_page_size(size_t n) { 36 | return (((n - 1) / huge_page_size) + 1) * huge_page_size; 37 | } 38 | 39 | T *allocate(std::size_t n) { 40 | if (n > std::numeric_limits::max() / sizeof(T)) { 41 | throw std::bad_alloc(); 42 | } 43 | auto p = static_cast(mmap( 44 | nullptr, round_to_huge_page_size(n * sizeof(T)), PROT_READ | PROT_WRITE, 45 | MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE | MAP_HUGETLB, -1, 0)); 46 | if (p == MAP_FAILED) { 47 | throw std::bad_alloc(); 48 | } 49 | return p; 50 | } 51 | 52 | void deallocate(T *p, std::size_t n) { 53 | munmap(p, round_to_huge_page_size(n)); 54 | } 55 | }; 56 | #else 57 | template using huge_page_allocator = std::allocator; 58 | #endif 59 | 60 | using namespace std::chrono; 61 | using namespace rigtorp; 62 | 63 | int main(int argc, char *argv[]) { 64 | (void)argc, (void)argv; 65 | 66 | size_t count = 10000000; 67 | size_t iters = 100000000; 68 | int type = -1; 69 | 70 | int opt; 71 | while ((opt = getopt(argc, argv, "i:c:t:")) != -1) { 72 | switch (opt) { 73 | case 'i': 74 | iters = std::stol(optarg); 75 | break; 76 | case 'c': 77 | count = std::stol(optarg); 78 | break; 79 | case 't': 80 | type = std::stoi(optarg); 81 | break; 82 | default: 83 | goto usage; 84 | } 85 | } 86 | 87 | if (optind != argc) { 88 | usage: 89 | std::cerr << "HashMapBenchmark © 2020 Erik Rigtorp \n" 90 | "usage: HashMapBenchmark [-c count] [-i iters] [-t 1|2|3|4]\n" 91 | << std::endl; 92 | exit(1); 93 | } 94 | 95 | using key = size_t; 96 | struct value { 97 | char buf[24]; 98 | }; 99 | 100 | struct hash { 101 | size_t operator()(size_t h) const noexcept { return _mm_crc32_u64(0, h); } 102 | }; 103 | 104 | auto b = [&](const char *n, auto &m) { 105 | std::minstd_rand gen(0); 106 | std::uniform_int_distribution ud(2, count); 107 | 108 | for (size_t i = 0; i < count; ++i) { 109 | const int val = ud(gen); 110 | m.insert({val, {}}); 111 | } 112 | 113 | auto start = steady_clock::now(); 114 | for (size_t i = 0; i < iters; ++i) { 115 | const int val = ud(gen); 116 | const auto it = m.find(val); 117 | if (it == m.end()) { 118 | m.insert({val, {}}); 119 | } else { 120 | m.erase(it); 121 | } 122 | } 123 | auto stop = steady_clock::now(); 124 | auto duration = stop - start; 125 | 126 | nanoseconds max = {}; 127 | for (size_t i = 0; i < iters; ++i) { 128 | const int val = ud(gen); 129 | auto start = steady_clock::now(); 130 | const auto it = m.find(val); 131 | if (it == m.end()) { 132 | m.insert({val, {}}); 133 | } else { 134 | m.erase(it); 135 | } 136 | auto stop = steady_clock::now(); 137 | max = std::max(max, stop - start); 138 | } 139 | 140 | std::cout << n << ": mean " 141 | << duration_cast(duration).count() / iters 142 | << " ns/iter, max " << max.count() << " ns/iter" << std::endl; 143 | }; 144 | 145 | if (type == -1 || type == 1) { 146 | HashMap, 147 | huge_page_allocator>> 148 | hm(2 * count, 0); 149 | b("HashMap", hm); 150 | } 151 | 152 | #if __has_include() 153 | if (type == -1 || type == 2) { 154 | // Couldn't get it to work with the huge_page_allocator 155 | google::dense_hash_map hm(count); 156 | hm.set_empty_key(0); 157 | hm.set_deleted_key(1); 158 | b("google::dense_hash_map", hm); 159 | } 160 | #endif 161 | 162 | #if __has_include() 163 | if (type == -1 || type == 3) { 164 | absl::flat_hash_map, 165 | huge_page_allocator>> 166 | hm; 167 | hm.reserve(count); 168 | b("absl::flat_hash_map", hm); 169 | } 170 | #endif 171 | 172 | if (type == -1 || type == 4) { 173 | std::unordered_map hm; 174 | hm.reserve(count); 175 | b("std::unordered_map", hm); 176 | } 177 | 178 | return 0; 179 | } 180 | -------------------------------------------------------------------------------- /src/HashMapExample.cpp: -------------------------------------------------------------------------------- 1 | // © 2017-2020 Erik Rigtorp 2 | // SPDX-License-Identifier: MIT 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | int main(int argc, char *argv[]) { 10 | (void)argc, (void)argv; 11 | 12 | using namespace rigtorp; 13 | 14 | // Hash for using std::string as lookup key 15 | struct Hash { 16 | size_t operator()(int v) { return v * 7; } 17 | size_t operator()(const std::string &v) { return std::stoi(v) * 7; } 18 | }; 19 | 20 | // Equal comparison for using std::string as lookup key 21 | struct Equal { 22 | bool operator()(int lhs, int rhs) { return lhs == rhs; } 23 | bool operator()(int lhs, const std::string &rhs) { 24 | return lhs == std::stoi(rhs); 25 | } 26 | }; 27 | 28 | // Create a HashMap with 16 buckets and 0 as the empty key 29 | HashMap hm(16, 0); 30 | hm.emplace(1, 1); 31 | hm[2] = 2; 32 | 33 | // Iterate and print key-value pairs 34 | for (const auto &e : hm) { 35 | std::cout << e.first << " = " << e.second << "\n"; 36 | } 37 | 38 | // Lookup using std::string 39 | std::cout << hm.at("1") << "\n"; 40 | 41 | // Erase entry 42 | hm.erase(1); 43 | 44 | return 0; 45 | } 46 | -------------------------------------------------------------------------------- /src/HashMapTest.cpp: -------------------------------------------------------------------------------- 1 | // © 2017-2020 Erik Rigtorp 2 | // SPDX-License-Identifier: MIT 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | using namespace rigtorp; 11 | 12 | static bool ok = true; 13 | 14 | #define EXPECT(expr) \ 15 | ([](bool res) { \ 16 | if (!res) { \ 17 | fprintf(stdout, "FAILED %s:%i: %s\n", __FILE__, __LINE__, #expr); \ 18 | } \ 19 | ok = ok && res; \ 20 | }(static_cast(expr))) 21 | #define THROWS(expr) \ 22 | ([&]() { \ 23 | try { \ 24 | expr; \ 25 | } catch (...) { \ 26 | return true; \ 27 | } \ 28 | return false; \ 29 | }()) 30 | 31 | struct Hash { 32 | size_t operator()(int v) { return v * 7; } 33 | size_t operator()(const std::string &v) { return std::stoi(v) * 7; } 34 | }; 35 | 36 | struct Equal { 37 | bool operator()(int lhs, int rhs) { return lhs == rhs; } 38 | bool operator()(int lhs, const std::string &rhs) { 39 | return lhs == std::stoi(rhs); 40 | } 41 | }; 42 | 43 | int main(int argc, char *argv[]) { 44 | (void)argc, (void)argv; 45 | 46 | // Correct types 47 | { 48 | HashMap hm(16, 0); 49 | const auto &chm = hm; 50 | 51 | // Iterators 52 | static_assert( 53 | std::is_same::value, ""); 54 | static_assert(std::is_same::value, 56 | ""); 57 | static_assert(std::is_same::value, 59 | ""); 60 | static_assert( 61 | std::is_same::value, ""); 62 | static_assert( 63 | std::is_same::value, 64 | ""); 65 | static_assert( 66 | std::is_same::value, 67 | ""); 68 | 69 | // Capacity 70 | static_assert(std::is_same::value, ""); 71 | static_assert(std::is_same::value, ""); 72 | static_assert(std::is_same::value, ""); 73 | static_assert(std::is_same::value, ""); 74 | static_assert(std::is_same::value, ""); 75 | static_assert(std::is_same::value, ""); 76 | 77 | // Modifiers 78 | const auto &p = std::make_pair(1, 1); 79 | static_assert(std::is_same>::value, 81 | ""); 82 | static_assert(std::is_same>::value, 84 | ""); 85 | static_assert(std::is_same>::value, 87 | ""); 88 | static_assert(std::is_same::value, 89 | ""); 90 | static_assert(std::is_same::value, ""); 91 | static_assert(std::is_same::value, ""); 92 | 93 | // Lookup 94 | 95 | // at() 96 | static_assert(std::is_same::value, ""); 97 | static_assert(std::is_same::value, ""); 98 | static_assert(std::is_same::value, ""); 99 | static_assert(std::is_same::value, ""); 100 | 101 | // operator[]() 102 | static_assert(std::is_same::value, ""); 103 | 104 | // count() 105 | static_assert(std::is_same::value, ""); 106 | static_assert(std::is_same::value, ""); 107 | static_assert(std::is_same::value, ""); 108 | static_assert(std::is_same::value, ""); 109 | 110 | // find() 111 | static_assert( 112 | std::is_same::value, ""); 113 | static_assert( 114 | std::is_same::value, 115 | ""); 116 | static_assert(std::is_same::value, 118 | ""); 119 | static_assert(std::is_same::value, 121 | ""); 122 | 123 | // Bucket interface 124 | 125 | // bucket_count() 126 | static_assert(std::is_same::value, ""); 127 | static_assert(std::is_same::value, 128 | ""); 129 | 130 | // max_bucket_count() 131 | static_assert(std::is_same::value, 132 | ""); 133 | static_assert(std::is_same::value, 134 | ""); 135 | 136 | // Hash policy 137 | 138 | // rehash() 139 | static_assert(std::is_same::value, ""); 140 | 141 | // reserve() 142 | static_assert(std::is_same::value, ""); 143 | 144 | // Observers 145 | 146 | // hash_function() 147 | static_assert( 148 | std::is_same::value, 149 | ""); 150 | static_assert(std::is_same::value, 152 | ""); 153 | 154 | // key_eq() 155 | static_assert( 156 | std::is_same::value, 157 | ""); 158 | static_assert( 159 | std::is_same::value, 160 | ""); 161 | } 162 | 163 | // Constructors 164 | { 165 | // HashMap(const HashMap&) 166 | HashMap hm(16, 0); 167 | hm[1] = 1; 168 | HashMap hm2(hm); 169 | EXPECT(!hm2.empty()); 170 | EXPECT(hm2.size() == 1); 171 | EXPECT(hm2[1] == 1); 172 | } 173 | 174 | { 175 | // HashMap(HashMap&&) 176 | HashMap hm(16, 0); 177 | hm[1] = 1; 178 | HashMap hm2(std::move(hm)); 179 | EXPECT(!hm2.empty()); 180 | EXPECT(hm2.size() == 1); 181 | EXPECT(hm2[1] == 1); 182 | } 183 | 184 | { 185 | // operator=(const HashMap&) 186 | HashMap hm(16, 0); 187 | hm[1] = 1; 188 | HashMap hm2(16, 0); 189 | hm2.operator=(hm); 190 | EXPECT(!hm2.empty()); 191 | EXPECT(hm2.size() == 1); 192 | EXPECT(hm2[1] == 1); 193 | } 194 | 195 | { 196 | // operator=(HashMap&&) 197 | HashMap hm(16, 0); 198 | hm[1] = 1; 199 | HashMap hm2(16, 0); 200 | hm2.operator=(std::move(hm)); 201 | EXPECT(!hm2.empty()); 202 | EXPECT(hm2.size() == 1); 203 | EXPECT(hm2[1] == 1); 204 | } 205 | 206 | // Iterators 207 | { 208 | HashMap hm(16, 0); 209 | const auto &chm = hm; 210 | 211 | EXPECT(hm.begin() == hm.end()); 212 | EXPECT(chm.begin() == chm.end()); 213 | EXPECT(hm.cbegin() == hm.cend()); 214 | EXPECT(hm.cbegin() == chm.begin()); 215 | EXPECT(hm.cend() == chm.end()); 216 | 217 | EXPECT(!(hm.begin() != hm.end())); 218 | EXPECT(!(chm.begin() != chm.end())); 219 | EXPECT(!(hm.cbegin() != hm.cend())); 220 | EXPECT(!(hm.cbegin() != chm.begin())); 221 | EXPECT(!(hm.cend() != chm.end())); 222 | 223 | const auto cit = hm.begin(); 224 | EXPECT(cit == hm.end()); 225 | EXPECT(!(cit != hm.end())); 226 | 227 | for (int i = 1; i < 100; ++i) { 228 | hm[i] = i; 229 | } 230 | 231 | std::array visited = {}; 232 | for (auto it = hm.begin(); it != hm.end(); ++it) { 233 | visited[it->first] = true; 234 | } 235 | 236 | for (int i = 1; i < 100; ++i) { 237 | EXPECT(visited[i]); 238 | } 239 | 240 | // Test for iterator traits 241 | EXPECT(std::all_of(hm.begin(), hm.end(), 242 | [](const auto &item) { return item.second > 0; })); 243 | } 244 | 245 | // Capacity 246 | { 247 | HashMap hm(16, 0); 248 | const auto &chm = hm; 249 | EXPECT(chm.empty()); 250 | EXPECT(chm.size() == 0); 251 | EXPECT(chm.max_size() > 0); 252 | hm[1] = 1; 253 | EXPECT(!chm.empty()); 254 | EXPECT(chm.size() == 1); 255 | } 256 | 257 | // Modifiers 258 | { 259 | // clear() 260 | HashMap hm(16, 0); 261 | hm[1] = 1; 262 | hm.clear(); 263 | EXPECT(hm.empty()); 264 | EXPECT(hm.size() == 0); 265 | EXPECT(hm.begin() == hm.end()); 266 | EXPECT(hm.cbegin() == hm.cend()); 267 | } 268 | 269 | { 270 | // insert() 271 | HashMap hm(16, 0); 272 | auto res = hm.insert({1, 1}); // xvalue 273 | EXPECT(!hm.empty()); 274 | EXPECT(hm.size() == 1); 275 | EXPECT(hm.begin() != hm.end()); 276 | EXPECT(hm.cbegin() != hm.cend()); 277 | EXPECT(res.first != hm.end()); 278 | EXPECT(res.first->first == 1); 279 | EXPECT(res.first->second == 1); 280 | EXPECT(res.second); 281 | const auto v = std::make_pair(1, 2); 282 | auto res2 = hm.insert(v); // rvalue 283 | EXPECT(hm.size() == 1); 284 | EXPECT(res2.first == res.first); 285 | EXPECT(res2.first->first == 1); 286 | EXPECT(res2.first->second == 1); 287 | EXPECT(!res2.second); 288 | } 289 | 290 | { 291 | // emplace() 292 | HashMap hm(16, 0); 293 | auto res = hm.emplace(1, 1); 294 | EXPECT(!hm.empty()); 295 | EXPECT(hm.size() == 1); 296 | EXPECT(hm.begin() != hm.end()); 297 | EXPECT(hm.cbegin() != hm.cend()); 298 | EXPECT(res.first != hm.end()); 299 | EXPECT(res.first->first == 1); 300 | EXPECT(res.first->second == 1); 301 | EXPECT(res.second); 302 | auto res2 = hm.emplace(1, 2); 303 | EXPECT(hm.size() == 1); 304 | EXPECT(res2.first == res.first); 305 | EXPECT(res2.first->first == 1); 306 | EXPECT(res2.first->second == 1); 307 | EXPECT(!res2.second); 308 | } 309 | 310 | { 311 | // erase(iterator) 312 | HashMap hm(16, 0); 313 | auto res = hm.emplace(1, 1); 314 | hm.erase(res.first); 315 | EXPECT(hm.empty()); 316 | EXPECT(hm.size() == 0); 317 | EXPECT(hm.begin() == hm.end()); 318 | EXPECT(hm.cbegin() == hm.cend()); 319 | } 320 | 321 | { 322 | // erase(const key_type&) 323 | HashMap hm(16, 0); 324 | EXPECT(hm.erase(1) == 0); 325 | hm[1] = 1; 326 | EXPECT(hm.erase(1) == 1); 327 | EXPECT(hm.empty()); 328 | EXPECT(hm.size() == 0); 329 | EXPECT(hm.begin() == hm.end()); 330 | EXPECT(hm.cbegin() == hm.cend()); 331 | } 332 | 333 | { 334 | // template erase(const K&) 335 | HashMap hm(16, 0); 336 | EXPECT(hm.erase("1") == 0); 337 | hm[1] = 1; 338 | EXPECT(hm.erase("1") == 1); 339 | EXPECT(hm.empty()); 340 | EXPECT(hm.size() == 0); 341 | EXPECT(hm.begin() == hm.end()); 342 | EXPECT(hm.cbegin() == hm.cend()); 343 | } 344 | 345 | { 346 | // swap() 347 | HashMap hm1(16, 0), hm2(16, 0); 348 | hm1[1] = 1; 349 | hm2.swap(hm1); 350 | EXPECT(hm1.empty()); 351 | EXPECT(hm1.size() == 0); 352 | EXPECT(hm2.size() == 1); 353 | EXPECT(hm2[1] == 1); 354 | std::swap(hm1, hm2); 355 | EXPECT(hm1.size() == 1); 356 | EXPECT(hm1[1] == 1); 357 | EXPECT(hm2.empty()); 358 | EXPECT(hm2.size() == 0); 359 | } 360 | 361 | // Lookup 362 | { 363 | // at(const key_type&) 364 | HashMap hm(16, 0); 365 | const auto &chm = hm; 366 | hm[1] = 1; 367 | EXPECT(hm.at(1) == 1); 368 | EXPECT(chm.at(1) == 1); 369 | hm.at(1) = 2; 370 | EXPECT(hm.at(1) == 2); 371 | EXPECT(chm.at(1) == 2); 372 | EXPECT(THROWS(hm.at(2))); 373 | EXPECT(THROWS(chm.at(2))); 374 | } 375 | 376 | { 377 | // template at(const K&) 378 | HashMap hm(16, 0); 379 | const auto &chm = hm; 380 | hm[1] = 1; 381 | EXPECT(hm.at("1") == 1); 382 | EXPECT(chm.at("1") == 1); 383 | hm.at("1") = 2; 384 | EXPECT(hm.at("1") == 2); 385 | EXPECT(chm.at("1") == 2); 386 | EXPECT(THROWS(hm.at("2"))); 387 | EXPECT(THROWS(chm.at("2"))); 388 | } 389 | 390 | { 391 | // operator[](const key_type&) 392 | HashMap hm(16, 0); 393 | hm[1] = 1; 394 | EXPECT(!hm.empty()); 395 | EXPECT(hm.size() == 1); 396 | EXPECT(hm.begin() != hm.end()); 397 | EXPECT(hm.cbegin() != hm.cend()); 398 | EXPECT(hm[1] == 1); 399 | } 400 | 401 | { 402 | // count(const key_type&) 403 | HashMap hm(16, 0); 404 | const auto &chm = hm; 405 | hm[1] = 1; 406 | EXPECT(hm.count(1) == 1); 407 | EXPECT(hm.count(2) == 0); 408 | EXPECT(chm.count(1) == 1); 409 | EXPECT(chm.count(2) == 0); 410 | } 411 | 412 | { 413 | // template count(const K&) 414 | HashMap hm(16, 0); 415 | const auto &chm = hm; 416 | hm[1] = 1; 417 | EXPECT(hm.count("1") == 1); 418 | EXPECT(hm.count("2") == 0); 419 | EXPECT(chm.count("1") == 1); 420 | EXPECT(chm.count("2") == 0); 421 | } 422 | 423 | { 424 | // find(const key_type&) 425 | HashMap hm(16, 0); 426 | const auto &chm = hm; 427 | hm[1] = 1; 428 | { 429 | auto it = hm.find(1); 430 | EXPECT(it != hm.end()); 431 | EXPECT(it->first == 1); 432 | EXPECT(it->second == 1); 433 | it = hm.find(2); 434 | EXPECT(it == hm.end()); 435 | } 436 | { 437 | auto it = chm.find(1); 438 | EXPECT(it != chm.end()); 439 | EXPECT(it->first == 1); 440 | EXPECT(it->second == 1); 441 | it = chm.find(2); 442 | EXPECT(it == chm.end()); 443 | } 444 | } 445 | 446 | { 447 | // template find(const K&) 448 | HashMap hm(16, 0); 449 | const auto &chm = hm; 450 | hm[1] = 1; 451 | { 452 | auto it = hm.find("1"); 453 | EXPECT(it != hm.end()); 454 | EXPECT(it->first == 1); 455 | EXPECT(it->second == 1); 456 | it = hm.find("2"); 457 | EXPECT(it == hm.end()); 458 | } 459 | { 460 | auto it = chm.find("1"); 461 | EXPECT(it != chm.end()); 462 | EXPECT(it->first == 1); 463 | EXPECT(it->second == 1); 464 | it = chm.find("2"); 465 | EXPECT(it == chm.end()); 466 | } 467 | } 468 | 469 | // Bucket interface 470 | { 471 | // bucket_count() 472 | HashMap hm(16, 0); 473 | const auto &chm = hm; 474 | EXPECT(hm.bucket_count() == 16); 475 | EXPECT(chm.bucket_count() == 16); 476 | } 477 | 478 | { 479 | // max_bucket_count() 480 | HashMap hm(16, 0); 481 | const auto &chm = hm; 482 | EXPECT(hm.max_bucket_count() > 0); 483 | EXPECT(chm.max_bucket_count() > 0); 484 | } 485 | 486 | // Hash policy 487 | { 488 | HashMap hm(2, 0); 489 | const auto &chm = hm; 490 | hm.emplace(1, 1); 491 | hm.emplace(2, 2); 492 | EXPECT(hm.bucket_count() == 4); 493 | EXPECT(chm.bucket_count() == 4); 494 | hm.rehash(2); 495 | EXPECT(hm.bucket_count() == 4); 496 | EXPECT(chm.bucket_count() == 4); 497 | hm.rehash(16); 498 | EXPECT(hm.bucket_count() == 16); 499 | EXPECT(chm.bucket_count() == 16); 500 | hm.reserve(2); 501 | EXPECT(hm.bucket_count() == 16); 502 | EXPECT(chm.bucket_count() == 16); 503 | hm.reserve(16); 504 | EXPECT(hm.bucket_count() == 32); 505 | EXPECT(chm.bucket_count() == 32); 506 | } 507 | 508 | if (!ok) { 509 | fprintf(stderr, "FAILED!\n"); 510 | } 511 | return !ok; 512 | } 513 | --------------------------------------------------------------------------------