├── .gitignore ├── tests ├── main.cpp ├── CMakeLists.txt ├── loghelper.h ├── test_bits_array.cpp ├── sample_fph.cpp └── test_fph_table.cpp ├── show ├── 3990x │ └── figs │ │ └── ,avg_hit_find_with_rehash.png └── m1-max │ └── figs │ └── ,avg_hit_find_with_rehash.png ├── CMakeLists.txt └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | cmake-*/ 3 | build/ 4 | .DS_Store 5 | 6 | -------------------------------------------------------------------------------- /tests/main.cpp: -------------------------------------------------------------------------------- 1 | void TestSet(); 2 | void TestFPH(); 3 | void TestMapPerformance(); 4 | 5 | int main() { 6 | TestSet(); 7 | TestFPH(); 8 | TestMapPerformance(); 9 | return 0; 10 | } -------------------------------------------------------------------------------- /show/3990x/figs/,avg_hit_find_with_rehash.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/renzibei/fph-table/master/show/3990x/figs/,avg_hit_find_with_rehash.png -------------------------------------------------------------------------------- /show/m1-max/figs/,avg_hit_find_with_rehash.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/renzibei/fph-table/master/show/m1-max/figs/,avg_hit_find_with_rehash.png -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | project(flash_perfect_hash) 3 | 4 | if(NOT MSVC) 5 | SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra") 6 | endif() 7 | 8 | add_library(fph_table INTERFACE) 9 | 10 | add_library(fph::fph_table ALIAS fph_table) 11 | 12 | 13 | target_include_directories(fph_table INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/include") 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | project(fph_table_tests) 4 | 5 | set(CMAKE_CXX_STANDARD 17) 6 | 7 | set(MEMCHECK_FLAGS "-fsanitize=address -fno-omit-frame-pointer -fno-optimize-sibling-calls") 8 | set(MEMCHECK_FLAGS "") 9 | 10 | if(NOT MSVC) 11 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-format-security ${MEMCHECK_FLAGS}") 12 | endif() 13 | 14 | add_executable(fph_table_tests main.cpp test_fph_table.cpp) 15 | 16 | add_executable(sample_fph sample_fph.cpp) 17 | 18 | add_executable(test_bits_array test_bits_array.cpp) 19 | 20 | add_subdirectory(.. ${CMAKE_CURRENT_BINARY_DIR}/fph-table) 21 | 22 | target_link_libraries(fph_table_tests fph::fph_table) 23 | target_link_libraries(sample_fph fph::fph_table) 24 | target_link_libraries(test_bits_array fph::fph_table) 25 | -------------------------------------------------------------------------------- /tests/loghelper.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | // #define LOG_HELPER_LEVEL_LOG_TRACE 7 | #define LOG_HELPER_LEVEL_LOG_DEBUG 8 | 9 | 10 | 11 | enum LogType{Error, Warn, Info, Debug, Trace}; 12 | 13 | class LogHelper { 14 | 15 | public: 16 | 17 | /** 18 | * A printf style log function, can used glog as underlying output tool 19 | * @tparam Args 20 | * @param type 21 | * @param format 22 | * @param args 23 | */ 24 | template 25 | static void log(LogType type, const char * format, Args&&... args) { 26 | #ifdef LOG_TRACE 27 | if(type <= Trace) 28 | #elif defined LOG_HELPER_LEVEL_LOG_DEBUG 29 | if(type <= Debug) 30 | #elif defined LOG_INFO 31 | if(type <= Info) 32 | #elif defined LOG_WARN 33 | if(type <= Warn) 34 | #endif 35 | { 36 | if (type == Error) { 37 | fprintf(stderr, "\033[40;31m"); 38 | } 39 | else if (type == Warn) { 40 | fprintf(stderr, "\033[40;33m"); 41 | } 42 | fprintf(stderr, format, std::forward(args)...); 43 | if (type == Error || type == Warn) { 44 | fprintf(stderr, "\033[0m"); 45 | } 46 | fprintf(stderr, "\n"); 47 | 48 | } 49 | } 50 | }; 51 | -------------------------------------------------------------------------------- /tests/test_bits_array.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "fph/meta_fph_table.h" 7 | 8 | 9 | uint64_t FastRand(uint64_t x) { 10 | x ^= x >> 33U; 11 | x *= UINT64_C(0xff51afd7ed558ccd); 12 | x ^= x >> 33U; 13 | return x; 14 | } 15 | 16 | int main() { 17 | 18 | constexpr size_t TEST_ITEM_SIZE = 1ULL << 22; 19 | constexpr size_t ITEM_BIT_SIZE = 4UL; 20 | 21 | using UnderlyingEntry = uint8_t; 22 | constexpr size_t UnderlyingEntrySize = TEST_ITEM_SIZE * (sizeof(UnderlyingEntry) * 8UL / ITEM_BIT_SIZE); 23 | static UnderlyingEntry underlying_arr[UnderlyingEntrySize]; 24 | static constexpr UnderlyingEntry ITEM_MASK = 25 | fph::meta::detail::GenBitMask(ITEM_BIT_SIZE); 26 | static uint32_t bench_table[TEST_ITEM_SIZE]; 27 | memset(underlying_arr, 0, sizeof(underlying_arr)); 28 | memset(bench_table, 0, sizeof(bench_table)); 29 | fph::meta::detail::BitArrayView bit_array(underlying_arr); 30 | uint64_t seed = std::random_device{}(); 31 | uint64_t original_seed = FastRand(seed); 32 | seed = original_seed; 33 | uint64_t temp_sum = 0; 34 | for (size_t i = 0; i < TEST_ITEM_SIZE; ++i) { 35 | seed = FastRand(seed); 36 | size_t index = seed % TEST_ITEM_SIZE; 37 | auto value = seed & ITEM_MASK; 38 | bit_array.set(index, value); 39 | seed = FastRand(seed); 40 | index = seed % TEST_ITEM_SIZE; 41 | auto temp_value = bit_array.get(index); 42 | temp_sum += temp_value; 43 | } 44 | uint64_t temp_sum1 = temp_sum; 45 | seed = original_seed; 46 | temp_sum = 0; 47 | for (size_t i = 0; i < TEST_ITEM_SIZE; ++i) { 48 | seed = FastRand(seed); 49 | size_t index = seed % TEST_ITEM_SIZE; 50 | auto value = seed & ITEM_MASK; 51 | bench_table[index] = value; 52 | seed = FastRand(seed); 53 | index = seed % TEST_ITEM_SIZE; 54 | auto temp_value = bench_table[index]; 55 | temp_sum += temp_value; 56 | } 57 | if (temp_sum != temp_sum1) { 58 | fprintf(stderr, "Error, temp_sum1: %" PRIu64 ", expected: %" PRIu64 "\n", 59 | temp_sum1, temp_sum); 60 | return -1; 61 | } 62 | fprintf(stdout, "Pass test, temp_sum: %" PRIu64 "\n", temp_sum); 63 | return 0; 64 | } -------------------------------------------------------------------------------- /tests/sample_fph.cpp: -------------------------------------------------------------------------------- 1 | #include "fph/dynamic_fph_table.h" 2 | #include "fph/meta_fph_table.h" 3 | #include 4 | #include 5 | 6 | using namespace std::literals; 7 | 8 | class TestKeyClass { 9 | 10 | public: 11 | explicit TestKeyClass(std::string s): data(std::move(s)) {} 12 | 13 | // The key_type of fph table need to be copy constructible, assignment operators are not needed 14 | TestKeyClass(const TestKeyClass&o) = default; 15 | TestKeyClass(TestKeyClass&&o) = default; 16 | 17 | TestKeyClass& operator=(const TestKeyClass&o) = delete; 18 | TestKeyClass& operator=(TestKeyClass&&o) = delete; 19 | 20 | bool operator==(const TestKeyClass& o) const { 21 | return this->data == o.data; 22 | } 23 | 24 | std::string data; 25 | 26 | }; 27 | 28 | 29 | struct TestKeyEqualTo { 30 | using is_transparent = void; 31 | using eq_type = std::equal_to; 32 | bool operator()(const TestKeyClass& a, const std::string& b) const { 33 | return eq_type{}(a.data, b); 34 | } 35 | bool operator()(const TestKeyClass& a, std::string_view b) const { 36 | return eq_type{}(a.data, b); 37 | } 38 | bool operator()(const TestKeyClass& a, const TestKeyClass& b) const { 39 | return eq_type{}(a.data, b.data); 40 | } 41 | bool operator()(const TestKeyClass& a, const char* b) const { 42 | return eq_type{}(a.data, b); 43 | } 44 | }; 45 | 46 | // The hash function of the custom key type need to take both a key and a seed 47 | struct TestKeySeedHash { 48 | using is_transparent = void; 49 | using hash_type = fph::SimpleSeedHash; 50 | 51 | size_t operator()(const TestKeyClass &src, size_t seed) const { 52 | return hash_type{}(src.data, seed); 53 | } 54 | 55 | size_t operator()(const std::string& src, size_t seed) const { 56 | return hash_type{}(src, seed); 57 | } 58 | 59 | size_t operator()(std::string_view src, size_t seed) const { 60 | return hash_type{}(src, seed); 61 | } 62 | 63 | size_t operator()(const char* src, size_t seed) const { 64 | return hash_type{}(src, seed); 65 | } 66 | }; 67 | 68 | // a random key generator is needed for the Fph Hash Table; 69 | // If using a custom class, a random generator of the key should be provided. 70 | class KeyClassRNG { 71 | public: 72 | KeyClassRNG(): string_gen(std::random_device{}()) {}; 73 | 74 | TestKeyClass operator()() { 75 | return TestKeyClass(string_gen()); 76 | } 77 | 78 | 79 | protected: 80 | fph::dynamic::RandomGenerator string_gen; 81 | }; 82 | 83 | template 84 | void SampleTest() { 85 | TestMap fph_map = {{TestKeyClass("a"), 1}, {TestKeyClass("b"), 2}, {TestKeyClass("c"), 3}, 86 | {TestKeyClass("d"), 4} }; 87 | 88 | std::vector> vec = { {TestKeyClass("A"), 11}, 89 | {TestKeyClass("B"), 12}, {TestKeyClass("C"), 13}, 90 | {TestKeyClass("D"), 4} }; 91 | 92 | 93 | 94 | std::cout << "map has elements: " << std::endl; 95 | for (const auto& [k, v]: fph_map) { 96 | std::cout << "(" << k.data << ", " << v << ") "; 97 | } 98 | std::cout << std::endl; 99 | 100 | fph_map.insert(vec.begin(), vec.end()); 101 | std::cout << "Map elements after insert range: " << std::endl; 102 | for (const auto& [k, v]: fph_map) { 103 | std::cout << "(" << k.data << ", " << v << ") "; 104 | } 105 | std::cout << std::endl; 106 | 107 | fph_map.insert({TestKeyClass("e"), 5}); 108 | auto &e_ref = fph_map.at(TestKeyClass("e")); 109 | std::cout << "value at e is " << e_ref << std::endl; 110 | fph_map.template try_emplace<>(TestKeyClass("f"), 6); 111 | const auto& f_ref = const_cast(&fph_map)->at(TestKeyClass("f")); 112 | (void)0; 113 | std::cout << "value at f is " << f_ref << std::endl; 114 | fph_map[TestKeyClass("g")] = 7; 115 | 116 | fph_map.erase(TestKeyClass("a")); 117 | auto const_find_it = const_cast(&fph_map)->find(TestKeyClass("b")); 118 | std::cout << "find key b value is " << const_find_it->second << std::endl; 119 | fph_map.erase(const_find_it); 120 | { 121 | auto temp_find_it = fph_map.find("c"sv); 122 | if (temp_find_it != fph_map.end()) { 123 | std::cout << "find value at c is " << temp_find_it->second << 124 | std::endl; 125 | } 126 | auto &c_value = fph_map.at("c"sv); 127 | std::cout << "value at c is " << c_value << std::endl; 128 | 129 | auto &c_ref = fph_map["c"sv]; 130 | std::cout << "value operator[] at c is " << c_ref << std::endl; 131 | } 132 | if (fph_map.contains("d"s)) { 133 | std::cout << "contains d in table" << std::endl; 134 | } 135 | std::cout << "count elements with key e: " << fph_map.count("e") << std::endl; 136 | 137 | std::cout << "Fph map now has elements: " << std::endl; 138 | for (const auto& [k, v]: fph_map) { 139 | std::cout << "(" << k.data << ", " << v << ") "; 140 | } 141 | std::cout << std::endl; 142 | 143 | if (fph_map.find(TestKeyClass("a")) == fph_map.end()) { 144 | std::cout << "Cannot find \"a\" in map" << std::endl; 145 | } 146 | if (fph_map.contains(TestKeyClass("b"))) { 147 | std::cout << "Found \"f\" in map" << std::endl; 148 | } 149 | std::cout << "Value with key \"g\" is " 150 | << fph_map.GetPointerNoCheck(TestKeyClass("g"))->second 151 | << std::endl; 152 | 153 | fph_map.clear(); 154 | 155 | if (fph_map.empty()) { 156 | std::cout << "table is empty\n"; 157 | } 158 | } 159 | 160 | void TestFphMap() { 161 | using KeyType = TestKeyClass; 162 | using MappedType = uint64_t; 163 | using SeedHash = TestKeySeedHash; 164 | using KeyEqual = TestKeyEqualTo; 165 | using Allocator = std::allocator>; 166 | using BucketParamType = uint32_t; 167 | using KeyRNG = KeyClassRNG; 168 | 169 | 170 | using DyFphMap = fph::DynamicFphMap; 172 | using FphMetaMap = fph::MetaFphMap; 174 | 175 | std::cout << "DynamicFphMap" << std::endl; 176 | SampleTest(); 177 | 178 | std::cout << std::endl << "MetaFphMap" << std::endl; 179 | SampleTest(); 180 | 181 | } 182 | 183 | int main() { 184 | TestFphMap(); 185 | return 0; 186 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Flash Perfect Hash 2 | 3 | The Flash Perfect Hash (FPH) library is a modern C++ implementation of a dynamic [perfect hash](https://en.wikipedia.org/wiki/Perfect_hash_function) 4 | table (no collisions for the hash), which makes the hash map/set extremely fast for lookup operations. 5 | 6 | We provide four container classes `fph::DynamicFphSet`,`fph::DynamicFphMap`,`fph::MetaFphSet` and 7 | `fph::MetaFphMap`. The APIs of these four classes are almost the same as those of 8 | `std::unordered_set` and `std::unordered_map`, but there are some minor differences, which we will 9 | explain in detail below. To compile this code, you need to use at least C++17 or newer standards. 10 | 11 | Generally speaking, the containers here are suitable for the situation where the performance of the 12 | lookup is very important, and the number of insertions is small compared to the query, or the keys are fixed. 13 | 14 | ## Performance 15 | 16 | Here we show the timing of the `find`operation when a 64-bit integer is used as the key. The results are from the 17 | [hashtable-bench](https://github.com/renzibei/hashtable-bench) project, which evaluates hash maps on different datasets. 18 | 19 | Figures 1 and 2 show the lookup time of multiple hash tables using different hash functions on the x86-64 platform and 20 | arm64 platform. It can be seen that `fph::DynamicFphMap` has a clear speed advantage in lookup on this dataset. 21 | 22 | For a comparison of more datasets and more operation types, please refer to the [benchmark](https://github.com/renzibei/hashtable-bench). 23 | 24 | ![fig1](./show/3990x/figs/,avg_hit_find_with_rehash.png) 25 | 26 |
Figure 1: Look up keys in the map with 0.9 max_load_factor, tested in AMD 3990x
27 | 28 | ![fig2](./show/m1-max/figs/,avg_hit_find_with_rehash.png) 29 | 30 |
Figure 2: Look up keys in the map with 0.9 max_load_factor, tested in Apple M1 Max
31 | 32 | `fph::MetaFphMap` is faster than `fph::DynamicFphMap` with `find` operation when try to find the 33 | keys that are not in the table and the elements number is large. `fph::MetaFphMap` uses a metadata 34 | array to save the information including the position markers and part of the hash values. So when it 35 | tries to find a key not in the table, we can probably use the metadata itself to know the fact that 36 | this key does not exist, without fetching the main slots. This can save the cache space. 37 | Correspondingly, when trying to find the keys in the table, or when the elements number is small, 38 | `fph::DynamicFphMap` will be faster. 39 | 40 | 41 | ## Algorithm 42 | 43 | The time for a hash table to find the key is determined by the cost of calculating the hash, the 44 | number of times the data is read from the memory, and the cost of each memory access. 45 | 46 | Almost every hash table dedicated to optimizing query performance will take some measures to reduce 47 | the number of memory accesses. For example, Google's [absl hash table](https://abseil.io/blog/20180927-swisstables) 48 | uses metadata and SIMD instruction to reduce the number of memory fetches; robin hood hashing is 49 | aimed at reducing the variance of probe distances, which can make the lookup more cache-friendly. 50 | 51 | Perfect hashing, by definition, minimizes the number of hashes and the number of memory accesses. 52 | It only needs to fetch the memory once to get the required data from the slots. Of course, the fly 53 | in the ointment is that the perfect hash function itself requires the parameter space that is 54 | proportional to the number of keys. Fortunately, the extra space required by FPH is not worth mentioning 55 | compared to the slots for storing data, and this space will not cause a significant increase in the cache miss rate. 56 | 57 | The idea of FPH originates from the [FCH algorithm](https://dl.acm.org/doi/abs/10.1145/133160.133209), 58 | which is a perfect hash algorithm suitable for implementation. With full awareness of modern computer system 59 | architecture, FPH has improved and optimized the FCH algorithm for query speed. In addition, we let the perfect hash 60 | table support dynamic modification, although the cost of dynamic modification is relatively high at present. 61 | 62 | The FCH algorithm uses a two-step method when choosing the hash method, which may bring branches to 63 | the pipeline. We skip a step to make the hashing process easier. This speeds up the query step 64 | but makes the process of constructing the hash slower. 65 | 66 | To be able to dynamically add key values to the hash table, whenever a new key makes 67 | the hash no longer a perfect hash, we will rebuild the hash table. 68 | 69 | 70 | ## Difference compared to std 71 | 1. The template parameter `SeedHash` is different from the `Hash` in STL, it has to be a functor 72 | accept two arguments: both the key and the seed. 73 | (There is also a no-seed version hash table, see following content). 74 | 2. For `fph::DynamicFphSet` and `fph::DynamicFphMap`, if the key type is not a common type, you will have to provide a random generator for the key 75 | with the template parameter `RandomKeyGenerator`. There is no such requirement for 76 | `fph::MetaFphSet` and `fph::MetaFphMap`. 77 | 3. The keys have to be CopyConstructible. 78 | 4. The values have to be MoveConstructible. 79 | 5. May invalidates any references and pointers to elements within the table after rehash. 80 | 81 | The second difference is because we use a trick in `fph::DynamicFphSet` and 82 | `fph::DynamicFphMap`. Normally, we need to know whether a slot is empty in hash 83 | table. Of course, we can use a bit array to indicate this information, but this 84 | will introduce extra memory load operations. So, we randomly 85 | generate a default key to fill the 86 | empty slots. How can we know whether this slot is empty when the user inserts 87 | the default key? We place a secondary default key in the position of the 88 | original slot belonging to the default key, and we make sure that the theory 89 | slot position of this secondary default key is different from that of the 90 | default key. 91 | 92 | As we use metadata to indicate whether the slots are empty in `fph::MetaFphSet` 93 | and `fph::MetaFphMap`, they don't need a random key generator anymore. 94 | 95 | ## No seed hash version 96 | The normal version of fph table requires a seed hash function. There exists a no-seed version where 97 | a no-seed hash function like `std::hash` can be used. You can switch to `noseed` branch to use the 98 | no-seed version codes. See [no seed version](#No-seed-version) in the 99 | [Instructions for use section](#Instructions-for-use) for more information. 100 | 101 | ## Build 102 | Requirement: C++ standard not older than C++17; currently only tested in GCC/Clang/MSVC (no compile error in MSVC). 103 | 104 | FPH library is a header-only library. So you can just add the header file to the header search path 105 | of your project to include it. 106 | 107 | Or, you can use FPH with CMake. Put this repo as a subdirectory under your project and then use it as a 108 | submodule of your CMake project. For instance, if you put the `fph-table` directory under the `third-party` 109 | directory of your project, you can add the following codes to your `CMakeLists.txt` 110 | ```cmake 111 | add_subdirectory(third-party/fph-table) 112 | target_link_libraries(your_target PRIVATE fph::fph_table) 113 | ``` 114 | 115 | When you have added the `fph-table` directory to your header, use can include the fph map/set by adding 116 | 117 | `#include "fph/dynamic_fph_table.h"` to your codes. 118 | 119 | ## Test 120 | To test that fph has no compile and run errors on your system, you can use the test code we 121 | provide using the following commands. 122 | 123 | 124 | ``` 125 | cd fph-table/tests 126 | mkdir build 127 | cd build 128 | cmake .. -DCMAKE_BUILD_TYPE=Release 129 | make -j4 130 | ./fph_table_tests 131 | ``` 132 | 133 | ## Usage 134 | 135 | The APIs are almost the same with the `std::unordered_set` and `std::unordered_map`. 136 | 137 | `fph::DynamicFphSet` and 138 | `fph::MetaFphSet` are the fph set containers. 139 | `fph::DynamicFphMap` 140 | and `fph::MetaFphMap` are the fph map 141 | containers. 142 | 143 | There are also aliases for these containers: `fph::dynamic_fph_set`, 144 | `fph::dynamic_fph_map`,`fph::meta_fph_set` and `fph::meta_fph_map`. 145 | 146 | The following sample shows how to deal with the custom key class, 147 | you can see `tests/sample_fph.cpp` for the more detailed 148 | example. 149 | 150 | ```c++ 151 | #include "fph/dynamic_fph_table.h" 152 | #include 153 | 154 | class TestKeyClass { 155 | 156 | public: 157 | explicit TestKeyClass(std::string s): data(std::move(s)) {} 158 | 159 | // The key_type of fph table need to be copy constructible, assignment operators are not needed 160 | TestKeyClass(const TestKeyClass&o) = default; 161 | TestKeyClass(TestKeyClass&&o) = default; 162 | 163 | TestKeyClass& operator=(const TestKeyClass&o) = delete; 164 | TestKeyClass& operator=(TestKeyClass&&o) = delete; 165 | 166 | bool operator==(const TestKeyClass& o) const { 167 | return this->data == o.data; 168 | } 169 | 170 | std::string data; 171 | 172 | }; 173 | 174 | // The hash function of the custom key type need to take both a key and a seed 175 | struct TestKeySeedHash { 176 | size_t operator()(const TestKeyClass &src, size_t seed) const { 177 | return fph::MixSeedHash{}(src.data, seed); 178 | } 179 | }; 180 | 181 | // a random key generator is needed for the Fph Hash Table; 182 | // If using a custom class, a random generator of the key should be provided. 183 | class KeyClassRNG { 184 | public: 185 | KeyClassRNG(): string_gen(std::random_device{}()) {}; 186 | 187 | TestKeyClass operator()() { 188 | return TestKeyClass(string_gen()); 189 | } 190 | 191 | 192 | protected: 193 | fph::dynamic::RandomGenerator string_gen; 194 | }; 195 | 196 | void TestFphMap() { 197 | using KeyType = TestKeyClass; 198 | using MappedType = uint64_t; 199 | using SeedHash = TestKeySeedHash; 200 | using Allocator = std::allocator>; 201 | using BucketParamType = uint32_t; 202 | using KeyRNG = KeyClassRNG; 203 | 204 | using FphMap = fph::DynamicFphMap, Allocator, 205 | BucketParamType , KeyRNG>; 206 | 207 | FphMap fph_map = {{TestKeyClass("a"), 1}, {TestKeyClass("b"), 2}, {TestKeyClass("c"), 3}, 208 | {TestKeyClass("d"), 4} }; 209 | 210 | std::cout << "Fph map has elements: " << std::endl; 211 | for (const auto& [k, v]: fph_map) { 212 | std::cout << "(" << k.data << ", " << v << ") "; 213 | } 214 | std::cout << std::endl; 215 | 216 | fph_map.insert({TestKeyClass("e"), 5}); 217 | fph_map.template try_emplace<>(TestKeyClass("f"), 6); 218 | fph_map[TestKeyClass("g")] = 7; 219 | fph_map.erase(TestKeyClass("a")); 220 | 221 | std::cout << "Fph map now has elements: " << std::endl; 222 | for (const auto& [k, v]: fph_map) { 223 | std::cout << "(" << k.data << ", " << v << ") "; 224 | } 225 | std::cout << std::endl; 226 | 227 | if (fph_map.find(TestKeyClass("a")) == fph_map.end()) { 228 | std::cout << "Cannot find \"a\" in map" << std::endl; 229 | } 230 | if (fph_map.contains(TestKeyClass("b"))) { 231 | std::cout << "Found \"f\" in map" << std::endl; 232 | } 233 | std::cout << "Value with key \"g\" is " 234 | << fph_map.GetPointerNoCheck(TestKeyClass("g"))->second 235 | << std::endl; 236 | } 237 | 238 | int main() { 239 | TestFphMap(); 240 | return 0; 241 | } 242 | ``` 243 | 244 | ## Instructions for use 245 | ### Calculate the distinct slot index only 246 | 247 | You can use the containers we provided to replace `std::unordered_set` or `std::unrodered_map` if 248 | you care more about lookup performance. Or if all you need is a perfect hash function i.e. a mapping 249 | from keys to the integers in a limited range, you can use the 250 | `fph::DynamicFphSet::GetSlotPos(const Key &key)` function to get the slot index of one key in 251 | the table, which is unique. The `GetSlotPos` is always faster than the `find` lookup as it does not 252 | fetch data from the slots (which occupy most of the memory of a hash table). 253 | 254 | ### Heterogeneous lookup 255 | 256 | Sometimes users don't want to use the `Key` as the key to do the `find` 257 | operation. For example, when the `Key` is `std::string`, users may want to 258 | use `std::string_view` as the type to do the lookup operations. 259 | 260 | ```c++ 261 | template< class K > iterator find( const K& x ); 262 | template< class K > const_iterator find( const K& x ) const; 263 | ``` 264 | 265 | These two overload functions participate in overload resolution only if 266 | `SeedHash::is_transparent` and `KeyEqual::is_transparent` are valid and each 267 | denotes a type. You can see the `tests/sample_fph.cpp` to learn this usage. 268 | This is basically the same transparent lookup framework used in C++20. 269 | 270 | ### Requirement of the seed hash function 271 | 272 | To avoid calling the hash function twice, we require that there exists a seed such that all 273 | actually inserted elements have different hash values with that seed. This is quite easy to for 274 | integers whose type is 64 bits. Identity hash, for example, is an injective (and bijective) function 275 | from 64-bit integers to 64-bit integers. And if the length of the key exceeds 64 bits and the size 276 | of hash value is 64 bits, then there is a possibility of collision. When the number of elements to 277 | be inserted is relatively small (for example, less than 10^9), we can find a hash function that 278 | satisfies the condition (injective) with a very high probability by replacing the seed. But if the 279 | number of elements is very large (more than 10^9), then the probability of collision will be too 280 | high. There are two solutions to this problem: 1. Instead of computing the hash once, compute the 281 | hash twice. This no longer requires the existence of a seed to make the hash injective to the 282 | inserted element. 2. Take a 128-bit hash function, so that the probability of collision is small enough. 283 | 284 | At present, we have implemented the first method in another branch, which does not need to change 285 | the code on the user side (provide a 128-bit hash function for custom classes). The disadvantage is 286 | that because the hash value is calculated twice, the speed will be slower than the one-time solution. 287 | 288 | We provide three kinds of SeedHash function for basic types: `fph::SimpleSeedHash`, 289 | `fph::MixSeedHash` and `fph::StrongSeedHash`; 290 | The SimpleSeedHash has the fastest calculation speed and the weakest hash distribution, while the 291 | StrongSeedHash is the slowest of them to calculate but has the best distribution of hash value. 292 | The MixSeedHash is in the mid of them. 293 | 294 | Take integer for an example, if the keys you want to insert are not uniformly distributed in 295 | the integer interval of that type, then the hash value may probably not be uniformly distributed 296 | in the hash type interval as well for a weak hash function. But with a strong hash function, 297 | you can easily produce uniformly distributed hash values regardless of your input distribution. 298 | 299 | The default Seed Hash function is the `fph::SimpleSeedHash` as it is 300 | the fastest, and it is good enough for most of the input data in real life. 301 | 302 | Tips: Know the patterns of the input keys before choosing the seed hash function. If the keys may 303 | cause a failure in the building of the table (which is rare for the hash functions we provide), 304 | use a stronger seed hash function. Don't write you own seed hash function unless you know they 305 | are good hash functions. 306 | 307 | If the user wants to write a custom seed hash function for the key type, refer to the 308 | fph::SimpleSeedHash; the functor needs to take both a key and a seed (size_t) as input arguments and 309 | return a size_t type hash value; 310 | 311 | ### No seed version 312 | 313 | The no-seed version is provided for situations where a no-seed hash function has to be used. 314 | Compared to the seed version, the no-seed version hash table does not require a `SeedHash`. The 315 | hash table in this version requires the same `Hash` function that the STL unordered containers use. 316 | However, there is a requirement for the no-seed hash function: all the actually inserted elements 317 | have different hash values. Similar to the requirement of the seed hash function, this is easy for 318 | 64-bit keys. Identity hash is good enough as a hash function for this hash table. And if the length 319 | of the key exceeds 64 bits and the size of hash value is 64 bits, then there is a possibility of 320 | collision. So we strongly recommend using the seeded version of the fph table when the key_type 321 | is string. 322 | 323 | You can switch to the no seed version by change to the `noseed` branch. 324 | ``` 325 | git checkout noseed 326 | ``` 327 | 328 | ### Further optimize lookup 329 | 330 | The classic `find(const key_type&key)` function can be further optimized if the key is guaranteed 331 | to be in the hash table. There is one comparison and branch instruction in the `find` function, 332 | while the `pointer GetPointerNoCheck(const key_type &key)` function does not contain any comparison 333 | or branch, as a result of which it's faster. 334 | 335 | A 'slot' is the space reserved for a value(key for a set, for a map). One slot in fph will at 336 | most contain one value. We use an exponential multiple of 2 for the size of slots. Saying that the number 337 | of slots is m and the element number is n. n <= m and the size of slots will be 338 | `sizeof(value_type) * m` bytes 339 | 340 | The speed of insertion is very sensitive to the max_load_factor parameter. If you use the 341 | `insert(const value_type&)` function to construct a table, and you do care a little about the insert time, we suggest 342 | that you use the default max_load_factor, which is around 0.6. But if you don't care about the 343 | insert time, or you use the `InsertNoDuplicated(first, last)` or `Build()` to construct the table, and most importantly, you want 344 | to save the memory size and cache size (which would probably accelerate the querying), you can 345 | set a max_load_factor no larger than max_load_factor_upper_limit(), which should be 0.98. 346 | 347 | If the range of your keys are limited, and they won't change at some time of your program, 348 | you can set a large max_load_factor and then call rehash(element_size) to rehash the elements to 349 | smaller slots if the load_factor can be larger in that case. (Make sure almost no new keys will 350 | be added to the table after this because the insert operation will be very slow when the 351 | load_factor is very large.) 352 | 353 | ### Memory usage 354 | 355 | The extra hot memory space besides slots during querying is the space for buckets (this concept is 356 | not the bucket in the STL unordered_set, it is from FCH algorithm), the size of 357 | this extra memory is about `c * n / (log2(n) + 1) * sizeof(BucketParamType)` bytes. c is a 358 | parameter that must be larger than 1.5. The larger c is, the quicker it will be for the 359 | insert operations. BucketParamType is an unsigned type, and it must meet the condition that 360 | `2^(number of bits of BucketParamType - 1)` is bigger than the element number. So you should choose 361 | the BucketParamType that is just large enough but not too large if you don't want to waste the 362 | memory and cache size. The memory size for this extra hot memory space will be slightly 363 | larger than `c * n` bits. 364 | 365 | For the fph meta hash table `fph::MetaFphSet` and `fph::MetaFphMap`, additional space is needed for 366 | the metadata. In the current codes, we use 1 byte metadata for each element, 1 bit for position 367 | marker and 7 bits for part of the hash. We can also choose 4 bits metadata for each element, 368 | but it turns out that the extra instructions needed for the 4-bits do not deserve the cache space 369 | they save in our tests. 370 | 371 | More extra space is required for hash table expansion and reconstruction. In order to optimize the 372 | memory allocation time, we did not actively release all of these spaces, but these additional 373 | spaces can be released. 374 | 375 | -------------------------------------------------------------------------------- /tests/test_fph_table.cpp: -------------------------------------------------------------------------------- 1 | #include "fph/dynamic_fph_table.h" 2 | #include "fph/meta_fph_table.h" 3 | #include "loghelper.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | 14 | 15 | #define TEST_TABLE_CORRECT 1 16 | 17 | #ifdef FPH_HAVE_EXCEPTION 18 | # define TEST_TRY try 19 | # define TEST_CATCH(X) catch(X) 20 | #else 21 | # define TEST_TRY if (true) 22 | # define TEST_CATCH(X) if (false) 23 | #endif 24 | 25 | enum TableType { 26 | FCH_TABLE = 0, 27 | DYNAMIC_FPH_TABLE, 28 | META_FPH_TABLE, 29 | STD_HASH_TABLE, 30 | ABSL_FLAT_TABLE, 31 | ROBIN_HOOD_FLAT_TABLE, 32 | SKA_FLAT_TABLE 33 | }; 34 | 35 | std::string GetTableName(TableType table_type) { 36 | switch (table_type) { 37 | case FCH_TABLE: 38 | return "fch_map"; 39 | case DYNAMIC_FPH_TABLE: 40 | return "dynamic_fph_map"; 41 | case META_FPH_TABLE: 42 | return "meta_fph_map"; 43 | case STD_HASH_TABLE: 44 | return "std::unordered_map"; 45 | case ABSL_FLAT_TABLE: 46 | return "absl:flat_hash_map"; 47 | case ROBIN_HOOD_FLAT_TABLE: 48 | return "robin_hood:unordered_flat_map"; 49 | case SKA_FLAT_TABLE: 50 | return "ska::flat_hash_map"; 51 | } 52 | return ""; 53 | } 54 | 55 | enum LookupExpectation { 56 | KEY_IN = 0, 57 | KEY_NOT_IN, 58 | KEY_MAY_IN, 59 | }; 60 | 61 | template 62 | struct is_pair : std::false_type 63 | { }; 64 | 65 | template 66 | struct is_pair> : std::true_type 67 | { }; 68 | 69 | template 70 | struct SimpleGetKey { 71 | const T& operator()(const T& x) const { 72 | return x; 73 | } 74 | 75 | T& operator()(T& x) { 76 | return x; 77 | } 78 | 79 | T operator()(T&& x) { 80 | return std::move(x); 81 | } 82 | 83 | }; 84 | 85 | template 86 | struct SimpleGetKey::value, void>::type> { 87 | 88 | using key_type = typename T::first_type; 89 | 90 | 91 | const key_type& operator()(const T& x) const { 92 | return x.first; 93 | } 94 | 95 | key_type& operator()(T& x) { 96 | return x.first; 97 | } 98 | 99 | key_type operator()(T&& x) { 100 | return std::move(x.first); 101 | } 102 | }; 103 | 104 | 105 | 106 | class TestKeyClass { 107 | public: 108 | explicit TestKeyClass(std::string s): data(std::move(s)) {} 109 | TestKeyClass(const TestKeyClass& o): data(o.data) {} 110 | // TestKeyClass(TestKeyClass&& o) = delete; 111 | TestKeyClass(TestKeyClass&& o) noexcept : data(std::move(o.data)) {} 112 | TestKeyClass& operator=(const TestKeyClass&o) = delete; 113 | TestKeyClass& operator=(TestKeyClass&&o) = delete; 114 | 115 | 116 | bool operator==(const TestKeyClass& o) const { 117 | return this->data == o.data; 118 | } 119 | 120 | std::string data; 121 | protected: 122 | 123 | }; 124 | 125 | class TestValueClass { 126 | public: 127 | explicit TestValueClass(uint64_t x): data(1, x) {} 128 | TestValueClass(const TestValueClass& o) = delete; 129 | TestValueClass(TestValueClass&& o) noexcept: data(std::move(o.data)) {} 130 | // TestValueClass(const TestValueClass& o) = default; 131 | 132 | TestValueClass& operator=(const TestValueClass& o) = delete; 133 | TestValueClass& operator=(TestValueClass&& o) = delete; 134 | 135 | bool operator==(const TestValueClass& o) const { 136 | return this->data == o.data; 137 | } 138 | 139 | std::vector data; 140 | }; 141 | 142 | struct TestKeyHash { 143 | size_t operator()(const TestKeyClass &src) const { 144 | return std::hash{}(src.data); 145 | } 146 | }; 147 | 148 | struct TestKeySeedHash { 149 | size_t operator()(const TestKeyClass &src, size_t seed) const { 150 | return fph::MixSeedHash{}(src.data, seed); 151 | } 152 | }; 153 | 154 | class KeyClassRNG { 155 | public: 156 | KeyClassRNG(): init_seed(std::random_device{}()), string_gen(init_seed) {}; 157 | KeyClassRNG(size_t seed): init_seed(seed), string_gen(seed) {} 158 | 159 | TestKeyClass operator()() { 160 | return TestKeyClass(string_gen()); 161 | } 162 | 163 | void seed(size_t seed) { 164 | init_seed = seed; 165 | string_gen.seed(seed); 166 | } 167 | 168 | size_t init_seed; 169 | 170 | protected: 171 | fph::dynamic::RandomGenerator string_gen; 172 | }; 173 | 174 | class ValueClassRNG { 175 | public: 176 | ValueClassRNG(): init_seed(std::random_device{}()), string_gen(init_seed) {}; 177 | ValueClassRNG(size_t seed): init_seed(seed), string_gen(seed) {} 178 | 179 | TestValueClass operator()() { 180 | return TestValueClass(string_gen()); 181 | } 182 | 183 | void seed(size_t seed) { 184 | init_seed = seed; 185 | string_gen.seed(seed); 186 | } 187 | 188 | size_t init_seed; 189 | 190 | protected: 191 | fph::dynamic::RandomGenerator string_gen; 192 | // fph::dynamic::RandomGenerator string_gen; 193 | }; 194 | 195 | 196 | 197 | 198 | using fph::dynamic::detail::ToString; 199 | 200 | std::string ToString(const TestKeyClass &x) { 201 | return x.data; 202 | } 203 | 204 | template, 205 | class ValueEqual = std::equal_to> 206 | bool IsTableSame(const Table1 &table1, const Table2 &table2) { 207 | if (table1.size() != table2.size()) { 208 | return false; 209 | } 210 | size_t table_size = table2.size(); 211 | size_t element_cnt = 0; 212 | for (const auto& pair :table1) { 213 | ++element_cnt; 214 | auto find_it = table2.find(GetKey{}(pair)); 215 | if FPH_UNLIKELY(find_it == table2.end()) { 216 | LogHelper::log(Error, "Fail to find %s in table2, can find in table1 status: %d", ToString(GetKey{}(pair)).c_str(), table1.find(GetKey{}(pair)) != table1.end()); 217 | return false; 218 | } 219 | if FPH_UNLIKELY(!ValueEqual{}(pair, *find_it)) { 220 | return false; 221 | } 222 | } 223 | if FPH_UNLIKELY(element_cnt != table_size) { 224 | LogHelper::log(Error, "Table 1 iterate num not equals to table size"); 225 | return false; 226 | } 227 | element_cnt = 0; 228 | for (const auto& pair :table2) { 229 | ++element_cnt; 230 | auto find_it = table1.find(GetKey{}(pair)); 231 | if FPH_UNLIKELY(find_it == table1.end()) { 232 | LogHelper::log(Error, "Fail to find %s in table1", ToString(GetKey{}(pair)).c_str()); 233 | return false; 234 | } 235 | if FPH_UNLIKELY(!ValueEqual{}(pair, *find_it)) { 236 | return false; 237 | } 238 | } 239 | if FPH_UNLIKELY(element_cnt != table_size) { 240 | LogHelper::log(Error, "Table 2 iterate num not equals to table size"); 241 | return false; 242 | } 243 | return true; 244 | } 245 | 246 | 247 | 248 | template< class Table, class BenchTable, class PairVec, class GetKey = SimpleGetKey, 249 | class ValueEqual = std::equal_to > 250 | bool TestInsertCorrectness(Table &table, BenchTable &bench_table, PairVec &pair_vec1, PairVec &pair_vec2, size_t test_index = 0) { 251 | (void)test_index; 252 | TEST_TRY { 253 | table.clear(); 254 | bench_table.clear(); 255 | if (!IsTableSame(table, bench_table)) { 256 | LogHelper::log(Error, "table and bench not same at beginning of insert"); 257 | return false; 258 | } 259 | 260 | size_t pair_cnt = 0; 261 | if constexpr(std::is_copy_constructible_v) { 262 | for (const auto &pair: pair_vec1) { 263 | 264 | 265 | 266 | auto[bench_insert_it, bench_ok] = bench_table.insert(pair); 267 | auto[insert_it, ok] = table.insert(pair); 268 | if FPH_UNLIKELY(bench_ok != ok) { 269 | LogHelper::log(Error, "insert flag not same, table: %d, bench_table: %d", ok, 270 | bench_ok); 271 | return false; 272 | } 273 | if FPH_UNLIKELY(!ValueEqual{}(*bench_insert_it, *insert_it)) { 274 | LogHelper::log(Error, "insert const& iterator not same"); 275 | return false; 276 | } 277 | 278 | // comment out the following comparing when not in debug 279 | // if (!IsTableSame(table, bench_table)) { 280 | // LogHelper::log(Error, "table not same during insert const&, pair_cnt: %lu", pair_cnt); 281 | // return false; 282 | // } 283 | ++pair_cnt; 284 | } 285 | if (!IsTableSame(table, bench_table)) { 286 | LogHelper::log(Error, "table not same after insert const&"); 287 | return false; 288 | } 289 | } 290 | 291 | 292 | pair_cnt = 0; 293 | if constexpr(std::is_move_constructible_v) { 294 | for (size_t i = 0; i < pair_vec1.size(); ++i) { 295 | auto[bench_insert_it, bench_ok] = bench_table.insert(std::move(pair_vec1[i])); 296 | auto[insert_it, ok] = table.insert(std::move(pair_vec2[i])); 297 | if (bench_ok != ok) { 298 | return false; 299 | } 300 | if FPH_UNLIKELY(!ValueEqual{}(*bench_insert_it, *insert_it)) { 301 | LogHelper::log(Error, "insert && iterator not same"); 302 | return false; 303 | } 304 | ++pair_cnt; 305 | } 306 | if (!IsTableSame(table, bench_table)) { 307 | LogHelper::log(Error, "table not same after insert &&"); 308 | return false; 309 | } 310 | } 311 | } 312 | TEST_CATCH (const std::exception& e) { 313 | LogHelper::log(Error, "Catch exception in test insert"); 314 | return false; 315 | } 316 | return true; 317 | } 318 | 319 | template, 320 | class ValueEqual = std::equal_to > 321 | bool TestEmplaceCorrectness1(Table &table, BenchTable &bench_table, ValueVec &value_vec1, ValueVec &value_vec2) { 322 | TEST_TRY { 323 | table.clear(); 324 | bench_table.clear(); 325 | if (!IsTableSame(table, bench_table)) { 326 | LogHelper::log(Error, "table not same as the bench_table after clear()"); 327 | return false; 328 | } 329 | 330 | size_t value_cnt = 0; 331 | if constexpr(std::is_copy_constructible_v) { 332 | for (const auto &value: value_vec1) { 333 | auto[bench_insert_it, bench_ok] = bench_table.emplace(value); 334 | auto[insert_it, ok] = table.emplace(value); 335 | if (bench_ok != ok) { 336 | LogHelper::log(Error, "emplace ret flag not same, table: %d, bench: %d", 337 | ok, bench_ok); 338 | return false; 339 | } 340 | if FPH_UNLIKELY(!ValueEqual{}(*bench_insert_it, *insert_it)) { 341 | LogHelper::log(Error, "emplace1 const& iterator not same"); 342 | return false; 343 | } 344 | ++value_cnt; 345 | } 346 | if (!IsTableSame(table, bench_table)) { 347 | LogHelper::log(Error, "table not same after emplace test1 const&"); 348 | return false; 349 | } 350 | } 351 | 352 | value_cnt = 0; 353 | for (size_t i = 0; i < value_vec1.size(); ++i) { 354 | auto[bench_insert_it, bench_ok] = bench_table.emplace(std::move(value_vec1[i])); 355 | auto[insert_it, ok] = table.emplace(std::move(value_vec2[i])); 356 | if (bench_ok != ok) { 357 | return false; 358 | } 359 | if FPH_UNLIKELY(!ValueEqual{}(*bench_insert_it, *insert_it)) { 360 | LogHelper::log(Error, "emplace1 && iterator not same"); 361 | return false; 362 | } 363 | ++value_cnt; 364 | } 365 | if (!IsTableSame(table, bench_table)) { 366 | LogHelper::log(Error, "table not same after emplace test1 &&"); 367 | return false; 368 | } 369 | } 370 | TEST_CATCH (const std::exception &e) { 371 | LogHelper::log(Error, "catch error in test emplace1"); 372 | return false; 373 | } 374 | return true; 375 | } 376 | 377 | template, 378 | class ValueEqual = std::equal_to > 379 | bool TestEmplaceCorrectness2(Table &table, BenchTable &bench_table, KeyVec &k_vec1, VVec &v_vec1, KeyVec& k_vec2, VVec &v_vec2) { 380 | table.clear(); 381 | bench_table.clear(); 382 | if (!IsTableSame(table, bench_table)) { 383 | return false; 384 | } 385 | size_t value_cnt = 0; 386 | if constexpr (std::is_copy_constructible_v) { 387 | for (size_t i = 0; i < k_vec1.size(); ++i) { 388 | auto [bench_insert_it, bench_ok] = bench_table.emplace(k_vec1[i], v_vec1[i]); 389 | auto [insert_it, ok] = table.emplace(k_vec1[i], v_vec1[i]); 390 | if (bench_ok != ok) { 391 | return false; 392 | } 393 | if FPH_UNLIKELY(!ValueEqual{}(*bench_insert_it, *insert_it)) { 394 | LogHelper::log(Error, "emplace2 const& iterator not same"); 395 | return false; 396 | } 397 | ++value_cnt; 398 | } 399 | if (!IsTableSame(table, bench_table)) { 400 | LogHelper::log(Error, "table not same after emplace2 const&"); 401 | return false; 402 | } 403 | table.clear(); 404 | bench_table.clear(); 405 | } 406 | 407 | for (size_t i = 0; i < k_vec1.size(); ++i) { 408 | auto [bench_insert_it, bench_ok] = bench_table.emplace(std::move(k_vec1[i]), std::move(v_vec1[i])); 409 | auto [insert_it, ok] = table.emplace(std::move(k_vec2[i]), std::move(v_vec2[i])); 410 | if (bench_ok != ok) { 411 | return false; 412 | } 413 | if FPH_UNLIKELY(!ValueEqual{}(*bench_insert_it, *insert_it)) { 414 | LogHelper::log(Error, "emplace2 && iterator not same"); 415 | return false; 416 | } 417 | ++value_cnt; 418 | } 419 | if (!IsTableSame(table, bench_table)) { 420 | LogHelper::log(Error, "table not same after emplace2 &&"); 421 | return false; 422 | } 423 | 424 | return true; 425 | } 426 | 427 | template, 428 | class ValueEqual = std::equal_to > 429 | bool TestTryEmplaceCorrectness(Table &table, BenchTable &bench_table, KeyVec &k_vec1, VVec &v_vec1, KeyVec& k_vec2, VVec &v_vec2) { 430 | table.clear(); 431 | bench_table.clear(); 432 | if (!IsTableSame(table, bench_table)) { 433 | return false; 434 | } 435 | size_t value_cnt = 0; 436 | if constexpr (std::is_copy_constructible_v) { 437 | for (size_t i = 0; i < k_vec1.size(); ++i) { 438 | auto[bench_insert_it, bench_ok] = bench_table.try_emplace(k_vec1[i], v_vec1[i]); 439 | auto[insert_it, ok] = table.try_emplace(k_vec1[i], v_vec1[i]); 440 | if (bench_ok != ok) { 441 | return false; 442 | } 443 | if FPH_UNLIKELY(!ValueEqual{}(*bench_insert_it, *insert_it)) { 444 | LogHelper::log(Error, "try_emplace const& iterator not same"); 445 | return false; 446 | } 447 | ++value_cnt; 448 | } 449 | if (!IsTableSame(table, bench_table)) { 450 | LogHelper::log(Error, "table not same after try_emplace const&"); 451 | return false; 452 | } 453 | table.clear(); 454 | bench_table.clear(); 455 | } 456 | for (size_t i = 0; i < k_vec1.size(); ++i) { 457 | auto [bench_insert_it, bench_ok] = bench_table.try_emplace(std::move(k_vec1[i]), std::move(v_vec1[i])); 458 | auto [insert_it, ok] = table.try_emplace(std::move(k_vec2[i]), std::move(v_vec2[i])); 459 | if (bench_ok != ok) { 460 | return false; 461 | } 462 | if FPH_UNLIKELY(!ValueEqual{}(*bench_insert_it, *insert_it)) { 463 | LogHelper::log(Error, "try_emplace && iterator not same"); 464 | return false; 465 | } 466 | ++value_cnt; 467 | } 468 | if (!IsTableSame(table, bench_table)) { 469 | LogHelper::log(Error, "table not same after try_emplace &&"); 470 | return false; 471 | } 472 | return true; 473 | } 474 | 475 | template, 476 | class ValueEqual = std::equal_to > 477 | bool TestOperatorCorrectness(Table &table, BenchTable &bench_table, KeyVec &k_vec1, VVec &v_vec1, KeyVec& k_vec2, VVec &v_vec2) { 478 | table.clear(); 479 | bench_table.clear(); 480 | if (!IsTableSame(table, bench_table)) { 481 | return false; 482 | } 483 | using value_type = typename Table::value_type; 484 | size_t value_cnt = 0; 485 | if constexpr (std::is_copy_assignable_v) { 486 | for (size_t i = 0; i < k_vec1.size(); ++i) { 487 | auto &bench_ref = bench_table[k_vec1[i]] = v_vec1[i]; 488 | auto &ref = table[k_vec1[i]] = v_vec1[i]; 489 | if FPH_UNLIKELY(!ValueEqual{}(value_type(k_vec1[i], bench_ref), value_type(k_vec1[i], ref))) { 490 | LogHelper::log(Error, "operator[] const& iterator not same"); 491 | return false; 492 | } 493 | ++value_cnt; 494 | } 495 | if (!IsTableSame(table, bench_table)) { 496 | LogHelper::log(Error, "table not same after operator[] const&"); 497 | return false; 498 | } 499 | table.clear(); 500 | bench_table.clear(); 501 | } 502 | if constexpr (std::is_move_assignable_v) { 503 | for (size_t i = 0; i < k_vec1.size(); ++i) { 504 | auto &bench_ref = bench_table[std::move(k_vec1[i])] = std::move(v_vec1[i]); 505 | auto temp_key = k_vec2[i]; 506 | auto &ref = table[std::move(k_vec2[i])] = std::move(v_vec2[i]); 507 | if FPH_UNLIKELY(!ValueEqual{}(value_type(temp_key, bench_ref), value_type(temp_key, ref))) { 508 | LogHelper::log(Error, "operator[] && iterator not same"); 509 | return false; 510 | } 511 | ++value_cnt; 512 | } 513 | if (!IsTableSame(table, bench_table)) { 514 | LogHelper::log(Error, "table not same after operator[] &&"); 515 | return false; 516 | } 517 | } 518 | return true; 519 | } 520 | 521 | 522 | 523 | template> 524 | void PrintTableKeys(const Table &table) { 525 | LogHelper::log(Info, "Table %s has %lu keys", GetTableName(table_type).c_str(), table.size()); 526 | for (auto it = table.begin(); it != table.end(); ++it) { 527 | fprintf(stderr, "%s, ", ToString(GetKey{}(*it)).c_str()); 528 | } 529 | fprintf(stderr, "\n"); 530 | } 531 | 532 | template< class Table, class BenchTable, class PairVec, class GetKey = SimpleGetKey, 533 | class ValueEqual = std::equal_to > 534 | bool TestEraseCorrectness(Table &table, BenchTable &bench_table, PairVec &pair_vec1, PairVec &pair_vec2, size_t seed) { 535 | TEST_TRY { 536 | table.clear(); 537 | bench_table.clear(); 538 | if (!IsTableSame(table, bench_table)) { 539 | LogHelper::log(Error, "table not equal after clear at beginning of erase test"); 540 | return false; 541 | } 542 | 543 | std::mt19937_64 random_engine(seed); 544 | std::uniform_int_distribution seed_gen; 545 | 546 | size_t pair_cnt = 0; 547 | std::vector key_seq_vec; 548 | std::vector operation_seq_vec; 549 | key_seq_vec.reserve(pair_vec1.size()); 550 | operation_seq_vec.reserve(pair_vec1.size()); 551 | for (size_t i = 0; i < pair_vec1.size(); ++i) { 552 | // const auto &pair = pair_vec[i]; 553 | auto temp_seed = seed_gen(random_engine); 554 | 555 | 556 | // bool do_erase_flag = false; 557 | 558 | if (i > 0 && temp_seed % 2U == 1U) { 559 | size_t try_erase_pos = seed_gen(random_engine) % i; 560 | const auto &try_erase_pair = pair_vec1[try_erase_pos]; 561 | const auto &temp_key = GetKey{}(try_erase_pair); 562 | 563 | key_seq_vec.push_back(temp_key); 564 | operation_seq_vec.push_back(false); 565 | // do_erase_flag = true; 566 | // last_erase_pair_index = try_erase_pos; 567 | if (bench_table.find(temp_key) != bench_table.end()) { 568 | 569 | 570 | auto seed2 = seed_gen(random_engine); 571 | if (seed2 % 2U == 1U) { 572 | auto bench_erase_size = bench_table.erase(temp_key); 573 | auto table_erase_size = table.erase(temp_key); 574 | if FPH_UNLIKELY(bench_erase_size != table_erase_size) { 575 | LogHelper::log(Error, "Erase by const& key return not same, bench: %lu, table_erase: %lu", 576 | bench_erase_size, table_erase_size); 577 | return false; 578 | } 579 | } else { 580 | bench_table.erase(bench_table.find(temp_key)); 581 | table.erase(table.find(temp_key)); 582 | } 583 | } 584 | } 585 | key_seq_vec.push_back(GetKey{}(pair_vec1[i])); 586 | operation_seq_vec.push_back(true); 587 | auto[bench_insert_it, bench_ok] = bench_table.insert(std::move(pair_vec1[i])); 588 | auto[insert_it, ok] = table.insert(std::move(pair_vec2[i])); 589 | if FPH_UNLIKELY(bench_ok != ok) { 590 | LogHelper::log(Error, "insert ret flag not same, table: %d, bench: %d", 591 | ok, bench_ok); 592 | return false; 593 | } 594 | if FPH_UNLIKELY(!ValueEqual{}(*bench_insert_it, *insert_it)) { 595 | LogHelper::log(Error, "insert const& iterator not same in erase test"); 596 | return false; 597 | } 598 | // if (!IsTableSame(table, bench_table)) { 599 | // LogHelper::log(Error, "table not same after one insert in erase, insert key: %s, " 600 | // "pair_cnt: %lu, do_erase: %d, last_erase_key: %s", 601 | // std::to_string(GetKey{}(pair)).c_str(), pair_cnt, do_erase_flag, 602 | // std::to_string(GetKey{}(pair_vec[last_erase_pair_index])).c_str()); 603 | // LogHelper::log(Error, "The key seq are: "); 604 | // for (const auto& key: key_seq_vec) { 605 | // fprintf(stderr, "%s, ", std::to_string(key).c_str()); 606 | // } 607 | // fprintf(stderr, "\n"); 608 | // LogHelper::log(Error, "The operation seq are: "); 609 | // for (auto op: operation_seq_vec) { 610 | // fprintf(stderr, "%d, ", int(op)); 611 | // } 612 | // fprintf(stderr, "\n"); 613 | // return false; 614 | // } 615 | ++pair_cnt; 616 | } 617 | if (!IsTableSame(table, bench_table)) { 618 | LogHelper::log(Error, "table not same after random erase"); 619 | return false; 620 | } 621 | 622 | for (auto it = bench_table.begin(); it != bench_table.end();) { 623 | auto temp_key = GetKey{}(*it); 624 | table.erase(table.find(temp_key)); 625 | it = bench_table.erase(it); 626 | // if (!IsTableSame(table, bench_table)) { 627 | // LogHelper::log(Error, "table not same during erase key %s in past half", std::to_string(temp_key).c_str()); 628 | // return false; 629 | // } 630 | } 631 | 632 | if (!IsTableSame(table, bench_table)) { 633 | LogHelper::log(Error, "table not same after erase all"); 634 | return false; 635 | } 636 | } 637 | TEST_CATCH (const std::exception& e) { 638 | LogHelper::log(Error, "Catch exception in test erase"); 639 | return false; 640 | } 641 | return true; 642 | } 643 | 644 | template> 645 | void ConstructTable(Table &table, const PairVec &pair_vec, size_t seed, double c = 2.0, bool do_reserve = true, bool do_rehash = false) { 646 | table.clear(); 647 | if constexpr (table_type == FCH_TABLE) { 648 | table.Init(pair_vec.begin(), pair_vec.end(), seed, true, c); 649 | } 650 | else { 651 | if (do_reserve) { 652 | table.reserve(pair_vec.size()); 653 | } 654 | for (size_t i = 0; i < pair_vec.size(); ++i) { 655 | const auto &pair = pair_vec[i]; 656 | if constexpr (table_type == ROBIN_HOOD_FLAT_TABLE) { 657 | table[GetKey{}(pair)] = pair.second; 658 | } 659 | else { 660 | table.insert(pair); 661 | } 662 | } 663 | if constexpr (table_type == DYNAMIC_FPH_TABLE || table_type == META_FPH_TABLE) { 664 | if (do_rehash) { 665 | if (table.load_factor() < 0.45) { 666 | table.max_load_factor(0.9); 667 | table.rehash(table.size()); 668 | } 669 | } 670 | } 671 | } 672 | } 673 | 674 | template, 675 | class ValueEqual = std::equal_to> 676 | bool TestPartEraseAndInsert(Table &table, BenchTable &bench_table, PairVec &pair_vec) { 677 | size_t ele_num = pair_vec.size(); 678 | if (table.size() != bench_table.size()) { 679 | LogHelper::log(Error, "element size in table not equal to pair_vec in test part erase"); 680 | return false; 681 | } 682 | size_t half_ele_num = ele_num / 2U; 683 | for (size_t i = 0; i < half_ele_num; ++i) { 684 | if (bench_table.find(GetKey{}(pair_vec[i])) != bench_table.end()) { 685 | bench_table.erase(GetKey{}(pair_vec[i])); 686 | table.erase(GetKey{}(pair_vec[i])); 687 | } 688 | 689 | } 690 | if (!IsTableSame(table, bench_table)) { 691 | LogHelper::log(Error, "Table not same after erase half"); 692 | return false; 693 | } 694 | for (size_t i = 0; i < half_ele_num; ++i) { 695 | bench_table.insert(pair_vec[i]); 696 | table.insert(pair_vec[i]); 697 | } 698 | if (!IsTableSame(table, bench_table)) { 699 | LogHelper::log(Error, "Table not same after insert half"); 700 | } 701 | ConstructTable(table, pair_vec, 0); 702 | ConstructTable(bench_table, pair_vec, 0); 703 | if (!IsTableSame(table, bench_table)) { 704 | LogHelper::log(Error, "Table not same after re build in part Erase"); 705 | return false; 706 | } 707 | return true; 708 | } 709 | 710 | template, 711 | class ValueEqual = std::equal_to> 712 | bool TestCopyAndMoveCorrect(Table &table, BenchTable &bench_table, PairVec &pair_vec) { 713 | TEST_TRY { 714 | ConstructTable(table, pair_vec, 0); 715 | ConstructTable(bench_table, pair_vec, 0); 716 | if (!IsTableSame(table, bench_table)) { 717 | LogHelper::log(Error, "Table not same after construct"); 718 | return false; 719 | } 720 | } 721 | TEST_CATCH (std::exception &e) { 722 | LogHelper::log(Error, "Catch exception in the beginning construct of test copy"); 723 | return false; 724 | } 725 | 726 | 727 | TEST_TRY { 728 | for (size_t t = 0; t < 2; ++t) { 729 | 730 | Table *temp_table_ptr = new Table(table); 731 | table.clear(); 732 | if (!IsTableSame(*temp_table_ptr, bench_table) 733 | || !TestPartEraseAndInsert 734 | (*temp_table_ptr, bench_table, pair_vec)) { 735 | LogHelper::log(Error, "Error in table copy constructor"); 736 | return false; 737 | } 738 | 739 | 740 | table = *temp_table_ptr; 741 | delete temp_table_ptr; 742 | temp_table_ptr = nullptr; 743 | if (!IsTableSame(table, bench_table) 744 | || !TestPartEraseAndInsert 745 | (table, bench_table, pair_vec)) { 746 | LogHelper::log(Error, "Error in table copy assignment"); 747 | return false; 748 | } 749 | 750 | Table *temp_table2_ptr = new Table(std::move(table)); 751 | table.clear(); 752 | if (!IsTableSame(*temp_table2_ptr, bench_table) 753 | || !TestPartEraseAndInsert 754 | (*temp_table2_ptr, bench_table, pair_vec)) { 755 | LogHelper::log(Error, "Error in table move constructor"); 756 | return false; 757 | } 758 | 759 | 760 | table = std::move(*temp_table2_ptr); 761 | delete temp_table2_ptr; 762 | temp_table2_ptr = nullptr; 763 | if (!IsTableSame(table, bench_table) 764 | || !TestPartEraseAndInsert 765 | (table, bench_table, pair_vec)) { 766 | LogHelper::log(Error, "Error in table move assignment"); 767 | return false; 768 | } 769 | 770 | // temp_table.clear(); 771 | // ConstructTable(temp_table, pair_vec, 0); 772 | // if (!IsTableSame(temp_table, bench_table) 773 | // || !TestPartEraseAndInsert 774 | // (temp_table, bench_table, pair_vec)) { 775 | // LogHelper::log(Error, "Error in for use with moved table"); 776 | // return false; 777 | // } 778 | } 779 | } 780 | TEST_CATCH (std::exception &e) { 781 | LogHelper::log(Error, "Catch exception in test copy and move"); 782 | return false; 783 | } 784 | return true; 785 | } 786 | 787 | template 788 | void Clear(Table *&table_ptr, size_t seed) { 789 | if (seed % 4 == 1) { 790 | delete table_ptr; 791 | table_ptr = new Table(); 792 | } 793 | else { 794 | table_ptr->clear(); 795 | } 796 | } 797 | 798 | template 799 | using base_type = typename std::remove_cv::type>::type; 800 | 801 | template 802 | struct is_base_same : std::false_type {}; 803 | 804 | template 805 | struct is_base_same, base_type>::value>::type> : std::true_type {}; 807 | 808 | 809 | 810 | template> 811 | void PrintPairVec(const PairVec &pair_vec) { 812 | if (pair_vec.size() < 300ULL) { 813 | LogHelper::log(Info, "pair vec has %lu elements", pair_vec.size()); 814 | for (const auto &pair: pair_vec) { 815 | fprintf(stderr, "%s, ", ToString(GetKey{}(pair)).c_str()); 816 | } 817 | fprintf(stderr, "\n"); 818 | } 819 | } 820 | 821 | void HandleExpPtr(std::exception_ptr eptr) // passing by value is ok 822 | { 823 | TEST_TRY { 824 | if (eptr) { 825 | std::rethrow_exception(eptr); 826 | } 827 | } TEST_CATCH(const std::exception& e) { 828 | LogHelper::log(Error, "Caught exception"); 829 | } 830 | } 831 | 832 | 833 | 834 | template 835 | bool TestInitList(size_t seed, ValueRandomGen value_gen) { 836 | if constexpr (std::is_copy_constructible_v) { 837 | std::mt19937_64 int_engine(seed); 838 | { 839 | size_t temp_seed = int_engine(); 840 | value_gen.seed(temp_seed); 841 | BenchTable bench_table{value_gen(), value_gen(), value_gen(), value_gen(), value_gen(), 842 | value_gen()}; 843 | value_gen.seed(temp_seed); 844 | Table table{value_gen(), value_gen(), value_gen(), value_gen(), value_gen(), 845 | value_gen()}; 846 | 847 | if (!IsTableSame(table, bench_table)) { 848 | LogHelper::log(Error, "Table not same after using init list construct"); 849 | return false; 850 | } 851 | } 852 | { 853 | size_t temp_seed = int_engine(); 854 | value_gen.seed(temp_seed); 855 | BenchTable bench_table; 856 | bench_table.insert( 857 | {value_gen(), value_gen(), value_gen(), value_gen(), value_gen(), value_gen()}); 858 | value_gen.seed(temp_seed); 859 | Table table; 860 | table.insert( 861 | {value_gen(), value_gen(), value_gen(), value_gen(), value_gen(), value_gen()}); 862 | 863 | if (!IsTableSame(table, bench_table)) { 864 | LogHelper::log(Error, "Table not same after using init list construct"); 865 | return false; 866 | } 867 | } 868 | } 869 | return true; 870 | } 871 | 872 | template 873 | bool TestCorrectness(size_t max_elem_num, size_t test_time) { 874 | std::vector test_elem_num_array = {0, max_elem_num}; 875 | static std::random_device random_device; 876 | auto test_seed = random_device(); 877 | // size_t test_seed = 3499889938ULL; 878 | // LogHelper::log(Debug, "test_seed: %lu", test_seed); 879 | std::mt19937_64 random_engine(test_seed); 880 | std::uniform_int_distribution size_gen; 881 | using value_type = typename Table::value_type; 882 | using key_type = typename Table::key_type; 883 | 884 | 885 | std::vector src_vec1, src_vec2; 886 | src_vec1.reserve(max_elem_num); 887 | src_vec2.reserve(max_elem_num); 888 | 889 | for (size_t i = 0; i < test_time; ++i) { 890 | test_elem_num_array.push_back(size_gen(random_engine) % (max_elem_num + 1UL)); 891 | } 892 | // using BenchTable = std::unordered_map; 893 | ValueRandomGen value_gen{}; 894 | BenchTable bench_table; 895 | size_t test_index = 0; 896 | Table *table_ptr = new Table(); 897 | 898 | auto gen_seed = value_gen.init_seed; 899 | 900 | TEST_TRY { 901 | 902 | const size_t start_k = 0; 903 | 904 | if (!TestInitList(gen_seed, value_gen)) { 905 | LogHelper::log(Error, "Error in test initlist"); 906 | return false; 907 | } 908 | 909 | 910 | for (size_t k = start_k; k < test_elem_num_array.size(); ++k) { 911 | auto ele_num = test_elem_num_array[k]; 912 | ++test_index; 913 | 914 | 915 | src_vec1.clear(); 916 | src_vec2.clear(); 917 | auto cur_seed = random_engine(); 918 | value_gen.seed(cur_seed); 919 | for (size_t i = 0; i < ele_num; ++i) { 920 | src_vec1.push_back(value_gen()); 921 | } 922 | value_gen.seed(cur_seed); 923 | for (size_t i = 0; i < ele_num; ++i) { 924 | src_vec2.push_back(value_gen()); 925 | } 926 | 927 | bool do_reserve = size_gen(random_engine) & 0x1U; 928 | if (do_reserve) { 929 | table_ptr->reserve(ele_num); 930 | } 931 | 932 | if (!TestInsertCorrectness(*table_ptr, bench_table, src_vec1, src_vec2, test_index)) { 933 | LogHelper::log(Error, 934 | "Fail to pass insert correctness test, element num: %lu, test_index: %lu, test_seed: %lu, gen_seed: %lu", 935 | ele_num, test_index, test_seed, gen_seed); 936 | #if FPH_DEBUG_ERROR 937 | table_ptr->PrintTableParams(); 938 | // PrintPairVec(src_vec1); 939 | #endif 940 | 941 | return false; 942 | } 943 | Clear(table_ptr, test_index); 944 | 945 | if constexpr (std::is_copy_constructible_v) { 946 | src_vec1.clear(); 947 | src_vec2.clear(); 948 | cur_seed = random_engine(); 949 | value_gen.seed(cur_seed); 950 | for (size_t i = 0; i < ele_num; ++i) { 951 | src_vec1.push_back(value_gen()); 952 | } 953 | value_gen.seed(cur_seed); 954 | for (size_t i = 0; i < ele_num; ++i) { 955 | src_vec2.push_back(value_gen()); 956 | } 957 | if (!TestCopyAndMoveCorrect(*table_ptr, bench_table, src_vec1)) { 958 | 959 | LogHelper::log(Error, "Fail to pass copy and move test, element num: %lu, test_seed: %lu, gen_seed: %lu", ele_num, test_seed, gen_seed); 960 | #if FPH_DEBUG_ERROR 961 | table_ptr->PrintTableParams(); 962 | PrintPairVec(src_vec1); 963 | #endif 964 | return false; 965 | } 966 | Clear(table_ptr, test_index); 967 | } 968 | 969 | 970 | src_vec1.clear(); 971 | src_vec2.clear(); 972 | cur_seed = random_engine(); 973 | value_gen.seed(cur_seed); 974 | for (size_t i = 0; i < ele_num; ++i) { 975 | src_vec1.push_back(value_gen()); 976 | } 977 | value_gen.seed(cur_seed); 978 | for (size_t i = 0; i < ele_num; ++i) { 979 | src_vec2.push_back(value_gen()); 980 | } 981 | if (!TestEmplaceCorrectness1(*table_ptr, bench_table, src_vec1, src_vec2)) { 982 | LogHelper::log(Error, "Fail to pass emplace correctness test, element num: %lu, test_seed: %lu, gen_seed: %lu", 983 | ele_num, test_seed, gen_seed); 984 | #if FPH_DEBUG_ERROR 985 | table_ptr->PrintTableParams(); 986 | // PrintPairVec(src_vec); 987 | #endif 988 | return false; 989 | } 990 | Clear(table_ptr, test_index); 991 | 992 | src_vec1.clear(); 993 | src_vec2.clear(); 994 | cur_seed = random_engine(); 995 | value_gen.seed(cur_seed); 996 | for (size_t i = 0; i < ele_num; ++i) { 997 | src_vec1.push_back(value_gen()); 998 | } 999 | value_gen.seed(cur_seed); 1000 | for (size_t i = 0; i < ele_num; ++i) { 1001 | src_vec2.push_back(value_gen()); 1002 | } 1003 | if (!TestEraseCorrectness(*table_ptr, bench_table, src_vec1, src_vec2, size_gen(random_engine))) { 1004 | LogHelper::log(Error, 1005 | "Fail to pass erase correctness test, element num: %lu, test_index: %lu, test_seed: %lu, gen_seed: %lu", 1006 | ele_num, test_index, test_seed, gen_seed); 1007 | #if FPH_DEBUG_ERROR 1008 | table_ptr->PrintTableParams(); 1009 | // PrintPairVec(src_vec); 1010 | PrintTableKeys(*table_ptr); 1011 | PrintTableKeys(bench_table); 1012 | #endif 1013 | return false; 1014 | } 1015 | Clear(table_ptr, test_index); 1016 | 1017 | 1018 | if constexpr(is_pair::value) { 1019 | if constexpr (is_base_same::value) { 1020 | std::vector k_vec1, k_vec2; 1021 | std::vector v_vec1, v_vec2; 1022 | k_vec1.reserve(ele_num); 1023 | v_vec1.reserve(ele_num); 1024 | k_vec2.reserve(ele_num); 1025 | v_vec2.reserve(ele_num); 1026 | cur_seed = random_engine(); 1027 | value_gen.seed(cur_seed); 1028 | for (size_t i = 0; i < ele_num; ++i) { 1029 | auto temp_pair = value_gen(); 1030 | k_vec1.push_back(std::move(temp_pair.first)); 1031 | v_vec1.push_back(std::move(temp_pair.second)); 1032 | } 1033 | value_gen.seed(cur_seed); 1034 | for (size_t i = 0; i < ele_num; ++i) { 1035 | auto temp_pair = value_gen(); 1036 | k_vec2.push_back(std::move(temp_pair.first)); 1037 | v_vec2.push_back(std::move(temp_pair.second)); 1038 | } 1039 | if (!TestEmplaceCorrectness2(*table_ptr, bench_table, k_vec1, v_vec1, k_vec2, v_vec2)) { 1040 | LogHelper::log(Error, 1041 | "Fail to pass emplace2 correctness test, element num: %lu", 1042 | ele_num); 1043 | return false; 1044 | } 1045 | Clear(table_ptr, test_index); 1046 | 1047 | k_vec1.clear(); k_vec2.clear(); 1048 | v_vec1.clear(); v_vec2.clear(); 1049 | cur_seed = random_engine(); 1050 | value_gen.seed(cur_seed); 1051 | for (size_t i = 0; i < ele_num; ++i) { 1052 | auto temp_pair = value_gen(); 1053 | k_vec1.push_back(std::move(temp_pair.first)); 1054 | v_vec1.push_back(std::move(temp_pair.second)); 1055 | } 1056 | value_gen.seed(cur_seed); 1057 | for (size_t i = 0; i < ele_num; ++i) { 1058 | auto temp_pair = value_gen(); 1059 | k_vec2.push_back(std::move(temp_pair.first)); 1060 | v_vec2.push_back(std::move(temp_pair.second)); 1061 | } 1062 | if (!TestTryEmplaceCorrectness(*table_ptr, bench_table, k_vec1, v_vec1, k_vec2, v_vec2)) { 1063 | LogHelper::log(Error, 1064 | "Fail to pass try_emplace correctness test, element num: %lu", 1065 | ele_num); 1066 | return false; 1067 | } 1068 | Clear(table_ptr, test_index); 1069 | 1070 | k_vec1.clear(); k_vec2.clear(); 1071 | v_vec1.clear(); v_vec2.clear(); 1072 | cur_seed = random_engine(); 1073 | value_gen.seed(cur_seed); 1074 | for (size_t i = 0; i < ele_num; ++i) { 1075 | auto temp_pair = value_gen(); 1076 | k_vec1.push_back(std::move(temp_pair.first)); 1077 | v_vec1.push_back(std::move(temp_pair.second)); 1078 | } 1079 | value_gen.seed(cur_seed); 1080 | for (size_t i = 0; i < ele_num; ++i) { 1081 | auto temp_pair = value_gen(); 1082 | k_vec2.push_back(std::move(temp_pair.first)); 1083 | v_vec2.push_back(std::move(temp_pair.second)); 1084 | } 1085 | 1086 | if (!TestOperatorCorrectness(*table_ptr, bench_table, k_vec1, v_vec1, k_vec2, v_vec2)) { 1087 | LogHelper::log(Error, 1088 | "Fail to pass operator[] correctness test, element num: %lu", 1089 | ele_num); 1090 | return false; 1091 | } 1092 | Clear(table_ptr, test_index); 1093 | } 1094 | } 1095 | } 1096 | delete table_ptr; 1097 | 1098 | } TEST_CATCH(...) { 1099 | 1100 | auto e_ptr = std::current_exception(); 1101 | LogHelper::log(Error, "Got exception"); 1102 | HandleExpPtr(e_ptr); 1103 | return false; 1104 | } 1105 | 1106 | return true; 1107 | } 1108 | 1109 | template, class T2RNG = fph::dynamic::RandomGenerator> 1110 | class RandomPairGen { 1111 | public: 1112 | RandomPairGen(): init_seed(std::random_device{}()), random_engine(init_seed), t1_gen(init_seed), t2_gen(init_seed) {} 1113 | 1114 | std::pair operator()() { 1115 | return {t1_gen(), t2_gen()}; 1116 | } 1117 | 1118 | template 1119 | void seed(SeedType seed) { 1120 | random_engine.seed(seed); 1121 | t1_gen.seed(seed); 1122 | t2_gen.seed(seed); 1123 | } 1124 | 1125 | size_t init_seed; 1126 | 1127 | protected: 1128 | std::mt19937_64 random_engine; 1129 | T1RNG t1_gen; 1130 | T2RNG t2_gen; 1131 | }; 1132 | 1133 | 1134 | 1135 | template > 1137 | std::tuple TestTableLookUp(Table &table, size_t lookup_time, const PairVec &input_vec, 1138 | const PairVec &lookup_vec, size_t seed, 1139 | double max_load_factor = 0.9, double c = 2.0) { 1140 | 1141 | size_t look_up_index = 0; 1142 | size_t key_num = input_vec.size(); 1143 | uint64_t useless_sum = 0; 1144 | if (input_vec.empty()) { 1145 | return {0, 0}; 1146 | } 1147 | std::mt19937_64 random_engine(seed); 1148 | std::uniform_int_distribution random_dis; 1149 | auto pair_vec = lookup_vec; 1150 | 1151 | if constexpr(table_type == DYNAMIC_FPH_TABLE || table_type == META_FPH_TABLE) { 1152 | table.max_load_factor(max_load_factor); 1153 | } 1154 | ConstructTable(table, input_vec, random_dis(random_engine), c, true, false); 1155 | std::shuffle(pair_vec.begin(), pair_vec.end(), random_engine); 1156 | 1157 | auto look_up_t0 = std::chrono::high_resolution_clock::now(); 1158 | for (size_t t = 0; t < lookup_time; ++t) { 1159 | ++look_up_index; 1160 | if FPH_UNLIKELY(look_up_index >= key_num) { 1161 | look_up_index -= key_num; 1162 | } 1163 | if constexpr (LOOKUP_EXP == KEY_IN) { 1164 | auto find_it = table.find(GetKey{}(pair_vec[look_up_index])); 1165 | useless_sum += *reinterpret_cast(std::addressof(find_it->second)); 1166 | } 1167 | else if constexpr (LOOKUP_EXP == KEY_NOT_IN) { 1168 | auto find_it = table.find(GetKey{}(pair_vec[look_up_index])); 1169 | if FPH_UNLIKELY(find_it != table.end()) { 1170 | LogHelper::log(Error, "Find key %s in table %s", 1171 | ToString(GetKey{}(pair_vec[look_up_index])).c_str(), 1172 | GetTableName(table_type).c_str()); 1173 | return {0, 0}; 1174 | } 1175 | } 1176 | else { 1177 | auto find_it = table.find(GetKey{}(pair_vec[look_up_index])); 1178 | if (find_it != table.end()) { 1179 | useless_sum += find_it->second; 1180 | } 1181 | } 1182 | 1183 | } 1184 | auto look_up_t1 = std::chrono::high_resolution_clock::now(); 1185 | auto look_up_ns = std::chrono::duration_cast(look_up_t1 - look_up_t0).count(); 1186 | if constexpr (verbose) { 1187 | LogHelper::log(Info, "%s look up use %.3f ns per call, use_less sum: %lu", 1188 | GetTableName(table_type).c_str(), look_up_ns * 1.0 / lookup_time, 1189 | useless_sum); 1190 | } 1191 | return {look_up_ns, useless_sum}; 1192 | } 1193 | 1194 | template> 1196 | uint64_t TestTableConstruct(Table &table, const PairVec &pair_vec, size_t seed = 0, double c = 2.0, 1197 | double max_load_factor = 0.9, 1198 | size_t test_time = 0) { 1199 | (void)test_time; 1200 | table.clear(); 1201 | auto begin_time = std::chrono::high_resolution_clock::now(); 1202 | if constexpr (table_type == DYNAMIC_FPH_TABLE || table_type == META_FPH_TABLE) { 1203 | table.max_load_factor(max_load_factor); 1204 | } 1205 | ConstructTable(table, pair_vec, seed, c, do_reserve, true); 1206 | auto end_time = std::chrono::high_resolution_clock::now(); 1207 | auto pass_ns = std::chrono::duration_cast(end_time - begin_time).count(); 1208 | if constexpr (verbose) { 1209 | LogHelper::log(Info, "%s construct use time %.6f seconds", 1210 | GetTableName(table_type).c_str(), pass_ns / (1e+9)); 1211 | } 1212 | return pass_ns; 1213 | } 1214 | 1215 | template > 1217 | std::tuple TestTableIterate(Table &table, size_t iterate_time, const PairVec &input_vec, 1218 | size_t seed, double max_load_factor = 0.9, double c = 2.0) { 1219 | if constexpr(table_type == DYNAMIC_FPH_TABLE || table_type == META_FPH_TABLE) { 1220 | table.max_load_factor(max_load_factor); 1221 | } 1222 | ConstructTable(table, input_vec, seed, c); 1223 | auto start_time = std::chrono::high_resolution_clock::now(); 1224 | uint64_t useless_sum = 0; 1225 | for (size_t t = 0; t < iterate_time; ++t) { 1226 | for (auto it = table.begin(); it != table.end(); ++it) { 1227 | // TODO: if second is not int 1228 | useless_sum += *reinterpret_cast(std::addressof(it->second)); 1229 | } 1230 | } 1231 | auto end_time = std::chrono::high_resolution_clock::now(); 1232 | uint64_t pass_ns = std::chrono::duration_cast(end_time - start_time).count(); 1233 | return {pass_ns, useless_sum}; 1234 | 1235 | } 1236 | 1237 | template 1238 | struct MutableValue { 1239 | using type = T; 1240 | }; 1241 | 1242 | template 1243 | struct MutableValue::value>::type> { 1244 | using type = std::pair::type, typename T::second_type>; 1245 | }; 1246 | 1247 | template> 1249 | void TestTablePerformance(size_t element_num, size_t construct_time, size_t lookup_time, 1250 | size_t seed = 0, double c = 2.0, double max_load_factor = 0.9) { 1251 | std::mt19937_64 random_engine(seed); 1252 | std::uniform_int_distribution size_gen; 1253 | // using value_type = typename Table::value_type; 1254 | using mutable_value_type = typename MutableValue::type; 1255 | 1256 | using key_type = typename Table::key_type; 1257 | ValueRandomGen value_gen{}; 1258 | value_gen.seed(seed); 1259 | 1260 | std::unordered_set key_set; 1261 | key_set.reserve(element_num); 1262 | 1263 | std::vector src_vec; 1264 | src_vec.reserve(element_num); 1265 | for (size_t i = 0; i < element_num; ++i) { 1266 | auto temp_pair = value_gen(); 1267 | if (key_set.find(GetKey{}(temp_pair)) != key_set.end()) { 1268 | continue; 1269 | } 1270 | src_vec.push_back(temp_pair); 1271 | key_set.insert(GetKey{}(temp_pair)); 1272 | } 1273 | 1274 | float print_load_factor = 0; 1275 | 1276 | size_t construct_seed = size_gen(random_engine); 1277 | size_t total_reserve_construct_ns = 0; 1278 | 1279 | for (size_t t = 0; t < construct_time; ++t) { 1280 | Table table; 1281 | uint64_t temp_construct_ns = TestTableConstruct 1282 | (table, src_vec, construct_seed, c, max_load_factor); 1283 | total_reserve_construct_ns += temp_construct_ns; 1284 | print_load_factor = table.load_factor(); 1285 | } 1286 | 1287 | size_t total_no_reserve_construct_ns = 0; 1288 | for (size_t t = 0; t < construct_time; ++t) { 1289 | Table table; 1290 | uint64_t temp_construct_ns = TestTableConstruct 1291 | (table, src_vec, construct_seed, c, max_load_factor); 1292 | total_no_reserve_construct_ns += temp_construct_ns; 1293 | } 1294 | 1295 | uint64_t in_lookup_ns = 0, useless_sum = 0; 1296 | { 1297 | Table table; 1298 | std::tie(in_lookup_ns, useless_sum) = TestTableLookUp, GetKey>(table, lookup_time, src_vec, 1300 | src_vec, construct_seed, max_load_factor, c); 1301 | } 1302 | 1303 | 1304 | std::vector lookup_vec; 1305 | lookup_vec.reserve(src_vec.size()); 1306 | for (size_t i = 0; i < src_vec.size(); ++i) { 1307 | auto temp_pair = value_gen(); 1308 | 1309 | while (key_set.find(GetKey{}(temp_pair)) != key_set.end()) { 1310 | 1311 | temp_pair = value_gen(); 1312 | } 1313 | lookup_vec.push_back(temp_pair); 1314 | } 1315 | 1316 | uint64_t out_lookup_ns = 0; 1317 | { 1318 | Table table; 1319 | std::tie(out_lookup_ns, std::ignore) = TestTableLookUp(table, lookup_time, src_vec, 1320 | lookup_vec, construct_seed, max_load_factor, c); 1321 | } 1322 | 1323 | uint64_t iterate_ns = 0, it_useless_sum = 0; 1324 | uint64_t iterate_time = (lookup_time + element_num - 1) / element_num; 1325 | { 1326 | 1327 | Table table; 1328 | std::tie(iterate_ns, it_useless_sum) = TestTableIterate, GetKey>( 1330 | table, iterate_time, src_vec, construct_seed, max_load_factor, c); 1331 | } 1332 | 1333 | LogHelper::log(Info, "%s %lu elements, sizeof(value_type)=%lu, load_factor: %.3f, construct with reserve avg use %.6f s," 1334 | "construct without reserve avg use %.6f s, look up key in the table use %.3f ns per key," 1335 | "look up key not in the table use %.3f ns per key, " 1336 | "iterate use %.3f ns per value, useless_sum: %lu", 1337 | GetTableName(table_type).c_str(), element_num, sizeof(value_type), print_load_factor, 1338 | total_reserve_construct_ns / (1e+9) / construct_time, total_no_reserve_construct_ns / (1e+9) / construct_time, 1339 | in_lookup_ns * 1.0 / lookup_time, out_lookup_ns * 1.0 / lookup_time, 1340 | iterate_ns * 1.0 / (iterate_time * element_num), useless_sum + it_useless_sum); 1341 | 1342 | 1343 | 1344 | 1345 | } 1346 | 1347 | 1348 | 1349 | void TestSet() { 1350 | #if TEST_TABLE_CORRECT 1351 | // using KeyType = uint64_t; 1352 | // using KeyType = uint64_t*; 1353 | // using KeyType = std::string; 1354 | using KeyType = TestKeyClass; 1355 | // using SeedHash = fph::SimpleSeedHash; 1356 | using SeedHash = TestKeySeedHash; 1357 | // using SeedHash = fph::MixSeedHash; 1358 | // using SeedHash = fph::StrongSeedHash; 1359 | // using BucketParamType = uint32_t; 1360 | 1361 | // using RandomKeyGenerator = fph::dynamic::RandomGenerator; 1362 | using RandomKeyGenerator = KeyClassRNG; 1363 | 1364 | // fph::DynamicFphSet, 1365 | // std::allocator, BucketParamType, RandomKeyGenerator> dy_fph_set; 1366 | 1367 | using DyFphSet7bit = fph::DynamicFphSet, 1368 | std::allocator, uint8_t, RandomKeyGenerator>; 1369 | using DyFphSet15bit = fph::DynamicFphSet, 1370 | std::allocator, uint16_t, RandomKeyGenerator>; 1371 | using DyFphSet31bit = fph::DynamicFphSet, 1372 | std::allocator, uint32_t, RandomKeyGenerator>; 1373 | 1374 | using MetaFphSet7bit = fph::MetaFphSet, 1375 | std::allocator, uint8_t>; 1376 | using MetaFphSet15bit = fph::MetaFphSet, 1377 | std::allocator, uint16_t>; 1378 | using MetaFphSet31bit = fph::MetaFphSet, 1379 | std::allocator, uint32_t>; 1380 | 1381 | // using HashMethod = robin_hood::hash; 1382 | // using HashMethod = absl::Hash; 1383 | using HashMethod = TestKeyHash; 1384 | // using HashMethod = std::hash; 1385 | 1386 | constexpr double TEST_CORR_MAX_LOAD_FACTOR = 0.7; 1387 | 1388 | 1389 | // using BenchTable = absl::flat_hash_set; 1390 | using BenchTable = std::unordered_set; 1391 | 1392 | 1393 | { 1394 | 1395 | auto meta_max_load_factor_upper_limit = MetaFphSet7bit::max_load_factor_upper_limit(); 1396 | if (TestCorrectness(128 * meta_max_load_factor_upper_limit, 1397 | 4000)) { 1398 | LogHelper::log(Info, "Pass MetaFphSet7Bit test with %d keys", size_t(128 * meta_max_load_factor_upper_limit)); 1399 | } 1400 | else { 1401 | LogHelper::log(Error, "Fail in MetaFphSet7Bit test"); 1402 | return; 1403 | } 1404 | 1405 | if (TestCorrectness(3000, 1406 | 400)) { 1407 | LogHelper::log(Info, "Pass MetaFphSet15Bit test with %d keys", 3000); 1408 | } 1409 | else { 1410 | LogHelper::log(Error, "Fail in MetaFphSet15Bit test"); 1411 | return; 1412 | } 1413 | 1414 | if (TestCorrectness(65536 / 2 * TEST_CORR_MAX_LOAD_FACTOR, 1415 | 10)) { 1416 | LogHelper::log(Info, "Pass MetaFphSet15Bit test with %d keys", size_t(65536 / 2 * TEST_CORR_MAX_LOAD_FACTOR)); 1417 | } 1418 | else { 1419 | LogHelper::log(Error, "Fail in MetaFphSet15Bit test"); 1420 | return; 1421 | } 1422 | if (TestCorrectness(500000ULL, 1423 | 3)) { 1424 | LogHelper::log(Info, "Pass MetaFphSet31Bit test with %d keys", 500000ULL); 1425 | } 1426 | else { 1427 | LogHelper::log(Error, "Fail in MetaFphSet31Bit test"); 1428 | return; 1429 | } 1430 | 1431 | auto max_load_factor_upper_limit = DyFphSet7bit::max_load_factor_upper_limit(); 1432 | if (TestCorrectness(128 * max_load_factor_upper_limit, 1433 | 4000)) { 1434 | LogHelper::log(Info, "Pass DyFphSet7Bit test with %d keys", size_t(128 * max_load_factor_upper_limit)); 1435 | } 1436 | else { 1437 | LogHelper::log(Error, "Fail in DyFphSet7Bit test"); 1438 | return; 1439 | } 1440 | 1441 | if (TestCorrectness(3000, 1442 | 400)) { 1443 | LogHelper::log(Info, "Pass DyFphSet15Bit test with %d keys", 3000); 1444 | } 1445 | else { 1446 | LogHelper::log(Error, "Fail in DyFphSet15Bit test"); 1447 | return; 1448 | } 1449 | 1450 | if (TestCorrectness(65536 / 2 * TEST_CORR_MAX_LOAD_FACTOR, 1451 | 10)) { 1452 | LogHelper::log(Info, "Pass DyFphSet15Bit test with %d keys", size_t(65536 / 2 * TEST_CORR_MAX_LOAD_FACTOR)); 1453 | } 1454 | else { 1455 | LogHelper::log(Error, "Fail in DyFphSet15Bit test"); 1456 | return; 1457 | } 1458 | 1459 | if (TestCorrectness(500000ULL, 1460 | 3)) { 1461 | LogHelper::log(Info, "Pass DyFphSet31Bit test with %d keys", 500000ULL); 1462 | } 1463 | else { 1464 | LogHelper::log(Error, "Fail in DyFphSet31Bit test"); 1465 | return; 1466 | } 1467 | 1468 | 1469 | 1470 | }; 1471 | #endif 1472 | 1473 | } 1474 | 1475 | template 1476 | struct FixSizeStruct { 1477 | constexpr FixSizeStruct()noexcept: data{0} {} 1478 | char data[size]; 1479 | 1480 | friend bool operator==(const FixSizeStruct&a, const FixSizeStruct&b) { 1481 | return memcmp(a.data, b.data, size) == 0; 1482 | } 1483 | }; 1484 | 1485 | 1486 | 1487 | 1488 | void TestFPH() { 1489 | #if TEST_TABLE_CORRECT 1490 | using KeyType = uint32_t; 1491 | // using KeyType = TestKeyClass; 1492 | // using KeyType = std::string; 1493 | // using KeyType = const uint64_t*; 1494 | // using KeyType = enum { 1495 | // Type0, 1496 | // Type1, 1497 | // Type2, 1498 | // Type3, 1499 | // }; 1500 | using ValueType = uint64_t; 1501 | // using ValueType = TestValueClass; 1502 | // using ValueType = std::string; 1503 | // using ValueType = FixSizeStruct<96>; 1504 | // using BucketParamType = uint32_t; 1505 | 1506 | // using KeyRandomGen = KeyClassRNG; 1507 | using KeyRandomGen = fph::dynamic::RandomGenerator; 1508 | 1509 | using ValueRandomGen = fph::dynamic::RandomGenerator; 1510 | // using ValueRandomGen = ValueClassRNG; 1511 | 1512 | using RandomGenerator = RandomPairGen; 1513 | 1514 | 1515 | using SeedHash = fph::SimpleSeedHash; 1516 | // using SeedHash = fph::StrongSeedHash; 1517 | // using SeedHash = fph::MixSeedHash; 1518 | // using SeedHash = TestKeySeedHash; 1519 | 1520 | using DyFphMap7bit = fph::DynamicFphMap, 1521 | std::allocator>, uint8_t, KeyRandomGen>; 1522 | using DyFphMap15bit = fph::DynamicFphMap, 1523 | std::allocator>, uint16_t, KeyRandomGen>; 1524 | using DyFphMap31bit = fph::DynamicFphMap, 1525 | std::allocator>, uint32_t, KeyRandomGen>; 1526 | // using DyFphMap63bit = fph::DynamicFphMap, 1527 | // std::allocator>, uint64_t, KeyRandomGen>; 1528 | 1529 | using MetaFphMap7bit = fph::MetaFphMap, 1530 | std::allocator>, uint8_t>; 1531 | using MetaFphMap15bit = fph::MetaFphMap, 1532 | std::allocator>, uint16_t>; 1533 | using MetaFphMap31bit = fph::MetaFphMap, 1534 | std::allocator>, uint32_t>; 1535 | 1536 | static_assert(is_pair::value); 1537 | 1538 | // using HashMethod = robin_hood::hash; 1539 | // using HashMethod = absl::Hash; 1540 | using HashMethod = std::hash; 1541 | // using HashMethod = TestKeyHash; 1542 | 1543 | // using BenchTable = absl::flat_hash_map; 1544 | using BenchTable = std::unordered_map; 1545 | 1546 | using StdHashTable = std::unordered_map; 1547 | // using AbslFlatTable = absl::flat_hash_map; 1548 | // using RobinFlatTable = robin_hood::unordered_flat_map; 1549 | // using SkaFlatTable = ska::flat_hash_map; 1550 | 1551 | 1552 | LogHelper::log(Debug, "sizeof DyFphMap15bit is %lu, sizeof MetaFphMap15bit is %lu, " 1553 | "sizeof StdHashTable is %lu", 1554 | sizeof(DyFphMap15bit), sizeof(MetaFphMap15bit), sizeof(StdHashTable)); 1555 | 1556 | // absl::flat_hash_map absl_map; 1557 | // robin_hood::unordered_flat_map robin_hood_map; 1558 | 1559 | // using PairType = std::pair; 1560 | constexpr double TEST_CORR_MAX_LOAD_FACTOR = 0.7; 1561 | 1562 | 1563 | std::random_device random_device; 1564 | std::uniform_int_distribution random_gen; 1565 | 1566 | 1567 | 1568 | 1569 | { 1570 | auto max_load_factor_upper_limit = MetaFphMap7bit::max_load_factor_upper_limit(); 1571 | bool correct_test_ret; 1572 | 1573 | size_t test_element_up_bound = std::floor(128.0 * max_load_factor_upper_limit); 1574 | correct_test_ret = TestCorrectness 1575 | (test_element_up_bound, 4000); 1576 | if (!correct_test_ret) { 1577 | LogHelper::log(Error, "MetaFphMap7bit Fail to pass correct test with %lu max elements", 1578 | test_element_up_bound); 1579 | return; 1580 | } else { 1581 | LogHelper::log(Info, "MetaFphMap7bit Pass correctness test with %lu max elements", 1582 | test_element_up_bound); 1583 | } 1584 | 1585 | test_element_up_bound = 3000; 1586 | correct_test_ret = TestCorrectness(test_element_up_bound, 400); 1587 | if (!correct_test_ret) { 1588 | LogHelper::log(Error, "MetaFphMap15bit Fail to pass correct test with %lu max elements", 1589 | test_element_up_bound); 1590 | return; 1591 | } else { 1592 | LogHelper::log(Info, "MetaFphMap15bit Pass correctness test with %lu max elements", 1593 | test_element_up_bound); 1594 | } 1595 | 1596 | test_element_up_bound = std::floor(65536.0 / 2.0 * TEST_CORR_MAX_LOAD_FACTOR); 1597 | correct_test_ret = TestCorrectness(test_element_up_bound, 10); 1598 | if (!correct_test_ret) { 1599 | LogHelper::log(Error, "MetaFphMap15bit Fail to pass correct test with %lu max elements", 1600 | test_element_up_bound); 1601 | return; 1602 | } else { 1603 | LogHelper::log(Info, "MetaFphMap15bit Pass correctness test with %lu max elements", 1604 | test_element_up_bound); 1605 | } 1606 | 1607 | test_element_up_bound = 500000ULL; 1608 | correct_test_ret = TestCorrectness(test_element_up_bound, 1); 1609 | if (!correct_test_ret) { 1610 | LogHelper::log(Error, "MetaFphMap31bit Fail to pass correct test with %lu max elements", 1611 | test_element_up_bound); 1612 | return; 1613 | } else { 1614 | LogHelper::log(Info, "MetaFphMap31bit Pass correctness test with %lu max elements", 1615 | test_element_up_bound); 1616 | } 1617 | } 1618 | { 1619 | auto max_load_factor_upper_limit = DyFphMap7bit::max_load_factor_upper_limit(); 1620 | bool correct_test_ret; 1621 | 1622 | size_t test_element_up_bound = std::floor(128.0 * max_load_factor_upper_limit); 1623 | correct_test_ret = TestCorrectness 1624 | (test_element_up_bound, 4000); 1625 | if (!correct_test_ret) { 1626 | LogHelper::log(Error, "DyFphMap7bit Fail to pass correct test with %lu max elements", 1627 | test_element_up_bound); 1628 | return; 1629 | } else { 1630 | LogHelper::log(Info, "DyFphMap7bit Pass correctness test with %lu max elements", 1631 | test_element_up_bound); 1632 | } 1633 | 1634 | test_element_up_bound = 3000; 1635 | correct_test_ret = TestCorrectness(test_element_up_bound, 400); 1636 | if (!correct_test_ret) { 1637 | LogHelper::log(Error, "DyFphMap15bit Fail to pass correct test with %lu max elements", 1638 | test_element_up_bound); 1639 | return; 1640 | } else { 1641 | LogHelper::log(Info, "DyFphMap15bit Pass correctness test with %lu max elements", 1642 | test_element_up_bound); 1643 | } 1644 | 1645 | test_element_up_bound = std::floor(65536.0 / 2.0 * TEST_CORR_MAX_LOAD_FACTOR); 1646 | correct_test_ret = TestCorrectness(test_element_up_bound, 10); 1647 | if (!correct_test_ret) { 1648 | LogHelper::log(Error, "DyFphMap15bit Fail to pass correct test with %lu max elements", 1649 | test_element_up_bound); 1650 | return; 1651 | } else { 1652 | LogHelper::log(Info, "DyFphMap15bit Pass correctness test with %lu max elements", 1653 | test_element_up_bound); 1654 | } 1655 | 1656 | test_element_up_bound = 500000ULL; 1657 | correct_test_ret = TestCorrectness(test_element_up_bound, 1); 1658 | if (!correct_test_ret) { 1659 | LogHelper::log(Error, "DyFphMap31bit Fail to pass correct test with %lu max elements", 1660 | test_element_up_bound); 1661 | return; 1662 | } else { 1663 | LogHelper::log(Info, "DyFphMap31bit Pass correctness test with %lu max elements", 1664 | test_element_up_bound); 1665 | } 1666 | } 1667 | 1668 | #endif 1669 | 1670 | 1671 | } 1672 | 1673 | void TestMapPerformance() { 1674 | using KeyType = uint64_t; 1675 | 1676 | // using KeyType = TestKeyClass; 1677 | // using KeyType = std::string; 1678 | using ValueType = uint64_t; 1679 | // using ValueType = TestValueClass; 1680 | // using ValueType = std::string; 1681 | // using ValueType = FixSizeStruct<96>; 1682 | using BucketParamType = uint32_t; 1683 | 1684 | using KeyRandomGen = fph::dynamic::RandomGenerator; 1685 | 1686 | using ValueRandomGen = fph::dynamic::RandomGenerator; 1687 | 1688 | using RandomGenerator = RandomPairGen; 1689 | 1690 | 1691 | using SeedHash = fph::SimpleSeedHash; 1692 | // using SeedHash = fph::StrongSeedHash; 1693 | // using SeedHash = fph::MixSeedHash; 1694 | // using SeedHash = TestKeySeedHash; 1695 | 1696 | using Allocator = std::allocator>; 1697 | 1698 | using PairType = std::pair; 1699 | // constexpr size_t KEY_NUM = 84100ULL; 1700 | constexpr size_t KEY_NUM = 1'000'000ULL; 1701 | constexpr size_t LOOKUP_TIME = 100000000ULL; 1702 | constexpr size_t CONSTRUCT_TIME = 2; 1703 | constexpr double TEST_MAX_LOAD_FACTOR = 0.6; 1704 | 1705 | constexpr double c = 2.0; 1706 | 1707 | using TestMetaFphMap = fph::MetaFphMap, Allocator, 1708 | BucketParamType>; 1709 | 1710 | using TestDyFphMap = fph::DynamicFphMap, 1711 | Allocator, BucketParamType, KeyRandomGen>; 1712 | 1713 | // using TestPerformanceMap = TestMetaFphMap; 1714 | // using TestPerformanceMap = TestDyFphMap; 1715 | 1716 | // static_assert(is_pair::value); 1717 | 1718 | 1719 | 1720 | 1721 | using HashMethod = std::hash; 1722 | // using HashMethod = TestKeyHash; 1723 | 1724 | 1725 | using StdHashTable = std::unordered_map; 1726 | 1727 | size_t performance_seed = std::random_device{}(); 1728 | 1729 | TestTablePerformance(KEY_NUM, CONSTRUCT_TIME, LOOKUP_TIME, 1730 | performance_seed, c, TEST_MAX_LOAD_FACTOR); 1731 | TestTablePerformance(KEY_NUM, CONSTRUCT_TIME, LOOKUP_TIME, 1732 | performance_seed, c, TEST_MAX_LOAD_FACTOR); 1733 | 1734 | 1735 | 1736 | 1737 | TestTablePerformance(KEY_NUM, CONSTRUCT_TIME, LOOKUP_TIME, 1738 | performance_seed, c, TEST_MAX_LOAD_FACTOR); 1739 | } 1740 | --------------------------------------------------------------------------------