├── .gitignore ├── benchmark ├── build.sh ├── sparsehash │ ├── traits │ ├── internal │ │ ├── libc_allocator_with_realloc.h │ │ └── hashtable-common.h │ ├── sparse_hash_set │ ├── dense_hash_set │ ├── sparse_hash_map │ └── dense_hash_map ├── benchcmp.cc ├── benchnum.cc ├── benchfindint.cc ├── benchfindstr.cc ├── tsl │ ├── hopscotch_growth_policy.h │ ├── robin_growth_policy.h │ ├── robin_set.h │ └── hopscotch_set.h └── data.txt ├── LICENSE ├── README.md ├── StrHash.h └── Str.h /.gitignore: -------------------------------------------------------------------------------- 1 | benchfindstr 2 | benchfindint 3 | benchnum 4 | benchcmp 5 | -------------------------------------------------------------------------------- /benchmark/build.sh: -------------------------------------------------------------------------------- 1 | g++ -std=c++17 -march=native -O3 -I. benchfindstr.cc -o benchfindstr 2 | # run: ./benchfindstr < data.txt 3 | 4 | g++ -std=c++17 -march=native -O3 -I. benchfindint.cc -o benchfindint 5 | # run: ./benchfindint < integers.txt 6 | 7 | g++ -march=native -O3 benchnum.cc -o benchnum 8 | # run: ./benchnum 9 | 10 | g++ -march=native -O3 benchcmp.cc -o benchcmp 11 | # run: ./benchcmp 12 | 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Meng Rao 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /benchmark/sparsehash/traits: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2006, Google Inc. 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are 6 | // met: 7 | // 8 | // * Redistributions of source code must retain the above copyright 9 | // notice, this list of conditions and the following disclaimer. 10 | // * Redistributions in binary form must reproduce the above 11 | // copyright notice, this list of conditions and the following disclaimer 12 | // in the documentation and/or other materials provided with the 13 | // distribution. 14 | // * Neither the name of Google Inc. nor the names of its 15 | // contributors may be used to endorse or promote products derived from 16 | // this software without specific prior written permission. 17 | // 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | #pragma once 31 | #include 32 | #include // For pair 33 | 34 | namespace google { 35 | 36 | // trait which can be added to user types to enable use of memcpy in sparsetable 37 | // Example: 38 | // namespace google{ 39 | // template <> 40 | // struct is_relocatable : std::true_type {}; 41 | // } 42 | 43 | template 44 | struct is_relocatable 45 | : std::integral_constant::value && 47 | std::is_trivially_destructible::value)> {}; 48 | template 49 | struct is_relocatable> 50 | : std::integral_constant::value && 51 | is_relocatable::value)> {}; 52 | 53 | template 54 | struct is_relocatable : is_relocatable {}; 55 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Str 2 | `Str` is a char array wrapper providing some frequently used operations in the most efficient way(supporting AVX512 SIMD optimization), including string comparisons and conversion to/from integers. 3 | 4 | ## StrHash 5 | `StrHash` is an adaptive open addressing hash table template taking `Str` as key and providing a find function in the most efficient way. It's adaptive in that it can extract features from the keys contained in the table and train its hashing parameters dynamically to distribute the keys for avoiding collision. 6 | 7 | `StrHash` is actually a subclass of `std::map`, so user can use whatever funcitons it provides to modify the table, and then call `doneModify` to train the table and `fastFind` to find keys in the table. Note that `doneModify` is pretty slow so it's not efficient to modify the table frequently between `fastFind`. It's recommended that `clear` be called immediately after `doneModify` if only `fastFind` is needed afterwards, so some memory can be saved. 8 | 9 | `StrHash` currently supports 7 hash functions and one of which can be selected using template parameter `HashFunc`: 10 | * 0: djb ver1(default) 11 | * 1: djb ver2 12 | * 2: sax 13 | * 3: fnv 14 | * 4: oat 15 | * 5: murmur 16 | * 6: int(for integer keys) 17 | 18 | User can also add other hash functions himself. 19 | 20 | `StrHash` is also suitable to have integers(such as uint32_t or uint64_t) as key for searching. Define `StrHash<8, Value, NullV, 6>` 21 | for uint64_t and `StrHash<4, Value, NullV, 6>` for uint32_t, see `benchfindint.cc` for detailed usage. 22 | 23 | ## Benchmark 24 | Tests show that `StrHash` is 7x faster than `std::unordered_map` and 3x faster than other open addressing hash table implementations such as `tsl::hopscotch_map`, `tsl::robin_map`, `robin_hood::unordered_map` and `google::dense_hash_map`. 25 | 26 | `Str`'s `operator==` and `compare` is 2x faster than `strncmp`/`memcmp` or those of `std::string`. 27 | 28 | `Str`'s `fromi` and `toi` is 10x faster than `stoi`/`strtol`/`to_string`/`sprintf`. 29 | 30 | `benchfindstr.cc` tests the performance of multiple string search solutions using the same data set. The data set contains the KRX option issue codes of Feb 2019 that we are interested in and are to be inserted into the table, and the first 1000 option issue codes we received from the market data(which are mostly of Feb 2019 but some are of other months) and are to be searched in the table. 31 | In `benchfindstr.cc`: 32 | * `bench_hash<0~5>` compair the performance of different hash functions `StrHash` supports. 33 | * `bench_hash` vs other searching solutions shows how `StrHash` is faster than others. 34 | * `bench_map` vs `bench_string_map` and `bench_bsearch` vs `bench_string_bsearch` show how `Str` is faster than `std::string`. 35 | 36 | `benchfindint.cc` tests the performance of multiple integer search solutions in similar way to `benchfindstr.cc`. The data set contains the SHFE instrument No of type uint64_t. Here `bench_hash6` should be the most suitable method. 37 | 38 | `benchcmp.cc` tests string comparison operations. 39 | 40 | `benchnum.cc` tests conversions to/from integers. 41 | -------------------------------------------------------------------------------- /benchmark/benchcmp.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../Str.h" 3 | 4 | using namespace std; 5 | 6 | inline uint64_t getns() { 7 | return std::chrono::high_resolution_clock::now().time_since_epoch().count(); 8 | } 9 | 10 | template 11 | void fillRand(Str& str) { 12 | for (int i = 0; i < Size; i++) { 13 | str[i] = rand() & 0xff; 14 | if (str[i] == 0) str[i] = 1; // don't gen null char, otherwise strncmp could cheat 15 | } 16 | } 17 | 18 | template 19 | void bench() { 20 | const int datasize = 1000; 21 | const int loop = 1000; 22 | const int eqrate = 50; // set a equal rate of 50% 23 | vector, Str>> strs(datasize); 24 | for (int i = 0; i < datasize; i++) { 25 | fillRand(strs[i].first); 26 | if (rand() % 100 < eqrate) { 27 | strs[i].second = strs[i].first; 28 | } 29 | else { 30 | fillRand(strs[i].second); 31 | } 32 | 33 | int res = strncmp(strs[i].first.s, strs[i].second.s, Size); 34 | res = (res > 0) - (res < 0); 35 | int compare_res = strs[i].first.compare(strs[i].second); 36 | compare_res = (compare_res > 0) - (compare_res < 0); 37 | 38 | int mem_res = memcmp(strs[i].first.s, strs[i].second.s, Size); 39 | mem_res = (mem_res > 0) - (mem_res < 0); 40 | 41 | assert(res == mem_res); 42 | assert(res == compare_res); 43 | assert((strs[i].first == strs[i].second) == (res == 0)); 44 | } 45 | 46 | { 47 | uint64_t sum = 0; 48 | auto before = getns(); 49 | for (int l = 0; l < loop; l++) { 50 | for (auto& pr : strs) { 51 | sum += pr.first == pr.second; 52 | } 53 | } 54 | auto after = getns(); 55 | cout << "bench " << Size << " eq: " << (double)(after - before) / (loop * datasize) << " res: " << sum << endl; 56 | } 57 | 58 | { 59 | uint64_t sum = 0; 60 | auto before = getns(); 61 | for (int l = 0; l < loop; l++) { 62 | for (auto& pr : strs) { 63 | sum += pr.first.compare(pr.second); 64 | } 65 | } 66 | auto after = getns(); 67 | cout << "bench " << Size << " compare: " << (double)(after - before) / (loop * datasize) << " res: " << sum << endl; 68 | } 69 | 70 | { 71 | uint64_t sum = 0; 72 | auto before = getns(); 73 | for (int l = 0; l < loop; l++) { 74 | for (auto& pr : strs) { 75 | sum += strncmp(pr.first.s, pr.second.s, Size); 76 | } 77 | } 78 | auto after = getns(); 79 | cout << "bench " << Size << " strncmp: " << (double)(after - before) / (loop * datasize) << " res: " << sum << endl; 80 | } 81 | 82 | { 83 | uint64_t sum = 0; 84 | auto before = getns(); 85 | for (int l = 0; l < loop; l++) { 86 | for (auto& pr : strs) { 87 | sum += memcmp(pr.first.s, pr.second.s, Size); 88 | } 89 | } 90 | auto after = getns(); 91 | cout << "bench " << Size << " memcmp: " << (double)(after - before) / (loop * datasize) << " res: " << sum << endl; 92 | } 93 | cout << endl; 94 | } 95 | 96 | 97 | int main() { 98 | srand(time(NULL)); 99 | bench<1>(); 100 | bench<2>(); 101 | bench<3>(); 102 | bench<4>(); 103 | bench<7>(); 104 | bench<8>(); 105 | bench<10>(); 106 | bench<12>(); 107 | bench<13>(); 108 | bench<15>(); 109 | bench<16>(); 110 | bench<22>(); 111 | bench<30>(); 112 | bench<50>(); 113 | bench<100>(); 114 | bench<128>(); 115 | bench<300>(); 116 | bench<500>(); 117 | bench<999>(); 118 | } 119 | 120 | 121 | -------------------------------------------------------------------------------- /benchmark/benchnum.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../Str.h" 3 | using namespace std; 4 | 5 | 6 | inline uint64_t getns() { 7 | return std::chrono::high_resolution_clock::now().time_since_epoch().count(); 8 | } 9 | 10 | uint32_t getRand() { 11 | uint32_t num = rand() & 0xffff; 12 | num <<= 16; 13 | num |= rand() & 0xffff; 14 | return num; 15 | } 16 | 17 | string dest; 18 | char buf[1024]; 19 | 20 | template 21 | void bench() { 22 | using NumStr = Str; 23 | uint64_t mod = 1; 24 | for (int i = 0; i < Size; i++) mod *= 10; 25 | 26 | const int datasize = 1000; 27 | const int loop = 1000; 28 | vector nums(datasize); 29 | vector strs(datasize); 30 | vector strings(datasize); 31 | for (int i = 0; i < datasize; i++) { 32 | uint64_t num = getRand(); 33 | num <<= 32; 34 | num += getRand(); 35 | num %= mod; 36 | string str = to_string(num); 37 | while (str.size() < Size) str = string("0") + str; 38 | NumStr numstr = str.data(); 39 | assert(numstr.toi64() == num); 40 | assert(stoll(str) == num); 41 | assert(strtoll(str.data(), NULL, 10) == num); 42 | NumStr teststr; 43 | teststr.fromi(num); 44 | assert(teststr == numstr); 45 | sprintf(buf, "%0*lld", Size, num); 46 | assert(str == buf); 47 | nums[i] = num; 48 | strs[i] = numstr; 49 | strings[i] = str; 50 | } 51 | 52 | { 53 | uint64_t sum = 0; 54 | auto before = getns(); 55 | for (int l = 0; l < loop; l++) { 56 | for (auto& str : strs) { 57 | sum += str.toi64(); 58 | } 59 | } 60 | auto after = getns(); 61 | cout << "bench " << Size << " toi64: " << (double)(after - before) / (loop * datasize) << " res: " << sum << endl; 62 | } 63 | 64 | { 65 | uint64_t sum = 0; 66 | auto before = getns(); 67 | for (int l = 0; l < loop; l++) { 68 | for (auto& str : strings) { 69 | sum += stoll(str); 70 | } 71 | } 72 | auto after = getns(); 73 | cout << "bench " << Size << " stoll: " << (double)(after - before) / (loop * datasize) << " res: " << sum << endl; 74 | } 75 | 76 | { 77 | uint64_t sum = 0; 78 | auto before = getns(); 79 | for (int l = 0; l < loop; l++) { 80 | for (auto& str : strings) { 81 | sum += strtoll(str.data(), NULL, 10); 82 | } 83 | } 84 | auto after = getns(); 85 | cout << "bench " << Size << " strtoll: " << (double)(after - before) / (loop * datasize) << " res: " << sum << endl; 86 | } 87 | 88 | { 89 | union 90 | { 91 | uint64_t num; 92 | char str[Size]; 93 | } res; 94 | res.num = 0; 95 | uint64_t sum = 0; 96 | auto before = getns(); 97 | for (int l = 0; l < loop; l++) { 98 | for (auto num : nums) { 99 | (*(NumStr*)res.str).fromi(num); 100 | sum += res.num; 101 | } 102 | } 103 | auto after = getns(); 104 | cout << "bench " << Size << " fromi: " << (double)(after - before) / (loop * datasize) << " res: " << sum << endl; 105 | } 106 | 107 | { 108 | auto before = getns(); 109 | for (int l = 0; l < loop; l++) { 110 | for (auto num : nums) { 111 | dest = to_string(num); 112 | } 113 | } 114 | auto after = getns(); 115 | cout << "bench " << Size << " to_string: " << (double)(after - before) / (loop * datasize) << " res: " << dest 116 | << endl; 117 | } 118 | 119 | { 120 | auto before = getns(); 121 | for (int l = 0; l < loop; l++) { 122 | for (auto num : nums) { 123 | sprintf(buf, "%0*lld", Size, num); 124 | } 125 | } 126 | auto after = getns(); 127 | cout << "bench " << Size << " sprintf: " << (double)(after - before) / (loop * datasize) << " res: " << dest 128 | << endl; 129 | } 130 | 131 | cout << endl; 132 | } 133 | 134 | 135 | int main() { 136 | srand(time(NULL)); 137 | bench<1>(); 138 | bench<2>(); 139 | bench<3>(); 140 | bench<4>(); 141 | bench<5>(); 142 | bench<6>(); 143 | bench<7>(); 144 | bench<8>(); 145 | bench<9>(); 146 | bench<10>(); 147 | bench<11>(); 148 | bench<12>(); 149 | bench<13>(); 150 | bench<14>(); 151 | bench<15>(); 152 | bench<16>(); 153 | bench<17>(); 154 | bench<18>(); 155 | } 156 | 157 | -------------------------------------------------------------------------------- /benchmark/sparsehash/internal/libc_allocator_with_realloc.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2010, Google Inc. 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are 6 | // met: 7 | // 8 | // * Redistributions of source code must retain the above copyright 9 | // notice, this list of conditions and the following disclaimer. 10 | // * Redistributions in binary form must reproduce the above 11 | // copyright notice, this list of conditions and the following disclaimer 12 | // in the documentation and/or other materials provided with the 13 | // distribution. 14 | // * Neither the name of Google Inc. nor the names of its 15 | // contributors may be used to endorse or promote products derived from 16 | // this software without specific prior written permission. 17 | // 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | // --- 31 | 32 | #pragma once 33 | 34 | #include // for malloc/realloc/free 35 | #include // for ptrdiff_t 36 | #include // for placement new 37 | 38 | namespace google { 39 | template 40 | class libc_allocator_with_realloc { 41 | public: 42 | typedef T value_type; 43 | typedef size_t size_type; 44 | typedef ptrdiff_t difference_type; 45 | 46 | typedef T* pointer; 47 | typedef const T* const_pointer; 48 | typedef T& reference; 49 | typedef const T& const_reference; 50 | 51 | libc_allocator_with_realloc() {} 52 | libc_allocator_with_realloc(const libc_allocator_with_realloc&) {} 53 | ~libc_allocator_with_realloc() {} 54 | 55 | pointer address(reference r) const { return &r; } 56 | const_pointer address(const_reference r) const { return &r; } 57 | 58 | pointer allocate(size_type n, const_pointer = 0) { 59 | return static_cast(malloc(n * sizeof(value_type))); 60 | } 61 | void deallocate(pointer p, size_type) { free(p); } 62 | pointer reallocate(pointer p, size_type n) { 63 | // p points to a storage array whose objects have already been destroyed 64 | // cast to void* to prevent compiler warnings about calling realloc() on 65 | // an object which cannot be relocated in memory 66 | return static_cast(realloc(static_cast(p), n * sizeof(value_type))); 67 | } 68 | 69 | size_type max_size() const { 70 | return static_cast(-1) / sizeof(value_type); 71 | } 72 | 73 | void construct(pointer p, const value_type& val) { new (p) value_type(val); } 74 | void destroy(pointer p) { p->~value_type(); } 75 | 76 | template 77 | libc_allocator_with_realloc(const libc_allocator_with_realloc&) {} 78 | 79 | template 80 | struct rebind { 81 | typedef libc_allocator_with_realloc other; 82 | }; 83 | }; 84 | 85 | // libc_allocator_with_realloc specialization. 86 | template <> 87 | class libc_allocator_with_realloc { 88 | public: 89 | typedef void value_type; 90 | typedef size_t size_type; 91 | typedef ptrdiff_t difference_type; 92 | typedef void* pointer; 93 | typedef const void* const_pointer; 94 | 95 | template 96 | struct rebind { 97 | typedef libc_allocator_with_realloc other; 98 | }; 99 | }; 100 | 101 | template 102 | inline bool operator==(const libc_allocator_with_realloc&, 103 | const libc_allocator_with_realloc&) { 104 | return true; 105 | } 106 | 107 | template 108 | inline bool operator!=(const libc_allocator_with_realloc&, 109 | const libc_allocator_with_realloc&) { 110 | return false; 111 | } 112 | 113 | } // namespace google 114 | -------------------------------------------------------------------------------- /benchmark/benchfindint.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../StrHash.h" 3 | #include "tsl/robin_map.h" 4 | #include "tsl/hopscotch_map.h" 5 | #include "robin_hood.h" 6 | #include "sparsehash/dense_hash_map" 7 | 8 | template 9 | constexpr std::string_view type_name() { 10 | using namespace std; 11 | #ifdef __clang__ 12 | string_view p = __PRETTY_FUNCTION__; 13 | return string_view(p.data() + 34, p.size() - 34 - 1); 14 | #elif defined(__GNUC__) 15 | string_view p = __PRETTY_FUNCTION__; 16 | #if __cplusplus < 201402 17 | return string_view(p.data() + 36, p.size() - 36 - 1); 18 | #else 19 | return string_view(p.data() + 49, p.find(';', 49) - 49); 20 | #endif 21 | #elif defined(_MSC_VER) 22 | string_view p = __FUNCSIG__; 23 | return string_view(p.data() + 84, p.size() - 84 - 7); 24 | #endif 25 | } 26 | 27 | using namespace std; 28 | 29 | inline uint64_t getns() { 30 | return std::chrono::high_resolution_clock::now().time_since_epoch().count(); 31 | } 32 | 33 | using IntT = uint32_t; // change to uint64_t or uint16_t 34 | constexpr int IntLen = sizeof(IntT); 35 | 36 | using Key = Str; 37 | using Value = uint16_t; 38 | const int loop = 1000; 39 | vector tbl_data; 40 | vector find_data; 41 | 42 | template 43 | void bench_hash() { 44 | StrHash ht; 45 | for (int i = 0; i < tbl_data.size(); i++) { 46 | ht.emplace((const char*)&tbl_data[i], i + 1); 47 | } 48 | ht.doneModify(); 49 | // the std::map can be cleared to save memory if only fastFind is called afterwards 50 | // ht.clear(); 51 | 52 | int64_t sum = 0; 53 | auto before = getns(); 54 | for (int l = 0; l < loop; l++) { 55 | for (auto s : find_data) { 56 | sum += ht.fastFind(*(const Key*)&s); 57 | } 58 | } 59 | auto after = getns(); 60 | cout << "bench_hash " << HashFunc << " sum: " << sum 61 | << " avg lat: " << (double)(after - before) / (loop * find_data.size()) << endl; 62 | } 63 | 64 | template 65 | void bench_map() { 66 | T ht; 67 | for (int i = 0; i < tbl_data.size(); i++) { 68 | ht.emplace(tbl_data[i], i + 1); 69 | } 70 | 71 | int64_t sum = 0; 72 | auto before = getns(); 73 | for (int l = 0; l < loop; l++) { 74 | for (auto s : find_data) { 75 | auto it = ht.find(s); 76 | if (it != ht.end()) sum += it->second; 77 | } 78 | } 79 | auto after = getns(); 80 | cout << type_name() << ", sum: " << sum << " avg lat: " << (double)(after - before) / (loop * find_data.size()) 81 | << endl; 82 | } 83 | 84 | void bench_dense() { 85 | google::dense_hash_map ht; 86 | ht.set_empty_key(0); 87 | for (int i = 0; i < tbl_data.size(); i++) { 88 | ht.emplace(tbl_data[i], i + 1); 89 | } 90 | 91 | int64_t sum = 0; 92 | auto before = getns(); 93 | for (int l = 0; l < loop; l++) { 94 | for (auto s : find_data) { 95 | auto it = ht.find(s); 96 | if (it != ht.end()) sum += it->second; 97 | } 98 | } 99 | auto after = getns(); 100 | cout << "dense_hash_set" 101 | << ", sum: " << sum << " avg lat: " << (double)(after - before) / (loop * find_data.size()) << endl; 102 | } 103 | 104 | void bench_bsearch() { 105 | int n = tbl_data.size(); 106 | vector> vec(n); 107 | for (int i = 0; i < n; i++) { 108 | vec[i].first = tbl_data[i]; 109 | vec[i].second = i + 1; 110 | } 111 | sort(vec.begin(), vec.end()); 112 | int64_t sum = 0; 113 | auto before = getns(); 114 | for (int l = 0; l < loop; l++) { 115 | for (auto s : find_data) { 116 | int l = 0, r = n - 1; 117 | while (l <= r) { 118 | int m = (l + r) >> 1; 119 | if (vec[m].first == s) { 120 | sum += vec[m].second; 121 | break; 122 | } 123 | if (vec[m].first > s) 124 | r = m - 1; 125 | else 126 | l = m + 1; 127 | } 128 | } 129 | } 130 | auto after = getns(); 131 | cout << "bench_bsearch sum: " << sum << " avg lat: " << (double)(after - before) / (loop * find_data.size()) << endl; 132 | } 133 | 134 | int main() { 135 | int n; 136 | cin >> n; 137 | tbl_data.resize(n); 138 | for (int i = 0; i < n; i++) { 139 | cin >> tbl_data[i]; 140 | } 141 | 142 | cin >> n; 143 | find_data.resize(n); 144 | for (int i = 0; i < n; i++) { 145 | cin >> find_data[i]; 146 | } 147 | 148 | bench_hash<0>(); 149 | bench_hash<1>(); 150 | bench_hash<2>(); 151 | bench_hash<3>(); 152 | bench_hash<4>(); 153 | bench_hash<5>(); 154 | bench_hash<6>(); // 6 is for integer key 155 | bench_map>(); 156 | bench_map>(); 157 | bench_map< 158 | tsl::robin_map, std::equal_to, std::allocator>, true>>(); 159 | bench_map, std::equal_to, 160 | std::allocator>, 10, true>>(); 161 | bench_map>(); 162 | bench_dense(); 163 | bench_bsearch(); 164 | 165 | return 0; 166 | } 167 | -------------------------------------------------------------------------------- /benchmark/benchfindstr.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../StrHash.h" 3 | #include "tsl/robin_map.h" 4 | #include "tsl/hopscotch_map.h" 5 | #include "robin_hood.h" 6 | #include "sparsehash/dense_hash_map" 7 | 8 | template 9 | constexpr std::string_view type_name() { 10 | using namespace std; 11 | #ifdef __clang__ 12 | string_view p = __PRETTY_FUNCTION__; 13 | return string_view(p.data() + 34, p.size() - 34 - 1); 14 | #elif defined(__GNUC__) 15 | string_view p = __PRETTY_FUNCTION__; 16 | #if __cplusplus < 201402 17 | return string_view(p.data() + 36, p.size() - 36 - 1); 18 | #else 19 | return string_view(p.data() + 49, p.find(';', 49) - 49); 20 | #endif 21 | #elif defined(_MSC_VER) 22 | string_view p = __FUNCSIG__; 23 | return string_view(p.data() + 84, p.size() - 84 - 7); 24 | #endif 25 | } 26 | 27 | using namespace std; 28 | 29 | inline uint64_t getns() { 30 | return std::chrono::high_resolution_clock::now().time_since_epoch().count(); 31 | } 32 | 33 | const int STR_LEN = 12; 34 | 35 | using Key = Str; 36 | using Value = uint16_t; 37 | const int loop = 1000; 38 | std::vector tbl_data; 39 | std::vector find_data; 40 | 41 | template 42 | void bench_hash() { 43 | StrHash ht; 44 | for (int i = 0; i < tbl_data.size(); i++) { 45 | ht.emplace(tbl_data[i].data(), i + 1); 46 | } 47 | if (!ht.doneModify()) { 48 | cout << "table size too large, try using template parameter SmallTbl=false" << endl; 49 | return; 50 | } 51 | // the std::map can be cleared to save memory if only fastFind is called afterwards 52 | // ht.clear(); 53 | 54 | int64_t sum = 0; 55 | auto before = getns(); 56 | for (int l = 0; l < loop; l++) { 57 | for (auto& s : find_data) { 58 | sum += ht.fastFind(*(const Key*)s.data()); 59 | } 60 | } 61 | auto after = getns(); 62 | cout << "bench_hash " << HashFunc << " sum: " << sum 63 | << " avg lat: " << (double)(after - before) / (loop * find_data.size()) << endl; 64 | } 65 | 66 | void bench_map() { 67 | StrHash ht; // StrHash is itself a std::map 68 | for (int i = 0; i < tbl_data.size(); i++) { 69 | ht.emplace(tbl_data[i].data(), i + 1); 70 | } 71 | 72 | int64_t sum = 0; 73 | auto before = getns(); 74 | for (int l = 0; l < loop; l++) { 75 | for (auto& s : find_data) { 76 | auto it = ht.find(*(const Key*)s.data()); 77 | if (it != ht.end()) sum += it->second; 78 | } 79 | } 80 | auto after = getns(); 81 | cout << "bench_map sum: " << sum << " avg lat: " << (double)(after - before) / (loop * find_data.size()) << endl; 82 | } 83 | 84 | template 85 | void bench_string_map() { 86 | T ht; 87 | for (int i = 0; i < tbl_data.size(); i++) { 88 | ht.emplace(tbl_data[i], i + 1); 89 | } 90 | int64_t sum = 0; 91 | auto before = getns(); 92 | for (int l = 0; l < loop; l++) { 93 | for (auto& s : find_data) { 94 | auto it = ht.find(s); 95 | if (it != ht.end()) sum += it->second; 96 | } 97 | } 98 | auto after = getns(); 99 | cout << type_name() << ", sum: " << sum << " avg lat: " << (double)(after - before) / (loop * find_data.size()) 100 | << endl; 101 | } 102 | 103 | void bench_dense_map() { 104 | google::dense_hash_map ht; 105 | ht.set_empty_key(""); 106 | for (int i = 0; i < tbl_data.size(); i++) { 107 | ht.emplace(tbl_data[i], i + 1); 108 | } 109 | int64_t sum = 0; 110 | auto before = getns(); 111 | for (int l = 0; l < loop; l++) { 112 | for (auto& s : find_data) { 113 | auto it = ht.find(s); 114 | if (it != ht.end()) sum += it->second; 115 | } 116 | } 117 | auto after = getns(); 118 | cout << "dense_hash_map" 119 | << ", sum: " << sum << " avg lat: " << (double)(after - before) / (loop * find_data.size()) << endl; 120 | } 121 | 122 | void bench_bsearch() { 123 | int n = tbl_data.size(); 124 | vector> vec(n); 125 | for (int i = 0; i < n; i++) { 126 | vec[i].first = tbl_data[i].data(); 127 | vec[i].second = i + 1; 128 | } 129 | sort(vec.begin(), vec.end()); 130 | int64_t sum = 0; 131 | auto before = getns(); 132 | for (int l = 0; l < loop; l++) { 133 | for (auto& s : find_data) { 134 | const Key& key = *(const Key*)s.data(); 135 | int l = 0, r = n - 1; 136 | while (l <= r) { 137 | int m = (l + r) >> 1; 138 | int cmp = key.compare(vec[m].first); 139 | if (cmp == 0) { 140 | sum += vec[m].second; 141 | break; 142 | } 143 | if (cmp < 0) 144 | r = m - 1; 145 | else 146 | l = m + 1; 147 | } 148 | } 149 | } 150 | auto after = getns(); 151 | cout << "bench_bsearch sum: " << sum << " avg lat: " << (double)(after - before) / (loop * find_data.size()) << endl; 152 | } 153 | 154 | void bench_string_bsearch() { 155 | int n = tbl_data.size(); 156 | vector> vec(n); 157 | for (int i = 0; i < n; i++) { 158 | vec[i].first = tbl_data[i]; 159 | vec[i].second = i + 1; 160 | } 161 | sort(vec.begin(), vec.end()); 162 | int64_t sum = 0; 163 | auto before = getns(); 164 | for (int l = 0; l < loop; l++) { 165 | for (auto& key : find_data) { 166 | int l = 0, r = n - 1; 167 | while (l <= r) { 168 | int m = (l + r) >> 1; 169 | int cmp = key.compare(vec[m].first); 170 | if (cmp == 0) { 171 | sum += vec[m].second; 172 | break; 173 | } 174 | if (cmp < 0) 175 | r = m - 1; 176 | else 177 | l = m + 1; 178 | } 179 | } 180 | } 181 | auto after = getns(); 182 | cout << "bench_string_bsearch sum: " << sum << " avg lat: " << (double)(after - before) / (loop * find_data.size()) 183 | << endl; 184 | } 185 | 186 | int main(int argc, char** argv) { 187 | int n; 188 | cin >> n; 189 | tbl_data.resize(n); 190 | for (int i = 0; i < n; i++) { 191 | cin >> tbl_data[i]; 192 | } 193 | cin >> n; 194 | find_data.resize(n); 195 | for (int i = 0; i < n; i++) { 196 | cin >> find_data[i]; 197 | } 198 | 199 | bench_hash<0>(); 200 | bench_hash<1>(); 201 | bench_hash<2>(); 202 | bench_hash<3>(); 203 | bench_hash<4>(); 204 | bench_hash<5>(); 205 | bench_map(); 206 | bench_string_map>(); 207 | bench_string_map>(); 208 | bench_string_map, std::equal_to, 209 | std::allocator>, true>>(); 210 | bench_string_map, std::equal_to, 211 | std::allocator>, 10, true>>(); 212 | bench_string_map>(); 213 | bench_dense_map(); 214 | bench_bsearch(); 215 | bench_string_bsearch(); 216 | 217 | return 0; 218 | } 219 | -------------------------------------------------------------------------------- /StrHash.h: -------------------------------------------------------------------------------- 1 | /* 2 | MIT License 3 | 4 | Copyright (c) 2019 Meng Rao 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | #pragma once 25 | #include "Str.h" 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | namespace strhash_detail { 32 | 33 | template 34 | struct HashType 35 | { using type = uint16_t; }; 36 | 37 | template<> 38 | struct HashType 39 | { using type = uint32_t; }; 40 | 41 | template 42 | inline uint32_t intHash(const T& s) { 43 | return *(uint32_t*)&s; 44 | } 45 | 46 | template<> 47 | inline uint32_t intHash(const Str<2>& s) { 48 | return *(uint16_t*)&s; 49 | } 50 | 51 | template<> 52 | inline uint32_t intHash(const Str<4>& s) { 53 | return *(uint32_t*)&s; 54 | } 55 | 56 | template<> 57 | inline uint32_t intHash(const Str<8>& s) { // simply truncate to lower 4 bytes 58 | return (uint32_t)(*(uint64_t*)&s); 59 | } 60 | 61 | } // namespace 62 | 63 | template 64 | class StrHash : public std::map, ValueT> 65 | { 66 | public: 67 | using KeyT = Str; 68 | using Parent = std::map; 69 | using HashT = typename strhash_detail::HashType::type; 70 | static const uint32_t MaxTblSZ = 1u << (SmallTbl ? 15 : 31); 71 | struct Bucket 72 | { 73 | alignas(KeyT::AlignSize) KeyT key; 74 | HashT hashv; 75 | ValueT value; 76 | Bucket() = default; 77 | Bucket(const KeyT& k, const ValueT& v) : key(k), value(v) {} 78 | }; 79 | 80 | bool doneModify() { 81 | uint32_t n = Parent::size(); 82 | if (n >= MaxTblSZ) return false; 83 | table_size = n; 84 | std::vector tmp_tbl; 85 | tmp_tbl.reserve(n); 86 | for (auto& pr : *this) { 87 | tmp_tbl.emplace_back(pr.first, pr.second); 88 | } 89 | findBest(tmp_tbl); 90 | for (auto& blk : tmp_tbl) { 91 | blk.hashv = calcHash(blk.key); 92 | } 93 | std::sort(tmp_tbl.begin(), tmp_tbl.end(), [](const Bucket& a, const Bucket& b) { return a.hashv < b.hashv; }); 94 | HashT size = tbl_mask + 1; 95 | tbl.reset(new Bucket[size]); 96 | for (HashT i = 0; i < size; i++) { 97 | tbl[i].hashv = size; 98 | } 99 | for (auto& blk : tmp_tbl) { 100 | for (HashT pos = blk.hashv;; pos = (pos + 1) & tbl_mask) { 101 | if (tbl[pos].hashv == size) { 102 | tbl[pos] = blk; 103 | break; 104 | } 105 | } 106 | } 107 | 108 | return true; 109 | } 110 | 111 | ValueT fastFind(const KeyT& key) const { 112 | HashT hash = calcHash(key); 113 | for (HashT pos = hash;; pos = (pos + 1) & tbl_mask) { 114 | if (tbl[pos].hashv > hash) return NullV; 115 | // it's likely that tbl[pos].hash == hash so we skip checking it 116 | if (/*tbl[pos].hash == hash && */ tbl[pos].key == key) return tbl[pos].value; 117 | } 118 | } 119 | 120 | uint32_t getTableSize() const { return table_size; } 121 | 122 | private: 123 | bool HashFuncUseSalt() const { return HashFunc != 3; } 124 | bool HashFuncUsePos() const { return HashFunc != 5; } 125 | 126 | HashT calcHash(const KeyT& key) const { 127 | static_assert(HashFunc <= 6, "unsupported HashFunc"); 128 | uint32_t hash; 129 | switch (HashFunc) { 130 | case 0: hash = djbHash1(key); break; 131 | case 1: hash = djbHash2(key); break; 132 | case 2: hash = saxHash(key); break; 133 | case 3: hash = fnvHash(key); break; 134 | case 4: hash = oatHash(key); break; 135 | case 5: hash = murmurHash(key); break; 136 | case 6: hash = intHash(key); break; 137 | } 138 | if (SmallTbl) hash ^= (hash >> 16); 139 | return (HashT)hash & tbl_mask; 140 | } 141 | 142 | // 0 143 | uint32_t djbHash1(const KeyT& key) const { 144 | uint32_t h = hash_salt; 145 | for (int i = 0; i < hash_pos_len; i++) { 146 | char ch = key.s[hash_pos[i]]; 147 | h = ((h << 5) + h) + ch; 148 | } 149 | return h; 150 | } 151 | 152 | // 1 153 | uint32_t djbHash2(const KeyT& key) const { 154 | uint32_t h = hash_salt; 155 | for (int i = 0; i < hash_pos_len; i++) { 156 | char ch = key.s[hash_pos[i]]; 157 | h = ((h << 5) + h) ^ ch; 158 | } 159 | return h; 160 | } 161 | 162 | // 2 163 | uint32_t saxHash(const KeyT& key) const { 164 | uint32_t h = hash_salt; 165 | for (int i = 0; i < hash_pos_len; i++) { 166 | char ch = key.s[hash_pos[i]]; 167 | h ^= (h << 5) + (h >> 2) + ch; 168 | } 169 | return h; 170 | } 171 | 172 | // 3, hash_salt is not used 173 | uint32_t fnvHash(const KeyT& key) const { 174 | uint32_t h = 2166136261; 175 | for (int i = 0; i < hash_pos_len; i++) { 176 | char ch = key.s[hash_pos[i]]; 177 | h = (h * 16777619) ^ ch; 178 | } 179 | return h; 180 | } 181 | 182 | // 4 183 | uint32_t oatHash(const KeyT& key) const { 184 | uint32_t h = hash_salt; 185 | for (int i = 0; i < hash_pos_len; i++) { 186 | char ch = key.s[hash_pos[i]]; 187 | h += ch; 188 | h += (h << 10); 189 | h ^= (h >> 6); 190 | } 191 | h += (h << 3); 192 | h ^= (h >> 11); 193 | h += (h << 15); 194 | return h; 195 | } 196 | 197 | // 5, hash_pos is not used 198 | uint32_t murmurHash(const KeyT& key) const { 199 | const unsigned int m = 0x5bd1e995; 200 | const int r = 24; 201 | int len = StrSZ; 202 | 203 | // Initialize the hash to a 'random' value 204 | unsigned int h = hash_salt ^ len; 205 | 206 | // Mix 4 bytes at a time into the hash 207 | const unsigned char* data = (const unsigned char*)key.s; 208 | while (len >= 4) { 209 | unsigned int k = *(unsigned int*)data; 210 | k *= m; 211 | k ^= k >> r; 212 | k *= m; 213 | h *= m; 214 | h ^= k; 215 | data += 4; 216 | len -= 4; 217 | } 218 | // Handle the last few bytes of the input array 219 | switch (len) { 220 | case 3: h ^= data[2] << 16; 221 | case 2: h ^= data[1] << 8; 222 | case 1: h ^= data[0]; h *= m; 223 | }; 224 | 225 | // Do a few final mixes of the hash to ensure the last few 226 | // bytes are well-incorporated. 227 | h ^= h >> 13; 228 | h *= m; 229 | h ^= h >> 15; 230 | return h; 231 | } 232 | 233 | // 6: when key is actually an integer(e.g. uint32_t or uint64_t), return itself as hash value 234 | uint32_t intHash(const KeyT& key) const { return strhash_detail::intHash(key); } 235 | 236 | void findBest(std::vector& tmp_tbl) { 237 | uint64_t n = tmp_tbl.size(); 238 | std::map chmap[StrSZ]; 239 | for (auto& bkt : tmp_tbl) { 240 | for (size_t i = 0; i < StrSZ; i++) { 241 | chmap[i][bkt.key.s[i]]++; 242 | } 243 | } 244 | std::pair chcost[StrSZ]; 245 | for (size_t i = 0; i < StrSZ; i++) { 246 | chcost[i].second = i; 247 | chcost[i].first = 0; 248 | for (auto& mppr : chmap[i]) { 249 | chcost[i].first += mppr.second * mppr.second; 250 | } 251 | } 252 | std::sort(chcost, chcost + StrSZ); 253 | for (size_t i = 0; i < StrSZ; i++) { 254 | hash_pos[i] = chcost[i].second; 255 | } 256 | uint64_t max_cost = n * n; 257 | uint64_t min_cost = n; 258 | uint64_t good_cost = n + n / 3; 259 | 260 | uint64_t init_tbl_size = 1; 261 | while (init_tbl_size <= n) init_tbl_size <<= 1; 262 | uint64_t max_tbl_size = std::min(init_tbl_size * 4, (uint64_t)MaxTblSZ); 263 | 264 | uint32_t best_pos_len = 0, best_mask = init_tbl_size - 1, best_salt = 0; 265 | uint64_t best_cost = max_cost + 1; 266 | 267 | for (hash_pos_len = 1; hash_pos_len <= StrSZ && chcost[hash_pos_len - 1].first < max_cost; 268 | hash_pos_len += (HashFuncUsePos() ? 1 : StrSZ)) { 269 | for (uint32_t tbl_size = init_tbl_size; tbl_size <= max_tbl_size; tbl_size <<= 1) { 270 | tbl_mask = tbl_size - 1; 271 | uint32_t max_salt = std::min((uint32_t)tbl_mask, 127U); 272 | for (hash_salt = 0; hash_salt <= max_salt; hash_salt += (HashFuncUseSalt() ? 1 : tbl_size)) { 273 | std::map pos_mp; 274 | for (auto& blk : tmp_tbl) { 275 | uint32_t hash = calcHash(blk.key); 276 | pos_mp[hash]++; 277 | } 278 | uint64_t cost = 0; 279 | for (auto& mppr : pos_mp) { 280 | cost += mppr.second * mppr.second; 281 | } 282 | if (cost < best_cost) { 283 | best_cost = cost; 284 | best_salt = hash_salt; 285 | best_pos_len = hash_pos_len; 286 | best_mask = tbl_mask; 287 | /* 288 | std::cout << "best_cost: " << best_cost << " best_salt: " << best_salt << " best_pos_len: " << best_pos_len 289 | << " best_mask: " << best_mask << " min_cost: " << min_cost << " good_cost: " << good_cost 290 | << std::endl; 291 | */ 292 | if (best_cost == min_cost) return; 293 | } 294 | } 295 | if (best_cost <= good_cost) goto done; 296 | } 297 | } 298 | done: 299 | hash_salt = best_salt; 300 | hash_pos_len = best_pos_len; 301 | tbl_mask = best_mask; 302 | } 303 | 304 | alignas(64) std::unique_ptr tbl; 305 | uint32_t hash_salt; 306 | HashT tbl_mask; 307 | uint16_t hash_pos_len; 308 | uint16_t hash_pos[StrSZ]; 309 | uint32_t table_size; 310 | }; 311 | -------------------------------------------------------------------------------- /benchmark/tsl/hopscotch_growth_policy.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018 Tessil 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef TSL_HOPSCOTCH_GROWTH_POLICY_H 25 | #define TSL_HOPSCOTCH_GROWTH_POLICY_H 26 | 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | 39 | namespace tsl { 40 | namespace hh { 41 | 42 | /** 43 | * Grow the hash table by a factor of GrowthFactor keeping the bucket count to a power of two. It allows 44 | * the table to use a mask operation instead of a modulo operation to map a hash to a bucket. 45 | * 46 | * GrowthFactor must be a power of two >= 2. 47 | */ 48 | template 49 | class power_of_two_growth_policy { 50 | public: 51 | /** 52 | * Called on the hash table creation and on rehash. The number of buckets for the table is passed in parameter. 53 | * This number is a minimum, the policy may update this value with a higher value if needed (but not lower). 54 | * 55 | * If 0 is given, min_bucket_count_in_out must still be 0 after the policy creation and 56 | * bucket_for_hash must always return 0 in this case. 57 | */ 58 | explicit power_of_two_growth_policy(std::size_t& min_bucket_count_in_out) { 59 | if(min_bucket_count_in_out > max_bucket_count()) { 60 | throw std::length_error("The hash table exceeds its maxmimum size."); 61 | } 62 | 63 | if(min_bucket_count_in_out > 0) { 64 | min_bucket_count_in_out = round_up_to_power_of_two(min_bucket_count_in_out); 65 | m_mask = min_bucket_count_in_out - 1; 66 | } 67 | else { 68 | m_mask = 0; 69 | } 70 | } 71 | 72 | /** 73 | * Return the bucket [0, bucket_count()) to which the hash belongs. 74 | * If bucket_count() is 0, it must always return 0. 75 | */ 76 | std::size_t bucket_for_hash(std::size_t hash) const noexcept { 77 | return hash & m_mask; 78 | } 79 | 80 | /** 81 | * Return the bucket count to use when the bucket array grows on rehash. 82 | */ 83 | std::size_t next_bucket_count() const { 84 | if((m_mask + 1) > max_bucket_count() / GrowthFactor) { 85 | throw std::length_error("The hash table exceeds its maxmimum size."); 86 | } 87 | 88 | return (m_mask + 1) * GrowthFactor; 89 | } 90 | 91 | /** 92 | * Return the maximum number of buckets supported by the policy. 93 | */ 94 | std::size_t max_bucket_count() const { 95 | // Largest power of two. 96 | return (std::numeric_limits::max() / 2) + 1; 97 | } 98 | 99 | /** 100 | * Reset the growth policy as if it was created with a bucket count of 0. 101 | * After a clear, the policy must always return 0 when bucket_for_hash is called. 102 | */ 103 | void clear() noexcept { 104 | m_mask = 0; 105 | } 106 | 107 | private: 108 | static std::size_t round_up_to_power_of_two(std::size_t value) { 109 | if(is_power_of_two(value)) { 110 | return value; 111 | } 112 | 113 | if(value == 0) { 114 | return 1; 115 | } 116 | 117 | --value; 118 | for(std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) { 119 | value |= value >> i; 120 | } 121 | 122 | return value + 1; 123 | } 124 | 125 | static constexpr bool is_power_of_two(std::size_t value) { 126 | return value != 0 && (value & (value - 1)) == 0; 127 | } 128 | 129 | private: 130 | static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2, "GrowthFactor must be a power of two >= 2."); 131 | 132 | std::size_t m_mask; 133 | }; 134 | 135 | 136 | /** 137 | * Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo to map a hash 138 | * to a bucket. Slower but it can be useful if you want a slower growth. 139 | */ 140 | template> 141 | class mod_growth_policy { 142 | public: 143 | explicit mod_growth_policy(std::size_t& min_bucket_count_in_out) { 144 | if(min_bucket_count_in_out > max_bucket_count()) { 145 | throw std::length_error("The hash table exceeds its maxmimum size."); 146 | } 147 | 148 | if(min_bucket_count_in_out > 0) { 149 | m_mod = min_bucket_count_in_out; 150 | } 151 | else { 152 | m_mod = 1; 153 | } 154 | } 155 | 156 | std::size_t bucket_for_hash(std::size_t hash) const noexcept { 157 | return hash % m_mod; 158 | } 159 | 160 | std::size_t next_bucket_count() const { 161 | if(m_mod == max_bucket_count()) { 162 | throw std::length_error("The hash table exceeds its maxmimum size."); 163 | } 164 | 165 | const double next_bucket_count = std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR); 166 | if(!std::isnormal(next_bucket_count)) { 167 | throw std::length_error("The hash table exceeds its maxmimum size."); 168 | } 169 | 170 | if(next_bucket_count > double(max_bucket_count())) { 171 | return max_bucket_count(); 172 | } 173 | else { 174 | return std::size_t(next_bucket_count); 175 | } 176 | } 177 | 178 | std::size_t max_bucket_count() const { 179 | return MAX_BUCKET_COUNT; 180 | } 181 | 182 | void clear() noexcept { 183 | m_mod = 1; 184 | } 185 | 186 | private: 187 | static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR = 1.0 * GrowthFactor::num / GrowthFactor::den; 188 | static const std::size_t MAX_BUCKET_COUNT = 189 | std::size_t(double( 190 | std::numeric_limits::max() / REHASH_SIZE_MULTIPLICATION_FACTOR 191 | )); 192 | 193 | static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, "Growth factor should be >= 1.1."); 194 | 195 | std::size_t m_mod; 196 | }; 197 | 198 | 199 | 200 | namespace detail { 201 | 202 | static constexpr const std::array PRIMES = {{ 203 | 1ul, 5ul, 17ul, 29ul, 37ul, 53ul, 67ul, 79ul, 97ul, 131ul, 193ul, 257ul, 389ul, 521ul, 769ul, 1031ul, 204 | 1543ul, 2053ul, 3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 98317ul, 196613ul, 393241ul, 786433ul, 205 | 1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul, 206 | 402653189ul, 805306457ul, 1610612741ul, 3221225473ul, 4294967291ul 207 | }}; 208 | 209 | template 210 | static constexpr std::size_t mod(std::size_t hash) { return hash % PRIMES[IPrime]; } 211 | 212 | // MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for faster modulo as the 213 | // compiler can optimize the modulo code better with a constant known at the compilation. 214 | static constexpr const std::array MOD_PRIME = {{ 215 | &mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>, &mod<6>, &mod<7>, &mod<8>, &mod<9>, &mod<10>, 216 | &mod<11>, &mod<12>, &mod<13>, &mod<14>, &mod<15>, &mod<16>, &mod<17>, &mod<18>, &mod<19>, &mod<20>, 217 | &mod<21>, &mod<22>, &mod<23>, &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>, &mod<29>, &mod<30>, 218 | &mod<31>, &mod<32>, &mod<33>, &mod<34>, &mod<35>, &mod<36>, &mod<37> , &mod<38>, &mod<39> 219 | }}; 220 | 221 | } 222 | 223 | /** 224 | * Grow the hash table by using prime numbers as bucket count. Slower than tsl::hh::power_of_two_growth_policy in 225 | * general but will probably distribute the values around better in the buckets with a poor hash function. 226 | * 227 | * To allow the compiler to optimize the modulo operation, a lookup table is used with constant primes numbers. 228 | * 229 | * With a switch the code would look like: 230 | * \code 231 | * switch(iprime) { // iprime is the current prime of the hash table 232 | * case 0: hash % 5ul; 233 | * break; 234 | * case 1: hash % 17ul; 235 | * break; 236 | * case 2: hash % 29ul; 237 | * break; 238 | * ... 239 | * } 240 | * \endcode 241 | * 242 | * Due to the constant variable in the modulo the compiler is able to optimize the operation 243 | * by a series of multiplications, substractions and shifts. 244 | * 245 | * The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34) * 5' in a 64 bits environement. 246 | */ 247 | class prime_growth_policy { 248 | public: 249 | explicit prime_growth_policy(std::size_t& min_bucket_count_in_out) { 250 | auto it_prime = std::lower_bound(detail::PRIMES.begin(), 251 | detail::PRIMES.end(), min_bucket_count_in_out); 252 | if(it_prime == detail::PRIMES.end()) { 253 | throw std::length_error("The hash table exceeds its maxmimum size."); 254 | } 255 | 256 | m_iprime = static_cast(std::distance(detail::PRIMES.begin(), it_prime)); 257 | if(min_bucket_count_in_out > 0) { 258 | min_bucket_count_in_out = *it_prime; 259 | } 260 | else { 261 | min_bucket_count_in_out = 0; 262 | } 263 | } 264 | 265 | std::size_t bucket_for_hash(std::size_t hash) const noexcept { 266 | return detail::MOD_PRIME[m_iprime](hash); 267 | } 268 | 269 | std::size_t next_bucket_count() const { 270 | if(m_iprime + 1 >= detail::PRIMES.size()) { 271 | throw std::length_error("The hash table exceeds its maxmimum size."); 272 | } 273 | 274 | return detail::PRIMES[m_iprime + 1]; 275 | } 276 | 277 | std::size_t max_bucket_count() const { 278 | return detail::PRIMES.back(); 279 | } 280 | 281 | void clear() noexcept { 282 | m_iprime = 0; 283 | } 284 | 285 | private: 286 | unsigned int m_iprime; 287 | 288 | static_assert(std::numeric_limits::max() >= detail::PRIMES.size(), 289 | "The type of m_iprime is not big enough."); 290 | }; 291 | 292 | } 293 | } 294 | 295 | #endif 296 | -------------------------------------------------------------------------------- /Str.h: -------------------------------------------------------------------------------- 1 | /* 2 | MIT License 3 | 4 | Copyright (c) 2019 Meng Rao 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | #pragma once 25 | #include 26 | #include 27 | 28 | #if __cplusplus >= 201703L 29 | #include 30 | #endif 31 | 32 | template 33 | class Str 34 | { 35 | public: 36 | static const int Size = SIZE; 37 | // Str is not required to align with AlignSize, in order to provide flexibility as a pure char array wrapper 38 | // but aligning Str could speed up comparison operations 39 | static const int AlignSize = SIZE >= 7 ? 8 : 4; 40 | char s[SIZE]; 41 | 42 | Str() {} 43 | Str(const char* p) { *this = *(const Str*)p; } 44 | 45 | char& operator[](int i) { return s[i]; } 46 | char operator[](int i) const { return s[i]; } 47 | 48 | bool operator==(const char* p2) const { 49 | #if defined(__AVX512VL__) && defined(__AVX512BW__) 50 | return simdEQ(s, p2, SIZE); 51 | #else 52 | return genericEQ(s, p2, SIZE); 53 | #endif 54 | } 55 | 56 | bool operator==(const Str& rhs) const { return operator==(rhs.s); } 57 | bool operator!=(const char* p) const { return !operator==(p); } 58 | bool operator!=(const Str& rhs) const { return !operator==(rhs.s); } 59 | 60 | static bool genericEQ(const char* p1, const char* p2, size_t len) { 61 | while (len >= 8) { 62 | if (*(uint64_t*)p1 != *(uint64_t*)p2) return false; 63 | p1 += 8; 64 | p2 += 8; 65 | len -= 8; 66 | } 67 | if (len >= 4) { 68 | if (*(uint32_t*)(p1) != *(uint32_t*)(p2)) return false; 69 | p1 += 4; 70 | p2 += 4; 71 | len -= 4; 72 | } 73 | switch (len) { 74 | case 1: return *p1 == *p2; 75 | case 2: return *(uint16_t*)p1 == *(uint16_t*)p2; 76 | case 3: return *(uint16_t*)p1 == *(uint16_t*)p2 && p1[2] == p2[2]; 77 | }; 78 | return true; 79 | } 80 | 81 | int compare(const char* p2) const { 82 | #if defined(__AVX512VL__) && defined(__AVX512BW__) 83 | return simdCompare(s, p2, SIZE); 84 | #else 85 | return genericCompare(s, p2, SIZE); 86 | #endif 87 | } 88 | int compare(const Str& rhs) const { return compare(rhs.s); } 89 | bool operator<(const char* p2) const { return compare(p2) < 0; } 90 | bool operator<(const Str& rhs) const { return compare(rhs.s) < 0; } 91 | 92 | static int genericCompare(const char* p1, const char* p2, size_t len) { 93 | while (len >= 8) { 94 | uint64_t mask = *(uint64_t*)p1 ^ *(uint64_t*)p2; 95 | if (mask) { 96 | int i = __builtin_ctzll(mask) >> 3; 97 | return (int)(uint8_t)p1[i] - (int)(uint8_t)p2[i]; 98 | } 99 | p1 += 8; 100 | p2 += 8; 101 | len -= 8; 102 | } 103 | for (size_t i = 0; i < len; i++) { 104 | int res = (int)(uint8_t)p1[i] - (int)(uint8_t)p2[i]; 105 | if (res) return res; 106 | } 107 | return 0; 108 | } 109 | 110 | #if __cplusplus >= 201703L 111 | std::string_view tosv() const { return std::string_view(s, SIZE); } 112 | #endif 113 | 114 | #if defined(__AVX512VL__) && defined(__AVX512BW__) 115 | static bool simdEQ(const char* p1, const char* p2, size_t len) { 116 | while (len >= 64) { 117 | uint64_t mask = _mm512_cmpneq_epu8_mask(_mm512_loadu_si512(p1), _mm512_loadu_si512(p2)); 118 | if (mask) return false; 119 | p1 += 64; 120 | p2 += 64; 121 | len -= 64; 122 | } 123 | 124 | if (len >= 32) { 125 | uint32_t mask = _mm256_cmpneq_epu8_mask(_mm256_loadu_si256((__m256i*)p1), _mm256_loadu_si256((__m256i*)p2)); 126 | if (mask) return false; 127 | p1 += 32; 128 | p2 += 32; 129 | len -= 32; 130 | } 131 | if (len >= 16) { 132 | uint16_t mask = _mm_cmpneq_epu8_mask(_mm_loadu_si128((__m128i*)p1), _mm_loadu_si128((__m128i*)p2)); 133 | if (mask) return false; 134 | p1 += 16; 135 | p2 += 16; 136 | len -= 16; 137 | } 138 | return genericEQ(p1, p2, len); 139 | } 140 | 141 | static int simdCompare(const char* p1, const char* p2, size_t len) { 142 | while (len >= 64) { 143 | uint64_t mask = _mm512_cmpneq_epu8_mask(_mm512_loadu_si512(p1), _mm512_loadu_si512(p2)); 144 | if (mask) { 145 | int i = __builtin_ctzll(mask); 146 | return (int)(uint8_t)p1[i] - (int)(uint8_t)p2[i]; 147 | } 148 | p1 += 64; 149 | p2 += 64; 150 | len -= 64; 151 | } 152 | if (len >= 32) { 153 | uint32_t mask = _mm256_cmpneq_epu8_mask(_mm256_loadu_si256((__m256i*)p1), _mm256_loadu_si256((__m256i*)p2)); 154 | if (mask) { 155 | int i = __builtin_ctz(mask); 156 | return (int)(uint8_t)p1[i] - (int)(uint8_t)p2[i]; 157 | } 158 | p1 += 32; 159 | p2 += 32; 160 | len -= 32; 161 | } 162 | if (len >= 16) { 163 | uint16_t mask = _mm_cmpneq_epu8_mask(_mm_loadu_si128((__m128i*)p1), _mm_loadu_si128((__m128i*)p2)); 164 | if (mask) { 165 | int i = __builtin_ctz(mask); 166 | return (int)(uint8_t)p1[i] - (int)(uint8_t)p2[i]; 167 | } 168 | p1 += 16; 169 | p2 += 16; 170 | len -= 16; 171 | } 172 | return genericCompare(p1, p2, len); 173 | } 174 | #endif 175 | 176 | uint32_t toi() const { 177 | uint32_t ret = 0; 178 | switch (SIZE) { 179 | case 10: ret += (s[SIZE - 10] - '0') * 1000000000; 180 | case 9: ret += (s[SIZE - 9] - '0') * 100000000; 181 | #ifdef __SSE4_1__ 182 | case 8: ret += simdtoi(s + SIZE - 8); return ret; 183 | #else 184 | case 8: ret += (s[SIZE - 8] - '0') * 10000000; 185 | #endif 186 | case 7: ret += (s[SIZE - 7] - '0') * 1000000; 187 | case 6: ret += (s[SIZE - 6] - '0') * 100000; 188 | case 5: ret += (s[SIZE - 5] - '0') * 10000; 189 | case 4: ret += (s[SIZE - 4] - '0') * 1000; 190 | case 3: ret += (s[SIZE - 3] - '0') * 100; 191 | case 2: ret += (s[SIZE - 2] - '0') * 10; 192 | case 1: ret += (s[SIZE - 1] - '0'); 193 | }; 194 | return ret; 195 | } 196 | 197 | uint64_t toi64() const { 198 | uint64_t ret = 0; 199 | switch (SIZE) { 200 | case 19: ret += (s[SIZE - 19] - '0') * 1000000000000000000LL; 201 | case 18: ret += (s[SIZE - 18] - '0') * 100000000000000000LL; 202 | case 17: ret += (s[SIZE - 17] - '0') * 10000000000000000LL; 203 | #ifdef __SSE4_1__ 204 | case 16: ret += simdtoi64(s + SIZE - 16); return ret; 205 | #else 206 | case 16: ret += (s[SIZE - 16] - '0') * 1000000000000000LL; 207 | #endif 208 | case 15: ret += (s[SIZE - 15] - '0') * 100000000000000LL; 209 | case 14: ret += (s[SIZE - 14] - '0') * 10000000000000LL; 210 | case 13: ret += (s[SIZE - 13] - '0') * 1000000000000LL; 211 | case 12: ret += (s[SIZE - 12] - '0') * 100000000000LL; 212 | case 11: ret += (s[SIZE - 11] - '0') * 10000000000LL; 213 | case 10: ret += (s[SIZE - 10] - '0') * 1000000000LL; 214 | case 9: ret += (s[SIZE - 9] - '0') * 100000000LL; 215 | #ifdef __SSE4_1__ 216 | case 8: ret += simdtoi(s + SIZE - 8); return ret; 217 | #else 218 | case 8: ret += (s[SIZE - 8] - '0') * 10000000LL; 219 | #endif 220 | case 7: ret += (s[SIZE - 7] - '0') * 1000000LL; 221 | case 6: ret += (s[SIZE - 6] - '0') * 100000LL; 222 | case 5: ret += (s[SIZE - 5] - '0') * 10000LL; 223 | case 4: ret += (s[SIZE - 4] - '0') * 1000LL; 224 | case 3: ret += (s[SIZE - 3] - '0') * 100LL; 225 | case 2: ret += (s[SIZE - 2] - '0') * 10LL; 226 | case 1: ret += (s[SIZE - 1] - '0'); 227 | }; 228 | return ret; 229 | } 230 | 231 | #ifdef __SSE4_1__ 232 | // covert 8 digits into int 233 | // https://arxiv.org/pdf/1902.08318.pdf, Fig.7 234 | static uint32_t simdtoi(const char* p) { 235 | __m128i ascii0 = _mm_set1_epi8('0'); 236 | __m128i mul_1_10 = _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); 237 | __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); 238 | __m128i mul_1_10000 = _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); 239 | // we should've used _mm_loadu_si64 here, but seems _mm_loadu_si128 is faster 240 | __m128i in = _mm_sub_epi8(_mm_loadu_si128((__m128i*)p), ascii0); 241 | __m128i t1 = _mm_maddubs_epi16(in, mul_1_10); 242 | __m128i t2 = _mm_madd_epi16(t1, mul_1_100); 243 | __m128i t3 = _mm_packus_epi32(t2, t2); 244 | __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); 245 | return _mm_cvtsi128_si32(t4); 246 | } 247 | 248 | // covert 16 digits into int64 249 | static uint64_t simdtoi64(const char* p) { 250 | __m128i ascii0 = _mm_set1_epi8('0'); 251 | __m128i mul_1_10 = _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); 252 | __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); 253 | __m128i mul_1_10000 = _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); 254 | __m128i in = _mm_sub_epi8(_mm_loadu_si128((__m128i*)p), ascii0); 255 | __m128i t1 = _mm_maddubs_epi16(in, mul_1_10); 256 | __m128i t2 = _mm_madd_epi16(t1, mul_1_100); 257 | __m128i t3 = _mm_packus_epi32(t2, t2); 258 | __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); 259 | // the above code is exactly the same as simdtoi 260 | uint64_t t5 = _mm_cvtsi128_si64(t4); 261 | return (t5 >> 32) + (t5 & 0xffffffff) * 100000000LL; 262 | } 263 | #endif 264 | 265 | template 266 | void fromi(T num) { 267 | if (Size & 1) { 268 | s[Size - 1] = '0' + (num % 10); 269 | num /= 10; 270 | } 271 | switch (Size & -2) { 272 | case 18: *(uint16_t*)(s + 16) = *(uint16_t*)(digit_pairs + ((num % 100) << 1)); num /= 100; 273 | case 16: *(uint16_t*)(s + 14) = *(uint16_t*)(digit_pairs + ((num % 100) << 1)); num /= 100; 274 | case 14: *(uint16_t*)(s + 12) = *(uint16_t*)(digit_pairs + ((num % 100) << 1)); num /= 100; 275 | case 12: *(uint16_t*)(s + 10) = *(uint16_t*)(digit_pairs + ((num % 100) << 1)); num /= 100; 276 | case 10: *(uint16_t*)(s + 8) = *(uint16_t*)(digit_pairs + ((num % 100) << 1)); num /= 100; 277 | case 8: *(uint16_t*)(s + 6) = *(uint16_t*)(digit_pairs + ((num % 100) << 1)); num /= 100; 278 | case 6: *(uint16_t*)(s + 4) = *(uint16_t*)(digit_pairs + ((num % 100) << 1)); num /= 100; 279 | case 4: *(uint16_t*)(s + 2) = *(uint16_t*)(digit_pairs + ((num % 100) << 1)); num /= 100; 280 | case 2: *(uint16_t*)(s + 0) = *(uint16_t*)(digit_pairs + ((num % 100) << 1)); num /= 100; 281 | } 282 | } 283 | 284 | static constexpr const char* digit_pairs = "00010203040506070809" 285 | "10111213141516171819" 286 | "20212223242526272829" 287 | "30313233343536373839" 288 | "40414243444546474849" 289 | "50515253545556575859" 290 | "60616263646566676869" 291 | "70717273747576777879" 292 | "80818283848586878889" 293 | "90919293949596979899"; 294 | }; 295 | 296 | template 297 | std::ostream& operator<<(std::ostream& os, const Str& str) { 298 | os.write(str.s, SIZE); 299 | return os; 300 | } 301 | 302 | -------------------------------------------------------------------------------- /benchmark/tsl/robin_growth_policy.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2017 Tessil 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef TSL_ROBIN_GROWTH_POLICY_H 25 | #define TSL_ROBIN_GROWTH_POLICY_H 26 | 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | 39 | #ifdef TSL_DEBUG 40 | # define tsl_rh_assert(expr) assert(expr) 41 | #else 42 | # define tsl_rh_assert(expr) (static_cast(0)) 43 | #endif 44 | 45 | 46 | /** 47 | * If exceptions are enabled, throw the exception passed in parameter, otherwise call std::terminate. 48 | */ 49 | #if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || (defined (_MSC_VER) && defined (_CPPUNWIND))) && !defined(TSL_NO_EXCEPTIONS) 50 | # define TSL_RH_THROW_OR_TERMINATE(ex, msg) throw ex(msg) 51 | #else 52 | # ifdef NDEBUG 53 | # define TSL_RH_THROW_OR_TERMINATE(ex, msg) std::terminate() 54 | # else 55 | # include 56 | # define TSL_RH_THROW_OR_TERMINATE(ex, msg) do { std::fprintf(stderr, msg); std::terminate(); } while(0) 57 | # endif 58 | #endif 59 | 60 | 61 | #if defined(__GNUC__) || defined(__clang__) 62 | # define TSL_RH_LIKELY(exp) (__builtin_expect(!!(exp), true)) 63 | #else 64 | # define TSL_RH_LIKELY(exp) (exp) 65 | #endif 66 | 67 | 68 | namespace tsl { 69 | namespace rh { 70 | 71 | /** 72 | * Grow the hash table by a factor of GrowthFactor keeping the bucket count to a power of two. It allows 73 | * the table to use a mask operation instead of a modulo operation to map a hash to a bucket. 74 | * 75 | * GrowthFactor must be a power of two >= 2. 76 | */ 77 | template 78 | class power_of_two_growth_policy { 79 | public: 80 | /** 81 | * Called on the hash table creation and on rehash. The number of buckets for the table is passed in parameter. 82 | * This number is a minimum, the policy may update this value with a higher value if needed (but not lower). 83 | * 84 | * If 0 is given, min_bucket_count_in_out must still be 0 after the policy creation and 85 | * bucket_for_hash must always return 0 in this case. 86 | */ 87 | explicit power_of_two_growth_policy(std::size_t& min_bucket_count_in_out) { 88 | if(min_bucket_count_in_out > max_bucket_count()) { 89 | TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size."); 90 | } 91 | 92 | if(min_bucket_count_in_out > 0) { 93 | min_bucket_count_in_out = round_up_to_power_of_two(min_bucket_count_in_out); 94 | m_mask = min_bucket_count_in_out - 1; 95 | } 96 | else { 97 | m_mask = 0; 98 | } 99 | } 100 | 101 | /** 102 | * Return the bucket [0, bucket_count()) to which the hash belongs. 103 | * If bucket_count() is 0, it must always return 0. 104 | */ 105 | std::size_t bucket_for_hash(std::size_t hash) const noexcept { 106 | return hash & m_mask; 107 | } 108 | 109 | /** 110 | * Return the number of buckets that should be used on next growth. 111 | */ 112 | std::size_t next_bucket_count() const { 113 | if((m_mask + 1) > max_bucket_count() / GrowthFactor) { 114 | TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size."); 115 | } 116 | 117 | return (m_mask + 1) * GrowthFactor; 118 | } 119 | 120 | /** 121 | * Return the maximum number of buckets supported by the policy. 122 | */ 123 | std::size_t max_bucket_count() const { 124 | // Largest power of two. 125 | return (std::numeric_limits::max() / 2) + 1; 126 | } 127 | 128 | /** 129 | * Reset the growth policy as if it was created with a bucket count of 0. 130 | * After a clear, the policy must always return 0 when bucket_for_hash is called. 131 | */ 132 | void clear() noexcept { 133 | m_mask = 0; 134 | } 135 | 136 | private: 137 | static std::size_t round_up_to_power_of_two(std::size_t value) { 138 | if(is_power_of_two(value)) { 139 | return value; 140 | } 141 | 142 | if(value == 0) { 143 | return 1; 144 | } 145 | 146 | --value; 147 | for(std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) { 148 | value |= value >> i; 149 | } 150 | 151 | return value + 1; 152 | } 153 | 154 | static constexpr bool is_power_of_two(std::size_t value) { 155 | return value != 0 && (value & (value - 1)) == 0; 156 | } 157 | 158 | protected: 159 | static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2, "GrowthFactor must be a power of two >= 2."); 160 | 161 | std::size_t m_mask; 162 | }; 163 | 164 | 165 | /** 166 | * Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo to map a hash 167 | * to a bucket. Slower but it can be useful if you want a slower growth. 168 | */ 169 | template> 170 | class mod_growth_policy { 171 | public: 172 | explicit mod_growth_policy(std::size_t& min_bucket_count_in_out) { 173 | if(min_bucket_count_in_out > max_bucket_count()) { 174 | TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size."); 175 | } 176 | 177 | if(min_bucket_count_in_out > 0) { 178 | m_mod = min_bucket_count_in_out; 179 | } 180 | else { 181 | m_mod = 1; 182 | } 183 | } 184 | 185 | std::size_t bucket_for_hash(std::size_t hash) const noexcept { 186 | return hash % m_mod; 187 | } 188 | 189 | std::size_t next_bucket_count() const { 190 | if(m_mod == max_bucket_count()) { 191 | TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size."); 192 | } 193 | 194 | const double next_bucket_count = std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR); 195 | if(!std::isnormal(next_bucket_count)) { 196 | TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size."); 197 | } 198 | 199 | if(next_bucket_count > double(max_bucket_count())) { 200 | return max_bucket_count(); 201 | } 202 | else { 203 | return std::size_t(next_bucket_count); 204 | } 205 | } 206 | 207 | std::size_t max_bucket_count() const { 208 | return MAX_BUCKET_COUNT; 209 | } 210 | 211 | void clear() noexcept { 212 | m_mod = 1; 213 | } 214 | 215 | private: 216 | static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR = 1.0 * GrowthFactor::num / GrowthFactor::den; 217 | static const std::size_t MAX_BUCKET_COUNT = 218 | std::size_t(double( 219 | std::numeric_limits::max() / REHASH_SIZE_MULTIPLICATION_FACTOR 220 | )); 221 | 222 | static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, "Growth factor should be >= 1.1."); 223 | 224 | std::size_t m_mod; 225 | }; 226 | 227 | 228 | 229 | namespace detail { 230 | 231 | static constexpr const std::array PRIMES = {{ 232 | 1ul, 5ul, 17ul, 29ul, 37ul, 53ul, 67ul, 79ul, 97ul, 131ul, 193ul, 257ul, 389ul, 521ul, 769ul, 1031ul, 233 | 1543ul, 2053ul, 3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 98317ul, 196613ul, 393241ul, 786433ul, 234 | 1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul, 235 | 402653189ul, 805306457ul, 1610612741ul, 3221225473ul, 4294967291ul 236 | }}; 237 | 238 | template 239 | static constexpr std::size_t mod(std::size_t hash) { return hash % PRIMES[IPrime]; } 240 | 241 | // MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for faster modulo as the 242 | // compiler can optimize the modulo code better with a constant known at the compilation. 243 | static constexpr const std::array MOD_PRIME = {{ 244 | &mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>, &mod<6>, &mod<7>, &mod<8>, &mod<9>, &mod<10>, 245 | &mod<11>, &mod<12>, &mod<13>, &mod<14>, &mod<15>, &mod<16>, &mod<17>, &mod<18>, &mod<19>, &mod<20>, 246 | &mod<21>, &mod<22>, &mod<23>, &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>, &mod<29>, &mod<30>, 247 | &mod<31>, &mod<32>, &mod<33>, &mod<34>, &mod<35>, &mod<36>, &mod<37> , &mod<38>, &mod<39> 248 | }}; 249 | 250 | } 251 | 252 | /** 253 | * Grow the hash table by using prime numbers as bucket count. Slower than tsl::rh::power_of_two_growth_policy in 254 | * general but will probably distribute the values around better in the buckets with a poor hash function. 255 | * 256 | * To allow the compiler to optimize the modulo operation, a lookup table is used with constant primes numbers. 257 | * 258 | * With a switch the code would look like: 259 | * \code 260 | * switch(iprime) { // iprime is the current prime of the hash table 261 | * case 0: hash % 5ul; 262 | * break; 263 | * case 1: hash % 17ul; 264 | * break; 265 | * case 2: hash % 29ul; 266 | * break; 267 | * ... 268 | * } 269 | * \endcode 270 | * 271 | * Due to the constant variable in the modulo the compiler is able to optimize the operation 272 | * by a series of multiplications, substractions and shifts. 273 | * 274 | * The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34) * 5' in a 64 bits environement. 275 | */ 276 | class prime_growth_policy { 277 | public: 278 | explicit prime_growth_policy(std::size_t& min_bucket_count_in_out) { 279 | auto it_prime = std::lower_bound(detail::PRIMES.begin(), 280 | detail::PRIMES.end(), min_bucket_count_in_out); 281 | if(it_prime == detail::PRIMES.end()) { 282 | TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size."); 283 | } 284 | 285 | m_iprime = static_cast(std::distance(detail::PRIMES.begin(), it_prime)); 286 | if(min_bucket_count_in_out > 0) { 287 | min_bucket_count_in_out = *it_prime; 288 | } 289 | else { 290 | min_bucket_count_in_out = 0; 291 | } 292 | } 293 | 294 | std::size_t bucket_for_hash(std::size_t hash) const noexcept { 295 | return detail::MOD_PRIME[m_iprime](hash); 296 | } 297 | 298 | std::size_t next_bucket_count() const { 299 | if(m_iprime + 1 >= detail::PRIMES.size()) { 300 | TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size."); 301 | } 302 | 303 | return detail::PRIMES[m_iprime + 1]; 304 | } 305 | 306 | std::size_t max_bucket_count() const { 307 | return detail::PRIMES.back(); 308 | } 309 | 310 | void clear() noexcept { 311 | m_iprime = 0; 312 | } 313 | 314 | private: 315 | unsigned int m_iprime; 316 | 317 | static_assert(std::numeric_limits::max() >= detail::PRIMES.size(), 318 | "The type of m_iprime is not big enough."); 319 | }; 320 | 321 | } 322 | } 323 | 324 | #endif 325 | -------------------------------------------------------------------------------- /benchmark/sparsehash/sparse_hash_set: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2005, Google Inc. 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are 6 | // met: 7 | // 8 | // * Redistributions of source code must retain the above copyright 9 | // notice, this list of conditions and the following disclaimer. 10 | // * Redistributions in binary form must reproduce the above 11 | // copyright notice, this list of conditions and the following disclaimer 12 | // in the documentation and/or other materials provided with the 13 | // distribution. 14 | // * Neither the name of Google Inc. nor the names of its 15 | // contributors may be used to endorse or promote products derived from 16 | // this software without specific prior written permission. 17 | // 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | // --- 31 | // 32 | // This is just a very thin wrapper over sparsehashtable.h, just 33 | // like sgi stl's stl_hash_set is a very thin wrapper over 34 | // stl_hashtable. The major thing we define is operator[], because 35 | // we have a concept of a data_type which stl_hashtable doesn't 36 | // (it only has a key and a value). 37 | // 38 | // This is more different from sparse_hash_map than you might think, 39 | // because all iterators for sets are const (you obviously can't 40 | // change the key, and for sets there is no value). 41 | // 42 | // We adhere mostly to the STL semantics for hash-map. One important 43 | // exception is that insert() may invalidate iterators entirely -- STL 44 | // semantics are that insert() may reorder iterators, but they all 45 | // still refer to something valid in the hashtable. Not so for us. 46 | // Likewise, insert() may invalidate pointers into the hashtable. 47 | // (Whether insert invalidates iterators and pointers depends on 48 | // whether it results in a hashtable resize). On the plus side, 49 | // delete() doesn't invalidate iterators or pointers at all, or even 50 | // change the ordering of elements. 51 | // 52 | // Here are a few "power user" tips: 53 | // 54 | // 1) set_deleted_key(): 55 | // Unlike STL's hash_map, if you want to use erase() you 56 | // *must* call set_deleted_key() after construction. 57 | // 58 | // 2) resize(0): 59 | // When an item is deleted, its memory isn't freed right 60 | // away. This allows you to iterate over a hashtable, 61 | // and call erase(), without invalidating the iterator. 62 | // To force the memory to be freed, call resize(0). 63 | // For tr1 compatibility, this can also be called as rehash(0). 64 | // 65 | // 3) min_load_factor(0.0) 66 | // Setting the minimum load factor to 0.0 guarantees that 67 | // the hash table will never shrink. 68 | // 69 | // Roughly speaking: 70 | // (1) dense_hash_set: fastest, uses the most memory unless entries are small 71 | // (2) sparse_hash_set: slowest, uses the least memory 72 | // (3) hash_set / unordered_set (STL): in the middle 73 | // 74 | // Typically I use sparse_hash_set when I care about space and/or when 75 | // I need to save the hashtable on disk. I use hash_set otherwise. I 76 | // don't personally use dense_hash_set ever; some people use it for 77 | // small sets with lots of lookups. 78 | // 79 | // - dense_hash_set has, typically, about 78% memory overhead (if your 80 | // data takes up X bytes, the hash_set uses .78X more bytes in overhead). 81 | // - sparse_hash_set has about 4 bits overhead per entry. 82 | // - sparse_hash_set can be 3-7 times slower than the others for lookup and, 83 | // especially, inserts. See time_hash_map.cc for details. 84 | // 85 | // See /usr/(local/)?doc/sparsehash-*/sparse_hash_set.html 86 | // for information about how to use this class. 87 | 88 | #pragma once 89 | 90 | #include // needed by stl_alloc 91 | #include // for equal_to<> 92 | #include // for alloc (which we don't use) 93 | #include // for pair<> 94 | #include 95 | #include // IWYU pragma: export 96 | 97 | namespace google { 98 | 99 | template , 100 | class EqualKey = std::equal_to, 101 | class Alloc = libc_allocator_with_realloc> 102 | class sparse_hash_set { 103 | private: 104 | // Apparently identity is not stl-standard, so we define our own 105 | struct Identity { 106 | typedef const Value& result_type; 107 | const Value& operator()(const Value& v) const { return v; } 108 | }; 109 | struct SetKey { 110 | void operator()(Value* value, const Value& new_key) const { 111 | *value = new_key; 112 | } 113 | }; 114 | 115 | typedef typename sparsehash_internal::key_equal_chosen::type EqualKeyChosen; 116 | typedef sparse_hashtable ht; 118 | ht rep; 119 | 120 | static_assert(!sparsehash_internal::has_transparent_key_equal::value 121 | || std::is_same>::value 122 | || std::is_same::value, 123 | "Heterogeneous lookup requires key_equal to either be the default container value or the same as the type provided by hash"); 124 | 125 | public: 126 | typedef typename ht::key_type key_type; 127 | typedef typename ht::value_type value_type; 128 | typedef typename ht::hasher hasher; 129 | typedef typename ht::key_equal key_equal; 130 | typedef Alloc allocator_type; 131 | 132 | typedef typename ht::size_type size_type; 133 | typedef typename ht::difference_type difference_type; 134 | typedef typename ht::const_pointer pointer; 135 | typedef typename ht::const_pointer const_pointer; 136 | typedef typename ht::const_reference reference; 137 | typedef typename ht::const_reference const_reference; 138 | 139 | typedef typename ht::const_iterator iterator; 140 | typedef typename ht::const_iterator const_iterator; 141 | typedef typename ht::const_local_iterator local_iterator; 142 | typedef typename ht::const_local_iterator const_local_iterator; 143 | 144 | // Iterator functions -- recall all iterators are const 145 | iterator begin() const { return rep.begin(); } 146 | iterator end() const { return rep.end(); } 147 | 148 | // These come from tr1's unordered_set. For us, a bucket has 0 or 1 elements. 149 | local_iterator begin(size_type i) const { return rep.begin(i); } 150 | local_iterator end(size_type i) const { return rep.end(i); } 151 | 152 | // Accessor functions 153 | allocator_type get_allocator() const { return rep.get_allocator(); } 154 | hasher hash_funct() const { return rep.hash_funct(); } 155 | hasher hash_function() const { return hash_funct(); } // tr1 name 156 | key_equal key_eq() const { return rep.key_eq(); } 157 | 158 | // Constructors 159 | explicit sparse_hash_set(size_type expected_max_items_in_table = 0, 160 | const hasher& hf = hasher(), 161 | const key_equal& eql = key_equal(), 162 | const allocator_type& alloc = allocator_type()) 163 | : rep(expected_max_items_in_table, hf, eql, Identity(), SetKey(), alloc) { 164 | } 165 | 166 | template 167 | sparse_hash_set(InputIterator f, InputIterator l, 168 | size_type expected_max_items_in_table = 0, 169 | const hasher& hf = hasher(), 170 | const key_equal& eql = key_equal(), 171 | const allocator_type& alloc = allocator_type()) 172 | : rep(expected_max_items_in_table, hf, eql, Identity(), SetKey(), alloc) { 173 | rep.insert(f, l); 174 | } 175 | // We use the default copy constructor 176 | // We use the default operator=() 177 | // We use the default destructor 178 | 179 | void clear() { rep.clear(); } 180 | void swap(sparse_hash_set& hs) { rep.swap(hs.rep); } 181 | 182 | // Functions concerning size 183 | size_type size() const { return rep.size(); } 184 | size_type max_size() const { return rep.max_size(); } 185 | bool empty() const { return rep.empty(); } 186 | size_type bucket_count() const { return rep.bucket_count(); } 187 | size_type max_bucket_count() const { return rep.max_bucket_count(); } 188 | 189 | // These are tr1 methods. bucket() is the bucket the key is or would be in. 190 | size_type bucket_size(size_type i) const { return rep.bucket_size(i); } 191 | size_type bucket(const key_type& key) const { return rep.bucket(key); } 192 | float load_factor() const { return size() * 1.0f / bucket_count(); } 193 | float max_load_factor() const { 194 | float shrink, grow; 195 | rep.get_resizing_parameters(&shrink, &grow); 196 | return grow; 197 | } 198 | void max_load_factor(float new_grow) { 199 | float shrink, grow; 200 | rep.get_resizing_parameters(&shrink, &grow); 201 | rep.set_resizing_parameters(shrink, new_grow); 202 | } 203 | // These aren't tr1 methods but perhaps ought to be. 204 | float min_load_factor() const { 205 | float shrink, grow; 206 | rep.get_resizing_parameters(&shrink, &grow); 207 | return shrink; 208 | } 209 | void min_load_factor(float new_shrink) { 210 | float shrink, grow; 211 | rep.get_resizing_parameters(&shrink, &grow); 212 | rep.set_resizing_parameters(new_shrink, grow); 213 | } 214 | // Deprecated; use min_load_factor() or max_load_factor() instead. 215 | void set_resizing_parameters(float shrink, float grow) { 216 | rep.set_resizing_parameters(shrink, grow); 217 | } 218 | 219 | void reserve(size_type size) { rehash(size); } // note: rehash internally treats hint/size as number of elements 220 | void resize(size_type hint) { rep.resize(hint); } 221 | void rehash(size_type hint) { resize(hint); } // the tr1 name 222 | 223 | // Lookup routines 224 | iterator find(const key_type& key) const { return rep.find(key); } 225 | 226 | template 227 | typename std::enable_if::value, iterator>::type 228 | find(const K& key) const { return rep.find(key); } 229 | 230 | size_type count(const key_type& key) const { return rep.count(key); } 231 | 232 | template 233 | typename std::enable_if::value, size_type>::type 234 | count(const K& key) const { return rep.count(key); } 235 | 236 | std::pair equal_range(const key_type& key) const { 237 | return rep.equal_range(key); 238 | } 239 | 240 | template 241 | typename std::enable_if::value, std::pair>::type 242 | equal_range(const K& key) const { 243 | return rep.equal_range(key); 244 | } 245 | 246 | // Insertion routines 247 | std::pair insert(const value_type& obj) { 248 | std::pair p = rep.insert(obj); 249 | return std::pair(p.first, p.second); // const to non-const 250 | } 251 | template 252 | void insert(InputIterator f, InputIterator l) { 253 | rep.insert(f, l); 254 | } 255 | void insert(const_iterator f, const_iterator l) { rep.insert(f, l); } 256 | // Required for std::insert_iterator; the passed-in iterator is ignored. 257 | iterator insert(iterator, const value_type& obj) { return insert(obj).first; } 258 | 259 | // Deletion routines 260 | // THESE ARE NON-STANDARD! I make you specify an "impossible" key 261 | // value to identify deleted buckets. You can change the key as 262 | // time goes on, or get rid of it entirely to be insert-only. 263 | void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); } 264 | void clear_deleted_key() { rep.clear_deleted_key(); } 265 | key_type deleted_key() const { return rep.deleted_key(); } 266 | 267 | // These are standard 268 | size_type erase(const key_type& key) { return rep.erase(key); } 269 | void erase(iterator it) { rep.erase(it); } 270 | void erase(iterator f, iterator l) { rep.erase(f, l); } 271 | 272 | // Comparison 273 | bool operator==(const sparse_hash_set& hs) const { return rep == hs.rep; } 274 | bool operator!=(const sparse_hash_set& hs) const { return rep != hs.rep; } 275 | 276 | // I/O -- this is an add-on for writing metainformation to disk 277 | // 278 | // For maximum flexibility, this does not assume a particular 279 | // file type (though it will probably be a FILE *). We just pass 280 | // the fp through to rep. 281 | 282 | // If your keys and values are simple enough, you can pass this 283 | // serializer to serialize()/unserialize(). "Simple enough" means 284 | // value_type is a POD type that contains no pointers. Note, 285 | // however, we don't try to normalize endianness. 286 | typedef typename ht::NopointerSerializer NopointerSerializer; 287 | 288 | // serializer: a class providing operator()(OUTPUT*, const value_type&) 289 | // (writing value_type to OUTPUT). You can specify a 290 | // NopointerSerializer object if appropriate (see above). 291 | // fp: either a FILE*, OR an ostream*/subclass_of_ostream*, OR a 292 | // pointer to a class providing size_t Write(const void*, size_t), 293 | // which writes a buffer into a stream (which fp presumably 294 | // owns) and returns the number of bytes successfully written. 295 | // Note basic_ostream is not currently supported. 296 | template 297 | bool serialize(ValueSerializer serializer, OUTPUT* fp) { 298 | return rep.serialize(serializer, fp); 299 | } 300 | 301 | // serializer: a functor providing operator()(INPUT*, value_type*) 302 | // (reading from INPUT and into value_type). You can specify a 303 | // NopointerSerializer object if appropriate (see above). 304 | // fp: either a FILE*, OR an istream*/subclass_of_istream*, OR a 305 | // pointer to a class providing size_t Read(void*, size_t), 306 | // which reads into a buffer from a stream (which fp presumably 307 | // owns) and returns the number of bytes successfully read. 308 | // Note basic_istream is not currently supported. 309 | // NOTE: Since value_type is const Key, ValueSerializer 310 | // may need to do a const cast in order to fill in the key. 311 | // NOTE: if Key is not a POD type, the serializer MUST use 312 | // placement-new to initialize its value, rather than a normal 313 | // equals-assignment or similar. (The value_type* passed into 314 | // the serializer points to garbage memory.) 315 | template 316 | bool unserialize(ValueSerializer serializer, INPUT* fp) { 317 | return rep.unserialize(serializer, fp); 318 | } 319 | 320 | // The four methods below are DEPRECATED. 321 | // Use serialize() and unserialize() for new code. 322 | template 323 | bool write_metadata(OUTPUT* fp) { 324 | return rep.write_metadata(fp); 325 | } 326 | 327 | template 328 | bool read_metadata(INPUT* fp) { 329 | return rep.read_metadata(fp); 330 | } 331 | 332 | template 333 | bool write_nopointer_data(OUTPUT* fp) { 334 | return rep.write_nopointer_data(fp); 335 | } 336 | 337 | template 338 | bool read_nopointer_data(INPUT* fp) { 339 | return rep.read_nopointer_data(fp); 340 | } 341 | }; 342 | 343 | template 344 | inline void swap(sparse_hash_set& hs1, 345 | sparse_hash_set& hs2) { 346 | hs1.swap(hs2); 347 | } 348 | 349 | } // namespace google 350 | -------------------------------------------------------------------------------- /benchmark/sparsehash/internal/hashtable-common.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2010, Google Inc. 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are 6 | // met: 7 | // 8 | // * Redistributions of source code must retain the above copyright 9 | // notice, this list of conditions and the following disclaimer. 10 | // * Redistributions in binary form must reproduce the above 11 | // copyright notice, this list of conditions and the following disclaimer 12 | // in the documentation and/or other materials provided with the 13 | // distribution. 14 | // * Neither the name of Google Inc. nor the names of its 15 | // contributors may be used to endorse or promote products derived from 16 | // this software without specific prior written permission. 17 | // 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | // --- 31 | // 32 | // Provides classes shared by both sparse and dense hashtable. 33 | // 34 | // sh_hashtable_settings has parameters for growing and shrinking 35 | // a hashtable. It also packages zero-size functor (ie. hasher). 36 | // 37 | // Other functions and classes provide common code for serializing 38 | // and deserializing hashtables to a stream (such as a FILE*). 39 | 40 | #pragma once 41 | 42 | #include 43 | #include 44 | #include // for size_t 45 | #include 46 | #include // For length_error 47 | 48 | namespace google { 49 | namespace sparsehash_internal { 50 | 51 | template struct make_void { typedef void type;}; 52 | template using void_t = typename make_void::type; 53 | 54 | template 55 | struct has_is_transparent : std::false_type {}; 56 | 57 | template 58 | struct has_is_transparent> : std::true_type {}; 59 | 60 | template 61 | struct has_transparent_key_equal : std::false_type {}; 62 | 63 | template 64 | struct has_transparent_key_equal> : std::true_type {}; 65 | 66 | template ::value> 67 | struct key_equal_chosen { 68 | using type = EqualKey; 69 | }; 70 | 71 | template 72 | struct key_equal_chosen { 73 | using type = typename HashFcn::transparent_key_equal; 74 | }; 75 | 76 | // Adaptor methods for reading/writing data from an INPUT or OUPTUT 77 | // variable passed to serialize() or unserialize(). For now we 78 | // have implemented INPUT/OUTPUT for FILE*, istream*/ostream* (note 79 | // they are pointers, unlike typical use), or else a pointer to 80 | // something that supports a Read()/Write() method. 81 | // 82 | // For technical reasons, we implement read_data/write_data in two 83 | // stages. The actual work is done in *_data_internal, which takes 84 | // the stream argument twice: once as a template type, and once with 85 | // normal type information. (We only use the second version.) We do 86 | // this because of how C++ picks what function overload to use. If we 87 | // implemented this the naive way: 88 | // bool read_data(istream* is, const void* data, size_t length); 89 | // template read_data(T* fp, const void* data, size_t length); 90 | // C++ would prefer the second version for every stream type except 91 | // istream. However, we want C++ to prefer the first version for 92 | // streams that are *subclasses* of istream, such as istringstream. 93 | // This is not possible given the way template types are resolved. So 94 | // we split the stream argument in two, one of which is templated and 95 | // one of which is not. The specialized functions (like the istream 96 | // version above) ignore the template arg and use the second, 'type' 97 | // arg, getting subclass matching as normal. The 'catch-all' 98 | // functions (the second version above) use the template arg to deduce 99 | // the type, and use a second, void* arg to achieve the desired 100 | // 'catch-all' semantics. 101 | 102 | // ----- low-level I/O for FILE* ---- 103 | 104 | template 105 | inline bool read_data_internal(Ignored*, FILE* fp, void* data, size_t length) { 106 | return fread(data, length, 1, fp) == 1; 107 | } 108 | 109 | template 110 | inline bool write_data_internal(Ignored*, FILE* fp, const void* data, 111 | size_t length) { 112 | return fwrite(data, length, 1, fp) == 1; 113 | } 114 | 115 | // ----- low-level I/O for iostream ---- 116 | 117 | // We want the caller to be responsible for #including , not 118 | // us, because iostream is a big header! According to the standard, 119 | // it's only legal to delay the instantiation the way we want to if 120 | // the istream/ostream is a template type. So we jump through hoops. 121 | template 122 | inline bool read_data_internal_for_istream(ISTREAM* fp, void* data, 123 | size_t length) { 124 | return fp->read(reinterpret_cast(data), length).good(); 125 | } 126 | template 127 | inline bool read_data_internal(Ignored*, std::istream* fp, void* data, 128 | size_t length) { 129 | return read_data_internal_for_istream(fp, data, length); 130 | } 131 | 132 | template 133 | inline bool write_data_internal_for_ostream(OSTREAM* fp, const void* data, 134 | size_t length) { 135 | return fp->write(reinterpret_cast(data), length).good(); 136 | } 137 | template 138 | inline bool write_data_internal(Ignored*, std::ostream* fp, const void* data, 139 | size_t length) { 140 | return write_data_internal_for_ostream(fp, data, length); 141 | } 142 | 143 | // ----- low-level I/O for custom streams ---- 144 | 145 | // The INPUT type needs to support a Read() method that takes a 146 | // buffer and a length and returns the number of bytes read. 147 | template 148 | inline bool read_data_internal(INPUT* fp, void*, void* data, size_t length) { 149 | return static_cast(fp->Read(data, length)) == length; 150 | } 151 | 152 | // The OUTPUT type needs to support a Write() operation that takes 153 | // a buffer and a length and returns the number of bytes written. 154 | template 155 | inline bool write_data_internal(OUTPUT* fp, void*, const void* data, 156 | size_t length) { 157 | return static_cast(fp->Write(data, length)) == length; 158 | } 159 | 160 | // ----- low-level I/O: the public API ---- 161 | 162 | template 163 | inline bool read_data(INPUT* fp, void* data, size_t length) { 164 | return read_data_internal(fp, fp, data, length); 165 | } 166 | 167 | template 168 | inline bool write_data(OUTPUT* fp, const void* data, size_t length) { 169 | return write_data_internal(fp, fp, data, length); 170 | } 171 | 172 | // Uses read_data() and write_data() to read/write an integer. 173 | // length is the number of bytes to read/write (which may differ 174 | // from sizeof(IntType), allowing us to save on a 32-bit system 175 | // and load on a 64-bit system). Excess bytes are taken to be 0. 176 | // INPUT and OUTPUT must match legal inputs to read/write_data (above). 177 | template 178 | bool read_bigendian_number(INPUT* fp, IntType* value, size_t length) { 179 | *value = 0; 180 | unsigned char byte; 181 | // We require IntType to be unsigned or else the shifting gets all screwy. 182 | static_assert(static_cast(-1) > static_cast(0), 183 | "serializing int requires an unsigned type"); 184 | for (size_t i = 0; i < length; ++i) { 185 | if (!read_data(fp, &byte, sizeof(byte))) return false; 186 | *value |= static_cast(byte) << ((length - 1 - i) * 8); 187 | } 188 | return true; 189 | } 190 | 191 | template 192 | bool write_bigendian_number(OUTPUT* fp, IntType value, size_t length) { 193 | unsigned char byte; 194 | // We require IntType to be unsigned or else the shifting gets all screwy. 195 | static_assert(static_cast(-1) > static_cast(0), 196 | "serializing int requires an unsigned type"); 197 | for (size_t i = 0; i < length; ++i) { 198 | byte = (sizeof(value) <= length - 1 - i) 199 | ? 0 200 | : static_cast((value >> ((length - 1 - i) * 8)) & 201 | 255); 202 | if (!write_data(fp, &byte, sizeof(byte))) return false; 203 | } 204 | return true; 205 | } 206 | 207 | // If your keys and values are simple enough, you can pass this 208 | // serializer to serialize()/unserialize(). "Simple enough" means 209 | // value_type is a POD type that contains no pointers. Note, 210 | // however, we don't try to normalize endianness. 211 | // This is the type used for NopointerSerializer. 212 | template 213 | struct pod_serializer { 214 | template 215 | bool operator()(INPUT* fp, value_type* value) const { 216 | return read_data(fp, value, sizeof(*value)); 217 | } 218 | 219 | template 220 | bool operator()(OUTPUT* fp, const value_type& value) const { 221 | return write_data(fp, &value, sizeof(value)); 222 | } 223 | }; 224 | 225 | // Settings contains parameters for growing and shrinking the table. 226 | // It also packages zero-size functor (ie. hasher). 227 | // 228 | // It does some munging of the hash value in cases where we think 229 | // (fear) the original hash function might not be very good. In 230 | // particular, the default hash of pointers is the identity hash, 231 | // so probably all the low bits are 0. We identify when we think 232 | // we're hashing a pointer, and chop off the low bits. Note this 233 | // isn't perfect: even when the key is a pointer, we can't tell 234 | // for sure that the hash is the identity hash. If it's not, this 235 | // is needless work (and possibly, though not likely, harmful). 236 | 237 | template 239 | class sh_hashtable_settings : public HashFunc { 240 | public: 241 | typedef Key key_type; 242 | typedef HashFunc hasher; 243 | typedef SizeType size_type; 244 | static_assert(!has_transparent_key_equal::value || has_is_transparent::value, 245 | "hash provided non-transparent key_equal"); 246 | 247 | public: 248 | sh_hashtable_settings(const hasher& hf, const float ht_occupancy_flt, 249 | const float ht_empty_flt) 250 | : hasher(hf), 251 | enlarge_threshold_(0), 252 | shrink_threshold_(0), 253 | consider_shrink_(false), 254 | use_empty_(false), 255 | use_deleted_(false), 256 | num_ht_copies_(0) { 257 | set_enlarge_factor(ht_occupancy_flt); 258 | set_shrink_factor(ht_empty_flt); 259 | } 260 | 261 | template 262 | size_type hash(const K& v) const { 263 | // We munge the hash value when we don't trust hasher::operator(). 264 | return hash_munger::MungedHash(hasher::operator()(v)); 265 | } 266 | 267 | float enlarge_factor() const { return enlarge_factor_; } 268 | void set_enlarge_factor(float f) { enlarge_factor_ = f; } 269 | float shrink_factor() const { return shrink_factor_; } 270 | void set_shrink_factor(float f) { shrink_factor_ = f; } 271 | 272 | size_type enlarge_threshold() const { return enlarge_threshold_; } 273 | void set_enlarge_threshold(size_type t) { enlarge_threshold_ = t; } 274 | size_type shrink_threshold() const { return shrink_threshold_; } 275 | void set_shrink_threshold(size_type t) { shrink_threshold_ = t; } 276 | 277 | size_type enlarge_size(size_type x) const { 278 | return static_cast(x * enlarge_factor_); 279 | } 280 | size_type shrink_size(size_type x) const { 281 | return static_cast(x * shrink_factor_); 282 | } 283 | 284 | bool consider_shrink() const { return consider_shrink_; } 285 | void set_consider_shrink(bool t) { consider_shrink_ = t; } 286 | 287 | bool use_empty() const { return use_empty_; } 288 | void set_use_empty(bool t) { use_empty_ = t; } 289 | 290 | bool use_deleted() const { return use_deleted_; } 291 | void set_use_deleted(bool t) { use_deleted_ = t; } 292 | 293 | size_type num_ht_copies() const { 294 | return static_cast(num_ht_copies_); 295 | } 296 | void inc_num_ht_copies() { ++num_ht_copies_; } 297 | 298 | // Reset the enlarge and shrink thresholds 299 | void reset_thresholds(size_type num_buckets) { 300 | set_enlarge_threshold(enlarge_size(num_buckets)); 301 | set_shrink_threshold(shrink_size(num_buckets)); 302 | // whatever caused us to reset already considered 303 | set_consider_shrink(false); 304 | } 305 | 306 | // Caller is resposible for calling reset_threshold right after 307 | // set_resizing_parameters. 308 | void set_resizing_parameters(float shrink, float grow) { 309 | assert(shrink >= 0.0); 310 | assert(grow <= 1.0); 311 | if (shrink > grow / 2.0f) 312 | shrink = grow / 2.0f; // otherwise we thrash hashtable size 313 | set_shrink_factor(shrink); 314 | set_enlarge_factor(grow); 315 | } 316 | 317 | // This is the smallest size a hashtable can be without being too crowded 318 | // If you like, you can give a min #buckets as well as a min #elts 319 | size_type min_buckets(size_type num_elts, size_type min_buckets_wanted) { 320 | float enlarge = enlarge_factor(); 321 | size_type sz = HT_MIN_BUCKETS; // min buckets allowed 322 | while (sz < min_buckets_wanted || 323 | num_elts >= static_cast(sz * enlarge)) { 324 | // This just prevents overflowing size_type, since sz can exceed 325 | // max_size() here. 326 | if (static_cast(sz * 2) < sz) { 327 | throw std::length_error("resize overflow"); // protect against overflow 328 | } 329 | sz *= 2; 330 | } 331 | return sz; 332 | } 333 | 334 | private: 335 | template 336 | class hash_munger { 337 | public: 338 | static size_t MungedHash(size_t hash) { return hash; } 339 | }; 340 | // This matches when the hashtable key is a pointer. 341 | template 342 | class hash_munger { 343 | public: 344 | static size_t MungedHash(size_t hash) { 345 | // TODO(csilvers): consider rotating instead: 346 | // static const int shift = (sizeof(void *) == 4) ? 2 : 3; 347 | // return (hash << (sizeof(hash) * 8) - shift)) | (hash >> 348 | // shift); 349 | // This matters if we ever change sparse/dense_hash_* to compare 350 | // hashes before comparing actual values. It's speedy on x86. 351 | return hash / sizeof(void*); // get rid of known-0 bits 352 | } 353 | }; 354 | 355 | size_type enlarge_threshold_; // table.size() * enlarge_factor 356 | size_type shrink_threshold_; // table.size() * shrink_factor 357 | float enlarge_factor_; // how full before resize 358 | float shrink_factor_; // how empty before resize 359 | // consider_shrink=true if we should try to shrink before next insert 360 | bool consider_shrink_; 361 | bool use_empty_; // used only by densehashtable, not sparsehashtable 362 | bool use_deleted_; // false until delkey has been set 363 | // num_ht_copies is a counter incremented every Copy/Move 364 | unsigned int num_ht_copies_; 365 | }; 366 | 367 | } // namespace sparsehash_internal 368 | } // namespace google 369 | -------------------------------------------------------------------------------- /benchmark/sparsehash/dense_hash_set: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2005, Google Inc. 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are 6 | // met: 7 | // 8 | // * Redistributions of source code must retain the above copyright 9 | // notice, this list of conditions and the following disclaimer. 10 | // * Redistributions in binary form must reproduce the above 11 | // copyright notice, this list of conditions and the following disclaimer 12 | // in the documentation and/or other materials provided with the 13 | // distribution. 14 | // * Neither the name of Google Inc. nor the names of its 15 | // contributors may be used to endorse or promote products derived from 16 | // this software without specific prior written permission. 17 | // 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | // --- 31 | // 32 | // This is just a very thin wrapper over densehashtable.h, just 33 | // like sgi stl's stl_hash_set is a very thin wrapper over 34 | // stl_hashtable. The major thing we define is operator[], because 35 | // we have a concept of a data_type which stl_hashtable doesn't 36 | // (it only has a key and a value). 37 | // 38 | // This is more different from dense_hash_map than you might think, 39 | // because all iterators for sets are const (you obviously can't 40 | // change the key, and for sets there is no value). 41 | // 42 | // NOTE: this is exactly like sparse_hash_set.h, with the word 43 | // "sparse" replaced by "dense", except for the addition of 44 | // set_empty_key(). 45 | // 46 | // YOU MUST CALL SET_EMPTY_KEY() IMMEDIATELY AFTER CONSTRUCTION. 47 | // 48 | // Otherwise your program will die in mysterious ways. (Note if you 49 | // use the constructor that takes an InputIterator range, you pass in 50 | // the empty key in the constructor, rather than after. As a result, 51 | // this constructor differs from the standard STL version.) 52 | // 53 | // In other respects, we adhere mostly to the STL semantics for 54 | // hash-map. One important exception is that insert() may invalidate 55 | // iterators entirely -- STL semantics are that insert() may reorder 56 | // iterators, but they all still refer to something valid in the 57 | // hashtable. Not so for us. Likewise, insert() may invalidate 58 | // pointers into the hashtable. (Whether insert invalidates iterators 59 | // and pointers depends on whether it results in a hashtable resize). 60 | // On the plus side, delete() doesn't invalidate iterators or pointers 61 | // at all, or even change the ordering of elements. 62 | // 63 | // Here are a few "power user" tips: 64 | // 65 | // 1) set_deleted_key(): 66 | // If you want to use erase() you must call set_deleted_key(), 67 | // in addition to set_empty_key(), after construction. 68 | // The deleted and empty keys must differ. 69 | // 70 | // 2) resize(0): 71 | // When an item is deleted, its memory isn't freed right 72 | // away. This allows you to iterate over a hashtable, 73 | // and call erase(), without invalidating the iterator. 74 | // To force the memory to be freed, call resize(0). 75 | // For tr1 compatibility, this can also be called as rehash(0). 76 | // 77 | // 3) min_load_factor(0.0) 78 | // Setting the minimum load factor to 0.0 guarantees that 79 | // the hash table will never shrink. 80 | // 81 | // Roughly speaking: 82 | // (1) dense_hash_set: fastest, uses the most memory unless entries are small 83 | // (2) sparse_hash_set: slowest, uses the least memory 84 | // (3) hash_set / unordered_set (STL): in the middle 85 | // 86 | // Typically I use sparse_hash_set when I care about space and/or when 87 | // I need to save the hashtable on disk. I use hash_set otherwise. I 88 | // don't personally use dense_hash_set ever; some people use it for 89 | // small sets with lots of lookups. 90 | // 91 | // - dense_hash_set has, typically, about 78% memory overhead (if your 92 | // data takes up X bytes, the hash_set uses .78X more bytes in overhead). 93 | // - sparse_hash_set has about 4 bits overhead per entry. 94 | // - sparse_hash_set can be 3-7 times slower than the others for lookup and, 95 | // especially, inserts. See time_hash_map.cc for details. 96 | // 97 | // See /usr/(local/)?doc/sparsehash-*/dense_hash_set.html 98 | // for information about how to use this class. 99 | 100 | #pragma once 101 | 102 | #include // needed by stl_alloc 103 | #include // for equal_to<>, select1st<>, etc 104 | #include // for initializer_list 105 | #include // for alloc 106 | #include // for pair<> 107 | #include // IWYU pragma: export 108 | #include 109 | 110 | namespace google { 111 | 112 | template , 113 | class EqualKey = std::equal_to, 114 | class Alloc = libc_allocator_with_realloc> 115 | class dense_hash_set { 116 | private: 117 | // Apparently identity is not stl-standard, so we define our own 118 | struct Identity { 119 | typedef const Value& result_type; 120 | template 121 | const Value& operator()(V&& v) const { return v; } 122 | }; 123 | struct SetKey { 124 | void operator()(Value* value, const Value& new_key) const { 125 | *value = new_key; 126 | } 127 | void operator()(Value* value, const Value& new_key, bool) const { 128 | new(value) Value(new_key); 129 | } 130 | }; 131 | 132 | // The actual data 133 | typedef typename sparsehash_internal::key_equal_chosen::type EqualKeyChosen; 134 | typedef dense_hashtable ht; 136 | ht rep; 137 | 138 | static_assert(!sparsehash_internal::has_transparent_key_equal::value 139 | || std::is_same>::value 140 | || std::is_same::value, 141 | "Heterogeneous lookup requires key_equal to either be the default container value or the same as the type provided by hash"); 142 | 143 | public: 144 | typedef typename ht::key_type key_type; 145 | typedef typename ht::value_type value_type; 146 | typedef typename ht::hasher hasher; 147 | typedef typename ht::key_equal key_equal; 148 | typedef Alloc allocator_type; 149 | 150 | typedef typename ht::size_type size_type; 151 | typedef typename ht::difference_type difference_type; 152 | typedef typename ht::const_pointer pointer; 153 | typedef typename ht::const_pointer const_pointer; 154 | typedef typename ht::const_reference reference; 155 | typedef typename ht::const_reference const_reference; 156 | 157 | typedef typename ht::const_iterator iterator; 158 | typedef typename ht::const_iterator const_iterator; 159 | typedef typename ht::const_local_iterator local_iterator; 160 | typedef typename ht::const_local_iterator const_local_iterator; 161 | 162 | // Iterator functions -- recall all iterators are const 163 | iterator begin() const { return rep.begin(); } 164 | iterator end() const { return rep.end(); } 165 | const_iterator cbegin() const { return rep.begin(); } 166 | const_iterator cend() const { return rep.end(); } 167 | 168 | // These come from tr1's unordered_set. For us, a bucket has 0 or 1 elements. 169 | local_iterator begin(size_type i) const { return rep.begin(i); } 170 | local_iterator end(size_type i) const { return rep.end(i); } 171 | local_iterator cbegin(size_type i) const { return rep.begin(i); } 172 | local_iterator cend(size_type i) const { return rep.end(i); } 173 | 174 | // Accessor functions 175 | allocator_type get_allocator() const { return rep.get_allocator(); } 176 | hasher hash_funct() const { return rep.hash_funct(); } 177 | hasher hash_function() const { return hash_funct(); } // tr1 name 178 | key_equal key_eq() const { return rep.key_eq(); } 179 | 180 | // Constructors 181 | explicit dense_hash_set(size_type expected_max_items_in_table = 0, 182 | const hasher& hf = hasher(), 183 | const key_equal& eql = key_equal(), 184 | const allocator_type& alloc = allocator_type()) 185 | : rep(expected_max_items_in_table, hf, eql, Identity(), SetKey(), alloc) { 186 | } 187 | 188 | template 189 | dense_hash_set(InputIterator f, InputIterator l, 190 | const key_type& empty_key_val, 191 | size_type expected_max_items_in_table = 0, 192 | const hasher& hf = hasher(), 193 | const key_equal& eql = key_equal(), 194 | const allocator_type& alloc = allocator_type()) 195 | : rep(expected_max_items_in_table, hf, eql, Identity(), SetKey(), alloc) { 196 | set_empty_key(empty_key_val); 197 | rep.insert(f, l); 198 | } 199 | // We use the default copy constructor 200 | // We use the default operator=() 201 | // We use the default destructor 202 | 203 | void clear() { rep.clear(); } 204 | // This clears the hash set without resizing it down to the minimum 205 | // bucket count, but rather keeps the number of buckets constant 206 | void clear_no_resize() { rep.clear_no_resize(); } 207 | void swap(dense_hash_set& hs) { rep.swap(hs.rep); } 208 | 209 | // Functions concerning size 210 | size_type size() const { return rep.size(); } 211 | size_type max_size() const { return rep.max_size(); } 212 | bool empty() const { return rep.empty(); } 213 | size_type bucket_count() const { return rep.bucket_count(); } 214 | size_type max_bucket_count() const { return rep.max_bucket_count(); } 215 | 216 | // These are tr1 methods. bucket() is the bucket the key is or would be in. 217 | size_type bucket_size(size_type i) const { return rep.bucket_size(i); } 218 | size_type bucket(const key_type& key) const { return rep.bucket(key); } 219 | float load_factor() const { return size() * 1.0f / bucket_count(); } 220 | float max_load_factor() const { 221 | float shrink, grow; 222 | rep.get_resizing_parameters(&shrink, &grow); 223 | return grow; 224 | } 225 | void max_load_factor(float new_grow) { 226 | float shrink, grow; 227 | rep.get_resizing_parameters(&shrink, &grow); 228 | rep.set_resizing_parameters(shrink, new_grow); 229 | } 230 | // These aren't tr1 methods but perhaps ought to be. 231 | float min_load_factor() const { 232 | float shrink, grow; 233 | rep.get_resizing_parameters(&shrink, &grow); 234 | return shrink; 235 | } 236 | void min_load_factor(float new_shrink) { 237 | float shrink, grow; 238 | rep.get_resizing_parameters(&shrink, &grow); 239 | rep.set_resizing_parameters(new_shrink, grow); 240 | } 241 | // Deprecated; use min_load_factor() or max_load_factor() instead. 242 | void set_resizing_parameters(float shrink, float grow) { 243 | rep.set_resizing_parameters(shrink, grow); 244 | } 245 | 246 | void reserve(size_type size) { rehash(size); } // note: rehash internally treats hint/size as number of elements 247 | void resize(size_type hint) { rep.resize(hint); } 248 | void rehash(size_type hint) { resize(hint); } // the tr1 name 249 | 250 | // Lookup routines 251 | iterator find(const key_type& key) const { return rep.find(key); } 252 | 253 | template 254 | typename std::enable_if::value, iterator>::type 255 | find(const K& key) const { return rep.find(key); } 256 | 257 | size_type count(const key_type& key) const { return rep.count(key); } 258 | 259 | template 260 | typename std::enable_if::value, size_type>::type 261 | count(const K& key) const { return rep.count(key); } 262 | 263 | std::pair equal_range(const key_type& key) const { 264 | return rep.equal_range(key); 265 | } 266 | 267 | template 268 | typename std::enable_if::value, std::pair>::type 269 | equal_range(const K& key) const { 270 | return rep.equal_range(key); 271 | } 272 | 273 | // Insertion routines 274 | std::pair insert(const value_type& obj) { 275 | std::pair p = rep.insert(obj); 276 | return std::pair(p.first, p.second); // const to non-const 277 | } 278 | 279 | std::pair insert(value_type&& obj) { 280 | std::pair p = rep.insert(std::move(obj)); 281 | return std::pair(p.first, p.second); // const to non-const 282 | } 283 | 284 | template 285 | std::pair emplace(Args&&... args) { 286 | return rep.emplace(std::forward(args)...); 287 | } 288 | 289 | template 290 | std::pair emplace_hint(const_iterator hint, Args&&... args) { 291 | return rep.emplace_hint(hint, std::forward(args)...); 292 | } 293 | 294 | template 295 | void insert(InputIterator f, InputIterator l) { 296 | rep.insert(f, l); 297 | } 298 | void insert(const_iterator f, const_iterator l) { rep.insert(f, l); } 299 | void insert(std::initializer_list ilist) { rep.insert(ilist.begin(), ilist.end()); } 300 | // Required for std::insert_iterator; the passed-in iterator is ignored. 301 | iterator insert(const_iterator, const value_type& obj) { return insert(obj).first; } 302 | iterator insert(const_iterator, value_type&& obj) { return insert(std::move(obj)).first; } 303 | 304 | // Deletion and empty routines 305 | // THESE ARE NON-STANDARD! I make you specify an "impossible" key 306 | // value to identify deleted and empty buckets. You can change the 307 | // deleted key as time goes on, or get rid of it entirely to be insert-only. 308 | void set_empty_key(const key_type& key) { rep.set_empty_key(key); } 309 | key_type empty_key() const { return rep.empty_key(); } 310 | 311 | void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); } 312 | void clear_deleted_key() { rep.clear_deleted_key(); } 313 | key_type deleted_key() const { return rep.deleted_key(); } 314 | 315 | // These are standard 316 | size_type erase(const key_type& key) { return rep.erase(key); } 317 | iterator erase(const_iterator it) { return rep.erase(it); } 318 | iterator erase(const_iterator f, const_iterator l) { return rep.erase(f, l); } 319 | 320 | // Comparison 321 | bool operator==(const dense_hash_set& hs) const { return rep == hs.rep; } 322 | bool operator!=(const dense_hash_set& hs) const { return rep != hs.rep; } 323 | 324 | // I/O -- this is an add-on for writing metainformation to disk 325 | // 326 | // For maximum flexibility, this does not assume a particular 327 | // file type (though it will probably be a FILE *). We just pass 328 | // the fp through to rep. 329 | 330 | // If your keys and values are simple enough, you can pass this 331 | // serializer to serialize()/unserialize(). "Simple enough" means 332 | // value_type is a POD type that contains no pointers. Note, 333 | // however, we don't try to normalize endianness. 334 | typedef typename ht::NopointerSerializer NopointerSerializer; 335 | 336 | // serializer: a class providing operator()(OUTPUT*, const value_type&) 337 | // (writing value_type to OUTPUT). You can specify a 338 | // NopointerSerializer object if appropriate (see above). 339 | // fp: either a FILE*, OR an ostream*/subclass_of_ostream*, OR a 340 | // pointer to a class providing size_t Write(const void*, size_t), 341 | // which writes a buffer into a stream (which fp presumably 342 | // owns) and returns the number of bytes successfully written. 343 | // Note basic_ostream is not currently supported. 344 | template 345 | bool serialize(ValueSerializer serializer, OUTPUT* fp) { 346 | return rep.serialize(serializer, fp); 347 | } 348 | 349 | // serializer: a functor providing operator()(INPUT*, value_type*) 350 | // (reading from INPUT and into value_type). You can specify a 351 | // NopointerSerializer object if appropriate (see above). 352 | // fp: either a FILE*, OR an istream*/subclass_of_istream*, OR a 353 | // pointer to a class providing size_t Read(void*, size_t), 354 | // which reads into a buffer from a stream (which fp presumably 355 | // owns) and returns the number of bytes successfully read. 356 | // Note basic_istream is not currently supported. 357 | template 358 | bool unserialize(ValueSerializer serializer, INPUT* fp) { 359 | return rep.unserialize(serializer, fp); 360 | } 361 | }; 362 | 363 | template 364 | inline void swap(dense_hash_set& hs1, 365 | dense_hash_set& hs2) { 366 | hs1.swap(hs2); 367 | } 368 | 369 | } // namespace google 370 | -------------------------------------------------------------------------------- /benchmark/sparsehash/sparse_hash_map: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2005, Google Inc. 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are 6 | // met: 7 | // 8 | // * Redistributions of source code must retain the above copyright 9 | // notice, this list of conditions and the following disclaimer. 10 | // * Redistributions in binary form must reproduce the above 11 | // copyright notice, this list of conditions and the following disclaimer 12 | // in the documentation and/or other materials provided with the 13 | // distribution. 14 | // * Neither the name of Google Inc. nor the names of its 15 | // contributors may be used to endorse or promote products derived from 16 | // this software without specific prior written permission. 17 | // 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | // --- 31 | // 32 | // This is just a very thin wrapper over sparsehashtable.h, just 33 | // like sgi stl's stl_hash_map is a very thin wrapper over 34 | // stl_hashtable. The major thing we define is operator[], because 35 | // we have a concept of a data_type which stl_hashtable doesn't 36 | // (it only has a key and a value). 37 | // 38 | // We adhere mostly to the STL semantics for hash-map. One important 39 | // exception is that insert() may invalidate iterators entirely -- STL 40 | // semantics are that insert() may reorder iterators, but they all 41 | // still refer to something valid in the hashtable. Not so for us. 42 | // Likewise, insert() may invalidate pointers into the hashtable. 43 | // (Whether insert invalidates iterators and pointers depends on 44 | // whether it results in a hashtable resize). On the plus side, 45 | // delete() doesn't invalidate iterators or pointers at all, or even 46 | // change the ordering of elements. 47 | // 48 | // Here are a few "power user" tips: 49 | // 50 | // 1) set_deleted_key(): 51 | // Unlike STL's hash_map, if you want to use erase() you 52 | // *must* call set_deleted_key() after construction. 53 | // 54 | // 2) resize(0): 55 | // When an item is deleted, its memory isn't freed right 56 | // away. This is what allows you to iterate over a hashtable 57 | // and call erase() without invalidating the iterator. 58 | // To force the memory to be freed, call resize(0). 59 | // For tr1 compatibility, this can also be called as rehash(0). 60 | // 61 | // 3) min_load_factor(0.0) 62 | // Setting the minimum load factor to 0.0 guarantees that 63 | // the hash table will never shrink. 64 | // 65 | // Roughly speaking: 66 | // (1) dense_hash_map: fastest, uses the most memory unless entries are small 67 | // (2) sparse_hash_map: slowest, uses the least memory 68 | // (3) hash_map / unordered_map (STL): in the middle 69 | // 70 | // Typically I use sparse_hash_map when I care about space and/or when 71 | // I need to save the hashtable on disk. I use hash_map otherwise. I 72 | // don't personally use dense_hash_map ever; some people use it for 73 | // small maps with lots of lookups. 74 | // 75 | // - dense_hash_map has, typically, about 78% memory overhead (if your 76 | // data takes up X bytes, the hash_map uses .78X more bytes in overhead). 77 | // - sparse_hash_map has about 4 bits overhead per entry. 78 | // - sparse_hash_map can be 3-7 times slower than the others for lookup and, 79 | // especially, inserts. See time_hash_map.cc for details. 80 | // 81 | // See /usr/(local/)?doc/sparsehash-*/sparse_hash_map.html 82 | // for information about how to use this class. 83 | 84 | #pragma once 85 | 86 | #include // needed by stl_alloc 87 | #include // for equal_to<>, select1st<>, etc 88 | #include // for alloc 89 | #include // for pair<> 90 | #include 91 | #include // IWYU pragma: export 92 | 93 | namespace google { 94 | 95 | template , 96 | class EqualKey = std::equal_to, 97 | class Alloc = libc_allocator_with_realloc>> 98 | class sparse_hash_map { 99 | private: 100 | // Apparently select1st is not stl-standard, so we define our own 101 | struct SelectKey { 102 | typedef const Key& result_type; 103 | const Key& operator()(const std::pair& p) const { 104 | return p.first; 105 | } 106 | }; 107 | struct SetKey { 108 | void operator()(std::pair* value, const Key& new_key) const { 109 | *const_cast(&value->first) = new_key; 110 | // It would be nice to clear the rest of value here as well, in 111 | // case it's taking up a lot of memory. We do this by clearing 112 | // the value. This assumes T has a zero-arg constructor! 113 | value->second = T(); 114 | } 115 | }; 116 | // For operator[]. 117 | struct DefaultValue { 118 | std::pair operator()(const Key& key) { 119 | return std::make_pair(key, T()); 120 | } 121 | }; 122 | 123 | // The actual data 124 | typedef typename sparsehash_internal::key_equal_chosen::type EqualKeyChosen; 125 | typedef sparse_hashtable, Key, HashFcn, SelectKey, 126 | SetKey, EqualKeyChosen, Alloc> ht; 127 | ht rep; 128 | 129 | static_assert(!sparsehash_internal::has_transparent_key_equal::value 130 | || std::is_same>::value 131 | || std::is_same::value, 132 | "Heterogeneous lookup requires key_equal to either be the default container value or the same as the type provided by hash"); 133 | 134 | public: 135 | typedef typename ht::key_type key_type; 136 | typedef T data_type; 137 | typedef T mapped_type; 138 | typedef typename ht::value_type value_type; 139 | typedef typename ht::hasher hasher; 140 | typedef typename ht::key_equal key_equal; 141 | typedef Alloc allocator_type; 142 | 143 | typedef typename ht::size_type size_type; 144 | typedef typename ht::difference_type difference_type; 145 | typedef typename ht::pointer pointer; 146 | typedef typename ht::const_pointer const_pointer; 147 | typedef typename ht::reference reference; 148 | typedef typename ht::const_reference const_reference; 149 | 150 | typedef typename ht::iterator iterator; 151 | typedef typename ht::const_iterator const_iterator; 152 | typedef typename ht::local_iterator local_iterator; 153 | typedef typename ht::const_local_iterator const_local_iterator; 154 | 155 | // Iterator functions 156 | iterator begin() { return rep.begin(); } 157 | iterator end() { return rep.end(); } 158 | const_iterator begin() const { return rep.begin(); } 159 | const_iterator end() const { return rep.end(); } 160 | 161 | // These come from tr1's unordered_map. For us, a bucket has 0 or 1 elements. 162 | local_iterator begin(size_type i) { return rep.begin(i); } 163 | local_iterator end(size_type i) { return rep.end(i); } 164 | const_local_iterator begin(size_type i) const { return rep.begin(i); } 165 | const_local_iterator end(size_type i) const { return rep.end(i); } 166 | 167 | // Accessor functions 168 | allocator_type get_allocator() const { return rep.get_allocator(); } 169 | hasher hash_funct() const { return rep.hash_funct(); } 170 | hasher hash_function() const { return hash_funct(); } 171 | key_equal key_eq() const { return rep.key_eq(); } 172 | 173 | // Constructors 174 | explicit sparse_hash_map(size_type expected_max_items_in_table = 0, 175 | const hasher& hf = hasher(), 176 | const key_equal& eql = key_equal(), 177 | const allocator_type& alloc = allocator_type()) 178 | : rep(expected_max_items_in_table, hf, eql, SelectKey(), SetKey(), 179 | alloc) {} 180 | 181 | template 182 | sparse_hash_map(InputIterator f, InputIterator l, 183 | size_type expected_max_items_in_table = 0, 184 | const hasher& hf = hasher(), 185 | const key_equal& eql = key_equal(), 186 | const allocator_type& alloc = allocator_type()) 187 | : rep(expected_max_items_in_table, hf, eql, SelectKey(), SetKey(), 188 | alloc) { 189 | rep.insert(f, l); 190 | } 191 | // We use the default copy constructor 192 | // We use the default operator=() 193 | // We use the default destructor 194 | 195 | void clear() { rep.clear(); } 196 | void swap(sparse_hash_map& hs) { rep.swap(hs.rep); } 197 | 198 | // Functions concerning size 199 | size_type size() const { return rep.size(); } 200 | size_type max_size() const { return rep.max_size(); } 201 | bool empty() const { return rep.empty(); } 202 | size_type bucket_count() const { return rep.bucket_count(); } 203 | size_type max_bucket_count() const { return rep.max_bucket_count(); } 204 | 205 | // These are tr1 methods. bucket() is the bucket the key is or would be in. 206 | size_type bucket_size(size_type i) const { return rep.bucket_size(i); } 207 | size_type bucket(const key_type& key) const { return rep.bucket(key); } 208 | float load_factor() const { return size() * 1.0f / bucket_count(); } 209 | float max_load_factor() const { 210 | float shrink, grow; 211 | rep.get_resizing_parameters(&shrink, &grow); 212 | return grow; 213 | } 214 | void max_load_factor(float new_grow) { 215 | float shrink, grow; 216 | rep.get_resizing_parameters(&shrink, &grow); 217 | rep.set_resizing_parameters(shrink, new_grow); 218 | } 219 | // These aren't tr1 methods but perhaps ought to be. 220 | float min_load_factor() const { 221 | float shrink, grow; 222 | rep.get_resizing_parameters(&shrink, &grow); 223 | return shrink; 224 | } 225 | void min_load_factor(float new_shrink) { 226 | float shrink, grow; 227 | rep.get_resizing_parameters(&shrink, &grow); 228 | rep.set_resizing_parameters(new_shrink, grow); 229 | } 230 | // Deprecated; use min_load_factor() or max_load_factor() instead. 231 | void set_resizing_parameters(float shrink, float grow) { 232 | rep.set_resizing_parameters(shrink, grow); 233 | } 234 | 235 | void reserve(size_type size) { rehash(size); } // note: rehash internally treats hint/size as number of elements 236 | void resize(size_type hint) { rep.resize(hint); } 237 | void rehash(size_type hint) { resize(hint); } // the tr1 name 238 | 239 | // Lookup routines 240 | iterator find(const key_type& key) { return rep.find(key); } 241 | const_iterator find(const key_type& key) const { return rep.find(key); } 242 | 243 | template 244 | typename std::enable_if::value, iterator>::type 245 | find(const K& key) { return rep.find(key); } 246 | template 247 | typename std::enable_if::value, const_iterator>::type 248 | find(const K& key) const { return rep.find(key); } 249 | 250 | data_type& operator[](const key_type& key) { // This is our value-add! 251 | // If key is in the hashtable, returns find(key)->second, 252 | // otherwise returns insert(value_type(key, T()).first->second. 253 | // Note it does not create an empty T unless the find fails. 254 | return rep.template find_or_insert(key).second; 255 | } 256 | 257 | size_type count(const key_type& key) const { return rep.count(key); } 258 | 259 | template 260 | typename std::enable_if::value, size_type>::type 261 | count(const K& key) const { return rep.count(key); } 262 | 263 | std::pair equal_range(const key_type& key) { 264 | return rep.equal_range(key); 265 | } 266 | std::pair equal_range( 267 | const key_type& key) const { 268 | return rep.equal_range(key); 269 | } 270 | 271 | template 272 | typename std::enable_if::value, std::pair>::type 273 | equal_range(const K& key) { 274 | return rep.equal_range(key); 275 | } 276 | template 277 | typename std::enable_if::value, std::pair>::type 278 | equal_range(const K& key) const { 279 | return rep.equal_range(key); 280 | } 281 | 282 | // Insertion routines 283 | std::pair insert(const value_type& obj) { 284 | return rep.insert(obj); 285 | } 286 | template 287 | void insert(InputIterator f, InputIterator l) { 288 | rep.insert(f, l); 289 | } 290 | void insert(const_iterator f, const_iterator l) { rep.insert(f, l); } 291 | // Required for std::insert_iterator; the passed-in iterator is ignored. 292 | iterator insert(iterator, const value_type& obj) { return insert(obj).first; } 293 | 294 | // Deletion routines 295 | // THESE ARE NON-STANDARD! I make you specify an "impossible" key 296 | // value to identify deleted buckets. You can change the key as 297 | // time goes on, or get rid of it entirely to be insert-only. 298 | void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); } 299 | void clear_deleted_key() { rep.clear_deleted_key(); } 300 | key_type deleted_key() const { return rep.deleted_key(); } 301 | 302 | // These are standard 303 | size_type erase(const key_type& key) { return rep.erase(key); } 304 | void erase(iterator it) { rep.erase(it); } 305 | void erase(iterator f, iterator l) { rep.erase(f, l); } 306 | 307 | // Comparison 308 | bool operator==(const sparse_hash_map& hs) const { return rep == hs.rep; } 309 | bool operator!=(const sparse_hash_map& hs) const { return rep != hs.rep; } 310 | 311 | // I/O -- this is an add-on for writing metainformation to disk 312 | // 313 | // For maximum flexibility, this does not assume a particular 314 | // file type (though it will probably be a FILE *). We just pass 315 | // the fp through to rep. 316 | 317 | // If your keys and values are simple enough, you can pass this 318 | // serializer to serialize()/unserialize(). "Simple enough" means 319 | // value_type is a POD type that contains no pointers. Note, 320 | // however, we don't try to normalize endianness. 321 | typedef typename ht::NopointerSerializer NopointerSerializer; 322 | 323 | // serializer: a class providing operator()(OUTPUT*, const value_type&) 324 | // (writing value_type to OUTPUT). You can specify a 325 | // NopointerSerializer object if appropriate (see above). 326 | // fp: either a FILE*, OR an ostream*/subclass_of_ostream*, OR a 327 | // pointer to a class providing size_t Write(const void*, size_t), 328 | // which writes a buffer into a stream (which fp presumably 329 | // owns) and returns the number of bytes successfully written. 330 | // Note basic_ostream is not currently supported. 331 | template 332 | bool serialize(ValueSerializer serializer, OUTPUT* fp) { 333 | return rep.serialize(serializer, fp); 334 | } 335 | 336 | // serializer: a functor providing operator()(INPUT*, value_type*) 337 | // (reading from INPUT and into value_type). You can specify a 338 | // NopointerSerializer object if appropriate (see above). 339 | // fp: either a FILE*, OR an istream*/subclass_of_istream*, OR a 340 | // pointer to a class providing size_t Read(void*, size_t), 341 | // which reads into a buffer from a stream (which fp presumably 342 | // owns) and returns the number of bytes successfully read. 343 | // Note basic_istream is not currently supported. 344 | // NOTE: Since value_type is std::pair, ValueSerializer 345 | // may need to do a const cast in order to fill in the key. 346 | // NOTE: if Key or T are not POD types, the serializer MUST use 347 | // placement-new to initialize their values, rather than a normal 348 | // equals-assignment or similar. (The value_type* passed into the 349 | // serializer points to garbage memory.) 350 | template 351 | bool unserialize(ValueSerializer serializer, INPUT* fp) { 352 | return rep.unserialize(serializer, fp); 353 | } 354 | 355 | // The four methods below are DEPRECATED. 356 | // Use serialize() and unserialize() for new code. 357 | template 358 | bool write_metadata(OUTPUT* fp) { 359 | return rep.write_metadata(fp); 360 | } 361 | 362 | template 363 | bool read_metadata(INPUT* fp) { 364 | return rep.read_metadata(fp); 365 | } 366 | 367 | template 368 | bool write_nopointer_data(OUTPUT* fp) { 369 | return rep.write_nopointer_data(fp); 370 | } 371 | 372 | template 373 | bool read_nopointer_data(INPUT* fp) { 374 | return rep.read_nopointer_data(fp); 375 | } 376 | }; 377 | 378 | // We need a global swap as well 379 | template 380 | inline void swap(sparse_hash_map& hm1, 381 | sparse_hash_map& hm2) { 382 | hm1.swap(hm2); 383 | } 384 | 385 | } // namespace google 386 | -------------------------------------------------------------------------------- /benchmark/sparsehash/dense_hash_map: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2005, Google Inc. 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are 6 | // met: 7 | // 8 | // * Redistributions of source code must retain the above copyright 9 | // notice, this list of conditions and the following disclaimer. 10 | // * Redistributions in binary form must reproduce the above 11 | // copyright notice, this list of conditions and the following disclaimer 12 | // in the documentation and/or other materials provided with the 13 | // distribution. 14 | // * Neither the name of Google Inc. nor the names of its 15 | // contributors may be used to endorse or promote products derived from 16 | // this software without specific prior written permission. 17 | // 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | // ---- 31 | // 32 | // This is just a very thin wrapper over densehashtable.h, just 33 | // like sgi stl's stl_hash_map is a very thin wrapper over 34 | // stl_hashtable. The major thing we define is operator[], because 35 | // we have a concept of a data_type which stl_hashtable doesn't 36 | // (it only has a key and a value). 37 | // 38 | // NOTE: this is exactly like sparse_hash_map.h, with the word 39 | // "sparse" replaced by "dense", except for the addition of 40 | // set_empty_key(). 41 | // 42 | // YOU MUST CALL SET_EMPTY_KEY() IMMEDIATELY AFTER CONSTRUCTION. 43 | // 44 | // Otherwise your program will die in mysterious ways. (Note if you 45 | // use the constructor that takes an InputIterator range, you pass in 46 | // the empty key in the constructor, rather than after. As a result, 47 | // this constructor differs from the standard STL version.) 48 | // 49 | // In other respects, we adhere mostly to the STL semantics for 50 | // hash-map. One important exception is that insert() may invalidate 51 | // iterators entirely -- STL semantics are that insert() may reorder 52 | // iterators, but they all still refer to something valid in the 53 | // hashtable. Not so for us. Likewise, insert() may invalidate 54 | // pointers into the hashtable. (Whether insert invalidates iterators 55 | // and pointers depends on whether it results in a hashtable resize). 56 | // On the plus side, delete() doesn't invalidate iterators or pointers 57 | // at all, or even change the ordering of elements. 58 | // 59 | // Here are a few "power user" tips: 60 | // 61 | // 1) set_deleted_key(): 62 | // If you want to use erase() you *must* call set_deleted_key(), 63 | // in addition to set_empty_key(), after construction. 64 | // The deleted and empty keys must differ. 65 | // 66 | // 2) resize(0): 67 | // When an item is deleted, its memory isn't freed right 68 | // away. This allows you to iterate over a hashtable, 69 | // and call erase(), without invalidating the iterator. 70 | // To force the memory to be freed, call resize(0). 71 | // For tr1 compatibility, this can also be called as rehash(0). 72 | // 73 | // 3) min_load_factor(0.0) 74 | // Setting the minimum load factor to 0.0 guarantees that 75 | // the hash table will never shrink. 76 | // 77 | // Roughly speaking: 78 | // (1) dense_hash_map: fastest, uses the most memory unless entries are small 79 | // (2) sparse_hash_map: slowest, uses the least memory 80 | // (3) hash_map / unordered_map (STL): in the middle 81 | // 82 | // Typically I use sparse_hash_map when I care about space and/or when 83 | // I need to save the hashtable on disk. I use hash_map otherwise. I 84 | // don't personally use dense_hash_set ever; some people use it for 85 | // small sets with lots of lookups. 86 | // 87 | // - dense_hash_map has, typically, about 78% memory overhead (if your 88 | // data takes up X bytes, the hash_map uses .78X more bytes in overhead). 89 | // - sparse_hash_map has about 4 bits overhead per entry. 90 | // - sparse_hash_map can be 3-7 times slower than the others for lookup and, 91 | // especially, inserts. See time_hash_map.cc for details. 92 | // 93 | // See /usr/(local/)?doc/sparsehash-*/dense_hash_map.html 94 | // for information about how to use this class. 95 | 96 | #pragma once 97 | 98 | #include // needed by stl_alloc 99 | #include // for equal_to<>, select1st<>, etc 100 | #include // for initializer_list 101 | #include // for alloc 102 | #include // for pair<> 103 | #include // forward_as_tuple 104 | #include // for enable_if, is_constructible, etc 105 | #include // IWYU pragma: export 106 | #include 107 | 108 | namespace google { 109 | 110 | template , 111 | class EqualKey = std::equal_to, 112 | class Alloc = libc_allocator_with_realloc>> 113 | class dense_hash_map { 114 | private: 115 | // Apparently select1st is not stl-standard, so we define our own 116 | struct SelectKey { 117 | typedef const Key& result_type; 118 | 119 | template 120 | using decay_t = typename std::decay::type; 121 | 122 | template ::first_type>, 125 | decay_t 126 | >::value>::type 127 | > 128 | result_type operator()(Pair&& p) const { 129 | return p.first; 130 | } 131 | }; 132 | struct SetKey { 133 | void operator()(std::pair* value, const Key& new_key) const { 134 | using NCKey = typename std::remove_cv::type; 135 | *const_cast(&value->first) = new_key; 136 | 137 | // It would be nice to clear the rest of value here as well, in 138 | // case it's taking up a lot of memory. We do this by clearing 139 | // the value. This assumes T has a zero-arg constructor! 140 | value->second = T(); 141 | } 142 | void operator()(std::pair* value, const Key& new_key, bool) const { 143 | new(value) std::pair(std::piecewise_construct, std::forward_as_tuple(new_key), std::forward_as_tuple()); 144 | } 145 | }; 146 | 147 | // The actual data 148 | typedef typename sparsehash_internal::key_equal_chosen::type EqualKeyChosen; 149 | typedef dense_hashtable, Key, HashFcn, SelectKey, 150 | SetKey, EqualKeyChosen, Alloc> ht; 151 | ht rep; 152 | 153 | static_assert(!sparsehash_internal::has_transparent_key_equal::value 154 | || std::is_same>::value 155 | || std::is_same::value, 156 | "Heterogeneous lookup requires key_equal to either be the default container value or the same as the type provided by hash"); 157 | 158 | public: 159 | typedef typename ht::key_type key_type; 160 | typedef T data_type; 161 | typedef T mapped_type; 162 | typedef typename ht::value_type value_type; 163 | typedef typename ht::hasher hasher; 164 | typedef typename ht::key_equal key_equal; 165 | typedef Alloc allocator_type; 166 | 167 | typedef typename ht::size_type size_type; 168 | typedef typename ht::difference_type difference_type; 169 | typedef typename ht::pointer pointer; 170 | typedef typename ht::const_pointer const_pointer; 171 | typedef typename ht::reference reference; 172 | typedef typename ht::const_reference const_reference; 173 | 174 | typedef typename ht::iterator iterator; 175 | typedef typename ht::const_iterator const_iterator; 176 | typedef typename ht::local_iterator local_iterator; 177 | typedef typename ht::const_local_iterator const_local_iterator; 178 | 179 | // Iterator functions 180 | iterator begin() { return rep.begin(); } 181 | iterator end() { return rep.end(); } 182 | const_iterator begin() const { return rep.begin(); } 183 | const_iterator end() const { return rep.end(); } 184 | const_iterator cbegin() const { return rep.begin(); } 185 | const_iterator cend() const { return rep.end(); } 186 | 187 | // These come from tr1's unordered_map. For us, a bucket has 0 or 1 elements. 188 | local_iterator begin(size_type i) { return rep.begin(i); } 189 | local_iterator end(size_type i) { return rep.end(i); } 190 | const_local_iterator begin(size_type i) const { return rep.begin(i); } 191 | const_local_iterator end(size_type i) const { return rep.end(i); } 192 | const_local_iterator cbegin(size_type i) const { return rep.begin(i); } 193 | const_local_iterator cend(size_type i) const { return rep.end(i); } 194 | 195 | // Accessor functions 196 | allocator_type get_allocator() const { return rep.get_allocator(); } 197 | hasher hash_funct() const { return rep.hash_funct(); } 198 | hasher hash_function() const { return hash_funct(); } 199 | key_equal key_eq() const { return rep.key_eq(); } 200 | 201 | // Constructors 202 | explicit dense_hash_map(size_type expected_max_items_in_table = 0, 203 | const hasher& hf = hasher(), 204 | const key_equal& eql = key_equal(), 205 | const allocator_type& alloc = allocator_type()) 206 | : rep(expected_max_items_in_table, hf, eql, SelectKey(), SetKey(), 207 | alloc) {} 208 | 209 | template 210 | dense_hash_map(InputIterator f, InputIterator l, 211 | const key_type& empty_key_val, 212 | size_type expected_max_items_in_table = 0, 213 | const hasher& hf = hasher(), 214 | const key_equal& eql = key_equal(), 215 | const allocator_type& alloc = allocator_type()) 216 | : rep(expected_max_items_in_table, hf, eql, SelectKey(), SetKey(), 217 | alloc) { 218 | set_empty_key(empty_key_val); 219 | rep.insert(f, l); 220 | } 221 | // We use the default copy constructor 222 | // We use the default operator=() 223 | // We use the default destructor 224 | 225 | void clear() { rep.clear(); } 226 | // This clears the hash map without resizing it down to the minimum 227 | // bucket count, but rather keeps the number of buckets constant 228 | void clear_no_resize() { rep.clear_no_resize(); } 229 | void swap(dense_hash_map& hs) { rep.swap(hs.rep); } 230 | 231 | // Functions concerning size 232 | size_type size() const { return rep.size(); } 233 | size_type max_size() const { return rep.max_size(); } 234 | bool empty() const { return rep.empty(); } 235 | size_type bucket_count() const { return rep.bucket_count(); } 236 | size_type max_bucket_count() const { return rep.max_bucket_count(); } 237 | 238 | // These are tr1 methods. bucket() is the bucket the key is or would be in. 239 | size_type bucket_size(size_type i) const { return rep.bucket_size(i); } 240 | size_type bucket(const key_type& key) const { return rep.bucket(key); } 241 | float load_factor() const { return size() * 1.0f / bucket_count(); } 242 | float max_load_factor() const { 243 | float shrink, grow; 244 | rep.get_resizing_parameters(&shrink, &grow); 245 | return grow; 246 | } 247 | void max_load_factor(float new_grow) { 248 | float shrink, grow; 249 | rep.get_resizing_parameters(&shrink, &grow); 250 | rep.set_resizing_parameters(shrink, new_grow); 251 | } 252 | // These aren't tr1 methods but perhaps ought to be. 253 | float min_load_factor() const { 254 | float shrink, grow; 255 | rep.get_resizing_parameters(&shrink, &grow); 256 | return shrink; 257 | } 258 | void min_load_factor(float new_shrink) { 259 | float shrink, grow; 260 | rep.get_resizing_parameters(&shrink, &grow); 261 | rep.set_resizing_parameters(new_shrink, grow); 262 | } 263 | // Deprecated; use min_load_factor() or max_load_factor() instead. 264 | void set_resizing_parameters(float shrink, float grow) { 265 | rep.set_resizing_parameters(shrink, grow); 266 | } 267 | 268 | void reserve(size_type size) { rehash(size); } // note: rehash internally treats hint/size as number of elements 269 | void resize(size_type hint) { rep.resize(hint); } 270 | void rehash(size_type hint) { resize(hint); } // the tr1 name 271 | 272 | // Lookup routines 273 | iterator find(const key_type& key) { return rep.find(key); } 274 | const_iterator find(const key_type& key) const { return rep.find(key); } 275 | 276 | template 277 | typename std::enable_if::value, iterator>::type 278 | find(const K& key) { return rep.find(key); } 279 | template 280 | typename std::enable_if::value, const_iterator>::type 281 | find(const K& key) const { return rep.find(key); } 282 | 283 | data_type& operator[](const key_type& key) { // This is our value-add! 284 | // If key is in the hashtable, returns find(key)->second, 285 | // otherwise returns insert(value_type(key, T()).first->second. 286 | // Note it does not create an empty T unless the find fails. 287 | return rep.template find_or_insert(key).second; 288 | } 289 | 290 | data_type& operator[](key_type&& key) { 291 | return rep.template find_or_insert(std::move(key)).second; 292 | } 293 | 294 | size_type count(const key_type& key) const { return rep.count(key); } 295 | 296 | template 297 | typename std::enable_if::value, size_type>::type 298 | count(const K& key) const { return rep.count(key); } 299 | 300 | std::pair equal_range(const key_type& key) { 301 | return rep.equal_range(key); 302 | } 303 | std::pair equal_range( 304 | const key_type& key) const { 305 | return rep.equal_range(key); 306 | } 307 | 308 | template 309 | typename std::enable_if::value, std::pair>::type 310 | equal_range(const K& key) { 311 | return rep.equal_range(key); 312 | } 313 | template 314 | typename std::enable_if::value, std::pair>::type 315 | equal_range(const K& key) const { 316 | return rep.equal_range(key); 317 | } 318 | 319 | // Insertion routines 320 | std::pair insert(const value_type& obj) { 321 | return rep.insert(obj); 322 | } 323 | 324 | template ::value>::type> 325 | std::pair insert(Pair&& obj) { 326 | return rep.insert(std::forward(obj)); 327 | } 328 | 329 | // overload to allow {} syntax: .insert( { {key}, {args} } ) 330 | std::pair insert(value_type&& obj) { 331 | return rep.insert(std::move(obj)); 332 | } 333 | 334 | template 335 | std::pair emplace(Args&&... args) { 336 | return rep.emplace(std::forward(args)...); 337 | } 338 | 339 | template 340 | std::pair emplace_hint(const_iterator hint, Args&&... args) { 341 | return rep.emplace_hint(hint, std::forward(args)...); 342 | } 343 | 344 | 345 | template 346 | void insert(InputIterator f, InputIterator l) { 347 | rep.insert(f, l); 348 | } 349 | void insert(const_iterator f, const_iterator l) { rep.insert(f, l); } 350 | void insert(std::initializer_list ilist) { rep.insert(ilist.begin(), ilist.end()); } 351 | // Required for std::insert_iterator; the passed-in iterator is ignored. 352 | iterator insert(const_iterator, const value_type& obj) { return insert(obj).first; } 353 | iterator insert(const_iterator, value_type&& obj) { return insert(std::move(obj)).first; } 354 | template ::value && 356 | !std::is_same::value 357 | >::type> 358 | iterator insert(const_iterator, P&& obj) { return insert(std::forward

(obj)).first; } 359 | 360 | // Deletion and empty routines 361 | // THESE ARE NON-STANDARD! I make you specify an "impossible" key 362 | // value to identify deleted and empty buckets. You can change the 363 | // deleted key as time goes on, or get rid of it entirely to be insert-only. 364 | // YOU MUST CALL THIS! 365 | void set_empty_key(const key_type& key) { rep.set_empty_key(key); } 366 | key_type empty_key() const { return rep.empty_key(); } 367 | 368 | void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); } 369 | void clear_deleted_key() { rep.clear_deleted_key(); } 370 | key_type deleted_key() const { return rep.deleted_key(); } 371 | 372 | // These are standard 373 | size_type erase(const key_type& key) { return rep.erase(key); } 374 | iterator erase(const_iterator it) { return rep.erase(it); } 375 | iterator erase(const_iterator f, const_iterator l) { return rep.erase(f, l); } 376 | 377 | // Comparison 378 | bool operator==(const dense_hash_map& hs) const { return rep == hs.rep; } 379 | bool operator!=(const dense_hash_map& hs) const { return rep != hs.rep; } 380 | 381 | // I/O -- this is an add-on for writing hash map to disk 382 | // 383 | // For maximum flexibility, this does not assume a particular 384 | // file type (though it will probably be a FILE *). We just pass 385 | // the fp through to rep. 386 | 387 | // If your keys and values are simple enough, you can pass this 388 | // serializer to serialize()/unserialize(). "Simple enough" means 389 | // value_type is a POD type that contains no pointers. Note, 390 | // however, we don't try to normalize endianness. 391 | typedef typename ht::NopointerSerializer NopointerSerializer; 392 | 393 | // serializer: a class providing operator()(OUTPUT*, const value_type&) 394 | // (writing value_type to OUTPUT). You can specify a 395 | // NopointerSerializer object if appropriate (see above). 396 | // fp: either a FILE*, OR an ostream*/subclass_of_ostream*, OR a 397 | // pointer to a class providing size_t Write(const void*, size_t), 398 | // which writes a buffer into a stream (which fp presumably 399 | // owns) and returns the number of bytes successfully written. 400 | // Note basic_ostream is not currently supported. 401 | template 402 | bool serialize(ValueSerializer serializer, OUTPUT* fp) { 403 | return rep.serialize(serializer, fp); 404 | } 405 | 406 | // serializer: a functor providing operator()(INPUT*, value_type*) 407 | // (reading from INPUT and into value_type). You can specify a 408 | // NopointerSerializer object if appropriate (see above). 409 | // fp: either a FILE*, OR an istream*/subclass_of_istream*, OR a 410 | // pointer to a class providing size_t Read(void*, size_t), 411 | // which reads into a buffer from a stream (which fp presumably 412 | // owns) and returns the number of bytes successfully read. 413 | // Note basic_istream is not currently supported. 414 | // NOTE: Since value_type is std::pair, ValueSerializer 415 | // may need to do a const cast in order to fill in the key. 416 | template 417 | bool unserialize(ValueSerializer serializer, INPUT* fp) { 418 | return rep.unserialize(serializer, fp); 419 | } 420 | }; 421 | 422 | // We need a global swap as well 423 | template 424 | inline void swap(dense_hash_map& hm1, 425 | dense_hash_map& hm2) { 426 | hm1.swap(hm2); 427 | } 428 | 429 | } // namespace google 430 | -------------------------------------------------------------------------------- /benchmark/data.txt: -------------------------------------------------------------------------------- 1 | 104 2 | KR4201P22150 3 | KR4201P22176 4 | KR4201P22200 5 | KR4201P22226 6 | KR4201P22259 7 | KR4201P22275 8 | KR4201P22309 9 | KR4201P22325 10 | KR4201P22358 11 | KR4201P22374 12 | KR4201P22408 13 | KR4201P22424 14 | KR4201P22457 15 | KR4201P22473 16 | KR4201P22507 17 | KR4201P22523 18 | KR4201P22556 19 | KR4201P22572 20 | KR4201P22606 21 | KR4201P22622 22 | KR4201P22655 23 | KR4201P22671 24 | KR4201P22705 25 | KR4201P22721 26 | KR4201P22754 27 | KR4201P22770 28 | KR4201P22804 29 | KR4201P22820 30 | KR4201P22853 31 | KR4201P22879 32 | KR4201P22903 33 | KR4201P22929 34 | KR4201P22952 35 | KR4201P22978 36 | KR4201P23000 37 | KR4201P23026 38 | KR4201P23059 39 | KR4201P23075 40 | KR4201P23109 41 | KR4201P23125 42 | KR4201P23158 43 | KR4201P23174 44 | KR4201P23208 45 | KR4201P23224 46 | KR4201P23257 47 | KR4201P23273 48 | KR4201P23307 49 | KR4201P23323 50 | KR4201P23356 51 | KR4201P23372 52 | KR4201P23406 53 | KR4201P23422 54 | KR4301P22158 55 | KR4301P22174 56 | KR4301P22208 57 | KR4301P22224 58 | KR4301P22257 59 | KR4301P22273 60 | KR4301P22307 61 | KR4301P22323 62 | KR4301P22356 63 | KR4301P22372 64 | KR4301P22406 65 | KR4301P22422 66 | KR4301P22455 67 | KR4301P22471 68 | KR4301P22505 69 | KR4301P22521 70 | KR4301P22554 71 | KR4301P22570 72 | KR4301P22604 73 | KR4301P22620 74 | KR4301P22653 75 | KR4301P22679 76 | KR4301P22703 77 | KR4301P22729 78 | KR4301P22752 79 | KR4301P22778 80 | KR4301P22802 81 | KR4301P22828 82 | KR4301P22851 83 | KR4301P22877 84 | KR4301P22901 85 | KR4301P22927 86 | KR4301P22950 87 | KR4301P22976 88 | KR4301P23008 89 | KR4301P23024 90 | KR4301P23057 91 | KR4301P23073 92 | KR4301P23107 93 | KR4301P23123 94 | KR4301P23156 95 | KR4301P23172 96 | KR4301P23206 97 | KR4301P23222 98 | KR4301P23255 99 | KR4301P23271 100 | KR4301P23305 101 | KR4301P23321 102 | KR4301P23354 103 | KR4301P23370 104 | KR4301P23404 105 | KR4301P23420 106 | 1000 107 | KR4201P22929 108 | KR4201P22929 109 | KR4201P22929 110 | KR4201P22929 111 | KR4201P22929 112 | KR4201P22903 113 | KR4201P22879 114 | KR4201P22879 115 | KR4201P22879 116 | KR4201P22879 117 | KR4201P22879 118 | KR4201P22903 119 | KR4201P22903 120 | KR4201P22978 121 | KR4201P22853 122 | KR4201P22903 123 | KR4201P22903 124 | KR4201P22853 125 | KR4201P22929 126 | KR4301P22422 127 | KR4201P22903 128 | KR4201P22903 129 | KR4301P22422 130 | KR4201P22879 131 | KR4301P22372 132 | KR4201P22952 133 | KR4201P22853 134 | KR4301P22372 135 | KR4201P22903 136 | KR4301P22372 137 | KR4201P22952 138 | KR4201P22879 139 | KR4201P22879 140 | KR4201P22903 141 | KR4201P22929 142 | KR4301P22307 143 | KR4201P22879 144 | KR4301P22323 145 | KR4201P22952 146 | KR4301P22356 147 | KR4201P22978 148 | KR4301P22372 149 | KR4201P22903 150 | KR4301P22471 151 | KR4201P22879 152 | KR4301P22356 153 | KR4301P22323 154 | KR4201P22952 155 | KR4301P22455 156 | KR4201P22929 157 | KR4301P22455 158 | KR4201P22903 159 | KR4301P22422 160 | KR4301P22356 161 | KR4201P22929 162 | KR4301P22406 163 | KR4201P22952 164 | KR4301P22372 165 | KR4201P22903 166 | KR4301P22422 167 | KR4201P22952 168 | KR4301P22406 169 | KR4201P22853 170 | KR4301P22372 171 | KR4201P22952 172 | KR4301P22422 173 | KR4301P22406 174 | KR4201P22903 175 | KR4301P22406 176 | KR4201P22929 177 | KR4301P22422 178 | KR4201P22952 179 | KR4301P22406 180 | KR4201P22903 181 | KR4301P22422 182 | KR4201P22853 183 | KR4301P22372 184 | KR4201P22952 185 | KR4301P22406 186 | KR4201P22903 187 | KR4301P22422 188 | KR4201P22929 189 | KR4201P22952 190 | KR4301P22406 191 | KR4201P22879 192 | KR4301P22372 193 | KR4201P22903 194 | KR4301P22356 195 | KR4201P22929 196 | KR4301P22604 197 | KR4201P22952 198 | KR4301P22372 199 | KR4201P22879 200 | KR4301P22422 201 | KR4301P22422 202 | KR4201P22903 203 | KR4301P22455 204 | KR4201P22929 205 | KR4301P22471 206 | KR4201P22952 207 | KR4301P22356 208 | KR4201P22879 209 | KR4301P22372 210 | KR4201P22903 211 | KR4301P22455 212 | KR4201P22929 213 | KR4301P22455 214 | KR4201P22879 215 | KR4301P22455 216 | KR4201P22952 217 | KR4301P22356 218 | KR4201P22929 219 | KR4301P22455 220 | KR4201P22903 221 | KR4301P22356 222 | KR4201P22879 223 | KR4301P22372 224 | KR4201P22952 225 | KR4301P22455 226 | KR4201P22978 227 | KR4301P22521 228 | KR4201P22903 229 | KR4301P22604 230 | KR4201P22879 231 | KR4301P22455 232 | KR4201P22929 233 | KR4301P22570 234 | KR4201P22903 235 | KR4301P22604 236 | KR4201P22952 237 | KR4301P22455 238 | KR4201P22879 239 | KR4301P22521 240 | KR4201P22903 241 | KR4301P22455 242 | KR4201P22853 243 | KR4301P22604 244 | KR4201P22952 245 | KR4301P22521 246 | KR4201P22879 247 | KR4301P22406 248 | KR4201P22903 249 | KR4301P22422 250 | KR4201P22929 251 | KR4301P22521 252 | KR4201P22952 253 | KR4301P22455 254 | KR4201P22879 255 | KR4201P22903 256 | KR4301P22604 257 | KR4301P22455 258 | KR4201P22853 259 | KR4301P22356 260 | KR4201P22952 261 | KR4301P22422 262 | KR4201P22879 263 | KR4201P22903 264 | KR4301P22356 265 | KR4201P22853 266 | KR4301P22455 267 | KR4301P22372 268 | KR4201P22952 269 | KR4301P22372 270 | KR4201P22879 271 | KR4301P22323 272 | KR4201P22978 273 | KR4301P22471 274 | KR4201P22952 275 | KR4201P22879 276 | KR4301P22422 277 | KR4301P22323 278 | KR4201P22853 279 | KR4301P22372 280 | KR4201P22952 281 | KR4301P22455 282 | KR4201P22879 283 | KR4201P22853 284 | KR4301P22422 285 | KR4201P22879 286 | KR4301P22455 287 | KR4301P22422 288 | KR4201P22853 289 | KR4301P22406 290 | KR4201P22879 291 | KR4301P22521 292 | KR4201P22978 293 | KR4301P22422 294 | KR4201P22879 295 | KR4201P22879 296 | KR4301P22471 297 | KR4301P22422 298 | KR4201P22929 299 | KR4301P22323 300 | KR4201P22853 301 | KR4301P22554 302 | KR4201P22879 303 | KR4301P22455 304 | KR4201P22853 305 | KR4301P22422 306 | KR4201P22879 307 | KR4301P22521 308 | KR4201P22978 309 | KR4301P22554 310 | KR4201P22929 311 | KR4301P22455 312 | KR4201P22929 313 | KR4301P22455 314 | KR4201P22929 315 | KR4301P22455 316 | KR4301P22406 317 | KR4201P22853 318 | KR4301P22406 319 | KR4201P22978 320 | KR4201P22978 321 | KR4301P22422 322 | KR4201P22853 323 | KR4301P22455 324 | KR4301P22422 325 | KR4301P22554 326 | KR4201P22853 327 | KR4301P22406 328 | KR4201P22929 329 | KR4301P22554 330 | KR4201P22879 331 | KR4301P22422 332 | KR4201P22879 333 | KR4301P22273 334 | KR4201P22879 335 | KR4301P22521 336 | KR4201P22853 337 | KR4301P22554 338 | KR4201P22853 339 | KR4201P22853 340 | KR4301P22422 341 | KR4201P22853 342 | KR4201P22879 343 | KR4301P22521 344 | KR4201P22853 345 | KR4301P22471 346 | KR4201P22879 347 | KR4301P22554 348 | KR4201P22879 349 | KR4301P22307 350 | KR4201P22853 351 | KR4301P22521 352 | KR4201P22879 353 | KR4301P22471 354 | KR4201P22853 355 | KR4301P22372 356 | KR4201P22853 357 | KR4201P33850 358 | KR4301P22422 359 | KR4201P22879 360 | KR4301P22455 361 | KR4201P22853 362 | KR4301P32157 363 | KR4201P22879 364 | KR4301P22554 365 | KR4201P22853 366 | KR4301P22422 367 | KR4201P22879 368 | KR4301P22554 369 | KR4301P22471 370 | KR4201P22879 371 | KR4301P22422 372 | KR4201P22903 373 | KR4301P22554 374 | KR4201P22853 375 | KR4301P22471 376 | KR4201P22879 377 | KR4301P22422 378 | KR4201P22903 379 | KR4201P22853 380 | KR4301P22554 381 | KR4201P22879 382 | KR4301P22406 383 | KR4201P22903 384 | KR4301P22422 385 | KR4301P22471 386 | KR4201P22820 387 | KR4301P22604 388 | KR4201P22853 389 | KR4301P22406 390 | KR4201P33850 391 | KR4301P22554 392 | KR4201P22879 393 | KR4301P22604 394 | KR4201P22820 395 | KR4301P22521 396 | KR4201P22879 397 | KR4301P22406 398 | KR4201P22820 399 | KR4301P22554 400 | KR4201P22853 401 | KR4301P22604 402 | KR4201P22820 403 | KR4301P22406 404 | KR4201P22853 405 | KR4201P22820 406 | KR4301P22604 407 | KR4201P22929 408 | KR4301P22471 409 | KR4301P22604 410 | KR4201P33850 411 | KR4301P22521 412 | KR4201P22853 413 | KR4301P22604 414 | KR4201P22820 415 | KR4301P22521 416 | KR4201P22853 417 | KR4301P22471 418 | KR4201P22879 419 | KR4301P22604 420 | KR4201P22820 421 | KR4301P22471 422 | KR4301P22521 423 | KR4301P22471 424 | KR4201P22820 425 | KR4301P22422 426 | KR4201P22879 427 | KR4301P22273 428 | KR4201P22853 429 | KR4201P22820 430 | KR4201P22820 431 | KR4301P22521 432 | KR4201P22853 433 | KR4301P22307 434 | KR4201P22879 435 | KR4301P22455 436 | KR4201P22820 437 | KR4301P22554 438 | KR4201P22853 439 | KR4301P22521 440 | KR4201P22879 441 | KR4301P22505 442 | KR4201P22820 443 | KR4301P22455 444 | KR4301P22521 445 | KR4201P22879 446 | KR4201P22853 447 | KR4301P32173 448 | KR4201P22820 449 | KR4301P22505 450 | KR4201P22853 451 | KR4301P22455 452 | KR4201P22820 453 | KR4301P22554 454 | KR4201P23000 455 | KR4301P22505 456 | KR4201P22853 457 | KR4301P22521 458 | KR4201P22820 459 | KR4201P22853 460 | KR4301P22455 461 | KR4201P22820 462 | KR4301P22307 463 | KR4201P22820 464 | KR4301P22521 465 | KR4201P22853 466 | KR4301P22455 467 | KR4301P22505 468 | KR4201P22820 469 | KR4301P22505 470 | KR4201P22853 471 | KR4301P22471 472 | KR4301P22521 473 | KR4201P22903 474 | KR4201P22820 475 | KR4301P22505 476 | KR4201P22853 477 | KR4301P22471 478 | KR4201P22952 479 | KR4301P22554 480 | KR4201P22853 481 | KR4201P22903 482 | KR4301P22471 483 | KR4201P22820 484 | KR4301P22505 485 | KR4301P22356 486 | KR4201P22853 487 | KR4301P22554 488 | KR4201P22952 489 | KR4301P22505 490 | KR4201P22820 491 | KR4301P22471 492 | KR4201P22820 493 | KR4301P22521 494 | KR4201P22853 495 | KR4301P22554 496 | KR4201P22820 497 | KR4301P22505 498 | KR4201P22820 499 | KR4301P22471 500 | KR4201P22903 501 | KR4301P32157 502 | KR4201P22820 503 | KR4301P22505 504 | KR4201P22853 505 | KR4301P22554 506 | KR4201P22820 507 | KR4201P22853 508 | KR4301P22471 509 | KR4301P22521 510 | KR4201P22820 511 | KR4301P22505 512 | KR4201P22804 513 | KR4301P22257 514 | KR4201P22820 515 | KR4301P32207 516 | KR4201P22879 517 | KR4301P22521 518 | KR4201P22820 519 | KR4301P22505 520 | KR4201P22853 521 | KR4201P22879 522 | KR4301P22521 523 | KR4201P22804 524 | KR4301P22273 525 | KR4301P22257 526 | KR4201P22820 527 | KR4301P22505 528 | KR4201P22978 529 | KR4301P22521 530 | KR4201P22820 531 | KR4301P32306 532 | KR4201P22853 533 | KR4301P22554 534 | KR4201P23158 535 | KR4301P22505 536 | KR4201P22804 537 | KR4301P22224 538 | KR4201P22853 539 | KR4201P22820 540 | KR4301P22323 541 | KR4201P22903 542 | KR4301P22505 543 | KR4201P22853 544 | KR4301P22323 545 | KR4201P22820 546 | KR4301P22406 547 | KR4301P22257 548 | KR4201P22804 549 | KR4301P22505 550 | KR4201P22820 551 | KR4301P32157 552 | KR4201P22978 553 | KR4301P22406 554 | KR4201P22820 555 | KR4201P22853 556 | KR4301P22554 557 | KR4301P22307 558 | KR4201P23273 559 | KR4301P22356 560 | KR4201P22820 561 | KR4301P22273 562 | KR4201P22804 563 | KR4201P22820 564 | KR4301P32272 565 | KR4301P22679 566 | KR4201P22978 567 | KR4301P22372 568 | KR4201P22820 569 | KR4301P22307 570 | KR4201P22853 571 | KR4301P22679 572 | KR4201P23422 573 | KR4301P22273 574 | KR4201P22952 575 | KR4301P22307 576 | KR4201P22853 577 | KR4301P22554 578 | KR4201P33256 579 | KR4301P22679 580 | KR4201P33025 581 | KR4201P22820 582 | KR4301P22406 583 | KR4201P22804 584 | KR4301P22307 585 | KR4301P22323 586 | KR4201P23000 587 | KR4301P22604 588 | KR4201P22978 589 | KR4301P22406 590 | KR4201P22820 591 | KR4201P22929 592 | KR4301P22521 593 | KR4301P22307 594 | KR4201P22952 595 | KR4201P22978 596 | KR4301P32207 597 | KR4201P22978 598 | KR4301P22554 599 | KR4201P22952 600 | KR4301P22521 601 | KR4301P22505 602 | KR4201P22820 603 | KR4301P22679 604 | KR4201P22820 605 | KR4301P22307 606 | KR4201P22952 607 | KR4301P22356 608 | KR4201P22903 609 | KR4301P22455 610 | KR4201P22952 611 | KR4301P22422 612 | KR4201P22952 613 | KR4301P22554 614 | KR4201P22820 615 | KR4201P22820 616 | KR4301P22570 617 | KR4301P22422 618 | KR4201P22853 619 | KR4201P22820 620 | KR4301P22455 621 | KR4301P22372 622 | KR4201P22804 623 | KR4301P22570 624 | KR4201P22770 625 | KR4301P22422 626 | KR4201P22820 627 | KR4301P22455 628 | KR4201P22879 629 | KR4301P22323 630 | KR4201P22853 631 | KR4301P22505 632 | KR4201P22952 633 | KR4201P22804 634 | KR4301P22356 635 | KR4301P22471 636 | KR4201P33108 637 | KR4301P22422 638 | KR4201P22879 639 | KR4201P22820 640 | KR4301P22570 641 | KR4301P22505 642 | KR4201P22853 643 | KR4301P22570 644 | KR4201P22903 645 | KR4301P32306 646 | KR4201P22820 647 | KR4301P22505 648 | KR4201P22804 649 | KR4301P22257 650 | KR4201P22903 651 | KR4201P22820 652 | KR4301P22570 653 | KR4201P22804 654 | KR4301P32306 655 | KR4301P22307 656 | KR4201P22820 657 | KR4301P22174 658 | KR4201P23000 659 | KR4301P22505 660 | KR4201P22978 661 | KR4301P22570 662 | KR4201P33074 663 | KR4301P22570 664 | KR4201P22820 665 | KR4301P22505 666 | KR4201P23000 667 | KR4301P22570 668 | KR4201P22853 669 | KR4301P22505 670 | KR4201P22952 671 | KR4301P22307 672 | KR4201P23000 673 | KR4301P32157 674 | KR4201P32852 675 | KR4301P22604 676 | KR4201P22978 677 | KR4301P22257 678 | KR4201P22820 679 | KR4301P22521 680 | KR4201P33025 681 | KR4201P23000 682 | KR4301P22604 683 | KR4201P22978 684 | KR4301P22521 685 | KR4201P22820 686 | KR4301P22257 687 | KR4201P22978 688 | KR4301P22372 689 | KR4201P33074 690 | KR4301P22323 691 | KR4201P22952 692 | KR4301P22604 693 | KR4201P22978 694 | KR4301P22570 695 | KR4201P22820 696 | KR4301P22273 697 | KR4301P22554 698 | KR4201P22770 699 | KR4201P33009 700 | KR4301P22604 701 | KR4301P22570 702 | KR4201P32829 703 | KR4301P22273 704 | KR4201P22820 705 | KR4301P22554 706 | KR4201P33058 707 | KR4201P23000 708 | KR4301P22604 709 | KR4301P22570 710 | KR4201P22820 711 | KR4301P22471 712 | KR4201P22903 713 | KR4301P22554 714 | KR4301P22604 715 | KR4201P33157 716 | KR4301P22521 717 | KR4201P22820 718 | KR4201P22903 719 | KR4301P22471 720 | KR4201P23174 721 | KR4301P22604 722 | KR4201P22820 723 | KR4301P22570 724 | KR4201P22820 725 | KR4301P32207 726 | KR4201P22820 727 | KR4201P33223 728 | KR4301P22604 729 | KR4201P22952 730 | KR4301P22356 731 | KR4301P22471 732 | KR4201P23208 733 | KR4201P23125 734 | KR4301P42156 735 | KR4301P22570 736 | KR4201P23208 737 | KR4201P22820 738 | KR4301P22604 739 | KR4301P32207 740 | KR4201P22754 741 | KR4301P22570 742 | KR4201P22929 743 | KR4201P23059 744 | KR4301P22570 745 | KR4301P22307 746 | KR4201P33009 747 | KR4301P22570 748 | KR4201P23125 749 | KR4301P22307 750 | KR4201P22879 751 | KR4301P22356 752 | KR4201P22754 753 | KR4301P22570 754 | KR4201P22929 755 | KR4301P22372 756 | KR4301P22505 757 | KR4201P22952 758 | KR4301P22521 759 | KR4201P23224 760 | KR4301P22505 761 | KR4201P33025 762 | KR4301P22604 763 | KR4201P32803 764 | KR4301P22570 765 | KR4201P32779 766 | KR4201P23158 767 | KR4301P22307 768 | KR4201P23158 769 | KR4301P22570 770 | KR4201P22952 771 | KR4301P22505 772 | KR4301P22521 773 | KR4301P22356 774 | KR4301P22570 775 | KR4301P22570 776 | KR4301P22570 777 | KR4301P22521 778 | KR4301P22356 779 | KR4301P22570 780 | KR4201P22721 781 | KR4301P22570 782 | KR4301P22323 783 | KR4301P22570 784 | KR4301P22356 785 | KR4301P22356 786 | KR4301P22323 787 | KR4201P22903 788 | KR4201P22929 789 | KR4301P22620 790 | KR4301P32207 791 | KR4201P23000 792 | KR4301P22406 793 | KR4301P22570 794 | KR4201P33108 795 | KR4301P22570 796 | KR4201P32902 797 | KR4301P22570 798 | KR4201P22952 799 | KR4301P22356 800 | KR4201P22952 801 | KR4301P22554 802 | KR4201P22879 803 | KR4301P92003 804 | KR4301P32157 805 | KR4201P22879 806 | KR4301P22422 807 | KR4201P32753 808 | KR4301P22372 809 | KR4201P22978 810 | KR4301P22356 811 | KR4201P22978 812 | KR4301P22604 813 | KR4301P22323 814 | KR4201P22853 815 | KR4301P22604 816 | KR4301P22356 817 | KR4301P22604 818 | KR4301P22455 819 | KR4201P33074 820 | KR4301P32272 821 | KR4201P23000 822 | KR4301P22620 823 | KR4201P23000 824 | KR4301P22604 825 | KR4201P23026 826 | KR4301P22372 827 | KR4201P22978 828 | KR4301P22604 829 | KR4201P22978 830 | KR4301P22356 831 | KR4201P22705 832 | KR4301P22406 833 | KR4201P22929 834 | KR4301P32256 835 | KR4201P22978 836 | KR4301P22604 837 | KR4201P22978 838 | KR4201P32720 839 | KR4301P32157 840 | KR4201P22978 841 | KR4301P22554 842 | KR4301P22604 843 | KR4201P22903 844 | KR4301P22521 845 | KR4201P23059 846 | KR4301P22224 847 | KR4201P22978 848 | KR4201P22929 849 | KR4301P22372 850 | KR4201P33074 851 | KR4301P22307 852 | KR4201P23075 853 | KR4301P22356 854 | KR4201P23059 855 | KR4201P22754 856 | KR4201P22853 857 | KR4301P32256 858 | KR4201P33108 859 | KR4301P22604 860 | KR4201P23075 861 | KR4301P22406 862 | KR4201P22671 863 | KR4301P22323 864 | KR4201P22929 865 | KR4301P22372 866 | KR4201P22770 867 | KR4301P22604 868 | KR4301P22323 869 | KR4201P22952 870 | KR4301P22273 871 | KR4201P23125 872 | KR4301P32256 873 | KR4201P32902 874 | KR4301P22356 875 | KR4301P22604 876 | KR4201P23372 877 | KR4201P23000 878 | KR4301P22406 879 | KR4201P22853 880 | KR4301P22372 881 | KR4201P23109 882 | KR4301P22356 883 | KR4201P22952 884 | KR4301P32322 885 | KR4201P32977 886 | KR4301P22257 887 | KR4201P22929 888 | KR4301P22406 889 | KR4201P32803 890 | KR4301P32207 891 | KR4201P23000 892 | KR4301P22604 893 | KR4201P23372 894 | KR4301P22208 895 | KR4201P22978 896 | KR4301P22174 897 | KR4201P22952 898 | KR4301P22679 899 | KR4201P23406 900 | KR4301P22604 901 | KR4201P22929 902 | KR4201P22952 903 | KR4301P22208 904 | KR4301P22455 905 | KR4201P22952 906 | KR4201P23307 907 | KR4301P22356 908 | KR4201P23323 909 | KR4301P22604 910 | KR4201P23000 911 | KR4301P22455 912 | KR4301P22372 913 | KR4201P23026 914 | KR4301P22323 915 | KR4201P33124 916 | KR4301P22604 917 | KR4201P33272 918 | KR4301P22455 919 | KR4201P23000 920 | KR4301P22257 921 | KR4201P23406 922 | KR4301P32157 923 | KR4201P22952 924 | KR4301P22604 925 | KR4201P23059 926 | KR4301P22455 927 | KR4201P33108 928 | KR4301P22208 929 | KR4201P22929 930 | KR4301P22356 931 | KR4201P22952 932 | KR4301P22604 933 | KR4201P33306 934 | KR4301P32173 935 | KR4201P22903 936 | KR4301P22679 937 | KR4201P22952 938 | KR4301P22604 939 | KR4201P23406 940 | KR4301P22554 941 | KR4201P22903 942 | KR4301P22729 943 | KR4201P22952 944 | KR4301P22307 945 | KR4201P23026 946 | KR4301P32157 947 | KR4201P22903 948 | KR4301P22455 949 | KR4201P32852 950 | KR4301P22422 951 | KR4201P22978 952 | KR4301P32173 953 | KR4201P22952 954 | KR4301P22455 955 | KR4201P22903 956 | KR4301P22505 957 | KR4201P22978 958 | KR4201P22903 959 | KR4301P32256 960 | KR4201P22978 961 | KR4301P22273 962 | KR4201P22655 963 | KR4301P22422 964 | KR4301P22257 965 | KR4301P22604 966 | KR4201P22820 967 | KR4301P32223 968 | KR4201P22978 969 | KR4301P22471 970 | KR4201P22978 971 | KR4201P22903 972 | KR4301P32256 973 | KR4201P22952 974 | KR4301P22604 975 | KR4201P22978 976 | KR4301P22554 977 | KR4201P22820 978 | KR4301P22521 979 | KR4201P32829 980 | KR4201P22978 981 | KR4301P22224 982 | KR4201P22952 983 | KR4301P22604 984 | KR4201P32704 985 | KR4301P32421 986 | KR4201P22903 987 | KR4301P22356 988 | KR4201P22978 989 | KR4301P22273 990 | KR4201P22952 991 | KR4301P22307 992 | KR4201P33058 993 | KR4301P22356 994 | KR4201P32902 995 | KR4301P22554 996 | KR4201P22804 997 | KR4301P22604 998 | KR4201P22952 999 | KR4301P22174 1000 | KR4201P23075 1001 | KR4301P22356 1002 | KR4201P22903 1003 | KR4301P22604 1004 | KR4201P22978 1005 | KR4301P22372 1006 | KR4201P22820 1007 | KR4301P22406 1008 | KR4201P23372 1009 | KR4301P22323 1010 | KR4201P23000 1011 | KR4301P22604 1012 | KR4201P22978 1013 | KR4301P22273 1014 | KR4201P22952 1015 | KR4301P32256 1016 | KR4201P22879 1017 | KR4301P22505 1018 | KR4201P23000 1019 | KR4301P22356 1020 | KR4201P22978 1021 | KR4301P22455 1022 | KR4201P22952 1023 | KR4301P22554 1024 | KR4201P32977 1025 | KR4301P22422 1026 | KR4201P22903 1027 | KR4301P22323 1028 | KR4201P23026 1029 | KR4301P22273 1030 | KR4201P22952 1031 | KR4301P22356 1032 | KR4201P22879 1033 | KR4301P22455 1034 | KR4201P22903 1035 | KR4301P32322 1036 | KR4201P22978 1037 | KR4301P32256 1038 | KR4201P23323 1039 | KR4301P22323 1040 | KR4201P22879 1041 | KR4301P22273 1042 | KR4201P22903 1043 | KR4301P22158 1044 | KR4201P22978 1045 | KR4301P32173 1046 | KR4201P23323 1047 | KR4301P22679 1048 | KR4201P22879 1049 | KR4301P32306 1050 | KR4201P22903 1051 | KR4301P22604 1052 | KR4201P22978 1053 | KR4301P22356 1054 | KR4201P22952 1055 | KR4301P22471 1056 | KR4201P22879 1057 | KR4301P22372 1058 | KR4201P22804 1059 | KR4301P22604 1060 | KR4201P22903 1061 | KR4301P32157 1062 | KR4201P22820 1063 | KR4301P22455 1064 | KR4201P22879 1065 | KR4301P22604 1066 | KR4201P22903 1067 | KR4301P22323 1068 | KR4301P32454 1069 | KR4201P22929 1070 | KR4201P22952 1071 | KR4301P22307 1072 | KR4201P23273 1073 | KR4301P32157 1074 | KR4201P22903 1075 | KR4301P22505 1076 | KR4201P22978 1077 | KR4301P22620 1078 | KR4301P22604 1079 | KR4201P22903 1080 | KR4301P22323 1081 | KR4201P22879 1082 | KR4201P23307 1083 | KR4301P22406 1084 | KR4201P22903 1085 | KR4301P22307 1086 | KR4201P22952 1087 | KR4301P32157 1088 | KR4201P22978 1089 | KR4301P32306 1090 | KR4201P23059 1091 | KR4301P22372 1092 | KR4201P22903 1093 | KR4201P22952 1094 | KR4301P22604 1095 | KR4301P22356 1096 | KR4201P33025 1097 | KR4301P22752 1098 | KR4201P22879 1099 | KR4301P22422 1100 | KR4201P22903 1101 | KR4301P22323 1102 | KR4201P23356 1103 | KR4301P22471 1104 | KR4301P22471 1105 | KR4201P33074 1106 | KR4301P22604 1107 | -------------------------------------------------------------------------------- /benchmark/tsl/robin_set.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2017 Tessil 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef TSL_ROBIN_SET_H 25 | #define TSL_ROBIN_SET_H 26 | 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include "robin_hash.h" 35 | 36 | 37 | namespace tsl { 38 | 39 | 40 | /** 41 | * Implementation of a hash set using open-adressing and the robin hood hashing algorithm with backward shift deletion. 42 | * 43 | * For operations modifying the hash set (insert, erase, rehash, ...), the strong exception guarantee 44 | * is only guaranteed when the expression `std::is_nothrow_swappable::value && 45 | * std::is_nothrow_move_constructible::value` is true, otherwise if an exception 46 | * is thrown during the swap or the move, the hash set may end up in a undefined state. Per the standard 47 | * a `Key` with a noexcept copy constructor and no move constructor also satisfies the 48 | * `std::is_nothrow_move_constructible::value` criterion (and will thus guarantee the 49 | * strong exception for the set). 50 | * 51 | * When `StoreHash` is true, 32 bits of the hash are stored alongside the values. It can improve 52 | * the performance during lookups if the `KeyEqual` function takes time (or engenders a cache-miss for example) 53 | * as we then compare the stored hashes before comparing the keys. When `tsl::rh::power_of_two_growth_policy` is used 54 | * as `GrowthPolicy`, it may also speed-up the rehash process as we can avoid to recalculate the hash. 55 | * When it is detected that storing the hash will not incur any memory penality due to alignement (i.e. 56 | * `sizeof(tsl::detail_robin_hash::bucket_entry) == 57 | * sizeof(tsl::detail_robin_hash::bucket_entry)`) and `tsl::rh::power_of_two_growth_policy` is 58 | * used, the hash will be stored even if `StoreHash` is false so that we can speed-up the rehash (but it will 59 | * not be used on lookups unless `StoreHash` is true). 60 | * 61 | * `GrowthPolicy` defines how the set grows and consequently how a hash value is mapped to a bucket. 62 | * By default the set uses `tsl::rh::power_of_two_growth_policy`. This policy keeps the number of buckets 63 | * to a power of two and uses a mask to set the hash to a bucket instead of the slow modulo. 64 | * Other growth policies are available and you may define your own growth policy, 65 | * check `tsl::rh::power_of_two_growth_policy` for the interface. 66 | * 67 | * If the destructor of `Key` throws an exception, the behaviour of the class is undefined. 68 | * 69 | * Iterators invalidation: 70 | * - clear, operator=, reserve, rehash: always invalidate the iterators. 71 | * - insert, emplace, emplace_hint, operator[]: if there is an effective insert, invalidate the iterators. 72 | * - erase: always invalidate the iterators. 73 | */ 74 | template, 76 | class KeyEqual = std::equal_to, 77 | class Allocator = std::allocator, 78 | bool StoreHash = false, 79 | class GrowthPolicy = tsl::rh::power_of_two_growth_policy<2>> 80 | class robin_set { 81 | private: 82 | template 83 | using has_is_transparent = tsl::detail_robin_hash::has_is_transparent; 84 | 85 | class KeySelect { 86 | public: 87 | using key_type = Key; 88 | 89 | const key_type& operator()(const Key& key) const noexcept { 90 | return key; 91 | } 92 | 93 | key_type& operator()(Key& key) noexcept { 94 | return key; 95 | } 96 | }; 97 | 98 | using ht = detail_robin_hash::robin_hash; 100 | 101 | public: 102 | using key_type = typename ht::key_type; 103 | using value_type = typename ht::value_type; 104 | using size_type = typename ht::size_type; 105 | using difference_type = typename ht::difference_type; 106 | using hasher = typename ht::hasher; 107 | using key_equal = typename ht::key_equal; 108 | using allocator_type = typename ht::allocator_type; 109 | using reference = typename ht::reference; 110 | using const_reference = typename ht::const_reference; 111 | using pointer = typename ht::pointer; 112 | using const_pointer = typename ht::const_pointer; 113 | using iterator = typename ht::iterator; 114 | using const_iterator = typename ht::const_iterator; 115 | 116 | 117 | /* 118 | * Constructors 119 | */ 120 | robin_set(): robin_set(ht::DEFAULT_INIT_BUCKETS_SIZE) { 121 | } 122 | 123 | explicit robin_set(size_type bucket_count, 124 | const Hash& hash = Hash(), 125 | const KeyEqual& equal = KeyEqual(), 126 | const Allocator& alloc = Allocator()): 127 | m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR) 128 | { 129 | } 130 | 131 | robin_set(size_type bucket_count, 132 | const Allocator& alloc): robin_set(bucket_count, Hash(), KeyEqual(), alloc) 133 | { 134 | } 135 | 136 | robin_set(size_type bucket_count, 137 | const Hash& hash, 138 | const Allocator& alloc): robin_set(bucket_count, hash, KeyEqual(), alloc) 139 | { 140 | } 141 | 142 | explicit robin_set(const Allocator& alloc): robin_set(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) { 143 | } 144 | 145 | template 146 | robin_set(InputIt first, InputIt last, 147 | size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, 148 | const Hash& hash = Hash(), 149 | const KeyEqual& equal = KeyEqual(), 150 | const Allocator& alloc = Allocator()): robin_set(bucket_count, hash, equal, alloc) 151 | { 152 | insert(first, last); 153 | } 154 | 155 | template 156 | robin_set(InputIt first, InputIt last, 157 | size_type bucket_count, 158 | const Allocator& alloc): robin_set(first, last, bucket_count, Hash(), KeyEqual(), alloc) 159 | { 160 | } 161 | 162 | template 163 | robin_set(InputIt first, InputIt last, 164 | size_type bucket_count, 165 | const Hash& hash, 166 | const Allocator& alloc): robin_set(first, last, bucket_count, hash, KeyEqual(), alloc) 167 | { 168 | } 169 | 170 | robin_set(std::initializer_list init, 171 | size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, 172 | const Hash& hash = Hash(), 173 | const KeyEqual& equal = KeyEqual(), 174 | const Allocator& alloc = Allocator()): 175 | robin_set(init.begin(), init.end(), bucket_count, hash, equal, alloc) 176 | { 177 | } 178 | 179 | robin_set(std::initializer_list init, 180 | size_type bucket_count, 181 | const Allocator& alloc): 182 | robin_set(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), alloc) 183 | { 184 | } 185 | 186 | robin_set(std::initializer_list init, 187 | size_type bucket_count, 188 | const Hash& hash, 189 | const Allocator& alloc): 190 | robin_set(init.begin(), init.end(), bucket_count, hash, KeyEqual(), alloc) 191 | { 192 | } 193 | 194 | 195 | robin_set& operator=(std::initializer_list ilist) { 196 | m_ht.clear(); 197 | 198 | m_ht.reserve(ilist.size()); 199 | m_ht.insert(ilist.begin(), ilist.end()); 200 | 201 | return *this; 202 | } 203 | 204 | allocator_type get_allocator() const { return m_ht.get_allocator(); } 205 | 206 | 207 | /* 208 | * Iterators 209 | */ 210 | iterator begin() noexcept { return m_ht.begin(); } 211 | const_iterator begin() const noexcept { return m_ht.begin(); } 212 | const_iterator cbegin() const noexcept { return m_ht.cbegin(); } 213 | 214 | iterator end() noexcept { return m_ht.end(); } 215 | const_iterator end() const noexcept { return m_ht.end(); } 216 | const_iterator cend() const noexcept { return m_ht.cend(); } 217 | 218 | 219 | /* 220 | * Capacity 221 | */ 222 | bool empty() const noexcept { return m_ht.empty(); } 223 | size_type size() const noexcept { return m_ht.size(); } 224 | size_type max_size() const noexcept { return m_ht.max_size(); } 225 | 226 | /* 227 | * Modifiers 228 | */ 229 | void clear() noexcept { m_ht.clear(); } 230 | 231 | 232 | 233 | 234 | std::pair insert(const value_type& value) { 235 | return m_ht.insert(value); 236 | } 237 | 238 | std::pair insert(value_type&& value) { 239 | return m_ht.insert(std::move(value)); 240 | } 241 | 242 | iterator insert(const_iterator hint, const value_type& value) { 243 | return m_ht.insert(hint, value); 244 | } 245 | 246 | iterator insert(const_iterator hint, value_type&& value) { 247 | return m_ht.insert(hint, std::move(value)); 248 | } 249 | 250 | template 251 | void insert(InputIt first, InputIt last) { 252 | m_ht.insert(first, last); 253 | } 254 | 255 | void insert(std::initializer_list ilist) { 256 | m_ht.insert(ilist.begin(), ilist.end()); 257 | } 258 | 259 | 260 | 261 | 262 | /** 263 | * Due to the way elements are stored, emplace will need to move or copy the key-value once. 264 | * The method is equivalent to insert(value_type(std::forward(args)...)); 265 | * 266 | * Mainly here for compatibility with the std::unordered_map interface. 267 | */ 268 | template 269 | std::pair emplace(Args&&... args) { 270 | return m_ht.emplace(std::forward(args)...); 271 | } 272 | 273 | 274 | 275 | /** 276 | * Due to the way elements are stored, emplace_hint will need to move or copy the key-value once. 277 | * The method is equivalent to insert(hint, value_type(std::forward(args)...)); 278 | * 279 | * Mainly here for compatibility with the std::unordered_map interface. 280 | */ 281 | template 282 | iterator emplace_hint(const_iterator hint, Args&&... args) { 283 | return m_ht.emplace_hint(hint, std::forward(args)...); 284 | } 285 | 286 | 287 | 288 | iterator erase(iterator pos) { return m_ht.erase(pos); } 289 | iterator erase(const_iterator pos) { return m_ht.erase(pos); } 290 | iterator erase(const_iterator first, const_iterator last) { return m_ht.erase(first, last); } 291 | size_type erase(const key_type& key) { return m_ht.erase(key); } 292 | 293 | /** 294 | * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same 295 | * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash. 296 | */ 297 | size_type erase(const key_type& key, std::size_t precalculated_hash) { 298 | return m_ht.erase(key, precalculated_hash); 299 | } 300 | 301 | /** 302 | * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 303 | * If so, K must be hashable and comparable to Key. 304 | */ 305 | template::value>::type* = nullptr> 306 | size_type erase(const K& key) { return m_ht.erase(key); } 307 | 308 | /** 309 | * @copydoc erase(const K& key) 310 | * 311 | * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same 312 | * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash. 313 | */ 314 | template::value>::type* = nullptr> 315 | size_type erase(const K& key, std::size_t precalculated_hash) { 316 | return m_ht.erase(key, precalculated_hash); 317 | } 318 | 319 | 320 | 321 | void swap(robin_set& other) { other.m_ht.swap(m_ht); } 322 | 323 | 324 | 325 | /* 326 | * Lookup 327 | */ 328 | size_type count(const Key& key) const { return m_ht.count(key); } 329 | 330 | /** 331 | * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same 332 | * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. 333 | */ 334 | size_type count(const Key& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); } 335 | 336 | /** 337 | * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 338 | * If so, K must be hashable and comparable to Key. 339 | */ 340 | template::value>::type* = nullptr> 341 | size_type count(const K& key) const { return m_ht.count(key); } 342 | 343 | /** 344 | * @copydoc count(const K& key) const 345 | * 346 | * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same 347 | * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. 348 | */ 349 | template::value>::type* = nullptr> 350 | size_type count(const K& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); } 351 | 352 | 353 | 354 | 355 | iterator find(const Key& key) { return m_ht.find(key); } 356 | 357 | /** 358 | * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same 359 | * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. 360 | */ 361 | iterator find(const Key& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } 362 | 363 | const_iterator find(const Key& key) const { return m_ht.find(key); } 364 | 365 | /** 366 | * @copydoc find(const Key& key, std::size_t precalculated_hash) 367 | */ 368 | const_iterator find(const Key& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); } 369 | 370 | /** 371 | * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 372 | * If so, K must be hashable and comparable to Key. 373 | */ 374 | template::value>::type* = nullptr> 375 | iterator find(const K& key) { return m_ht.find(key); } 376 | 377 | /** 378 | * @copydoc find(const K& key) 379 | * 380 | * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same 381 | * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. 382 | */ 383 | template::value>::type* = nullptr> 384 | iterator find(const K& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } 385 | 386 | /** 387 | * @copydoc find(const K& key) 388 | */ 389 | template::value>::type* = nullptr> 390 | const_iterator find(const K& key) const { return m_ht.find(key); } 391 | 392 | /** 393 | * @copydoc find(const K& key) 394 | * 395 | * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same 396 | * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. 397 | */ 398 | template::value>::type* = nullptr> 399 | const_iterator find(const K& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); } 400 | 401 | 402 | 403 | 404 | std::pair equal_range(const Key& key) { return m_ht.equal_range(key); } 405 | 406 | /** 407 | * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same 408 | * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. 409 | */ 410 | std::pair equal_range(const Key& key, std::size_t precalculated_hash) { 411 | return m_ht.equal_range(key, precalculated_hash); 412 | } 413 | 414 | std::pair equal_range(const Key& key) const { return m_ht.equal_range(key); } 415 | 416 | /** 417 | * @copydoc equal_range(const Key& key, std::size_t precalculated_hash) 418 | */ 419 | std::pair equal_range(const Key& key, std::size_t precalculated_hash) const { 420 | return m_ht.equal_range(key, precalculated_hash); 421 | } 422 | 423 | /** 424 | * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 425 | * If so, K must be hashable and comparable to Key. 426 | */ 427 | template::value>::type* = nullptr> 428 | std::pair equal_range(const K& key) { return m_ht.equal_range(key); } 429 | 430 | /** 431 | * @copydoc equal_range(const K& key) 432 | * 433 | * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same 434 | * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. 435 | */ 436 | template::value>::type* = nullptr> 437 | std::pair equal_range(const K& key, std::size_t precalculated_hash) { 438 | return m_ht.equal_range(key, precalculated_hash); 439 | } 440 | 441 | /** 442 | * @copydoc equal_range(const K& key) 443 | */ 444 | template::value>::type* = nullptr> 445 | std::pair equal_range(const K& key) const { return m_ht.equal_range(key); } 446 | 447 | /** 448 | * @copydoc equal_range(const K& key, std::size_t precalculated_hash) 449 | */ 450 | template::value>::type* = nullptr> 451 | std::pair equal_range(const K& key, std::size_t precalculated_hash) const { 452 | return m_ht.equal_range(key, precalculated_hash); 453 | } 454 | 455 | 456 | 457 | 458 | /* 459 | * Bucket interface 460 | */ 461 | size_type bucket_count() const { return m_ht.bucket_count(); } 462 | size_type max_bucket_count() const { return m_ht.max_bucket_count(); } 463 | 464 | 465 | /* 466 | * Hash policy 467 | */ 468 | float load_factor() const { return m_ht.load_factor(); } 469 | float max_load_factor() const { return m_ht.max_load_factor(); } 470 | void max_load_factor(float ml) { m_ht.max_load_factor(ml); } 471 | 472 | void rehash(size_type count) { m_ht.rehash(count); } 473 | void reserve(size_type count) { m_ht.reserve(count); } 474 | 475 | 476 | /* 477 | * Observers 478 | */ 479 | hasher hash_function() const { return m_ht.hash_function(); } 480 | key_equal key_eq() const { return m_ht.key_eq(); } 481 | 482 | 483 | /* 484 | * Other 485 | */ 486 | 487 | /** 488 | * Convert a const_iterator to an iterator. 489 | */ 490 | iterator mutable_iterator(const_iterator pos) { 491 | return m_ht.mutable_iterator(pos); 492 | } 493 | 494 | friend bool operator==(const robin_set& lhs, const robin_set& rhs) { 495 | if(lhs.size() != rhs.size()) { 496 | return false; 497 | } 498 | 499 | for(const auto& element_lhs: lhs) { 500 | const auto it_element_rhs = rhs.find(element_lhs); 501 | if(it_element_rhs == rhs.cend()) { 502 | return false; 503 | } 504 | } 505 | 506 | return true; 507 | } 508 | 509 | friend bool operator!=(const robin_set& lhs, const robin_set& rhs) { 510 | return !operator==(lhs, rhs); 511 | } 512 | 513 | friend void swap(robin_set& lhs, robin_set& rhs) { 514 | lhs.swap(rhs); 515 | } 516 | 517 | private: 518 | ht m_ht; 519 | }; 520 | 521 | 522 | /** 523 | * Same as `tsl::robin_set`. 524 | */ 525 | template, 527 | class KeyEqual = std::equal_to, 528 | class Allocator = std::allocator, 529 | bool StoreHash = false> 530 | using robin_pg_set = robin_set; 531 | 532 | } // end namespace tsl 533 | 534 | #endif 535 | 536 | -------------------------------------------------------------------------------- /benchmark/tsl/hopscotch_set.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2017 Tessil 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef TSL_HOPSCOTCH_SET_H 25 | #define TSL_HOPSCOTCH_SET_H 26 | 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include "hopscotch_hash.h" 37 | 38 | 39 | namespace tsl { 40 | 41 | /** 42 | * Implementation of a hash set using the hopscotch hashing algorithm. 43 | * 44 | * The Key must be either nothrow move-constructible, copy-constuctible or both. 45 | * 46 | * The size of the neighborhood (NeighborhoodSize) must be > 0 and <= 62 if StoreHash is false. 47 | * When StoreHash is true, 32-bits of the hash will be stored alongside the neighborhood limiting 48 | * the NeighborhoodSize to <= 30. There is no memory usage difference between 49 | * 'NeighborhoodSize 62; StoreHash false' and 'NeighborhoodSize 30; StoreHash true'. 50 | * 51 | * Storing the hash may improve performance on insert during the rehash process if the hash takes time 52 | * to compute. It may also improve read performance if the KeyEqual function takes time (or incurs a cache-miss). 53 | * If used with simple Hash and KeyEqual it may slow things down. 54 | * 55 | * StoreHash can only be set if the GrowthPolicy is set to tsl::power_of_two_growth_policy. 56 | * 57 | * GrowthPolicy defines how the set grows and consequently how a hash value is mapped to a bucket. 58 | * By default the set uses tsl::power_of_two_growth_policy. This policy keeps the number of buckets 59 | * to a power of two and uses a mask to set the hash to a bucket instead of the slow modulo. 60 | * You may define your own growth policy, check tsl::power_of_two_growth_policy for the interface. 61 | * 62 | * If the destructor of Key throws an exception, behaviour of the class is undefined. 63 | * 64 | * Iterators invalidation: 65 | * - clear, operator=, reserve, rehash: always invalidate the iterators. 66 | * - insert, emplace, emplace_hint, operator[]: if there is an effective insert, invalidate the iterators 67 | * if a displacement is needed to resolve a collision (which mean that most of the time, 68 | * insert will invalidate the iterators). Or if there is a rehash. 69 | * - erase: iterator on the erased element is the only one which become invalid. 70 | */ 71 | template, 73 | class KeyEqual = std::equal_to, 74 | class Allocator = std::allocator, 75 | unsigned int NeighborhoodSize = 62, 76 | bool StoreHash = false, 77 | class GrowthPolicy = tsl::hh::power_of_two_growth_policy<2>> 78 | class hopscotch_set { 79 | private: 80 | template 81 | using has_is_transparent = tsl::detail_hopscotch_hash::has_is_transparent; 82 | 83 | class KeySelect { 84 | public: 85 | using key_type = Key; 86 | 87 | const key_type& operator()(const Key& key) const { 88 | return key; 89 | } 90 | 91 | key_type& operator()(Key& key) { 92 | return key; 93 | } 94 | }; 95 | 96 | 97 | using overflow_container_type = std::list; 98 | using ht = detail_hopscotch_hash::hopscotch_hash; 103 | 104 | public: 105 | using key_type = typename ht::key_type; 106 | using value_type = typename ht::value_type; 107 | using size_type = typename ht::size_type; 108 | using difference_type = typename ht::difference_type; 109 | using hasher = typename ht::hasher; 110 | using key_equal = typename ht::key_equal; 111 | using allocator_type = typename ht::allocator_type; 112 | using reference = typename ht::reference; 113 | using const_reference = typename ht::const_reference; 114 | using pointer = typename ht::pointer; 115 | using const_pointer = typename ht::const_pointer; 116 | using iterator = typename ht::iterator; 117 | using const_iterator = typename ht::const_iterator; 118 | 119 | 120 | /* 121 | * Constructors 122 | */ 123 | hopscotch_set() : hopscotch_set(ht::DEFAULT_INIT_BUCKETS_SIZE) { 124 | } 125 | 126 | explicit hopscotch_set(size_type bucket_count, 127 | const Hash& hash = Hash(), 128 | const KeyEqual& equal = KeyEqual(), 129 | const Allocator& alloc = Allocator()) : 130 | m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR) 131 | { 132 | } 133 | 134 | hopscotch_set(size_type bucket_count, 135 | const Allocator& alloc) : hopscotch_set(bucket_count, Hash(), KeyEqual(), alloc) 136 | { 137 | } 138 | 139 | hopscotch_set(size_type bucket_count, 140 | const Hash& hash, 141 | const Allocator& alloc) : hopscotch_set(bucket_count, hash, KeyEqual(), alloc) 142 | { 143 | } 144 | 145 | explicit hopscotch_set(const Allocator& alloc) : hopscotch_set(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) { 146 | } 147 | 148 | template 149 | hopscotch_set(InputIt first, InputIt last, 150 | size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, 151 | const Hash& hash = Hash(), 152 | const KeyEqual& equal = KeyEqual(), 153 | const Allocator& alloc = Allocator()) : hopscotch_set(bucket_count, hash, equal, alloc) 154 | { 155 | insert(first, last); 156 | } 157 | 158 | template 159 | hopscotch_set(InputIt first, InputIt last, 160 | size_type bucket_count, 161 | const Allocator& alloc) : hopscotch_set(first, last, bucket_count, Hash(), KeyEqual(), alloc) 162 | { 163 | } 164 | 165 | template 166 | hopscotch_set(InputIt first, InputIt last, 167 | size_type bucket_count, 168 | const Hash& hash, 169 | const Allocator& alloc) : hopscotch_set(first, last, bucket_count, hash, KeyEqual(), alloc) 170 | { 171 | } 172 | 173 | hopscotch_set(std::initializer_list init, 174 | size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, 175 | const Hash& hash = Hash(), 176 | const KeyEqual& equal = KeyEqual(), 177 | const Allocator& alloc = Allocator()) : 178 | hopscotch_set(init.begin(), init.end(), bucket_count, hash, equal, alloc) 179 | { 180 | } 181 | 182 | hopscotch_set(std::initializer_list init, 183 | size_type bucket_count, 184 | const Allocator& alloc) : 185 | hopscotch_set(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), alloc) 186 | { 187 | } 188 | 189 | hopscotch_set(std::initializer_list init, 190 | size_type bucket_count, 191 | const Hash& hash, 192 | const Allocator& alloc) : 193 | hopscotch_set(init.begin(), init.end(), bucket_count, hash, KeyEqual(), alloc) 194 | { 195 | } 196 | 197 | 198 | hopscotch_set& operator=(std::initializer_list ilist) { 199 | m_ht.clear(); 200 | 201 | m_ht.reserve(ilist.size()); 202 | m_ht.insert(ilist.begin(), ilist.end()); 203 | 204 | return *this; 205 | } 206 | 207 | allocator_type get_allocator() const { return m_ht.get_allocator(); } 208 | 209 | 210 | /* 211 | * Iterators 212 | */ 213 | iterator begin() noexcept { return m_ht.begin(); } 214 | const_iterator begin() const noexcept { return m_ht.begin(); } 215 | const_iterator cbegin() const noexcept { return m_ht.cbegin(); } 216 | 217 | iterator end() noexcept { return m_ht.end(); } 218 | const_iterator end() const noexcept { return m_ht.end(); } 219 | const_iterator cend() const noexcept { return m_ht.cend(); } 220 | 221 | 222 | /* 223 | * Capacity 224 | */ 225 | bool empty() const noexcept { return m_ht.empty(); } 226 | size_type size() const noexcept { return m_ht.size(); } 227 | size_type max_size() const noexcept { return m_ht.max_size(); } 228 | 229 | /* 230 | * Modifiers 231 | */ 232 | void clear() noexcept { m_ht.clear(); } 233 | 234 | 235 | 236 | 237 | std::pair insert(const value_type& value) { return m_ht.insert(value); } 238 | std::pair insert(value_type&& value) { return m_ht.insert(std::move(value)); } 239 | 240 | iterator insert(const_iterator hint, const value_type& value) { return m_ht.insert(hint, value); } 241 | iterator insert(const_iterator hint, value_type&& value) { return m_ht.insert(hint, std::move(value)); } 242 | 243 | template 244 | void insert(InputIt first, InputIt last) { m_ht.insert(first, last); } 245 | void insert(std::initializer_list ilist) { m_ht.insert(ilist.begin(), ilist.end()); } 246 | 247 | 248 | 249 | 250 | /** 251 | * Due to the way elements are stored, emplace will need to move or copy the key-value once. 252 | * The method is equivalent to insert(value_type(std::forward(args)...)); 253 | * 254 | * Mainly here for compatibility with the std::unordered_map interface. 255 | */ 256 | template 257 | std::pair emplace(Args&&... args) { return m_ht.emplace(std::forward(args)...); } 258 | 259 | 260 | 261 | 262 | /** 263 | * Due to the way elements are stored, emplace_hint will need to move or copy the key-value once. 264 | * The method is equivalent to insert(hint, value_type(std::forward(args)...)); 265 | * 266 | * Mainly here for compatibility with the std::unordered_map interface. 267 | */ 268 | template 269 | iterator emplace_hint(const_iterator hint, Args&&... args) { 270 | return m_ht.emplace_hint(hint, std::forward(args)...); 271 | } 272 | 273 | 274 | 275 | 276 | iterator erase(iterator pos) { return m_ht.erase(pos); } 277 | iterator erase(const_iterator pos) { return m_ht.erase(pos); } 278 | iterator erase(const_iterator first, const_iterator last) { return m_ht.erase(first, last); } 279 | size_type erase(const key_type& key) { return m_ht.erase(key); } 280 | 281 | /** 282 | * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same 283 | * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash. 284 | */ 285 | size_type erase(const key_type& key, std::size_t precalculated_hash) { 286 | return m_ht.erase(key, precalculated_hash); 287 | } 288 | 289 | /** 290 | * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 291 | * If so, K must be hashable and comparable to Key. 292 | */ 293 | template::value>::type* = nullptr> 294 | size_type erase(const K& key) { return m_ht.erase(key); } 295 | 296 | /** 297 | * @copydoc erase(const K& key) 298 | * 299 | * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same 300 | * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash. 301 | */ 302 | template::value>::type* = nullptr> 303 | size_type erase(const K& key, std::size_t precalculated_hash) { 304 | return m_ht.erase(key, precalculated_hash); 305 | } 306 | 307 | 308 | 309 | 310 | void swap(hopscotch_set& other) { other.m_ht.swap(m_ht); } 311 | 312 | 313 | /* 314 | * Lookup 315 | */ 316 | size_type count(const Key& key) const { return m_ht.count(key); } 317 | 318 | /** 319 | * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same 320 | * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. 321 | */ 322 | size_type count(const Key& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); } 323 | 324 | /** 325 | * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 326 | * If so, K must be hashable and comparable to Key. 327 | */ 328 | template::value>::type* = nullptr> 329 | size_type count(const K& key) const { return m_ht.count(key); } 330 | 331 | /** 332 | * @copydoc count(const K& key) const 333 | * 334 | * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same 335 | * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. 336 | */ 337 | template::value>::type* = nullptr> 338 | size_type count(const K& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); } 339 | 340 | 341 | 342 | 343 | iterator find(const Key& key) { return m_ht.find(key); } 344 | 345 | /** 346 | * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same 347 | * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. 348 | */ 349 | iterator find(const Key& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } 350 | 351 | const_iterator find(const Key& key) const { return m_ht.find(key); } 352 | 353 | /** 354 | * @copydoc find(const Key& key, std::size_t precalculated_hash) 355 | */ 356 | const_iterator find(const Key& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); } 357 | 358 | /** 359 | * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 360 | * If so, K must be hashable and comparable to Key. 361 | */ 362 | template::value>::type* = nullptr> 363 | iterator find(const K& key) { return m_ht.find(key); } 364 | 365 | /** 366 | * @copydoc find(const K& key) 367 | * 368 | * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same 369 | * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. 370 | */ 371 | template::value>::type* = nullptr> 372 | iterator find(const K& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } 373 | 374 | /** 375 | * @copydoc find(const K& key) 376 | */ 377 | template::value>::type* = nullptr> 378 | const_iterator find(const K& key) const { return m_ht.find(key); } 379 | 380 | /** 381 | * @copydoc find(const K& key) 382 | * 383 | * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same 384 | * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. 385 | */ 386 | template::value>::type* = nullptr> 387 | const_iterator find(const K& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); } 388 | 389 | 390 | 391 | 392 | std::pair equal_range(const Key& key) { return m_ht.equal_range(key); } 393 | 394 | /** 395 | * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same 396 | * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. 397 | */ 398 | std::pair equal_range(const Key& key, std::size_t precalculated_hash) { 399 | return m_ht.equal_range(key, precalculated_hash); 400 | } 401 | 402 | std::pair equal_range(const Key& key) const { return m_ht.equal_range(key); } 403 | 404 | /** 405 | * @copydoc equal_range(const Key& key, std::size_t precalculated_hash) 406 | */ 407 | std::pair equal_range(const Key& key, std::size_t precalculated_hash) const { 408 | return m_ht.equal_range(key, precalculated_hash); 409 | } 410 | 411 | /** 412 | * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 413 | * If so, K must be hashable and comparable to Key. 414 | */ 415 | template::value>::type* = nullptr> 416 | std::pair equal_range(const K& key) { return m_ht.equal_range(key); } 417 | 418 | /** 419 | * @copydoc equal_range(const K& key) 420 | * 421 | * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same 422 | * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. 423 | */ 424 | template::value>::type* = nullptr> 425 | std::pair equal_range(const K& key, std::size_t precalculated_hash) { 426 | return m_ht.equal_range(key, precalculated_hash); 427 | } 428 | 429 | /** 430 | * @copydoc equal_range(const K& key) 431 | */ 432 | template::value>::type* = nullptr> 433 | std::pair equal_range(const K& key) const { return m_ht.equal_range(key); } 434 | 435 | /** 436 | * @copydoc equal_range(const K& key, std::size_t precalculated_hash) 437 | */ 438 | template::value>::type* = nullptr> 439 | std::pair equal_range(const K& key, std::size_t precalculated_hash) const { 440 | return m_ht.equal_range(key, precalculated_hash); 441 | } 442 | 443 | 444 | 445 | 446 | /* 447 | * Bucket interface 448 | */ 449 | size_type bucket_count() const { return m_ht.bucket_count(); } 450 | size_type max_bucket_count() const { return m_ht.max_bucket_count(); } 451 | 452 | 453 | /* 454 | * Hash policy 455 | */ 456 | float load_factor() const { return m_ht.load_factor(); } 457 | float max_load_factor() const { return m_ht.max_load_factor(); } 458 | void max_load_factor(float ml) { m_ht.max_load_factor(ml); } 459 | 460 | void rehash(size_type count_) { m_ht.rehash(count_); } 461 | void reserve(size_type count_) { m_ht.reserve(count_); } 462 | 463 | 464 | /* 465 | * Observers 466 | */ 467 | hasher hash_function() const { return m_ht.hash_function(); } 468 | key_equal key_eq() const { return m_ht.key_eq(); } 469 | 470 | 471 | /* 472 | * Other 473 | */ 474 | 475 | /** 476 | * Convert a const_iterator to an iterator. 477 | */ 478 | iterator mutable_iterator(const_iterator pos) { 479 | return m_ht.mutable_iterator(pos); 480 | } 481 | 482 | size_type overflow_size() const noexcept { return m_ht.overflow_size(); } 483 | 484 | friend bool operator==(const hopscotch_set& lhs, const hopscotch_set& rhs) { 485 | if(lhs.size() != rhs.size()) { 486 | return false; 487 | } 488 | 489 | for(const auto& element_lhs : lhs) { 490 | const auto it_element_rhs = rhs.find(element_lhs); 491 | if(it_element_rhs == rhs.cend()) { 492 | return false; 493 | } 494 | } 495 | 496 | return true; 497 | } 498 | 499 | friend bool operator!=(const hopscotch_set& lhs, const hopscotch_set& rhs) { 500 | return !operator==(lhs, rhs); 501 | } 502 | 503 | friend void swap(hopscotch_set& lhs, hopscotch_set& rhs) { 504 | lhs.swap(rhs); 505 | } 506 | 507 | private: 508 | ht m_ht; 509 | }; 510 | 511 | 512 | /** 513 | * Same as `tsl::hopscotch_set`. 514 | */ 515 | template, 517 | class KeyEqual = std::equal_to, 518 | class Allocator = std::allocator, 519 | unsigned int NeighborhoodSize = 62, 520 | bool StoreHash = false> 521 | using hopscotch_pg_set = hopscotch_set; 522 | 523 | } // end namespace tsl 524 | 525 | #endif 526 | --------------------------------------------------------------------------------