├── .bazelrc ├── .gitignore ├── BUILD.bazel ├── BUILD.double_conversion ├── BUILD.folly ├── BUILD.skarupke ├── BUILD.sparsehash ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── WORKSPACE ├── build_info.cc ├── build_info.h ├── dynamic_rehash_effect.cc ├── hashtable_benchmarks.cc ├── hashtable_benchmarks.ipynb ├── hashtable_memory.ipynb ├── results ├── bytell_max_lf.txt ├── ngbronson-2018-10-02.json ├── sonmi-2018-07-19.json └── sonmi-2021-05-02.json ├── sparseconfig.h └── tools └── workspace_status.sh /.bazelrc: -------------------------------------------------------------------------------- 1 | build --cxxopt=--std=c++17 2 | 3 | build --copt=-march=corei7 --copt=-mtune=core-avx2 4 | 5 | build --workspace_status_command=tools/workspace_status.sh 6 | build -c opt 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bazel-* 2 | -------------------------------------------------------------------------------- /BUILD.bazel: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | package(default_visibility = ["//visibility:public"]) 16 | 17 | cc_library( 18 | name = "sparsehash_config", 19 | hdrs = ["sparseconfig.h"], 20 | include_prefix = "sparsehash/internal", 21 | visibility = ["//visibility:public"], 22 | ) 23 | 24 | cc_library( 25 | name = "build_info", 26 | hdrs = ["build_info.h"], 27 | linkstamp = "build_info.cc", 28 | ) 29 | 30 | cc_binary( 31 | name = "hashtable_benchmarks", 32 | srcs = ["hashtable_benchmarks.cc"], 33 | copts = ["-Wno-deprecated"], 34 | deps = [ 35 | ":build_info", 36 | "@absl//absl/strings", 37 | "@absl//absl/strings:str_format", 38 | "@absl//absl/container:flat_hash_set", 39 | "@absl//absl/container:node_hash_set", 40 | "@boost//:preprocessor", 41 | "@facebook_folly//:folly", 42 | "@gbench//:benchmark_main", 43 | "@google_sparsehash//:sparsehash", 44 | "@skarupke//:skarupke", 45 | ], 46 | stamp = 1, 47 | ) 48 | 49 | cc_binary( 50 | name = "dynamic_rehash_effect", 51 | srcs = ["dynamic_rehash_effect.cc"], 52 | deps = [ 53 | "@skarupke//:skarupke", 54 | ], 55 | ) 56 | -------------------------------------------------------------------------------- /BUILD.double_conversion: -------------------------------------------------------------------------------- 1 | # Bazel(http://bazel.io) BUILD file 2 | 3 | licenses(["notice"]) 4 | 5 | exports_files(["LICENSE"]) 6 | 7 | cc_library( 8 | name = "double-conversion", 9 | srcs = glob(["double-conversion/*.cc"]), 10 | hdrs = glob(["double-conversion/*.h"]), 11 | includes = [ 12 | ".", 13 | ], 14 | linkopts = [ 15 | "-lm", 16 | ], 17 | visibility = ["//visibility:public"], 18 | ) 19 | 20 | cc_test( 21 | name = "cctest", 22 | srcs = glob(["test/cctest/*.cc", "test/cctest/*.h"]), 23 | args = [ 24 | "test-bignum", 25 | "test-bignum-dtoa", 26 | "test-conversions", 27 | "test-dtoa", 28 | "test-fast-dtoa", 29 | "test-fixed-dtoa", 30 | "test-ieee", 31 | "test-strtod", 32 | ], 33 | visibility = ["//visibility:public"], 34 | deps = [":double-conversion"], 35 | ) 36 | -------------------------------------------------------------------------------- /BUILD.folly: -------------------------------------------------------------------------------- 1 | cc_library( 2 | name = "folly", 3 | includes = [""], 4 | srcs = [ 5 | "folly/FileUtil.cpp", 6 | "folly/ScopeGuard.cpp", 7 | "folly/container/detail/F14Table.cpp", 8 | "folly/lang/Assume.h", 9 | "folly/lang/ToAscii.cpp", 10 | "folly/lang/SafeAssert.cpp", 11 | "folly/net/NetOps.cpp", 12 | ], 13 | hdrs = glob([ 14 | "folly/**/*.h", 15 | ]), 16 | defines = [ 17 | "FOLLY_NO_CONFIG", 18 | "FOLLY_HAVE_MEMRCHR", 19 | "FOLLY_HAVE_SENDMMSG", 20 | "FOLLY_HAVE_RECVMMSG", 21 | ], 22 | visibility = ["//visibility:public"], 23 | deps = [ 24 | "@com_github_google_glog//:glog", 25 | "@com_github_google_double_conversion//:double-conversion", 26 | ], 27 | ) 28 | -------------------------------------------------------------------------------- /BUILD.skarupke: -------------------------------------------------------------------------------- 1 | cc_library( 2 | name = "skarupke", 3 | includes = [""], 4 | hdrs = [ 5 | "bytell_hash_map.hpp", 6 | "flat_hash_map.hpp", 7 | "unordered_map.hpp", 8 | ], 9 | visibility = ["//visibility:public"], 10 | ) 11 | -------------------------------------------------------------------------------- /BUILD.sparsehash: -------------------------------------------------------------------------------- 1 | cc_library( 2 | name = "sparsehash", 3 | includes = ["src"], 4 | hdrs = glob([ 5 | "src/google/**/*", 6 | "src/sparsehash/**/*", 7 | ]), 8 | visibility = ["//visibility:public"], 9 | deps = [ 10 | "@hashtable_benchmarks//:sparsehash_config", 11 | ], 12 | ) 13 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution; 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | 25 | ## Community Guidelines 26 | 27 | This project follows [Google's Open Source Community 28 | Guidelines](https://opensource.google.com/conduct/). 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Hashtable Benchmarks 2 | 3 | Benchmarks for comparing hashtable implementations. 4 | 5 | 1. Build: 6 | 7 | ```shell 8 | bazel build :hashtable_benchmarks 9 | ``` 10 | 11 | Note that `-c opt` is the default. 12 | 13 | 2. Run: 14 | 15 | ```shell 16 | ./bazel-bin/hashtable_benchmarks --benchmark_format=json > benchmark-results.json 17 | ``` 18 | 19 | 3. Analyze: 20 | 21 | You can use http://colab.research.google.com along with `hashtable_benchmarks.ipynb` to parse the generated `benchmark-results.json`. 22 | 23 | 4. Contribute: 24 | 25 | We would like this to turn into *the* place for comparing hashtables in C++. We 26 | will accept external dependencies on other hashtable libraries (assuming they 27 | have a compatible licence). We encourage folks to improve and modify both the 28 | analysis and the benchmarks themselves as we learn things. Please join the 29 | dicussion at 30 | 31 | https://groups.google.com/forum/#!forum/hashtable-benchmarks 32 | 33 | # Disclaimer 34 | 35 | This is not an officially supported Google product. 36 | -------------------------------------------------------------------------------- /WORKSPACE: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | workspace(name = "hashtable_benchmarks") 16 | 17 | load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") 18 | 19 | # abseil 20 | http_archive( 21 | name = "absl", 22 | strip_prefix = "abseil-cpp-master", 23 | urls = ["https://github.com/abseil/abseil-cpp/archive/master.zip"], 24 | ) 25 | 26 | # Google benchmark. 27 | http_archive( 28 | name = "gbench", 29 | strip_prefix = "benchmark-main", 30 | urls = ["https://github.com/google/benchmark/archive/main.zip"], 31 | ) 32 | 33 | # Google dense_hash_set 34 | http_archive( 35 | name = "google_sparsehash", 36 | build_file = "//:BUILD.sparsehash", 37 | strip_prefix = "sparsehash-master", 38 | urls = ["https://github.com/google/sparsehash/archive/master.zip"], 39 | ) 40 | 41 | http_archive( 42 | name = "com_github_gflags_gflags", 43 | strip_prefix = "gflags-master", 44 | urls = ["https://github.com/gflags/gflags/archive/master.zip"], 45 | ) 46 | http_archive( 47 | name = "com_github_google_glog", 48 | strip_prefix = "glog-master", 49 | urls = ["https://github.com/google/glog/archive/master.zip"], 50 | ) 51 | http_archive( 52 | name = "com_github_google_double_conversion", 53 | build_file = "//:BUILD.double_conversion", 54 | strip_prefix = "double-conversion-master", 55 | urls = ["https://github.com/google/double-conversion/archive/master.zip"], 56 | ) 57 | 58 | # Facebook folly 59 | http_archive( 60 | name = "facebook_folly", 61 | build_file = "//:BUILD.folly", 62 | strip_prefix = "folly-main", 63 | urls = ["https://github.com/facebook/folly/archive/main.zip"], 64 | ) 65 | 66 | # ska::flat_hash_set and ska::bytell_hash_set 67 | http_archive( 68 | name = "skarupke", 69 | build_file = "//:BUILD.skarupke", 70 | strip_prefix = "flat_hash_map-master", 71 | urls = ["https://github.com/skarupke/flat_hash_map/archive/master.zip"], 72 | ) 73 | 74 | http_archive( 75 | name = "com_github_nelhage_rules_boost", 76 | strip_prefix = "rules_boost-master", 77 | urls = ["https://github.com/nelhage/rules_boost/archive/master.zip"], 78 | ) 79 | 80 | load("@com_github_nelhage_rules_boost//:boost/boost.bzl", "boost_deps") 81 | 82 | boost_deps() 83 | -------------------------------------------------------------------------------- /build_info.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | char const* getBuildScmRevision() { 16 | return BUILD_SCM_REVISION; 17 | } 18 | -------------------------------------------------------------------------------- /build_info.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | char const* getBuildScmRevision(); 16 | -------------------------------------------------------------------------------- /dynamic_rehash_effect.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include "bytell_hash_map.hpp" 21 | 22 | // ska::bytell_hash_set has a collision-based rehash decision. 23 | // This program explores the load factor at which that rehash is triggered. 24 | 25 | int main(int argc, char **argv) { 26 | size_t maxSize = 1U << 28; 27 | size_t numReps = 100; 28 | 29 | std::random_device rd; 30 | auto rng = std::mt19937{rd()}; 31 | auto dis = std::uniform_int_distribution{0, (1U << 31) - 1}; 32 | 33 | for (size_t rep = 0; rep < numReps; ++rep) { 34 | ska::bytell_hash_set set; 35 | set.max_load_factor(0.999); 36 | 37 | while (set.size() < maxSize) { 38 | auto key = dis(rng); 39 | size_t prevSize = set.size(); 40 | size_t prevCap = set.bucket_count(); 41 | set.insert(key); 42 | if (set.bucket_count() > prevCap && prevCap > 0) { 43 | auto lf = static_cast(prevSize) / prevCap; 44 | std::cout << prevCap << " " << prevSize << " " << lf << "\n"; 45 | } 46 | } 47 | std::cout << "\n"; 48 | } 49 | return 0; 50 | } 51 | -------------------------------------------------------------------------------- /hashtable_benchmarks.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include "build_info.h" 27 | #include "benchmark/benchmark.h" 28 | #include "absl/strings/str_format.h" 29 | #include "absl/base/port.h" 30 | #include "absl/container/flat_hash_set.h" 31 | #include "absl/container/node_hash_set.h" 32 | #include "boost/preprocessor.hpp" 33 | #include "google/dense_hash_set" 34 | #include "folly/container/F14Set.h" 35 | 36 | #if defined(__GNUC__) || defined(__clang__) 37 | #define HT_BENCH_FLATTEN __attribute__((__flatten__)) 38 | #define HT_BENCH_NOINLINE __attribute__((__noinline__)) 39 | #else 40 | #define HT_BENCH_FLATTEN 41 | #define HT_BENCH_NOINLINE 42 | #endif 43 | 44 | // Benchmarks for comparing hash tables. 45 | // 46 | // TL;DR: 47 | // 48 | // $ bazel build -c opt :hashtable_benchmarks 49 | // $ ./bazel-bin/hashtable_benchmarks --benchmark_format=json 50 | // 51 | // This benchmark runs hashtables through a large suite of configurations. 52 | 53 | namespace { 54 | 55 | #if !defined(__clang__) && __GNUC__ == 5 56 | // gcc 5.5 miscompiles LookupHit_Hot's DoNotOptimize(set), resulting in 57 | // an immediate segfault in opt mode. This fallback implementation only 58 | // handles integral non-const lvalue types, but is sufficient for the 59 | // hash table benchmarks. gcc 7 is okay. 60 | template 61 | std::enable_if_t::value || std::is_pointer::value> 62 | DoNotOptimize(T &val) { 63 | asm volatile("" : "+r"(val) : : "memory"); 64 | } 65 | 66 | template 67 | void DoNotOptimize(T const& val) { 68 | asm volatile("" : : "r,m"(val) : "memory"); 69 | } 70 | #else 71 | using ::benchmark::DoNotOptimize; 72 | #endif 73 | 74 | std::mt19937 MakeRNG() { 75 | std::random_device rd; 76 | return std::mt19937(rd()); 77 | } 78 | 79 | std::mt19937& GetRNG() { 80 | static auto* rng = new auto(MakeRNG()); 81 | return *rng; 82 | } 83 | 84 | template 85 | class Ballast { 86 | char ballast_[kSize]; 87 | }; 88 | 89 | template <> 90 | class Ballast<0> {}; 91 | 92 | // sizeof(Value) == kSize 93 | // alignof(Value) == kSize < 8 ? 4 : 8 94 | template 95 | class alignas(kSize < 8 ? 4 : 8) Value : private Ballast { 96 | public: 97 | static_assert(kSize >= 4, ""); 98 | Value(uint32_t value = 0) : value_(value) {} // NOLINT 99 | operator uint32_t() const { return value_; } // NOLINT 100 | 101 | private: 102 | uint32_t value_; 103 | }; 104 | 105 | // Use a ~zero cost hash function. The purpose of this benchmark is to 106 | // focus on the implementations of the containers, not the quality or 107 | // speed of their hash functions. Since we're bypassing the default hasher 108 | // for SwissTable and disabling the bit mixer that is normally applied to 109 | // untrusted hash functors by F14, we need to ensure that bits 30 and 31 110 | // of the key (which are always 0 and 1 respectively for keys inserted 111 | // to the hash table) don't end up causing collisions or affecting the 112 | // vector filtering step. SwissTable uses bits 0..6 for filtering; F14 113 | // uses bits 56..62. This hasher puts entropy in all bits of the hash 114 | // result except 30..33 (inclusive), which is enough to avoid collisions 115 | // for capacity 2^23 for SwissTable and 2^30 for F14. 116 | struct Hash { 117 | using folly_is_avalanching = std::true_type; 118 | 119 | size_t operator()(uint32_t x) const noexcept { 120 | return (size_t{x} << 34) | x; 121 | } 122 | }; 123 | 124 | struct Eq { 125 | bool operator()(uint32_t x, uint32_t y) const { 126 | ++num_calls; 127 | return x == y; 128 | } 129 | static size_t num_calls; 130 | }; 131 | 132 | size_t Eq::num_calls; 133 | 134 | // The highest order bit is set <=> it's a special value. 135 | constexpr uint32_t kEmpty = 1U << 31; 136 | constexpr uint32_t kDeleted = 3U << 30; 137 | 138 | uint32_t RandomNonSpecial() { 139 | std::uniform_int_distribution dis(0, (1U << 31) - 1); 140 | return dis(GetRNG()); 141 | } 142 | 143 | // See Hash 144 | uint32_t RandomExistent() { return RandomNonSpecial() | (1U << 30); } 145 | uint32_t RandomNonexistent() { return RandomNonSpecial() & ~(1U << 30); } 146 | 147 | template 148 | void Init(Container* c) {} 149 | 150 | template 151 | void Reserve(Container* c, size_t n) { 152 | return c->reserve(n); 153 | } 154 | 155 | template 156 | void Init(google::dense_hash_set* s) { 157 | s->set_empty_key(kEmpty); 158 | s->set_deleted_key(kDeleted); 159 | } 160 | 161 | template 162 | void Reserve(google::dense_hash_set* s, size_t n) { 163 | s->resize(n); 164 | } 165 | 166 | template 167 | void Reserve(__gnu_cxx::hash_set* c, size_t n) {} 168 | 169 | template 170 | double MaxLoadFactor(const Container& c) { 171 | return c.max_load_factor(); 172 | } 173 | 174 | template 175 | double MaxLoadFactor(const __gnu_cxx::hash_set& c) { 176 | return 1.; 177 | } 178 | 179 | template 180 | double LoadFactor(const Container& c) { 181 | // Do not use load_factor() because hash_map does not provide such function. 182 | return 1. * c.size() / c.bucket_count(); 183 | } 184 | 185 | template 186 | double RelativeLoadFactor(const Container& c) { 187 | return LoadFactor(c) / MaxLoadFactor(c); 188 | } 189 | 190 | enum class Density { 191 | kMin, // mininum load factor 192 | kMax, // maximum load factor 193 | }; 194 | 195 | // Returns a set filled with random data with size at least min_size and either 196 | // low or high load factor depending on the requested density. Calling this 197 | // function multiple times with the same arguments will yield sets with the same 198 | // size and the same number of buckets. Their elements can be different. 199 | template 200 | Set GenerateSet(size_t min_size, Density density) { 201 | Set set; 202 | Init(&set); 203 | Reserve(&set, min_size - 1); // -1 is a workaround for dense_hash_set 204 | std::vector v; 205 | v.reserve(min_size); 206 | // +1 is to ensure the final set size is at least min_size. 207 | while (set.size() < (density == Density::kMax ? min_size + 1 : min_size)) { 208 | uint32_t elem = RandomExistent(); 209 | if (set.insert(elem).second) v.push_back(elem); 210 | } 211 | size_t bucket_count = set.bucket_count(); 212 | while (true) { 213 | uint32_t elem = RandomExistent(); 214 | if (!set.insert(elem).second) continue; 215 | v.push_back(elem); 216 | if (set.bucket_count() > bucket_count) { 217 | if (density == Density::kMax) { 218 | Set empty; 219 | Init(&empty); 220 | set.swap(empty); 221 | // Drop two elements instead of one as a workaround for dense_hash_set. 222 | assert(v.size() >= 2); 223 | set.insert(v.begin(), v.end() - 2); 224 | } 225 | return set; 226 | } 227 | } 228 | } 229 | 230 | // Generates several random sets with GenerateSet(min_size, density). The 231 | // sum of the set sizes is at least min_total_size. 232 | template 233 | HT_BENCH_NOINLINE std::vector GenerateSets(size_t min_size, 234 | size_t min_total_size, 235 | Density density) { 236 | GetRNG() = MakeRNG(); 237 | size_t total_size = 0; 238 | std::vector res; 239 | res.reserve(min_total_size / min_size + 1); 240 | while (total_size < min_total_size) { 241 | Set set = GenerateSet(min_size, density); 242 | total_size += set.size(); 243 | res.push_back(std::move(set)); 244 | // This requirement makes benchmarks a bit simpler but it can be removed 245 | // when it becomes necessary to benchmark classes that violate it. 246 | assert(res.front().size() == res.back().size()); 247 | } 248 | return res; 249 | } 250 | 251 | template 252 | std::vector ToVector(const Set& set) { 253 | std::vector res(set.size()); 254 | std::copy(set.begin(), set.end(), res.begin()); 255 | return res; 256 | } 257 | 258 | template