├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── COPYING ├── LICENSE ├── README.md ├── examples ├── CMakeLists.txt ├── hashmap.cpp ├── hashset.cpp └── serialization_and_memsize.cpp ├── include └── tudocomp │ └── util │ ├── compact_hash │ ├── decomposed_key_t.hpp │ ├── entry_t.hpp │ ├── hash_functions.hpp │ ├── index_structure │ │ ├── cv_bvs_t.hpp │ │ ├── displacement_t.hpp │ │ ├── elias_gamma_displacement_table_t.hpp │ │ ├── layered_displacement_table_t.hpp │ │ └── naive_displacement_table_t.hpp │ ├── map │ │ ├── hashmap_t.hpp │ │ ├── satellite_data_t.hpp │ │ ├── typedefs.hpp │ │ ├── val_quot_bucket_layout_t.hpp │ │ └── val_quot_ptrs_t.hpp │ ├── set │ │ ├── hashset_t.hpp │ │ ├── no_satellite_data_t.hpp │ │ ├── quot_bucket_layout_t.hpp │ │ ├── quot_ptr_t.hpp │ │ └── typedefs.hpp │ ├── size_manager_t.hpp │ ├── storage │ │ ├── bucket_t.hpp │ │ ├── buckets_bv_t.hpp │ │ ├── plain_sentinel_t.hpp │ │ └── sparse_pos_t.hpp │ └── util.hpp │ ├── heap_size.hpp │ ├── object_size_t.hpp │ └── serialization.hpp └── test ├── CMakeLists.txt ├── compact_hash_displacement_tests.cpp ├── compact_hash_elias_displacement_tests.cpp ├── compact_hash_tests.cpp ├── compact_hash_tests.template.hpp ├── compact_hashset_tests.template.hpp ├── compact_sparse_hash_displacement_tests.cpp ├── compact_sparse_hash_elias_displacement_tests.cpp ├── compact_sparse_hash_tests.cpp ├── compact_sparse_hashset_displacement_tests.cpp ├── compact_sparse_hashset_elias_displacement_tests.cpp ├── compact_sparse_hashset_serialization_tests.cpp ├── compact_sparse_hashset_tests.cpp ├── sandbox_test.cpp └── v2_tests.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | 3 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "external/bit_span"] 2 | path = submodules/bit_span 3 | url = https://github.com/tudocomp/bit_span.git 4 | [submodule "submodules/build_system"] 5 | path = submodules/build_system 6 | url = https://github.com/tudocomp/build_system 7 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.0.2 FATAL_ERROR) 2 | 3 | project (compact_sparse_hash) 4 | 5 | # Check if this project is build standalone 6 | # 7 | # We do this in case we want to use this repo as a GIT submodule, 8 | # because then we only need the source files themselves 9 | if(CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR) 10 | set(CSH_STANDALONE 1) 11 | endif() 12 | 13 | if(CSH_STANDALONE) 14 | # init build system 15 | execute_process(COMMAND git submodule update --init -- build_system 16 | WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/submodules) 17 | 18 | list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/submodules/build_system/cmakemodules") 19 | include(tdc_init) 20 | 21 | # downloadable dependencies 22 | include(depend_glog) 23 | 24 | # quit if dependencies aren't met 25 | tdc_check_hard_deps() 26 | if(TDC_DEPS_MISSING) 27 | return() 28 | endif() 29 | 30 | # soft dependencies 31 | include(softdepend_gtest) 32 | 33 | # submodules 34 | include(git_submodule_subdirectories) 35 | git_submodule_subdirectory(submodules/build_system) 36 | git_submodule_subdirectory(submodules/bit_span) 37 | endif() 38 | 39 | # Main target 40 | add_library(compact_sparse_hash INTERFACE) 41 | target_link_libraries(compact_sparse_hash INTERFACE bit_span) 42 | target_include_directories(compact_sparse_hash INTERFACE include) 43 | 44 | if(CSH_STANDALONE) 45 | # Unit tests 46 | add_subdirectory(test) 47 | 48 | # Examples 49 | add_subdirectory(examples) 50 | 51 | # Disclaimer 52 | MESSAGE(STATUS "Built Type: " ${CMAKE_BUILD_TYPE} ) 53 | endif() 54 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | TuDoComp - TU Dortmund lossless compression framework 2 | Copyright (C) 2016 Patrick Dinklage, Dominik Köppl, Marvin Löbel, Johannes Fischer 3 | Contact found at: https://ls11-www.cs.tu-dortmund.de/staff/koeppl 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Compact Sparse Hash Table 2 | ======== 3 | 4 | The compact sparse hash table is a blend of compact hashing [1] and 5 | [Google's sparse hash table](https://github.com/sparsehash/sparsehash). 6 | Our hash table is more memory efficient than both variants when the hash table is not much filled. 7 | The restriction is that it can only hash integer keys, but of arbitrary bit width. 8 | 9 | # Why? 10 | The main idea is to use the compact sparse hash table as a dynamic dictionary for 11 | maintaining a set of (key,value)-pairs, or shortly kv-pairs, where the keys are integer values. 12 | It is especially useful if memory efficiency is in focus, since the table stores the keys bit-aligned. 13 | Therefore, it is crucial to specify the bit width of a key. The bit width can be updated online. 14 | For instance, compact hash tables and sparse hash tables are already used for computing LZ78 [2]. 15 | 16 | # Usage 17 | 18 | A minimal example is 19 | ```C++ 20 | #include 21 | ... 22 | // creates a hash table with zero entries, set the bit-width of the keys to four 23 | auto map = tdc::compact_hash::map::sparse_cv_hashmap_t(0, 4); 24 | for(int i = 0; i <= 15; ++i) { // interval [0..15] can be represented by four bits 25 | map.insert(i, i*i); // insert key i, value i*i 26 | std::cout << i << " -> " << map[i] << std::endl; // map[i] returns value i*i with key i 27 | } 28 | ``` 29 | 30 | # How it works 31 | The idea of a hash table is to maintain a set of (key,value)-pairs, or shortly kv-pairs. 32 | 33 | It applies the approach of Cleary [1], in which a _bijective_ hash function 34 | determines the _initial position_, i.e., the position at which to try to store a kv-pair at first place 35 | (in case of a collision a pair cannot stored there). 36 | The bijective hash functions allows us to store only a fragment of the key, called the _quotient_, in the hash table. 37 | The complete key of a kv-pair can be restored with the quotient and the additional knowledge of the initial address of the kv-pair. 38 | Unfortunately, due to collisions, it happens that the kv-pair is misplaced (i.e., it is not 39 | stored at its initial address). 40 | The initial address can be restored by additionally maintaining two bit vectors, and restricting the 41 | collision resolving to linear probing. 42 | The bit vectors track the misplacements such that we can recalculate the initial address of a stored kv-pair. 43 | Each of the two additional bit vectors stores for each position in the hash table one bit. 44 | In summary, this technique saves space by not saving the full keys, but only their quotients. 45 | 46 | To further slim down the space footprint, we apply the trick of the sparse hash table: 47 | Instead of allocating a large hash table, we allocate a vector of pointers to buckets. 48 | Each bucket represents a section of length `B` of the hash table, such that we have `n/B` buckets if the hash table is of size `n` 49 | (we assure that `n` is divisible by `B` such that all buckets have the same length `B`). 50 | Although a bucket stores up to `B` elements, it only acquires space for the actually saved kv-pairs in it. 51 | For that, it stores a bit vector of length `B` marking with a one all positions in its section of the hash table that are actually occupied by a 52 | kv-pair. 53 | The kv-pair corresponding to the `i`-th one in the bit vector (i.e., the `i`-th one in the bit vector has rank `i`) 54 | is the `i`-th element stored in the bucket. 55 | Given that we want to access the `j`-th element in the section belonging to a bucket, 56 | we know that the `j`-the position is marked with a one in the bit vector, but not the rank of this one. 57 | To compute the rank of the one at the `j`-th position, we count how many one's up to the `j`-th position are stored in the bit vector in the bucket. 58 | Remember that the rank is the entry number of the element in the bucket we want to access. 59 | By keeping `B` small enough, we argue that the entire bucket can be stored in cache, allowing us to work with the bit vector 60 | with modern CPU instructions like `popcount`. 61 | When inserting a new kv-pair into the bucket, we update the bit vector, and move the stored elements adequately 62 | (like in a standard std::vector). However, this is not a performance bottleneck, since again, with a sufficiently small bucket size, 63 | this operation is computed efficiently on modern computer hardware. 64 | Currently, we have set the bucket size `B` to 64. 65 | 66 | 67 | # API 68 | We have a `set` and a `map` interface to the (sparse) compact hash table: 69 | - `tdc::compact_hash::set::hashset_t` 70 | - `tdc::compact_hash::map::hashmap_t` 71 | Each of these hash table classes is templated by the following parameters: 72 | - the hash function 73 | - how the storage of the hash table is represented (e.g., sparse) 74 | - how to maintain entries that are stored not at their initial address, i.e., how the displacement works 75 | - `cv_bvs_t` : Approach by Cleary using two bit vectors setting a virgin and change bit 76 | - `displacement_t`: using a displacement array represented by `T`, which can be 77 | - `layered_displacement_table_t`: the recursive m-Bonsai approach of [3], where we implemented the simpler practical approach that uses an integer array with fixed bit-width `i` and an auxiliary `std::unordered_map` for storing displacement values that cannot be represented with `i` bits. 78 | - `elias_gamma_displacement_table_t`: the gamma m-Bonsai approach of [3] 79 | - `naive_displacement_table_t`: stores the displacement array as a plain array with `size_t` integers (for debug purposes) 80 | 81 | The `hashset_t` has the following helpful methods: 82 | - `lookup(key)` looks up a key and returns an `entry_t`, 83 | - `lookup_insert(key)` additionally inserts `key` if not present, 84 | - `lookup_insert_key_width(key, key_width)` works like above, but additionally increases the bit widths of the keys to `key_width`, 85 | - `grow_key_width(key_width)` increases the bit width of the keys to `key_width`. 86 | 87 | All `lookup*` methods return an `entry_t` object, which contains an _id_ (`uint64_t`) 88 | which is unique and immutable until the hash table needs to be rehashed. 89 | This _id_ is computed based on the displacement setting: 90 | - For `displacement_t` it is the position in the hash table the entry was hashed to. The id needs `log2(table_size)` bits. 91 | - For `cv_bvs_t` it is the local position within its group (the approach `cv_bvs_t` clusters all entries with the same initial address to one group) 92 | It is `id = initial_address | (local_position << log2(table_size))`. The id needs `log2(table_size) + log2(x)` bits, where `x` is the size of the specific group (which is at most the maximal number of collisions at an initial address) . 93 | 94 | It is possible to let the hash table call an event handler before it rehashes its contents. 95 | For that, methods that can cause a rehashing provide a template parameter `on_resize_t` that can be set to an event handler. 96 | See the class `default_on_resize_t` in `hashset_t` for an example. 97 | 98 | # Constraints 99 | 100 | * keys have to be integers 101 | * linear probing for collision handling 102 | * hash table size is always a power of two 103 | * hash function must be bijective 104 | * API is not STL-conform 105 | 106 | # Features 107 | * The bit width of the keys can be updated on-line. 108 | Changing the bit width causes a rehashing of the complete hash table. 109 | * Supports multiple hash functions. Currently, a `xorshift` hash function is implemented. 110 | * On resizing the hash table, each bucket of the old hash table is rehashed and subsequently freed, 111 | such that there is no high memory peak like in traditional hash tables that need to keep entire old and new hash table 112 | in RAM during a resize operation. 113 | 114 | # Serialization 115 | 116 | We offer a serialization API for the `set` interface: 117 | 118 | ```c++ 119 | #include 120 | 121 | using tdc::serialize; 122 | using table_t = tdc::compact_hash::set::hashset_t<...>; 123 | 124 | table_t a = table_t(...); 125 | 126 | std::stringstream ss; 127 | 128 | // serialize to any std::ostream: 129 | serialize::write(ss, a); 130 | 131 | // deserialize from any std::istream: 132 | table_t b = serialize::read(ss); 133 | ``` 134 | 135 | # Dependencies 136 | 137 | The project is written in modern `C++14`. 138 | It uses `cmake` to build the library. 139 | 140 | The external dependencies are: 141 | 142 | * [Google Logging (glog)](https://github.com/google/glog) (0.34 or later). 143 | * [Google Test](https://github.com/google/googletest) (1.7.0 or later) __[Just for running the unit tests]__. 144 | 145 | `cmake` searches the external dependencies first on the system, 146 | or automatically downloads and builds them from their official repositories. 147 | In that regard, a proper installation of the dependencies is not required. 148 | 149 | # License 150 | 151 | The code in this repository is published under the 152 | [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0) 153 | 154 | # Todo 155 | * When additionally restricting the values to be integers, we can avoid padding: 156 | We currently byte-align the values to allow the reinterpretation of its content (just by casting). 157 | By restricting to integer values, we can write the values bit-compact in a bit vector. 158 | * Additionally, in the case that we work with values that are integers, 159 | we want to support setting the width of the integer values online to further slim down memory consumption. 160 | * The hash table currently does not support the deletion of a kv-pair. 161 | * Support variable bucket sizes `B` 162 | 163 | # Related Work 164 | * [Dynpdt: dynamic path-decomposed trie](https://github.com/kampersanda/dynpdt), a space-efficient dynamic keyword dictionary. It supports strings as values. 165 | * [mame-Bonsai](https://github.com/Poyias/mBonsai), a compact hash table implementation used as a trie data structure 166 | * [Bonsai trie reimplementation](https://github.com/kampersanda/bonsais), a reimplementation of the previous trie data structure 167 | 168 | # References 169 | * [1] J. G. Cleary. Compact hash tables using bidirectional linear probing. IEEE Trans. Computers, 33(9): 828-834, 1984. 170 | * [2] J. Fischer, D. Köppl: Practical Evaluation of Lempel-Ziv-78 and Lempel-Ziv-Welch Tries. SPIRE 2017: 191-207. 171 | * [3] A. Poyias, R. Raman: Improved Practical Compact Dynamic Tries. SPIRE 2015: 324-336 172 | -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_custom_target(examples) 2 | 3 | # Create executable for every *.cpp file 4 | FILE(GLOB children RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp) 5 | FOREACH(child ${children}) 6 | get_filename_component(executable ${child} NAME_WE) 7 | 8 | add_executable( 9 | ${executable} 10 | 11 | ${child} 12 | ) 13 | 14 | target_include_directories(${executable} INTERFACE include) 15 | 16 | target_link_libraries( 17 | ${executable} 18 | 19 | compact_sparse_hash 20 | ) 21 | add_dependencies(examples ${executable}) 22 | 23 | ENDFOREACH() 24 | -------------------------------------------------------------------------------- /examples/hashmap.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | template 9 | using map_type = tdc::compact_hash::map::plain_elias_hashmap_t; 10 | 11 | int main() { 12 | // creates a hash table with zero entries, set the bit-width of the keys to four 13 | auto map = map_type(0, 4); 14 | for(int i = 0; i <= 15; ++i) { // interval [0..15] can be represented by four bits 15 | map.insert(i, std::move(i*i)); // insert key i, value i*i 16 | std::cout << i << " -> " << map[i] << std::endl; // map[i] returns value i*i with key i 17 | 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /examples/hashset.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | using set_type = tdc::compact_hash::set::hashset_t< 12 | tdc::compact_hash::poplar_xorshift_t, 13 | tdc::compact_hash::cv_bvs_t 14 | >; 15 | 16 | int main() { 17 | // creates a set with capacity zero and bit-width five 18 | auto set = set_type(0, 5); 19 | for(int i = 0; i <= 4; ++i) { // can hash keys in the range [0..2**5-1] 20 | set.lookup_insert(i*i); 21 | } 22 | for(int i = 0; i <= 15; ++i) { 23 | auto ret = set.lookup(i); 24 | if(ret.found()) { 25 | std::cout << "Id of node : " << ret.id() << std::endl; // returns the unique ID of the entry. This ID does not change until resizing occurs. 26 | std::cout << i << " -> " << ret.found() << std::endl; // checks whether set[i] is set 27 | std::cout << std::endl; 28 | } 29 | } 30 | std::stringstream ss; 31 | tdc::serialize::write(ss, set); 32 | std::cout << ss.str() << std::endl; 33 | 34 | } 35 | -------------------------------------------------------------------------------- /examples/serialization_and_memsize.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | template 12 | using map_type = tdc::compact_hash::map::sparse_elias_hashmap_t; 13 | 14 | int main() { 15 | // creates a hash table with default capacity and initial bit widths 16 | auto map = map_type(); 17 | for(int i = 0; i < 1000; ++i) { 18 | auto key = i; 19 | auto val = i*i + 42; 20 | 21 | map.insert_kv_width(key, std::move(val), tdc::bits_for(key), tdc::bits_for(val)); 22 | } 23 | 24 | std::cout << "elements in map: " << map.size() << std::endl; 25 | std::cout << "key width: " << map.key_width() << " bits" << std::endl; 26 | std::cout << "value width: " << map.value_width() << " bits" << std::endl; 27 | 28 | // this could just be an `ofstream` for outputting to a file. 29 | std::stringstream output_stream; 30 | 31 | // compute size of the datastructure 32 | auto heap_object_size = tdc::heap_size_compute(map); 33 | 34 | // serialize the datastructure 35 | auto written_object_size = tdc::serialize_write(output_stream, map); 36 | 37 | std::cout << "total heap size of initial map: " << heap_object_size.size_in_bytes() << std::endl; 38 | std::cout << "serialized bytes: " << written_object_size.size_in_bytes() << std::endl; 39 | 40 | auto deserialized_map = tdc::serialize_read>(output_stream); 41 | auto heap_object_size2 = tdc::heap_size_compute(deserialized_map); 42 | 43 | std::cout << "total heap size of deserialized map: " << heap_object_size2.size_in_bytes() << std::endl; 44 | } 45 | -------------------------------------------------------------------------------- /include/tudocomp/util/compact_hash/decomposed_key_t.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace tdc {namespace compact_hash { 6 | struct decomposed_key_t { 7 | size_t initial_address; // initial address of key in table 8 | uint64_t stored_quotient; // quotient value stored in table 9 | }; 10 | }} 11 | -------------------------------------------------------------------------------- /include/tudocomp/util/compact_hash/entry_t.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace tdc {namespace compact_hash { 4 | 5 | template 6 | class generic_entry_t { 7 | uint64_t m_id; 8 | bool m_key_already_exist; 9 | bool m_not_found; 10 | entry_ptr m_ptr; 11 | 12 | inline generic_entry_t(uint64_t id, bool key_already_exist, bool not_found, entry_ptr ptr): 13 | m_id(id), 14 | m_key_already_exist(key_already_exist), 15 | m_not_found(not_found), 16 | m_ptr(ptr) {} 17 | public: 18 | /// Creates a `entry_t` for a key that already exists in the table. 19 | /// 20 | /// The _id_ is an integer that uniquely describes the key, 21 | /// while only taking up approximately log2(table_size) bits. 22 | /// It gets invalidated if the underlying table needs to be resized. 23 | inline static generic_entry_t found_exist(uint64_t id, entry_ptr ptr) { 24 | return generic_entry_t { 25 | id, 26 | true, 27 | false, 28 | ptr, 29 | }; 30 | } 31 | 32 | /// Creates a `entry_t` for a new key in the table. 33 | /// 34 | /// The _id_ is an integer that uniquely describes the key, 35 | /// while only taking up approximately log2(table_size) bits. 36 | /// It gets invalidated if the underlying table needs to be resized. 37 | inline static generic_entry_t found_new(uint64_t id, entry_ptr ptr) { 38 | return generic_entry_t { 39 | id, 40 | false, 41 | false, 42 | ptr, 43 | }; 44 | } 45 | 46 | /// Creates a `entry_t` for a key that could not be found in the table. 47 | inline static generic_entry_t not_found() { 48 | return generic_entry_t { 49 | 0, 50 | false, 51 | true, 52 | entry_ptr(), 53 | }; 54 | } 55 | 56 | /// Returns true if the key exists in the table. 57 | inline bool found() const { 58 | return !m_not_found; 59 | } 60 | 61 | /// Returns the _id_ of the key. 62 | /// 63 | /// The _id_ is an integer that uniquely describes the key, 64 | /// while only taking up approximately log2(table_size) bits. 65 | /// It gets invalidated if the underlying table needs to be resized. 66 | inline uint64_t id() const { 67 | DCHECK(found()); 68 | return m_id; 69 | } 70 | 71 | /// Returns true if the key already exists in the table. 72 | inline bool key_already_exist() const { 73 | DCHECK(found()); 74 | return m_key_already_exist; 75 | } 76 | 77 | /// Return the ptr to the data, if it exists. 78 | inline entry_ptr ptr() const { 79 | DCHECK(found()); 80 | return m_ptr; 81 | } 82 | }; 83 | 84 | }} 85 | -------------------------------------------------------------------------------- /include/tudocomp/util/compact_hash/hash_functions.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | // Source: https://github.com/kampersanda/poplar-trie/blob/master/include/poplar/bijective_hash.hpp 7 | namespace poplar{namespace bijective_hash { 8 | 9 | // (p, q): p < 2**w is a prime and q < 2**w is an integer such that pq mod m = 1 10 | constexpr uint64_t PRIME_TABLE[][2][3] = { 11 | {{0ULL, 0ULL, 0ULL}, {0ULL, 0ULL, 0ULL}}, // 0 12 | {{1ULL, 1ULL, 1ULL}, {1ULL, 1ULL, 1ULL}}, // 1 13 | {{3ULL, 1ULL, 3ULL}, {3ULL, 1ULL, 3ULL}}, // 2 14 | {{7ULL, 5ULL, 3ULL}, {7ULL, 5ULL, 3ULL}}, // 3 15 | {{13ULL, 11ULL, 7ULL}, {5ULL, 3ULL, 7ULL}}, // 4 16 | {{31ULL, 29ULL, 23ULL}, {31ULL, 21ULL, 7ULL}}, // 5 17 | {{61ULL, 59ULL, 53ULL}, {21ULL, 51ULL, 29ULL}}, // 6 18 | {{127ULL, 113ULL, 109ULL}, {127ULL, 17ULL, 101ULL}}, // 7 19 | {{251ULL, 241ULL, 239ULL}, {51ULL, 17ULL, 15ULL}}, // 8 20 | {{509ULL, 503ULL, 499ULL}, {341ULL, 455ULL, 315ULL}}, // 9 21 | {{1021ULL, 1019ULL, 1013ULL}, {341ULL, 819ULL, 93ULL}}, // 10 22 | {{2039ULL, 2029ULL, 2027ULL}, {455ULL, 1509ULL, 195ULL}}, // 11 23 | {{4093ULL, 4091ULL, 4079ULL}, {1365ULL, 819ULL, 3855ULL}}, // 12 24 | {{8191ULL, 8179ULL, 8171ULL}, {8191ULL, 4411ULL, 4291ULL}}, // 13 25 | {{16381ULL, 16369ULL, 16363ULL}, {5461ULL, 4369ULL, 12483ULL}}, // 14 26 | {{32749ULL, 32719ULL, 32717ULL}, {13797ULL, 10031ULL, 1285ULL}}, // 15 27 | {{65521ULL, 65519ULL, 65497ULL}, {4369ULL, 3855ULL, 36969ULL}}, // 16 28 | {{131071ULL, 131063ULL, 131059ULL}, {131071ULL, 29127ULL, 110907ULL}}, // 17 29 | {{262139ULL, 262133ULL, 262127ULL}, {209715ULL, 95325ULL, 200463ULL}}, // 18 30 | {{524287ULL, 524269ULL, 524261ULL}, {524287ULL, 275941ULL, 271853ULL}}, // 19 31 | {{1048573ULL, 1048571ULL, 1048559ULL}, {349525ULL, 209715ULL, 986895ULL}}, // 20 32 | {{2097143ULL, 2097133ULL, 2097131ULL}, {1864135ULL, 1324517ULL, 798915ULL}}, // 21 33 | {{4194301ULL, 4194287ULL, 4194277ULL}, {1398101ULL, 986895ULL, 3417581ULL}}, // 22 34 | {{8388593ULL, 8388587ULL, 8388581ULL}, {1118481ULL, 798915ULL, 3417581ULL}}, // 23 35 | {{16777213ULL, 16777199ULL, 16777183ULL}, {5592405ULL, 986895ULL, 15760415ULL}}, // 24 36 | {{33554393ULL, 33554383ULL, 33554371ULL}, {17207401ULL, 31500079ULL, 15952107ULL}}, // 25 37 | {{67108859ULL, 67108837ULL, 67108819ULL}, {53687091ULL, 62137837ULL, 50704475ULL}}, // 26 38 | {{134217689ULL, 134217649ULL, 134217617ULL}, {17207401ULL, 113830225ULL, 82223473ULL}}, // 27 39 | {{268435399ULL, 268435367ULL, 268435361ULL}, {131863031ULL, 96516119ULL, 186492001ULL}}, // 28 40 | {{536870909ULL, 536870879ULL, 536870869ULL}, {357913941ULL, 32537631ULL, 274678141ULL}}, // 29 41 | {{1073741789ULL, 1073741783ULL, 1073741741ULL}, {889671797ULL, 1047552999ULL, 349289509ULL}}, // 30 42 | {{2147483647ULL, 2147483629ULL, 2147483587ULL}, {2147483647ULL, 1469330917ULL, 1056139499ULL}}, // 31 43 | {{4294967291ULL, 4294967279ULL, 4294967231ULL}, {858993459ULL, 252645135ULL, 1057222719ULL}}, // 32 44 | {{8589934583ULL, 8589934567ULL, 8589934543ULL}, {7635497415ULL, 1030792151ULL, 3856705327ULL}}, // 33 45 | {{17179869143ULL, 17179869107ULL, 17179869071ULL}, {9637487591ULL, 11825104763ULL, 12618841967ULL}}, // 34 46 | {{34359738337ULL, 34359738319ULL, 34359738307ULL}, {1108378657ULL, 21036574511ULL, 22530975979ULL}}, // 35 47 | {{68719476731ULL, 68719476719ULL, 68719476713ULL}, {13743895347ULL, 64677154575ULL, 8963410009ULL}}, // 36 48 | {{137438953447ULL, 137438953441ULL, 137438953427ULL}, {43980465111ULL, 35468117025ULL, 70246576219ULL}}, // 37 49 | {{274877906899ULL, 274877906857ULL, 274877906837ULL}, {207685529691ULL, 41073710233ULL, 208085144509ULL}}, // 38 50 | {{549755813881ULL, 549755813869ULL, 549755813821ULL}, {78536544841ULL, 347214198245ULL, 369238979477ULL}}, // 39 51 | {{1099511627689ULL, 1099511627609ULL, 1099511627581ULL}, {315951617177ULL, 928330176745ULL, 343949791253ULL}}, // 40 52 | {{2199023255531ULL, 2199023255521ULL, 2199023255497ULL}, {209430786243ULL, 1134979744801ULL, 1119502748281ULL}}, // 41 53 | {{4398046511093ULL, 4398046511087ULL, 4398046511071ULL}, {1199467230301ULL, 3363212037903ULL, 3331853417503ULL}}, // 42 54 | {{8796093022151ULL, 8796093022141ULL, 8796093022091ULL}, {8178823336439ULL, 918994793365ULL, 2405769031715ULL}}, // 43 55 | {{17592186044399ULL, 17592186044299ULL, 17592186044297ULL}, {16557351571215ULL, 2405769031715ULL, 2365335938745ULL}}, // 44 56 | {{35184372088777ULL, 35184372088763ULL, 35184372088751ULL}, {27507781814905ULL, 17847145262451ULL, 11293749065551ULL}}, // 45 57 | {{70368744177643ULL, 70368744177607ULL, 70368744177601ULL}, {13403570319555ULL, 34567102403063ULL, 4467856773185ULL}}, // 46 58 | {{140737488355213ULL, 140737488355201ULL, 140737488355181ULL}, {88113905752901ULL, 4432676798593ULL, 22020151239269ULL}}, // 47 59 | {{281474976710597ULL, 281474976710591ULL, 281474976710567ULL}, {100186008659725ULL, 4330384257087ULL, 123342967322647ULL}}, // 48 60 | {{562949953421231ULL, 562949953421201ULL, 562949953421189ULL}, {222399981598543ULL, 25358106009969ULL, 366146311168333ULL}}, // 49 61 | {{1125899906842597ULL, 1125899906842589ULL, 1125899906842573ULL}, {667199944795629ULL, 289517118902389ULL, 286994093901061ULL}}, // 50 62 | {{2251799813685119ULL, 2251799813685109ULL, 2251799813685083ULL}, {558586000294015ULL, 161999986596061ULL, 232003617167571ULL}}, // 51 63 | {{4503599627370449ULL, 4503599627370353ULL, 4503599627370323ULL}, {3449565672028465ULL, 3558788516733329ULL, 3514369651416283ULL}}, // 52 64 | {{9007199254740881ULL, 9007199254740847ULL, 9007199254740761ULL}, {2840107873116529ULL, 496948924399503ULL, 4991002184445225ULL}}, // 53 65 | {{18014398509481951ULL, 18014398509481931ULL, 18014398509481853ULL}, {16922616781634591ULL, 13595772459986403ULL, 6600695637062101ULL}}, // 54 66 | {{36028797018963913ULL, 36028797018963901ULL, 36028797018963869ULL}, {20962209174669945ULL, 20434243085382549ULL, 11645671763705525ULL}}, // 55 67 | {{72057594037927931ULL, 72057594037927909ULL, 72057594037927889ULL}, {14411518807585587ULL, 18681598454277613ULL, 21463964181510449ULL}}, // 56 68 | {{144115188075855859ULL, 144115188075855823ULL, 144115188075855811ULL}, {88686269585142075ULL, 44116894308935471ULL, 18900352534538475ULL}}, // 57 69 | {{288230376151711687ULL, 288230376151711681ULL, 288230376151711607ULL}, {126416831645487607ULL, 18300341342965825ULL, 136751638320155207ULL}}, // 58 70 | {{576460752303423263ULL, 576460752303423061ULL, 576460752303422971ULL}, {5124095576030431ULL, 2700050362076925ULL, 198471980483577139ULL}}, // 59 71 | {{1152921504606846883ULL, 1152921504606846803ULL, 1152921504606846697ULL}, {12397005425880075ULL, 566464323072728283ULL, 4132335141960025ULL}}, // 60 72 | {{2305843009213693951ULL, 2305843009213693669ULL, 2305843009213693613ULL}, {2305843009213693951ULL, 1768084568902373101ULL, 360500529464087845ULL}}, // 61 73 | {{4611686018427387733ULL, 4611686018427387421ULL, 4611686018427387271ULL}, {4557748170258646525ULL, 152768066863019061ULL, 1515372340968241207ULL}}, // 62 74 | {{9223372036854775291ULL, 9223372036854775279ULL, 9223372036854775181ULL}, {3657236494304118067ULL, 2545580940228350223ULL, 3339243145719352645ULL}}, // 63 75 | {{9223372036854775291ULL, 9223372036854775279ULL, 9223372036854775181ULL}, {3657236494304118067ULL, 11768952977083126031ULL, 3339243145719352645ULL}}, // 64 76 | }; 77 | 78 | class Xorshift { 79 | public: 80 | /// runtime initilization arguments, if any 81 | struct config_args {}; 82 | 83 | /// get the config of this instance 84 | inline config_args current_config() const { return config_args{}; } 85 | 86 | Xorshift() = default; 87 | 88 | inline Xorshift(uint32_t univ_bits, config_args config): 89 | m_bits(univ_bits), 90 | m_shift(univ_bits / 2 + 1) 91 | { 92 | DCHECK_LT(0U, m_bits); 93 | DCHECK_LE(m_bits, 64U); 94 | DCHECK_LT(0, mask()); 95 | } 96 | 97 | inline uint64_t hash(uint64_t x) const { 98 | DCHECK_LE(x, mask()); 99 | x = hash_<0>(x); 100 | x = hash_<1>(x); 101 | x = hash_<2>(x); 102 | return x; 103 | } 104 | 105 | inline uint64_t hash_inv(uint64_t x) const { 106 | DCHECK_LE(x, mask()); 107 | x = hash_inv_<2>(x); 108 | x = hash_inv_<1>(x); 109 | x = hash_inv_<0>(x); 110 | return x; 111 | } 112 | 113 | /// STL compability 114 | inline uint64_t operator()(uint64_t x) const { 115 | return hash(x); 116 | } 117 | 118 | inline uint64_t bits() const { 119 | return m_bits; 120 | } 121 | 122 | inline uint64_t mask() const { 123 | return (-1ULL >> (64-m_bits)); 124 | } 125 | 126 | void show_stat(std::ostream& os) const { 127 | os << "Statistics of Xorshift\n"; 128 | os << " - mask: " << mask() << "\n"; 129 | os << " - bits: " << bits() << "\n"; 130 | } 131 | 132 | private: 133 | uint32_t m_bits{}; 134 | uint32_t m_shift{}; 135 | 136 | template 137 | friend struct ::tdc::serialize; 138 | 139 | template 140 | friend struct ::tdc::heap_size; 141 | 142 | template 143 | uint64_t hash_(uint64_t x) const { 144 | DCHECK_LE(x, mask()); 145 | x = x ^ (x >> (m_shift + N)); 146 | x = (x * PRIME_TABLE[bits()][0][N]) & mask(); 147 | return x; 148 | } 149 | template 150 | uint64_t hash_inv_(uint64_t x) const { 151 | x = (x * PRIME_TABLE[bits()][1][N]) & mask(); 152 | x = x ^ (x >> (m_shift + N)); 153 | return x; 154 | } 155 | }; 156 | 157 | }} //ns - poplar::bijective_hash 158 | 159 | 160 | namespace tdc {namespace compact_hash { 161 | 162 | class xorshift_t { 163 | uint64_t m_j; 164 | uint64_t m_w_mask; 165 | 166 | template 167 | friend struct ::tdc::serialize; 168 | 169 | template 170 | friend struct ::tdc::heap_size; 171 | 172 | xorshift_t() = default; 173 | public: 174 | /// runtime initilization arguments, if any 175 | struct config_args {}; 176 | 177 | /// get the config of this instance 178 | inline config_args current_config() const { return config_args{}; } 179 | 180 | /// Constructs a hash function for values with a width of `w` bits. 181 | xorshift_t(uint32_t w, config_args config): 182 | m_j((w / 2ull) + 1) 183 | { 184 | DCHECK_LT((w / 2ull), m_j); 185 | DCHECK_NE(w, 0U); 186 | 187 | // NB: Two shifts because a single shift with w == 64 is undefined 188 | // behavior for a uint64_t according to the C++ standard. 189 | m_w_mask = (1ull << (w - 1ull) << 1ull) - 1ull; 190 | } 191 | 192 | /// This takes a value `x` with a width of `w` bits, 193 | /// and calculates a hash value with a width of `w` bits. 194 | inline uint64_t hash(uint64_t x) const { 195 | uint64_t j = m_j; 196 | uint64_t w_mask = m_w_mask; 197 | 198 | return (x xor ((x << j) & w_mask)) & w_mask; 199 | } 200 | 201 | /// This takes a hash value `x` with a width of `w` bits, 202 | /// and reverses the hash function to the original value. 203 | inline uint64_t hash_inv(uint64_t x) const { 204 | return hash(x); 205 | } 206 | }; 207 | 208 | using poplar_xorshift_t = poplar::bijective_hash::Xorshift; 209 | 210 | } 211 | 212 | template<> 213 | struct heap_size { 214 | using T = compact_hash::xorshift_t; 215 | 216 | static object_size_t compute(T const& val) { 217 | using namespace compact_hash; 218 | 219 | auto bytes = object_size_t::empty(); 220 | 221 | bytes += heap_size::compute(val.m_j); 222 | bytes += heap_size::compute(val.m_w_mask); 223 | 224 | return bytes; 225 | } 226 | }; 227 | 228 | template<> 229 | struct serialize { 230 | using T = compact_hash::xorshift_t; 231 | 232 | static object_size_t write(std::ostream& out, T const& val) { 233 | using namespace compact_hash; 234 | 235 | auto bytes = object_size_t::empty(); 236 | 237 | bytes += serialize::write(out, val.m_j); 238 | bytes += serialize::write(out, val.m_w_mask); 239 | 240 | return bytes; 241 | } 242 | static T read(std::istream& in) { 243 | using namespace compact_hash; 244 | 245 | T ret; 246 | ret.m_j = serialize::read(in); 247 | ret.m_w_mask = serialize::read(in); 248 | return ret; 249 | } 250 | static bool equal_check(T const& lhs, T const& rhs) { 251 | return gen_equal_check(m_j) 252 | && gen_equal_check(m_w_mask); 253 | } 254 | }; 255 | 256 | template<> 257 | struct heap_size { 258 | using T = poplar::bijective_hash::Xorshift; 259 | 260 | static object_size_t compute(T const& val) { 261 | using namespace compact_hash; 262 | 263 | auto bytes = object_size_t::empty(); 264 | 265 | bytes += heap_size::compute(val.m_shift); 266 | bytes += heap_size::compute(val.m_bits); 267 | 268 | return bytes; 269 | } 270 | }; 271 | 272 | template<> 273 | struct serialize { 274 | using T = poplar::bijective_hash::Xorshift; 275 | 276 | static object_size_t write(std::ostream& out, T const& val) { 277 | using namespace compact_hash; 278 | 279 | auto bytes = object_size_t::empty(); 280 | 281 | bytes += serialize::write(out, val.m_shift); 282 | bytes += serialize::write(out, val.m_bits); 283 | 284 | return bytes; 285 | } 286 | static T read(std::istream& in) { 287 | using namespace compact_hash; 288 | 289 | T ret; 290 | ret.m_shift = serialize::read(in); 291 | ret.m_bits = serialize::read(in); 292 | return ret; 293 | } 294 | static bool equal_check(T const& lhs, T const& rhs) { 295 | return gen_equal_check(m_shift) 296 | && gen_equal_check(m_bits); 297 | } 298 | }; 299 | 300 | } 301 | -------------------------------------------------------------------------------- /include/tudocomp/util/compact_hash/index_structure/cv_bvs_t.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include "../entry_t.hpp" 8 | 9 | #include 10 | 11 | namespace tdc {namespace compact_hash { 12 | 13 | class cv_bvs_t { 14 | template 15 | friend struct ::tdc::serialize; 16 | 17 | template 18 | friend struct ::tdc::heap_size; 19 | 20 | IntVector> m_cv; 21 | inline cv_bvs_t(IntVector>&& cv): m_cv(std::move(cv)) {} 22 | 23 | public: 24 | /// runtime initilization arguments, if any 25 | struct config_args {}; 26 | 27 | /// get the config of this instance 28 | inline config_args current_config() const { return config_args{}; } 29 | 30 | inline cv_bvs_t(size_t table_size, config_args config) { 31 | m_cv.reserve(table_size); 32 | m_cv.resize(table_size); 33 | } 34 | 35 | /// A Group is a half-open range [group_start, group_end) 36 | /// that corresponds to a group of elements in the hashtable that 37 | /// belong to the same initial_address. 38 | /// 39 | /// This means that `c[group_start] == 1`, and 40 | /// `c[group_start < x < group_end] == 0`. 41 | /// 42 | /// `groups_terminator` points to the next free location 43 | /// inside the hashtable. 44 | struct Group { 45 | size_t group_start; // Group that belongs to the key. 46 | size_t group_end; // It's a half-open range: [start .. end). 47 | size_t groups_terminator; // Next free location. 48 | }; 49 | 50 | template 51 | struct context_t { 52 | using satellite_t = typename storage_t::satellite_t_export; 53 | using entry_width_t = typename satellite_t::entry_bit_width_t; 54 | using entry_ptr_t = typename satellite_t::entry_ptr_t; 55 | using entry_t = generic_entry_t; 56 | using table_pos_t = typename storage_t::table_pos_t; 57 | 58 | IntVector>& m_cv; 59 | size_t const table_size; 60 | entry_width_t widths; 61 | size_mgr_t const& size_mgr; 62 | storage_t& storage; 63 | 64 | /// Getter for the v bit at table position `pos`. 65 | inline bool get_v(size_t pos) { 66 | return (m_cv[pos] & 0b01) != 0; 67 | } 68 | 69 | /// Getter for the c bit at table position `pos`. 70 | inline bool get_c(size_t pos) { 71 | return (m_cv[pos] & 0b10) != 0; 72 | } 73 | 74 | /// Setter for the v bit at table position `pos`. 75 | inline void set_v(size_t pos, bool v) { 76 | auto x = m_cv[pos] & 0b10; 77 | m_cv[pos] = x | (0b01 * v); 78 | } 79 | 80 | /// Setter for the c bit at table position `pos`. 81 | inline void set_c(size_t pos, bool c) { 82 | auto x = m_cv[pos] & 0b01; 83 | m_cv[pos] = x | (0b10 * c); 84 | } 85 | 86 | /// Setter for the c and v bit at table position `pos`. 87 | inline void set_cv(size_t pos, uint8_t v) { 88 | m_cv[pos] = v; 89 | } 90 | 91 | // Assumption: There exists a group at the initial address of `key`. 92 | // This group is either the group belonging to key, 93 | // or the one after it in the case that no group for `key` exists yet. 94 | inline Group search_existing_group(uint64_t initial_address) { 95 | auto sctx = storage.context(table_size, widths); 96 | auto ret = Group(); 97 | size_t cursor = initial_address; 98 | 99 | // Walk forward from the initial address until we find a empty location. 100 | // TODO: This search could maybe be accelerated by: 101 | // - checking whole blocks in the bucket bitvector for == or != 0 102 | size_t v_counter = 0; 103 | DCHECK_EQ(get_v(cursor), true); 104 | for(; 105 | !sctx.pos_is_empty(sctx.table_pos(cursor)); 106 | cursor = size_mgr.mod_add(cursor)) 107 | { 108 | v_counter += get_v(cursor); 109 | } 110 | DCHECK_GE(v_counter, 1U); 111 | ret.groups_terminator = cursor; 112 | 113 | // Walk back again to find the end of the group 114 | // belonging to the initial address. 115 | size_t c_counter = v_counter; 116 | for(; c_counter != 1; cursor = size_mgr.mod_sub(cursor)) { 117 | c_counter -= get_c(size_mgr.mod_sub(cursor)); 118 | } 119 | ret.group_end = cursor; 120 | 121 | // Walk further back to find the start of the group 122 | // belonging to the initial address 123 | for(; c_counter != 0; cursor = size_mgr.mod_sub(cursor)) { 124 | c_counter -= get_c(size_mgr.mod_sub(cursor)); 125 | } 126 | ret.group_start = cursor; 127 | 128 | return ret; 129 | } 130 | 131 | /// Search a quotient inside an existing Group. 132 | /// 133 | /// This returns a pointer to the value if its found, or null 134 | /// otherwise. 135 | inline entry_t search_in_group(Group const& group, 136 | uint64_t stored_quotient) { 137 | auto sctx = storage.context(table_size, widths); 138 | for(size_t i = group.group_start; i != group.group_end; i = size_mgr.mod_add(i)) { 139 | auto sparse_entry = sctx.at(sctx.table_pos(i)); 140 | 141 | if (sparse_entry.get_quotient() == stored_quotient) { 142 | uint64_t in_group_offset = size_mgr.mod_sub(i, group.group_start); 143 | return entry_t::found_exist(in_group_offset, sparse_entry); 144 | } 145 | } 146 | return entry_t::not_found(); 147 | } 148 | 149 | /// Inserts a new key-value pair after an existing 150 | /// group, shifting all following entries one to the right as needed. 151 | inline entry_ptr_t insert_value_after_group( 152 | Group const& group, uint64_t stored_quotient) 153 | { 154 | auto sctx = storage.context(table_size, widths); 155 | auto end_pos = sctx.table_pos(group.group_end); 156 | if (sctx.pos_is_empty(end_pos)) { 157 | // if there is no following group, just append the new entry 158 | return sctx.allocate_pos(end_pos); 159 | } else { 160 | // else, shift all following elements one to the right 161 | return shift_groups_and_insert(group.group_end, 162 | group.groups_terminator, 163 | stored_quotient); 164 | } 165 | } 166 | 167 | /// Shifts all values and `c` bits of the half-open range [from, to) 168 | /// inside the table one to the right, and inserts the new value 169 | /// at the now-empty location `from`. 170 | /// 171 | /// The position `to` needs to be empty. 172 | inline entry_ptr_t shift_groups_and_insert( 173 | size_t from, size_t to, uint64_t stored_quotient) 174 | { 175 | DCHECK_NE(from, to); 176 | 177 | for(size_t i = to; i != from;) { 178 | size_t next_i = size_mgr.mod_sub(i, size_t(1)); 179 | 180 | set_c(i, get_c(next_i)); 181 | 182 | i = next_i; 183 | } 184 | set_c(from, false); 185 | 186 | return shift_elements_and_insert(from, to); 187 | } 188 | 189 | /// Shifts all values of the half-open range [from, to) 190 | /// inside the table one to the right, and inserts the new value 191 | /// at the now-empty location `from`. 192 | /// 193 | /// The position `to` needs to be empty. 194 | inline entry_ptr_t shift_elements_and_insert( 195 | size_t from, size_t to) 196 | { 197 | auto sctx = storage.context(table_size, widths); 198 | // move from...to one to the right, then insert at from 199 | 200 | DCHECK(from != to); 201 | 202 | table_pos_t from_pos; 203 | 204 | if (to < from) { 205 | // if the range wraps around, we decompose into two ranges: 206 | // [ | | ] 207 | // | to^ ^from | 208 | // ^start end^ 209 | // [ 2 ] [ 1 ] 210 | // 211 | // NB: because we require from != to, and insert 1 additional element, 212 | // we are always dealing with a minimum 2 element range, 213 | // and thus can not end up with a split range with length == 0. 214 | 215 | from_pos = sparse_shift(from, table_size); 216 | if (to > 0) { 217 | auto start_pos = sparse_shift(0, to); 218 | sctx.at(from_pos).swap_with(sctx.at(start_pos)); 219 | } 220 | } else { 221 | // [ | | ] 222 | // from^ ^to 223 | 224 | from_pos = sparse_shift(from, to); 225 | } 226 | 227 | // insert the element from the end of the range at the free 228 | // position to the right of it. 229 | auto new_loc = sctx.allocate_pos(sctx.table_pos(to)); 230 | 231 | auto from_ptrs = sctx.at(from_pos); 232 | new_loc.init_from(from_ptrs); 233 | from_ptrs.uninitialize(); 234 | 235 | return from_ptrs; 236 | } 237 | 238 | /// Shifts all elements one to the right, 239 | /// moving the last element to the front position, 240 | /// and returns a ptr pair to it. 241 | inline table_pos_t sparse_shift(size_t from, size_t to) { 242 | DCHECK_LT(from, to); 243 | auto sctx = storage.context(table_size, widths); 244 | 245 | // initialize iterators like this: 246 | // [ ] 247 | // ^from to^ 248 | // || 249 | // <- src^| 250 | // <- dest^ 251 | 252 | auto from_loc = sctx.table_pos(from); 253 | auto from_iter = sctx.make_iter(from_loc); 254 | 255 | auto last = sctx.table_pos(to - 1); 256 | auto src = sctx.make_iter(last); 257 | auto dst = sctx.make_iter(sctx.table_pos(to)); 258 | 259 | // move the element at the last position to a temporary position 260 | auto tmp_p = sctx.at(last); 261 | auto tmp = tmp_p.move_out(); 262 | 263 | // move all elements one to the right 264 | // TODO: Could be optimized 265 | // to memcpies for different underlying layouts 266 | while(src != from_iter) { 267 | // Decrement first for backward iteration 268 | src.decrement(); 269 | dst.decrement(); 270 | 271 | // Get access to the value/quotient at src and dst 272 | auto src_be = src.get(); 273 | auto dst_be = dst.get(); 274 | 275 | // Copy value/quotient over 276 | dst_be.move_from(src_be); 277 | } 278 | 279 | // move last element to the front 280 | auto from_p = sctx.at(from_loc); 281 | from_p.set(std::move(tmp)); 282 | return from_loc; 283 | } 284 | 285 | inline uint64_t local_id_to_global_id(uint64_t initial_address, uint64_t local_id) { 286 | local_id <<= size_mgr.capacity_log2(); 287 | local_id |= initial_address; 288 | return local_id; 289 | } 290 | 291 | entry_t lookup_id(uint64_t id) { 292 | uint64_t local_id = id >> size_mgr.capacity_log2(); 293 | uint64_t initial_address = id & ((1ull << size_mgr.capacity_log2()) - 1); 294 | 295 | auto group = search_existing_group(initial_address); 296 | auto position = size_mgr.mod_add(group.group_start, local_id); 297 | 298 | auto sctx = storage.context(table_size, widths); 299 | auto sparse_entry = sctx.at(sctx.table_pos(position)); 300 | 301 | return entry_t::found_exist(id, sparse_entry); 302 | } 303 | 304 | entry_t lookup_insert(uint64_t initial_address, 305 | uint64_t stored_quotient) 306 | { 307 | auto sctx = storage.context(table_size, widths); 308 | auto ia_pos = sctx.table_pos(initial_address); 309 | 310 | // cases: 311 | // - initial address empty. 312 | // - initial address occupied, there is an element for this key 313 | // (v[initial address] = 1). 314 | // - initial address occupied, there is no element for this key 315 | // (v[initial address] = 0). 316 | 317 | if (sctx.pos_is_empty(ia_pos)) { 318 | // check if we can insert directly 319 | 320 | auto location = sctx.allocate_pos(ia_pos); 321 | location.set_quotient(stored_quotient); 322 | 323 | // we created a new group, so update the bitflags 324 | set_cv(initial_address, 0b11); 325 | 326 | uint64_t global_id = local_id_to_global_id(initial_address, 0); 327 | return entry_t::found_new(global_id, location); 328 | } else { 329 | // check if there already is a group for this key 330 | bool const group_exists = get_v(initial_address); 331 | 332 | if (group_exists) { 333 | auto const group = search_existing_group(initial_address); 334 | 335 | // check if element already exists 336 | auto r = search_in_group(group, stored_quotient); 337 | 338 | if (r.found()) { 339 | // There is a value for this key already. 340 | DCHECK_EQ(r.ptr().get_quotient(), stored_quotient); 341 | 342 | uint64_t global_id = local_id_to_global_id( 343 | initial_address, r.id()); 344 | return entry_t::found_exist(global_id, r.ptr()); 345 | } else { 346 | // Insert a new value 347 | auto p = insert_value_after_group(group, stored_quotient); 348 | p.set_quotient(stored_quotient); 349 | 350 | uint64_t in_group_offset = size_mgr.mod_sub( 351 | group.group_end, group.group_start); 352 | uint64_t global_id = local_id_to_global_id( 353 | initial_address, in_group_offset); 354 | return entry_t::found_new(global_id, p); 355 | } 356 | } else { 357 | // insert a new group 358 | 359 | // pretend we already inserted the new group 360 | // this makes table_insert_value_after_group() find the group 361 | // at the location _before_ the new group 362 | set_v(initial_address, true); 363 | auto const group = search_existing_group(initial_address); 364 | 365 | // insert the element after the found group 366 | auto p = insert_value_after_group(group, stored_quotient); 367 | p.set_quotient(stored_quotient); 368 | 369 | // mark the inserted element as the start of a new group, 370 | // thus fixing-up the v <-> c mapping 371 | set_c(group.group_end, true); 372 | 373 | uint64_t global_id = local_id_to_global_id( 374 | initial_address, 0); 375 | return entry_t::found_new(global_id, p); 376 | } 377 | } 378 | } 379 | 380 | template 381 | inline void for_all_allocated(F f) { 382 | auto sctx = storage.context(table_size, widths); 383 | 384 | // first, skip forward to the first empty location 385 | // so that iteration can start at the beginning of the first complete group 386 | 387 | size_t i = 0; 388 | for(;;i++) { 389 | if (sctx.pos_is_empty(sctx.table_pos(i))) { 390 | break; 391 | } 392 | } 393 | 394 | // Remember our startpoint so that we can recognize it when 395 | // we wrapped around back to it 396 | size_t const original_start = i; 397 | 398 | // We proceed to the next position so that we can iterate until 399 | // we reach `original_start` again. 400 | uint64_t initial_address = i; 401 | i = size_mgr.mod_add(i); 402 | 403 | while(true) { 404 | auto sctx = storage.context(table_size, widths); 405 | while (sctx.pos_is_empty(sctx.table_pos(i))) { 406 | if (i == original_start) { 407 | return; 408 | } 409 | 410 | initial_address = i; 411 | i = size_mgr.mod_add(i); 412 | } 413 | 414 | // If start of group, find next v bit to find initial address 415 | if (get_c(i)) { 416 | initial_address = size_mgr.mod_add(initial_address); 417 | while(!get_v(initial_address)) { 418 | initial_address = size_mgr.mod_add(initial_address); 419 | } 420 | } 421 | 422 | f(initial_address, i); 423 | 424 | i = size_mgr.mod_add(i); 425 | } 426 | } 427 | 428 | void print_all() { 429 | auto sctx = storage.context(table_size, widths); 430 | std::cout << "/////////////////\n"; 431 | for(size_t i = 0; i < table_size; i++) { 432 | auto p = sctx.table_pos(i); 433 | if(sctx.pos_is_empty(p)) { 434 | std::cout << "-- -\n"; 435 | } else { 436 | std::cout << int(get_c(i)) << int(get_v(i)) << " #\n"; 437 | } 438 | } 439 | std::cout << "/////////////////\n"; 440 | } 441 | 442 | template 443 | inline void drain_all(F f) { 444 | table_pos_t drain_start; 445 | bool first = true; 446 | 447 | for_all_allocated([&](auto initial_address, auto i) { 448 | auto sctx = storage.context(table_size, widths); 449 | auto p = sctx.table_pos(i); 450 | 451 | if (first) { 452 | first = false; 453 | drain_start = p; 454 | } 455 | 456 | sctx.trim_storage(&drain_start, p); 457 | f(initial_address, sctx.at(p)); 458 | }); 459 | } 460 | 461 | inline entry_t search(uint64_t initial_address, uint64_t stored_quotient) { 462 | //std::cout << "search on cv(ia="< 478 | inline auto context(storage_t& storage, 479 | size_t table_size, 480 | typename storage_t::satellite_t_export::entry_bit_width_t const& widths, 481 | size_mgr_t const& size_mgr) { 482 | return context_t { 483 | m_cv, table_size, widths, size_mgr, storage 484 | }; 485 | } 486 | }; 487 | 488 | } 489 | 490 | template<> 491 | struct heap_size { 492 | using T = compact_hash::cv_bvs_t; 493 | 494 | static object_size_t compute(T const& val, size_t table_size) { 495 | DCHECK_EQ(val.m_cv.size(), table_size); 496 | auto size = val.m_cv.stat_allocation_size_in_bytes(); 497 | 498 | return object_size_t::exact(size); 499 | } 500 | }; 501 | 502 | template<> 503 | struct serialize { 504 | using T = compact_hash::cv_bvs_t; 505 | 506 | static object_size_t write(std::ostream& out, T const& val, 507 | size_t table_size) { 508 | DCHECK_EQ(val.m_cv.size(), table_size); 509 | auto data = (char const*) val.m_cv.data(); 510 | auto size = val.m_cv.stat_allocation_size_in_bytes(); 511 | 512 | out.write(data, size); 513 | 514 | return object_size_t::exact(size); 515 | } 516 | 517 | static T read(std::istream& in, 518 | size_t table_size) { 519 | auto cv = IntVector>(); 520 | cv.reserve(table_size); 521 | cv.resize(table_size); 522 | auto data = (char*) cv.data(); 523 | auto size = cv.stat_allocation_size_in_bytes(); 524 | 525 | in.read(data, size); 526 | 527 | return T { 528 | std::move(cv) 529 | }; 530 | } 531 | 532 | static bool equal_check(T const& lhs, T const& rhs, size_t table_size) { 533 | return gen_equal_diagnostic(lhs.m_cv == rhs.m_cv); 534 | } 535 | }; 536 | 537 | } 538 | -------------------------------------------------------------------------------- /include/tudocomp/util/compact_hash/index_structure/displacement_t.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "../entry_t.hpp" 13 | 14 | #include 15 | 16 | namespace tdc {namespace compact_hash { 17 | 18 | template 19 | class displacement_t { 20 | template 21 | friend struct ::tdc::serialize; 22 | 23 | template 24 | friend struct ::tdc::heap_size; 25 | 26 | displacement_table_t m_displace; 27 | 28 | displacement_t(displacement_table_t&& table): 29 | m_displace(std::move(table)) {} 30 | 31 | public: 32 | displacement_table_t& displacement_table() { return m_displace; } 33 | /// runtime initilization arguments, if any 34 | struct config_args { 35 | typename displacement_table_t::config_args table_config; 36 | }; 37 | 38 | /// get the config of this instance 39 | inline config_args current_config() const { 40 | return config_args { m_displace.current_config() }; 41 | } 42 | 43 | inline displacement_t(size_t table_size, config_args config): 44 | m_displace(table_size, config.table_config) {} 45 | 46 | template 47 | struct context_t { 48 | using satellite_t = typename storage_t::satellite_t_export; 49 | using entry_width_t = typename satellite_t::entry_bit_width_t; 50 | using entry_t = generic_entry_t; 51 | using table_pos_t = typename storage_t::table_pos_t; 52 | 53 | displacement_table_t& m_displace; 54 | size_t const table_size; 55 | entry_width_t widths; 56 | size_mgr_t const& size_mgr; 57 | storage_t& storage; 58 | 59 | entry_t lookup_id(uint64_t id) { 60 | uint64_t position = id; 61 | 62 | auto sctx = storage.context(table_size, widths); 63 | auto sparse_entry = sctx.at(sctx.table_pos(position)); 64 | 65 | return entry_t::found_exist(id, sparse_entry); 66 | } 67 | 68 | entry_t lookup_insert(uint64_t initial_address, 69 | uint64_t stored_quotient) 70 | { 71 | auto sctx = storage.context(table_size, widths); 72 | 73 | auto cursor = initial_address; 74 | while(true) { 75 | auto pos = sctx.table_pos(cursor); 76 | 77 | if (sctx.pos_is_empty(pos)) { 78 | auto ptrs = sctx.allocate_pos(pos); 79 | m_displace.set(cursor, size_mgr.mod_sub(cursor, initial_address)); 80 | ptrs.set_quotient(stored_quotient); 81 | return entry_t::found_new(cursor, ptrs); 82 | } 83 | 84 | if(m_displace.get(cursor) == size_mgr.mod_sub(cursor, initial_address)) { 85 | auto ptrs = sctx.at(pos); 86 | if (ptrs.get_quotient() == stored_quotient) { 87 | return entry_t::found_exist(cursor, ptrs); 88 | } 89 | } 90 | 91 | cursor = size_mgr.mod_add(cursor); 92 | DCHECK_NE(cursor, initial_address); 93 | } 94 | 95 | DCHECK(false) << "unreachable"; 96 | return entry_t::not_found(); 97 | } 98 | 99 | template 100 | inline void for_all_allocated(F f) { 101 | auto sctx = storage.context(table_size, widths); 102 | 103 | // first, skip forward to the first empty location 104 | // so that iteration can start at the beginning of the first complete group 105 | 106 | size_t i = 0; 107 | for(;;i++) { 108 | if (sctx.pos_is_empty(sctx.table_pos(i))) { 109 | break; 110 | } 111 | } 112 | 113 | // Remember our startpoint so that we can recognize it when 114 | // we wrapped around back to it 115 | size_t const original_start = i; 116 | 117 | // We proceed to the next position so that we can iterate until 118 | // we reach `original_start` again. 119 | i = size_mgr.mod_add(i); 120 | 121 | while(true) { 122 | auto sctx = storage.context(table_size, widths); 123 | while (sctx.pos_is_empty(sctx.table_pos(i))) { 124 | if (i == original_start) { 125 | return; 126 | } 127 | 128 | i = size_mgr.mod_add(i); 129 | } 130 | 131 | auto disp = m_displace.get(i); 132 | uint64_t initial_address = size_mgr.mod_sub(i, disp); 133 | 134 | f(initial_address, i); 135 | 136 | i = size_mgr.mod_add(i); 137 | } 138 | } 139 | 140 | template 141 | inline void drain_all(F f) { 142 | table_pos_t drain_start; 143 | bool first = true; 144 | 145 | for_all_allocated([&](auto initial_address, auto i) { 146 | auto sctx = storage.context(table_size, widths); 147 | auto p = sctx.table_pos(i); 148 | 149 | if (first) { 150 | first = false; 151 | drain_start = p; 152 | } 153 | 154 | sctx.trim_storage(&drain_start, p); 155 | f(initial_address, sctx.at(p)); 156 | }); 157 | } 158 | 159 | inline entry_t search(uint64_t const initial_address, 160 | uint64_t stored_quotient) { 161 | auto sctx = storage.context(table_size, widths); 162 | auto cursor = initial_address; 163 | while(true) { 164 | auto pos = sctx.table_pos(cursor); 165 | 166 | if (sctx.pos_is_empty(pos)) { 167 | return entry_t::not_found(); 168 | } 169 | 170 | if(m_displace.get(cursor) == size_mgr.mod_sub(cursor, initial_address)) { 171 | auto ptrs = sctx.at(pos); 172 | if (ptrs.get_quotient() == stored_quotient) { 173 | return entry_t::found_exist(cursor, ptrs); 174 | } 175 | } 176 | 177 | cursor = size_mgr.mod_add(cursor); 178 | DCHECK_NE(cursor, initial_address); 179 | } 180 | 181 | DCHECK(false) << "unreachable"; 182 | return entry_t::not_found(); 183 | } 184 | }; 185 | template 186 | inline auto context(storage_t& storage, 187 | size_t table_size, 188 | typename storage_t::satellite_t_export::entry_bit_width_t const& widths, 189 | size_mgr_t const& size_mgr) { 190 | return context_t { 191 | m_displace, table_size, widths, size_mgr, storage 192 | }; 193 | } 194 | }; 195 | 196 | } 197 | 198 | template 199 | struct heap_size> { 200 | using T = compact_hash::displacement_t; 201 | 202 | static object_size_t compute(T const& val, size_t table_size) { 203 | return heap_size::compute(val.m_displace, table_size); 204 | } 205 | }; 206 | 207 | template 208 | struct serialize> { 209 | using T = compact_hash::displacement_t; 210 | 211 | static object_size_t write(std::ostream& out, T const& val, size_t table_size) { 212 | return serialize::write(out, val.m_displace, table_size); 213 | } 214 | 215 | static T read(std::istream& in, size_t table_size) { 216 | auto displace = 217 | serialize::read(in, table_size); 218 | 219 | return T { 220 | std::move(displace) 221 | }; 222 | } 223 | static bool equal_check(T const& lhs, T const& rhs, size_t table_size) { 224 | return gen_equal_check(m_displace, table_size); 225 | } 226 | }; 227 | 228 | } 229 | -------------------------------------------------------------------------------- /include/tudocomp/util/compact_hash/index_structure/layered_displacement_table_t.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | namespace tdc {namespace compact_hash { 16 | 17 | template 18 | struct static_layered_bit_width_t { 19 | using elem_t = uint_t; 20 | 21 | /// runtime initilization arguments, if any 22 | struct config_args {}; 23 | 24 | /// get the config of this instance 25 | inline config_args current_config() const { return config_args{}; } 26 | 27 | static_layered_bit_width_t() = default; 28 | static_layered_bit_width_t(config_args config) {} 29 | 30 | inline void set_width(IntVector& iv) const {} 31 | inline uint64_t max() const { return std::numeric_limits::max(); } 32 | }; 33 | 34 | struct dynamic_layered_bit_width_t { 35 | using elem_t = dynamic_t; 36 | 37 | size_t m_width; 38 | 39 | /// runtime initilization arguments, if any 40 | struct config_args { size_t width = 4; }; 41 | 42 | /// get the config of this instance 43 | inline config_args current_config() const { return config_args{ m_width }; } 44 | 45 | dynamic_layered_bit_width_t() = default; 46 | dynamic_layered_bit_width_t(config_args config): m_width(config.width) {} 47 | 48 | inline void set_width(IntVector& iv) const { 49 | iv.width(m_width); 50 | } 51 | inline uint64_t max() const { return (1ull << m_width) - 1; } 52 | }; 53 | 54 | /// Stores displacement entries as integers with a bit width given by 55 | /// `bit_width_t`. Displacement value larger than that 56 | /// will be spilled into a `std::unordered_map`. 57 | template 58 | class layered_displacement_table_t { 59 | template 60 | friend struct ::tdc::serialize; 61 | 62 | template 63 | friend struct ::tdc::heap_size; 64 | 65 | using elem_t = typename bit_width_t::elem_t; 66 | using elem_val_t = typename IntVector::value_type; 67 | 68 | IntVector m_displace; 69 | std::unordered_map m_spill; 70 | bit_width_t m_bit_width; 71 | 72 | layered_displacement_table_t() = default; 73 | public: 74 | /// runtime initilization arguments, if any 75 | struct config_args { 76 | typename bit_width_t::config_args bit_width_config; 77 | }; 78 | 79 | /// get the config of this instance 80 | inline config_args current_config() const { 81 | return config_args{ m_bit_width.current_config() }; 82 | } 83 | 84 | inline layered_displacement_table_t(size_t table_size, 85 | config_args config): 86 | m_bit_width(config.bit_width_config) 87 | { 88 | m_bit_width.set_width(m_displace); 89 | m_displace.reserve(table_size); 90 | m_displace.resize(table_size); 91 | } 92 | inline size_t get(size_t pos) { 93 | size_t max = m_bit_width.max(); 94 | size_t tmp = elem_val_t(m_displace[pos]); 95 | if (tmp == max) { 96 | return m_spill[pos]; 97 | } else { 98 | return tmp; 99 | } 100 | } 101 | inline void set(size_t pos, size_t val) { 102 | size_t max = m_bit_width.max(); 103 | if (val >= max) { 104 | m_displace[pos] = max; 105 | m_spill[pos] = val; 106 | } else { 107 | m_displace[pos] = val; 108 | } 109 | } 110 | }; 111 | 112 | } 113 | 114 | template 115 | struct heap_size> { 116 | using T = compact_hash::layered_displacement_table_t; 117 | 118 | static object_size_t compute(T const& val, size_t table_size) { 119 | auto bytes = object_size_t::empty(); 120 | 121 | DCHECK_EQ(val.m_displace.size(), table_size); 122 | auto size = val.m_displace.stat_allocation_size_in_bytes(); 123 | bytes += object_size_t::exact(size); 124 | bytes += heap_size_compute(val.m_bit_width); 125 | 126 | size_t unordered_map_size_guess 127 | = sizeof(decltype(val.m_spill)) 128 | + val.m_spill.size() * sizeof(size_t) * 2; 129 | 130 | bytes += object_size_t::unknown_extra_data(unordered_map_size_guess); 131 | 132 | return bytes; 133 | } 134 | }; 135 | 136 | template 137 | struct serialize> { 138 | using T = compact_hash::layered_displacement_table_t; 139 | 140 | static object_size_t write(std::ostream& out, T const& val, size_t table_size) { 141 | auto bytes = object_size_t::empty(); 142 | 143 | DCHECK_EQ(val.m_displace.size(), table_size); 144 | 145 | bytes += serialize_write(out, val.m_bit_width); 146 | 147 | auto data = (char const*) val.m_displace.data(); 148 | auto size = val.m_displace.stat_allocation_size_in_bytes(); 149 | out.write(data, size); 150 | bytes += object_size_t::exact(size); 151 | 152 | size_t spill_size = val.m_spill.size(); 153 | out.write((char*) &spill_size, sizeof(size_t)); 154 | bytes += object_size_t::exact(sizeof(size_t)); 155 | 156 | for (auto pair : val.m_spill) { 157 | size_t k = pair.first; 158 | size_t v = pair.second; 159 | out.write((char*) &k, sizeof(size_t)); 160 | out.write((char*) &v, sizeof(size_t)); 161 | bytes += object_size_t::exact(sizeof(size_t) * 2); 162 | spill_size--; 163 | } 164 | 165 | DCHECK_EQ(spill_size, 0U); 166 | 167 | return bytes; 168 | } 169 | 170 | static T read(std::istream& in, size_t table_size) { 171 | T ret; 172 | serialize_read_into(in, ret.m_bit_width); 173 | ret.m_bit_width.set_width(ret.m_displace); 174 | ret.m_displace.reserve(table_size); 175 | ret.m_displace.resize(table_size); 176 | auto data = (char*) ret.m_displace.data(); 177 | auto size = ret.m_displace.stat_allocation_size_in_bytes(); 178 | in.read(data, size); 179 | 180 | auto& spill = ret.m_spill; 181 | size_t spill_size; 182 | in.read((char*) &spill_size, sizeof(size_t)); 183 | 184 | for (size_t i = 0; i < spill_size; i++) { 185 | size_t k; 186 | size_t v; 187 | in.read((char*) &k, sizeof(size_t)); 188 | in.read((char*) &v, sizeof(size_t)); 189 | 190 | spill[k] = v; 191 | } 192 | 193 | return ret; 194 | } 195 | 196 | static bool equal_check(T const& lhs, T const& rhs, size_t table_size) { 197 | return gen_equal_diagnostic(lhs.m_displace == rhs.m_displace) 198 | && gen_equal_diagnostic(lhs.m_spill == rhs.m_spill) 199 | && gen_equal_check(m_bit_width); 200 | } 201 | }; 202 | 203 | template 204 | struct heap_size> { 205 | using T = compact_hash::static_layered_bit_width_t; 206 | 207 | static object_size_t compute(T const& val) { 208 | return object_size_t::empty(); 209 | } 210 | }; 211 | 212 | template 213 | struct serialize> { 214 | using T = compact_hash::static_layered_bit_width_t; 215 | 216 | static object_size_t write(std::ostream& out, T const& val) { 217 | return object_size_t::empty(); 218 | } 219 | 220 | static T read(std::istream& in) { 221 | return T(); 222 | } 223 | 224 | static bool equal_check(T const& lhs, T const& rhs) { 225 | return true; 226 | } 227 | }; 228 | 229 | template<> 230 | struct heap_size { 231 | using T = compact_hash::dynamic_layered_bit_width_t; 232 | 233 | static object_size_t compute(T const& val) { 234 | return object_size_t::exact(sizeof(T)); 235 | } 236 | }; 237 | 238 | template<> 239 | struct serialize { 240 | using T = compact_hash::dynamic_layered_bit_width_t; 241 | 242 | static object_size_t write(std::ostream& out, T const& val) { 243 | auto bytes = object_size_t::empty(); 244 | bytes += serialize_write(out, val.m_width); 245 | return bytes; 246 | } 247 | 248 | static T read(std::istream& in) { 249 | T ret; 250 | serialize_read_into(in, ret.m_width); 251 | return ret; 252 | } 253 | 254 | static bool equal_check(T const& lhs, T const& rhs) { 255 | return gen_equal_check(m_width); 256 | } 257 | }; 258 | 259 | 260 | } 261 | -------------------------------------------------------------------------------- /include/tudocomp/util/compact_hash/index_structure/naive_displacement_table_t.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | namespace tdc {namespace compact_hash { 16 | 17 | /// Stores displacement entries as `size_t` integers. 18 | struct naive_displacement_table_t { 19 | template 20 | friend struct ::tdc::serialize; 21 | 22 | /// runtime initilization arguments, if any 23 | struct config_args {}; 24 | 25 | /// get the config of this instance 26 | inline config_args current_config() const { return config_args{}; } 27 | 28 | std::vector m_displace; 29 | inline naive_displacement_table_t(size_t table_size, 30 | config_args config) { 31 | m_displace.reserve(table_size); 32 | m_displace.resize(table_size); 33 | } 34 | inline size_t get(size_t pos) const { 35 | return m_displace[pos]; 36 | } 37 | inline void set(size_t pos, size_t val) { 38 | m_displace[pos] = val; 39 | } 40 | }; 41 | 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /include/tudocomp/util/compact_hash/map/satellite_data_t.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "val_quot_ptrs_t.hpp" 6 | #include "val_quot_bucket_layout_t.hpp" 7 | 8 | namespace tdc {namespace compact_hash{namespace map { 9 | 10 | template 11 | struct satellite_data_t { 12 | private: 13 | using qvd_t = val_quot_bucket_layout_t; 14 | using widths_t = typename qvd_t::QVWidths; 15 | public: 16 | static constexpr bool has_sentinel = true; 17 | using entry_ptr_t = val_quot_ptrs_t; 18 | using entry_bit_width_t = widths_t; 19 | 20 | using bucket_data_layout_t = qvd_t; 21 | 22 | using sentinel_value_type = typename cbp::cbp_repr_t::value_type; 23 | }; 24 | 25 | }}} 26 | -------------------------------------------------------------------------------- /include/tudocomp/util/compact_hash/map/typedefs.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | namespace tdc {namespace compact_hash {namespace map { 14 | 15 | template 16 | using plain_cv_hashmap_t 17 | = hashmap_t; 18 | 19 | template 20 | using sparse_cv_hashmap_t 21 | = hashmap_t; 22 | 23 | template 24 | using plain_layered_hashmap_t 25 | = hashmap_t< 26 | val_t, hash_t, plain_sentinel_t, 27 | displacement_t>>; 28 | 29 | template 30 | using sparse_layered_hashmap_t 31 | = hashmap_t< 32 | val_t, hash_t, buckets_bv_t, 33 | displacement_t>>; 34 | 35 | template 36 | using plain_elias_hashmap_t 37 | = hashmap_t< 38 | val_t, hash_t, plain_sentinel_t, 39 | displacement_t>>; 41 | 42 | template 43 | using sparse_elias_hashmap_t 44 | = hashmap_t< 45 | val_t, hash_t, buckets_bv_t, 46 | displacement_t>>; 48 | 49 | }}} 50 | -------------------------------------------------------------------------------- /include/tudocomp/util/compact_hash/map/val_quot_bucket_layout_t.hpp: -------------------------------------------------------------------------------- 1 | 2 | #pragma once 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include "val_quot_ptrs_t.hpp" 12 | 13 | namespace tdc {namespace compact_hash{namespace map { 14 | 15 | template 16 | struct val_quot_bucket_layout_t { 17 | struct QVWidths { 18 | uint8_t quot_width; 19 | uint8_t val_width; 20 | }; 21 | 22 | /// Calculates the offsets of the two different arrays inside the allocation. 23 | struct Layout { 24 | cbp::cbp_layout_element_t vals_layout; 25 | cbp::cbp_layout_element_t quots_layout; 26 | size_t overall_qword_size; 27 | 28 | inline Layout(): vals_layout(), quots_layout(), overall_qword_size(0) { 29 | } 30 | }; 31 | inline static Layout calc_sizes(size_t size, QVWidths widths) { 32 | DCHECK_NE(size, 0U); 33 | DCHECK_LE(alignof(val_t), alignof(uint64_t)); 34 | 35 | auto layout = cbp::bit_layout_t(); 36 | 37 | // The values 38 | auto values = layout.cbp_elements(size, widths.val_width); 39 | 40 | // The quotients 41 | auto quots = layout.cbp_elements(size, widths.quot_width); 42 | 43 | Layout r; 44 | r.vals_layout = values; 45 | r.quots_layout = quots; 46 | r.overall_qword_size = layout.get_size_in_uint64_t_units(); 47 | return r; 48 | } 49 | 50 | /// Creates the pointers to the beginnings of the two arrays inside 51 | /// the allocation. 52 | inline static val_quot_ptrs_t ptr(uint64_t* alloc, size_t size, QVWidths widths) { 53 | DCHECK_NE(size, 0U); 54 | auto layout = calc_sizes(size, widths); 55 | 56 | return val_quot_ptrs_t { 57 | layout.vals_layout.ptr_relative_to(alloc), 58 | layout.quots_layout.ptr_relative_to(alloc), 59 | }; 60 | } 61 | 62 | // Run destructors of each element in the bucket. 63 | inline static void destroy_vals(uint64_t* alloc, size_t size, QVWidths widths) { 64 | if (size != 0) { 65 | auto start = ptr(alloc, size, widths).val_ptr(); 66 | auto end = start + size; 67 | 68 | for(; start != end; start++) { 69 | cbp::cbp_repr_t::call_destructor(start); 70 | } 71 | } 72 | } 73 | 74 | /// Returns a `val_quot_ptrs_t` to position `pos`, 75 | /// or a sentinel value that acts as a one-pass-the-end pointer for the empty case. 76 | inline static val_quot_ptrs_t at(uint64_t* alloc, size_t size, size_t pos, QVWidths widths) { 77 | if(size != 0) { 78 | auto ps = ptr(alloc, size, widths); 79 | return val_quot_ptrs_t(ps.val_ptr() + pos, ps.quot_ptr() + pos); 80 | } else { 81 | DCHECK_EQ(pos, 0U); 82 | return val_quot_ptrs_t(); 83 | } 84 | } 85 | }; 86 | 87 | }}} 88 | -------------------------------------------------------------------------------- /include/tudocomp/util/compact_hash/map/val_quot_ptrs_t.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | namespace tdc {namespace compact_hash{namespace map { 12 | 13 | /// Represents a pair of pointers to value and quotient inside a bucket. 14 | template 15 | class val_quot_ptrs_t { 16 | ValPtr m_val_ptr; 17 | mutable QuotPtr m_quot_ptr; 18 | 19 | public: 20 | using value_type = typename cbp::cbp_repr_t::value_type; 21 | 22 | struct my_value_type { 23 | uint64_t quot; 24 | value_type val; 25 | }; 26 | 27 | inline val_quot_ptrs_t(ValPtr val_ptr, 28 | QuotPtr quot_ptr): 29 | m_val_ptr(val_ptr), 30 | m_quot_ptr(quot_ptr) 31 | { 32 | } 33 | 34 | inline val_quot_ptrs_t(): 35 | m_val_ptr(), m_quot_ptr() {} 36 | 37 | inline uint64_t get_quotient() const { 38 | return uint64_t(*m_quot_ptr); 39 | } 40 | 41 | inline void set_quotient(uint64_t v) const { 42 | *m_quot_ptr = v; 43 | } 44 | 45 | inline void swap_quotient(uint64_t& other) const { 46 | uint64_t tmp = uint64_t(*m_quot_ptr); 47 | std::swap(other, tmp); 48 | *m_quot_ptr = tmp; 49 | } 50 | 51 | inline ValPtr val_ptr() const { 52 | return m_val_ptr; 53 | } 54 | 55 | inline QuotPtr quot_ptr() const { 56 | return m_quot_ptr; 57 | } 58 | 59 | inline void increment_ptr() { 60 | m_quot_ptr++; 61 | m_val_ptr++; 62 | } 63 | inline void decrement_ptr() { 64 | m_quot_ptr--; 65 | m_val_ptr--; 66 | } 67 | 68 | inline friend bool operator==(val_quot_ptrs_t const& lhs, 69 | val_quot_ptrs_t const& rhs) 70 | { 71 | return lhs.m_val_ptr == rhs.m_val_ptr; 72 | } 73 | 74 | inline friend bool operator!=(val_quot_ptrs_t const& lhs, 75 | val_quot_ptrs_t const& rhs) 76 | { 77 | return lhs.m_val_ptr != rhs.m_val_ptr; 78 | } 79 | 80 | inline void set(value_type&& val, 81 | uint64_t quot) { 82 | set_quotient(quot); 83 | *val_ptr() = std::move(val); 84 | } 85 | 86 | inline void set_no_drop(value_type&& val, 87 | uint64_t quot) { 88 | set_quotient(quot); 89 | cbp::cbp_repr_t::construct_val_from_rval(val_ptr(), std::move(val)); 90 | } 91 | 92 | inline void set_val(value_type&& val) { 93 | *val_ptr() = std::move(val); 94 | } 95 | 96 | inline void set_val_no_drop(value_type&& val) { 97 | cbp::cbp_repr_t::construct_val_from_rval(val_ptr(), std::move(val)); 98 | } 99 | 100 | inline void move_from(val_quot_ptrs_t other) { 101 | *val_ptr() = std::move(*other.val_ptr()); 102 | set_quotient(other.get_quotient()); 103 | } 104 | 105 | inline void init_from(val_quot_ptrs_t other) { 106 | cbp::cbp_repr_t::construct_val_from_ptr(val_ptr(), other.val_ptr()); 107 | set_quotient(other.get_quotient()); 108 | } 109 | 110 | inline void swap_with(val_quot_ptrs_t other) { 111 | value_type tmp_val = std::move(*val_ptr()); 112 | uint64_t tmp_quot = get_quotient(); 113 | 114 | move_from(other); 115 | other.set(std::move(tmp_val), tmp_quot); 116 | } 117 | 118 | inline void uninitialize() { 119 | cbp::cbp_repr_t::call_destructor(val_ptr()); 120 | } 121 | 122 | inline bool contents_eq(val_quot_ptrs_t rhs) const { 123 | return (get_quotient() == rhs.get_quotient()) && (*val_ptr() == *rhs.val_ptr()); 124 | } 125 | 126 | inline my_value_type move_out() const { 127 | return my_value_type { 128 | get_quotient(), 129 | std::move(*val_ptr()), 130 | }; 131 | } 132 | 133 | inline void set(my_value_type&& val) { 134 | set_quotient(val.quot); 135 | *val_ptr() = std::move(val.val); 136 | } 137 | }; 138 | 139 | }}} 140 | -------------------------------------------------------------------------------- /include/tudocomp/util/compact_hash/set/hashset_t.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | #include 11 | 12 | namespace tdc {namespace compact_hash {namespace set { 13 | 14 | template 15 | class hashset_t { 16 | using storage_t = buckets_bv_t; 17 | using satellite_t = typename storage_t::satellite_t_export; 18 | public: 19 | /// runtime initilization arguments for the template config parameters 20 | struct config_args { 21 | typename size_manager_t::config_args size_manager_config; 22 | typename hash_t::config_args hash_config; 23 | typename storage_t::config_args storage_config; 24 | typename placement_t::config_args displacement_config; 25 | }; 26 | 27 | /// this is called during a resize to copy over internal config values 28 | inline config_args current_config() const { 29 | auto r = config_args{}; 30 | r.size_manager_config = m_sizing.current_config(); 31 | r.hash_config = m_hash.current_config(); 32 | r.storage_config = m_storage.current_config(); 33 | r.displacement_config = m_placement.current_config(); 34 | return r; 35 | } 36 | 37 | /// Default value of the `key_width` parameter of the constructor. 38 | static constexpr size_t DEFAULT_KEY_WIDTH = 1; 39 | static constexpr size_t DEFAULT_TABLE_SIZE = 0; 40 | 41 | inline hashset_t(hashset_t&& other): 42 | m_sizing(std::move(other.m_sizing)), 43 | m_key_width(std::move(other.m_key_width)), 44 | m_storage(std::move(other.m_storage)), 45 | m_placement(std::move(other.m_placement)), 46 | m_hash(std::move(other.m_hash)) 47 | { 48 | } 49 | inline hashset_t& operator=(hashset_t&& other) { 50 | m_sizing = std::move(other.m_sizing); 51 | m_key_width = std::move(other.m_key_width); 52 | m_storage = std::move(other.m_storage); 53 | m_placement = std::move(other.m_placement); 54 | m_hash = std::move(other.m_hash); 55 | 56 | return *this; 57 | } 58 | // NB: These just exist to catch bugs, and could be removed 59 | inline hashset_t(hashset_t const& other) = delete; 60 | inline hashset_t& operator=(hashset_t const& other) = delete; 61 | 62 | /// Constructs a hashtable with a initial table size `size`, 63 | /// and a initial key bit-width `key_width`. 64 | inline hashset_t(size_t size = DEFAULT_TABLE_SIZE, 65 | size_t key_width = DEFAULT_KEY_WIDTH, 66 | config_args config = config_args{}): 67 | m_sizing(size, config.size_manager_config), 68 | m_key_width(key_width), 69 | m_storage(table_size(), storage_widths(), config.storage_config), 70 | m_placement(table_size(), config.displacement_config), 71 | m_hash(real_width(), config.hash_config) 72 | { 73 | } 74 | 75 | /// Returns the amount of elements inside the datastructure. 76 | inline size_t size() const { 77 | return m_sizing.size(); 78 | } 79 | 80 | /// Returns the current size of the hashtable. 81 | /// This value is greater-or-equal the amount of the elements 82 | /// currently contained in it, which is represented by `size()`. 83 | inline size_t table_size() const { 84 | return m_sizing.capacity(); 85 | } 86 | 87 | /// Current width of the keys stored in this datastructure. 88 | inline size_t key_width() const { 89 | return m_key_width; 90 | } 91 | 92 | /// Amount of bits of the key, that are stored implicitly 93 | /// by its position in the table. 94 | inline size_t initial_address_width() const { 95 | return m_sizing.capacity_log2(); 96 | } 97 | 98 | /// Amount of bits of the key, that are stored explicitly 99 | /// in the buckets. 100 | inline size_t quotient_width() const { 101 | return real_width() - m_sizing.capacity_log2(); 102 | } 103 | 104 | /// Sets the maximum load factor 105 | /// (how full the table can get before re-allocating). 106 | /// 107 | /// Expects a value `0.0 < z < 1.0`. 108 | inline void max_load_factor(float z) { 109 | m_sizing.max_load_factor(z); 110 | } 111 | 112 | /// Returns the maximum load factor. 113 | inline float max_load_factor() const noexcept { 114 | return m_sizing.max_load_factor(); 115 | } 116 | 117 | struct default_on_resize_t { 118 | /// Will be called in case of an resize. 119 | inline void on_resize(size_t table_size) {} 120 | /// Will be called after `on_resize()` for each element 121 | /// that gets re-inserted into the new set. 122 | inline void on_reinsert(uint64_t key, uint64_t id) {} 123 | }; 124 | 125 | using entry_t = generic_entry_t; 126 | 127 | /// Looks up the key `key` in the set, inserting it if 128 | /// it doesn't already exist. 129 | /// 130 | /// The returned `entry_t` contains both an _id_ that is unique for each 131 | /// element in the set for a given table size, 132 | /// and a boolean indicating if the key already exists. 133 | /// 134 | /// If the set needs to be resized, the observer `on_resize` will be 135 | /// used to notify the code about the changed size and new key-id mappings. 136 | template 137 | inline entry_t lookup_insert(uint64_t key, 138 | on_resize_t&& on_resize = on_resize_t()) { 139 | return lookup_insert_key_width(key, key_width(), on_resize); 140 | } 141 | 142 | /// Looks up the key `key` in the set, inserting it if 143 | /// it doesn't already exist, and grows the key width to `key_width` 144 | /// bits. 145 | /// 146 | /// The returned `entry_t` contains both an _id_ that is unique for each 147 | /// element in the set for a given table size, 148 | /// and a boolean indicating if the key already exists. 149 | /// 150 | /// If the set needs to be resized, the observer `on_resize` will be 151 | /// used to notify the code about the changed size and new key-id mappings. 152 | template 153 | inline entry_t lookup_insert_key_width(uint64_t key, 154 | uint8_t key_width, 155 | on_resize_t&& on_resize = on_resize_t()) { 156 | auto raw_key_width = std::max(key_width, this->key_width()); 157 | return grow_and_insert(key, raw_key_width, on_resize); 158 | } 159 | 160 | /// Grow the key width as needed. 161 | /// 162 | /// Note that it is more efficient to change the width directly during 163 | /// insertion of a new value. 164 | template 165 | inline void grow_key_width(size_t key_width, 166 | on_resize_t&& on_resize = on_resize_t()) { 167 | auto raw_key_width = std::max(key_width, this->key_width()); 168 | grow_if_needed(size(), raw_key_width, on_resize); 169 | } 170 | 171 | /// Search for a key inside the hashset. 172 | /// 173 | /// The returned `entry_t` contains a boolean indicating if the key was found. 174 | /// If it is, then it contains the corresponding _id_ of the entry. 175 | inline entry_t lookup(uint64_t key) { 176 | auto dkey = decompose_key(key); 177 | auto pctx = m_placement.context(m_storage, table_size(), storage_widths(), m_sizing); 178 | return pctx.search(dkey.initial_address, dkey.stored_quotient); 179 | } 180 | 181 | /// Takes an ID as returned by `entry_t::id()`, and returns the corresponding `entry_t`. 182 | /// 183 | /// The bavior is undefined if the id does not exist in the data structure, or after an 184 | /// intermediate rehash. 185 | inline entry_t lookup_id(uint64_t id) { 186 | auto pctx = m_placement.context(m_storage, table_size(), storage_widths(), m_sizing); 187 | auto result = pctx.lookup_id(id); 188 | 189 | return result; 190 | } 191 | 192 | /// Swap this instance of the data structure with another one. 193 | inline void swap(hashset_t& other) { 194 | std::swap(*this, other); 195 | } 196 | 197 | /// Moves the contents of this hashtable 198 | /// into another table. 199 | /// 200 | /// This method tries to eagerly free memory in 201 | /// order to keep the total consumption low, if possible. 202 | /// 203 | /// The target hashtable will grow as needed. To prevent that, ensure its 204 | /// capacity and bit widths are already large enough. 205 | /// 206 | /// The `on_resize` handler will call `on_reinsert()` for 207 | /// each moved element. It will not be called for growth operations 208 | /// of the target hashtable. 209 | template 210 | inline void move_into(hashset_t& other, 211 | on_resize_t&& on_resize = on_resize_t()) { 212 | auto pctx = m_placement.context(m_storage, table_size(), storage_widths(), m_sizing); 213 | pctx.drain_all([&](auto initial_address, auto kv) { 214 | auto stored_quotient = kv.get_quotient(); 215 | auto key = this->compose_key(initial_address, stored_quotient); 216 | auto r = other.lookup_insert(key); 217 | DCHECK(r.found()); 218 | DCHECK(!r.key_already_exist()); 219 | on_resize.on_reinsert(key, r.id()); 220 | }); 221 | } 222 | 223 | /// Check wether for the `new_size` this hashtable would need 224 | /// to perform a grow of the capacity. 225 | inline bool needs_to_grow_capacity(size_t new_size) const { 226 | return m_sizing.needs_to_grow_capacity(m_sizing.capacity(), new_size); 227 | } 228 | 229 | /// Check wether for the `new_size` and `new_key_width` this 230 | /// hashtable would need to reallocate. 231 | inline bool needs_to_realloc(size_t new_size, 232 | size_t new_key_width) const { 233 | return needs_to_grow_capacity(new_size) 234 | || (new_key_width != key_width()); 235 | } 236 | 237 | /// Compute the new capacity the hashmap would have after a grow 238 | /// operation for `new_size`. 239 | inline size_t grown_capacity(size_t new_size) const { 240 | size_t new_capacity = m_sizing.capacity(); 241 | while (m_sizing.needs_to_grow_capacity(new_capacity, new_size)) { 242 | new_capacity = m_sizing.grown_capacity(new_capacity); 243 | } 244 | return new_capacity; 245 | } 246 | 247 | /// Pseudo-Pointer to a key. 248 | /// 249 | /// Does not actually point at a memory location, and defines equality 250 | /// based on the key value and wether this is in its `null` state. 251 | class pointer_type { 252 | uint64_t m_key; 253 | bool m_empty; 254 | public: 255 | pointer_type(uint64_t key): m_key(key), m_empty(false) {} 256 | pointer_type(): m_key(-1), m_empty(true) {} 257 | 258 | inline uint64_t& operator*() { 259 | return m_key; 260 | } 261 | inline uint64_t* operator->() { 262 | return &m_key; 263 | } 264 | inline bool operator==(pointer_type const& other) const { 265 | return (m_empty == other.m_empty) && (m_key == other.m_key); 266 | } 267 | inline bool operator!=(pointer_type const& other) const { 268 | return !(*this == other); 269 | } 270 | inline bool operator==(uint64_t const* const& other) const { 271 | return (other != nullptr) && (m_key == *other); 272 | } 273 | inline bool operator!=(uint64_t const* const& other) const { 274 | return !(*this == other); 275 | } 276 | }; 277 | 278 | /// Search for a key inside the hashtable. 279 | /// 280 | /// This returns a pseudo-pointer to the key if its found, or null 281 | /// otherwise. This method exists for STL-compability. 282 | inline pointer_type find(uint64_t key) { 283 | if (count(key)) { 284 | return pointer_type(key); 285 | } else { 286 | return pointer_type(); 287 | } 288 | } 289 | 290 | /// Count the number of occurrences of `key`, as defined on STL containers. 291 | /// 292 | /// It will return either 0 or 1. 293 | inline size_t count(uint64_t key) { 294 | return lookup(key).found(); 295 | } 296 | 297 | private: 298 | using quot_width_t = typename satellite_t::entry_bit_width_t; 299 | 300 | /// Size of table, and width of the stored keys and values 301 | size_manager_t m_sizing; 302 | uint8_t m_key_width; 303 | 304 | /// Storage of the table elements 305 | storage_t m_storage; 306 | 307 | /// Placement management structures 308 | placement_t m_placement; 309 | 310 | /// Hash function 311 | hash_t m_hash {1}; 312 | 313 | template 314 | friend struct ::tdc::serialize; 315 | 316 | template 317 | friend struct ::tdc::heap_size; 318 | 319 | /// The actual amount of bits currently usable for 320 | /// storing a key in the hashtable. 321 | /// 322 | /// Due to implementation details, this can be 323 | /// larger than `key_width()`. 324 | /// 325 | /// Specifically, there are currently two cases: 326 | /// - If all bits of the the key fit into the initial-address space, 327 | /// then the quotient bitvector inside the buckets would 328 | /// have to store integers of width 0. This is undefined behavior 329 | /// with the current code, so we add a padding bit. 330 | /// - Otherwise the current maximum key width `m_key_width` 331 | /// determines the real width. 332 | inline size_t real_width() const { 333 | return std::max(m_sizing.capacity_log2() + 1, m_key_width); 334 | } 335 | 336 | inline quot_width_t storage_widths() const { 337 | return uint8_t(quotient_width()); 338 | } 339 | 340 | /// Debug check that a key does not occupy more bits than the 341 | /// hashtable currently allows. 342 | inline bool dcheck_key_width(uint64_t key) { 343 | uint64_t key_mask = (1ull << (key_width() - 1ull) << 1ull) - 1ull; 344 | bool key_is_too_large = key & ~key_mask; 345 | return !key_is_too_large; 346 | } 347 | 348 | /// Decompose a key into its initial address and quotient. 349 | inline decomposed_key_t decompose_key(uint64_t key) { 350 | DCHECK(dcheck_key_width(key)) << "Attempt to decompose key " << key << ", which requires more than the current set maximum of " << key_width() << " bits, but should not."; 351 | 352 | uint64_t hres = m_hash.hash(key); 353 | 354 | DCHECK_EQ(m_hash.hash_inv(hres), key); 355 | 356 | return m_sizing.decompose_hashed_value(hres); 357 | } 358 | 359 | /// Compose a key from its initial address and quotient. 360 | inline uint64_t compose_key(uint64_t initial_address, uint64_t quotient) { 361 | uint64_t harg = m_sizing.compose_hashed_value(initial_address, quotient); 362 | uint64_t key = m_hash.hash_inv(harg); 363 | 364 | DCHECK(dcheck_key_width(key)) << "Composed key " << key << ", which requires more than the current set maximum of " << key_width() << " bits, but should not."; 365 | return key; 366 | } 367 | 368 | /// Access the element represented by `handler` under 369 | /// the key `key` with the, possibly new, width of `key_width` bits. 370 | /// 371 | /// `handler` is a type that allows reacting correctly to different ways 372 | /// to access or create a new or existing value in the hashtable. 373 | /// See `InsertHandler` and `AddressDefaultHandler` below. 374 | template 375 | inline auto grow_and_insert(uint64_t key, size_t key_width, on_resize_t& onr) { 376 | grow_if_needed(this->size() + 1, key_width, onr); 377 | auto const dkey = this->decompose_key(key); 378 | 379 | DCHECK_EQ(key, this->compose_key(dkey.initial_address, dkey.stored_quotient)); 380 | 381 | auto pctx = m_placement.context(m_storage, table_size(), storage_widths(), m_sizing); 382 | 383 | auto result = pctx.lookup_insert(dkey.initial_address, dkey.stored_quotient); 384 | 385 | if (!result.key_already_exist()) { 386 | m_sizing.set_size(m_sizing.size() + 1); 387 | } 388 | 389 | return result; 390 | } 391 | 392 | /// Check the current key width and table site against the arguments, 393 | /// and grows the table or quotient bitvectors as needed. 394 | template 395 | inline void grow_if_needed(size_t const new_size, 396 | size_t const new_key_width, 397 | on_resize_t& onr) { 398 | /* 399 | std::cout 400 | << "buckets size/cap: " << m_buckets.size() 401 | << ", size: " << m_sizing.size() 402 | << "\n"; 403 | */ 404 | 405 | // TODO: Could reuse the existing table if only m_key_width changes 406 | // TODO: The iterators is inefficient since it does redundant 407 | // memory lookups and address calculations. 408 | 409 | if (needs_to_realloc(new_size, new_key_width)) { 410 | size_t new_capacity = grown_capacity(new_size); 411 | auto config = this->current_config(); 412 | auto new_table = hashset_t( 413 | new_capacity, new_key_width, config); 414 | 415 | /* 416 | std::cout 417 | << "grow to cap " << new_table.table_size() 418 | << ", key_width: " << new_table.key_width() 419 | << ", val_width: " << new_table.value_width() 420 | << ", real_width: " << new_table.real_width() 421 | << ", quot width: " << new_table.quotient_width() 422 | << "\n"; 423 | */ 424 | 425 | onr.on_resize(new_capacity); 426 | 427 | move_into(new_table, onr); 428 | 429 | *this = std::move(new_table); 430 | } 431 | 432 | DCHECK(!needs_to_realloc(new_size, new_key_width)); 433 | } 434 | }; 435 | 436 | }} 437 | 438 | template 439 | struct heap_size> { 440 | using T = compact_hash::set::hashset_t; 441 | using storage_t = typename T::storage_t; 442 | 443 | static object_size_t compute(T const& val) { 444 | using namespace compact_hash::set; 445 | using namespace compact_hash; 446 | 447 | auto bytes = object_size_t::empty(); 448 | 449 | bytes += heap_size::compute(val.m_sizing); 450 | bytes += heap_size::compute(val.m_key_width); 451 | bytes += heap_size::compute(val.m_hash); 452 | bytes += heap_size::compute( 453 | val.m_storage, val.table_size(), val.storage_widths()); 454 | bytes += heap_size::compute( 455 | val.m_placement, val.table_size()); 456 | 457 | return bytes; 458 | } 459 | }; 460 | 461 | template 462 | struct serialize> { 463 | using T = compact_hash::set::hashset_t; 464 | using storage_t = typename T::storage_t; 465 | 466 | static object_size_t write(std::ostream& out, T const& val) { 467 | using namespace compact_hash::set; 468 | using namespace compact_hash; 469 | 470 | auto bytes = object_size_t::empty(); 471 | 472 | bytes += serialize::write(out, val.m_sizing); 473 | bytes += serialize::write(out, val.m_key_width); 474 | bytes += serialize::write(out, val.m_hash); 475 | bytes += serialize::write( 476 | out, val.m_storage, val.table_size(), val.storage_widths()); 477 | bytes += serialize::write( 478 | out, val.m_placement, val.table_size()); 479 | 480 | return bytes; 481 | } 482 | static T read(std::istream& in) { 483 | using namespace compact_hash::set; 484 | using namespace compact_hash; 485 | 486 | T ret; 487 | 488 | auto sizing = serialize::read(in); 489 | auto key_width = serialize::read(in); 490 | auto hash = serialize::read(in); 491 | ret.m_sizing = std::move(sizing); 492 | ret.m_key_width = std::move(key_width); 493 | ret.m_hash = std::move(hash); 494 | 495 | auto storage = serialize::read(in, ret.table_size(), ret.storage_widths()); 496 | auto placement = serialize::read(in, ret.table_size()); 497 | 498 | ret.m_storage = std::move(storage); 499 | ret.m_placement = std::move(placement); 500 | 501 | return ret; 502 | } 503 | static bool equal_check(T const& lhs, T const& rhs) { 504 | if (!(gen_equal_check(table_size()) && gen_equal_check(storage_widths()))) { 505 | return false; 506 | } 507 | 508 | auto table_size = lhs.table_size(); 509 | auto storage_widths = lhs.storage_widths(); 510 | 511 | bool deep_eq = gen_equal_check(m_sizing) 512 | && gen_equal_check(m_key_width) 513 | && gen_equal_check(m_hash) 514 | && gen_equal_check(m_storage, table_size, storage_widths) 515 | && gen_equal_check(m_placement, table_size); 516 | 517 | return deep_eq; 518 | } 519 | }; 520 | 521 | } 522 | -------------------------------------------------------------------------------- /include/tudocomp/util/compact_hash/set/no_satellite_data_t.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "quot_ptr_t.hpp" 5 | #include "quot_bucket_layout_t.hpp" 6 | 7 | namespace tdc {namespace compact_hash {namespace set { 8 | 9 | struct no_satellite_data_t { 10 | using entry_ptr_t = quot_ptr_t; 11 | using entry_bit_width_t = uint8_t; 12 | using bucket_data_layout_t = quot_bucket_layout_t; 13 | using sentinel_value_type = void; 14 | }; 15 | 16 | }}} 17 | -------------------------------------------------------------------------------- /include/tudocomp/util/compact_hash/set/quot_bucket_layout_t.hpp: -------------------------------------------------------------------------------- 1 | 2 | #pragma once 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include "quot_ptr_t.hpp" 12 | 13 | namespace tdc {namespace compact_hash {namespace set { 14 | 15 | struct quot_bucket_layout_t { 16 | /// Calculates the offsets of the two different arrays inside the allocation. 17 | struct Layout { 18 | cbp::cbp_layout_element_t quots_layout; 19 | size_t overall_qword_size; 20 | 21 | inline Layout(): quots_layout(), overall_qword_size(0) { 22 | } 23 | }; 24 | inline static Layout calc_sizes(size_t size, uint8_t quot_width) { 25 | DCHECK_NE(size, 0U); 26 | 27 | auto layout = cbp::bit_layout_t(); 28 | 29 | // The quotients 30 | auto quots = layout.cbp_elements(size, quot_width); 31 | 32 | Layout r; 33 | r.quots_layout = quots; 34 | r.overall_qword_size = layout.get_size_in_uint64_t_units(); 35 | return r; 36 | } 37 | 38 | /// Creates the pointers to the beginnings of the two arrays inside 39 | /// the allocation. 40 | inline static quot_ptr_t ptr(uint64_t* alloc, size_t size, uint8_t quot_width) { 41 | DCHECK_NE(size, 0U); 42 | auto layout = calc_sizes(size, quot_width); 43 | 44 | return layout.quots_layout.ptr_relative_to(alloc); 45 | } 46 | 47 | // Run destructors of each element in the bucket. 48 | inline static void destroy_vals(uint64_t*, size_t, uint8_t) { 49 | // NB: this does not contain values 50 | } 51 | 52 | /// Returns a `val_quot_ptr_t` to position `pos`, 53 | /// or a sentinel value that acts as a one-pass-the-end pointer for the empty case. 54 | inline static quot_ptr_t at(uint64_t* alloc, size_t size, size_t pos, uint8_t quot_width) { 55 | if(size != 0) { 56 | auto ps = ptr(alloc, size, quot_width); 57 | return quot_ptr_t(ps.quot_ptr() + pos); 58 | } else { 59 | DCHECK_EQ(pos, 0U); 60 | return quot_ptr_t(); 61 | } 62 | } 63 | }; 64 | 65 | }}} 66 | -------------------------------------------------------------------------------- /include/tudocomp/util/compact_hash/set/quot_ptr_t.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | namespace tdc {namespace compact_hash {namespace set { 12 | 13 | /// Represents a pair of pointers to value and quotient inside a bucket. 14 | class quot_ptr_t { 15 | mutable QuotPtr m_quot_ptr; 16 | 17 | public: 18 | struct my_value_type { 19 | uint64_t quot; 20 | }; 21 | 22 | inline quot_ptr_t(QuotPtr quot_ptr): 23 | m_quot_ptr(quot_ptr) 24 | { 25 | } 26 | 27 | inline quot_ptr_t(): 28 | m_quot_ptr() {} 29 | 30 | inline uint64_t get_quotient() const { 31 | return uint64_t(*m_quot_ptr); 32 | } 33 | 34 | inline void set_quotient(uint64_t v) const { 35 | *m_quot_ptr = v; 36 | } 37 | 38 | inline void swap_quotient(uint64_t& other) const { 39 | uint64_t tmp = uint64_t(*m_quot_ptr); 40 | std::swap(other, tmp); 41 | *m_quot_ptr = tmp; 42 | } 43 | 44 | inline QuotPtr quot_ptr() const { 45 | return m_quot_ptr; 46 | } 47 | 48 | inline void increment_ptr() { 49 | m_quot_ptr++; 50 | } 51 | inline void decrement_ptr() { 52 | m_quot_ptr--; 53 | } 54 | 55 | inline friend bool operator==(quot_ptr_t const& lhs, 56 | quot_ptr_t const& rhs) 57 | { 58 | return lhs.m_quot_ptr == rhs.m_quot_ptr; 59 | } 60 | 61 | inline friend bool operator!=(quot_ptr_t const& lhs, 62 | quot_ptr_t const& rhs) 63 | { 64 | return lhs.m_quot_ptr != rhs.m_quot_ptr; 65 | } 66 | 67 | inline void set(uint64_t quot) { 68 | set_quotient(quot); 69 | } 70 | 71 | inline void set_no_drop(uint64_t quot) { 72 | set_quotient(quot); 73 | } 74 | 75 | inline void move_from(quot_ptr_t other) { 76 | set_quotient(other.get_quotient()); 77 | } 78 | 79 | inline void init_from(quot_ptr_t other) { 80 | set_quotient(other.get_quotient()); 81 | } 82 | 83 | inline void swap_with(quot_ptr_t other) { 84 | uint64_t tmp_quot = get_quotient(); 85 | move_from(other); 86 | other.set(tmp_quot); 87 | } 88 | 89 | inline void uninitialize() { 90 | } 91 | 92 | inline bool contents_eq(quot_ptr_t rhs) const { 93 | return get_quotient() == rhs.get_quotient(); 94 | } 95 | 96 | inline my_value_type move_out() const { 97 | return my_value_type { 98 | get_quotient(), 99 | }; 100 | } 101 | 102 | inline void set(my_value_type&& val) { 103 | set_quotient(val.quot); 104 | } 105 | }; 106 | 107 | }}} 108 | -------------------------------------------------------------------------------- /include/tudocomp/util/compact_hash/set/typedefs.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | namespace tdc {namespace compact_hash {namespace set { 12 | 13 | template 14 | using sparse_cv_hashset_t 15 | = hashset_t; 16 | 17 | template 18 | using sparse_layered_hashset_t 19 | = hashset_t>>; 20 | 21 | template 22 | using sparse_elias_hashset_t 23 | = hashset_t>>; 25 | 26 | }}} 27 | -------------------------------------------------------------------------------- /include/tudocomp/util/compact_hash/size_manager_t.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "decomposed_key_t.hpp" 6 | 7 | #include 8 | #include 9 | 10 | namespace tdc {namespace compact_hash { 11 | 12 | /// This manages the size of the hashtable, and related calculations. 13 | class size_manager_t { 14 | /* 15 | * TODO: This is currently hardcoded to work with power-of-two table sizes. 16 | * Generalize it to allows arbitrary growth functions. 17 | */ 18 | 19 | uint8_t m_capacity_log2; 20 | size_t m_size; 21 | float m_load_factor = 0.5; 22 | 23 | template 24 | friend struct ::tdc::serialize; 25 | 26 | template 27 | friend struct ::tdc::heap_size; 28 | 29 | /// Adjust the user-specified size of the table as needed 30 | /// by the current implementation. 31 | /// 32 | /// In this case, the grow function multiplies the capacity by two, 33 | /// so we need to start at a value != 0. 34 | inline static size_t adjust_size(size_t size) { 35 | return (size < 2) ? 2 : size; 36 | } 37 | 38 | size_manager_t() = default; 39 | 40 | public: 41 | /// runtime initilization arguments, if any 42 | struct config_args { 43 | config_args() {} 44 | config_args(float load_factor): load_factor(load_factor) {} 45 | 46 | float load_factor = 0.5; 47 | }; 48 | 49 | /// get the config of this instance 50 | inline config_args current_config() const { 51 | return config_args { 52 | m_load_factor, 53 | }; 54 | } 55 | 56 | /// Create the size manager with an initial table size `capacity` 57 | inline size_manager_t(size_t capacity, config_args config = config_args{}) { 58 | capacity = adjust_size(capacity); 59 | 60 | m_size = 0; 61 | m_load_factor = config.load_factor; 62 | CHECK(is_pot(capacity)); 63 | m_capacity_log2 = log2_upper(capacity); 64 | } 65 | 66 | /// Returns the amount of elements currently stored in the hashtable. 67 | inline size_t size() const { 68 | return m_size; 69 | } 70 | 71 | /// Update the amount of elements currently stored in the hashtable 72 | inline void set_size(size_t new_size) { 73 | DCHECK_LT(new_size, capacity()); 74 | m_size = new_size; 75 | } 76 | 77 | /// The amount of bits used by the current table size. 78 | // TODO: Remove/make private 79 | inline uint8_t capacity_log2() const { 80 | return m_capacity_log2; 81 | } 82 | 83 | /// The current table size. 84 | inline size_t capacity() const { 85 | return 1ull << m_capacity_log2; 86 | } 87 | 88 | /// Check if the capacity needs to grow for the size given as the 89 | /// argument. 90 | inline bool needs_to_grow_capacity(size_t capacity, size_t new_size) const { 91 | // Capacity, at which a re-allocation is needed 92 | size_t trigger_capacity = size_t(float(capacity) * m_load_factor); 93 | 94 | // Make sure we have always a minimum of 1 free space in the table. 95 | trigger_capacity = std::min(capacity - 1, trigger_capacity); 96 | 97 | bool ret = trigger_capacity < new_size; 98 | return ret; 99 | } 100 | 101 | /// Returns the new capacity after growth. 102 | inline size_t grown_capacity(size_t capacity) const { 103 | DCHECK_GE(capacity, 1U); 104 | return capacity * 2; 105 | } 106 | 107 | /// Decompose the hash value such that `initial_address` 108 | /// covers the entire table, and `quotient` contains 109 | /// the remaining bits. 110 | inline decomposed_key_t decompose_hashed_value(uint64_t hres) { 111 | uint64_t shift = capacity_log2(); 112 | 113 | return decomposed_key_t { 114 | hres & ((1ull << shift) - 1ull), 115 | hres >> shift, 116 | }; 117 | } 118 | 119 | /// Composes a hash value from an `initial_address` and `quotient`. 120 | inline uint64_t compose_hashed_value(uint64_t initial_address, uint64_t quotient) { 121 | uint64_t shift = capacity_log2(); 122 | uint64_t harg = (quotient << shift) | initial_address; 123 | return harg; 124 | } 125 | 126 | /// Adds the `add` value to `v`, and wraps it around the current capacity. 127 | template 128 | inline int_t mod_add(int_t v, int_t add = 1) const { 129 | size_t mask = capacity() - 1; 130 | return (v + add) & mask; 131 | } 132 | 133 | /// Subtracts the `sub` value to `v`, and wraps it around the current capacity. 134 | template 135 | inline int_t mod_sub(int_t v, int_t sub = 1) const { 136 | size_t mask = capacity() - 1; 137 | return (v - sub) & mask; 138 | } 139 | 140 | /// Sets the maximum load factor 141 | /// (how full the table can get before re-allocating). 142 | /// 143 | /// Expects a value `0.0 < z < 1.0`. 144 | inline void max_load_factor(float z) { 145 | DCHECK_GT(z, 0.0); 146 | DCHECK_LE(z, 1.0); 147 | m_load_factor = z; 148 | } 149 | 150 | /// Returns the maximum load factor. 151 | inline float max_load_factor() const noexcept { 152 | return m_load_factor; 153 | } 154 | }; 155 | 156 | } 157 | 158 | template<> 159 | struct heap_size { 160 | using T = compact_hash::size_manager_t; 161 | 162 | static object_size_t compute(T const& val) { 163 | using namespace compact_hash; 164 | 165 | auto bytes = object_size_t::empty(); 166 | 167 | bytes += heap_size::compute(val.m_capacity_log2); 168 | bytes += heap_size::compute(val.m_size); 169 | bytes += heap_size::compute(val.m_load_factor); 170 | 171 | return bytes; 172 | } 173 | }; 174 | 175 | template<> 176 | struct serialize { 177 | using T = compact_hash::size_manager_t; 178 | 179 | static object_size_t write(std::ostream& out, T const& val) { 180 | using namespace compact_hash; 181 | 182 | auto bytes = object_size_t::empty(); 183 | 184 | bytes += serialize::write(out, val.m_capacity_log2); 185 | bytes += serialize::write(out, val.m_size); 186 | bytes += serialize::write(out, val.m_load_factor); 187 | 188 | return bytes; 189 | } 190 | static T read(std::istream& in) { 191 | using namespace compact_hash; 192 | 193 | T ret; 194 | ret.m_capacity_log2 = serialize::read(in); 195 | ret.m_size = serialize::read(in); 196 | ret.m_load_factor = serialize::read(in); 197 | return ret; 198 | } 199 | static bool equal_check(T const& lhs, T const& rhs) { 200 | return gen_equal_check(m_capacity_log2) 201 | && gen_equal_check(m_size) 202 | && gen_equal_check(m_load_factor); 203 | } 204 | }; 205 | 206 | } 207 | -------------------------------------------------------------------------------- /include/tudocomp/util/compact_hash/storage/bucket_t.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | namespace tdc {namespace compact_hash { 13 | using namespace compact_hash; 14 | 15 | /// A bucket of quotient-value pairs in a sparse compact hashtable. 16 | /// 17 | /// It consists of a pointer to a single heap allocation, that contains: 18 | /// - A 64-bit bitvector of currently stored elements. 19 | /// - A dynamic-width array of quotients. 20 | /// - A potentially dynamic-width array of satellite values. 21 | /// 22 | /// An empty bucket does not allocate any memory. 23 | /// 24 | /// WARNING: 25 | /// To prevent the overhead of unnecessary default-constructions, 26 | /// the bucket does not initialize or destroy the value and quotient parts 27 | /// of the allocation in its constructor/destructor. 28 | /// Instead, it relies on the surrounding container to initialize and destroy 29 | /// the values correctly. 30 | // TODO: Investigate changing this semantic to automatic initialization 31 | // and destruction. 32 | template 33 | class bucket_t { 34 | std::unique_ptr m_data; 35 | 36 | template 37 | friend struct ::tdc::serialize; 38 | 39 | template 40 | friend struct ::tdc::heap_size; 41 | 42 | using entry_ptr_t = typename satellite_t::entry_ptr_t; 43 | using entry_bit_width_t = typename satellite_t::entry_bit_width_t; 44 | public: 45 | /// Maps hashtable position to position of the corresponding bucket, 46 | /// and the position inside of it. 47 | struct bucket_layout_t: satellite_t::bucket_data_layout_t { 48 | static constexpr size_t BVS_WIDTH_SHIFT = 6; 49 | static constexpr size_t BVS_WIDTH_MASK = 0b111111; 50 | 51 | static inline size_t table_pos_to_idx_of_bucket(size_t pos) { 52 | return pos >> BVS_WIDTH_SHIFT; 53 | } 54 | 55 | static inline size_t table_pos_to_idx_inside_bucket(size_t pos) { 56 | return pos & BVS_WIDTH_MASK; 57 | } 58 | 59 | static inline size_t table_size_to_bucket_size(size_t size) { 60 | return (size + BVS_WIDTH_MASK) >> BVS_WIDTH_SHIFT; 61 | } 62 | }; 63 | 64 | inline bucket_t(): m_data() {} 65 | 66 | /// Construct a bucket, reserving space according to the bitvector 67 | /// `bv` and `quot_width`. 68 | inline bucket_t(uint64_t bv, entry_bit_width_t width) { 69 | if (bv != 0) { 70 | auto qvd_size = qvd_data_size(size(bv), width); 71 | 72 | m_data = std::make_unique(qvd_size + 1); 73 | m_data[0] = bv; 74 | 75 | // NB: We call this for its alignment asserts 76 | ptr(width); 77 | } else { 78 | m_data.reset(); 79 | } 80 | } 81 | 82 | inline bucket_t(bucket_t&& other) = default; 83 | inline bucket_t& operator=(bucket_t&& other) = default; 84 | 85 | /// Returns the bitvector of contained elements. 86 | inline uint64_t bv() const { 87 | if (!is_empty()) { 88 | return m_data[0]; 89 | } else { 90 | return 0; 91 | } 92 | } 93 | 94 | /// Returns the amount of elements in the bucket. 95 | inline size_t size() const { 96 | return size(bv()); 97 | } 98 | 99 | // Run destructors of each element in the bucket. 100 | inline void destroy_vals(entry_bit_width_t widths) { 101 | if (is_allocated()) { 102 | bucket_layout_t::destroy_vals(get_qv(), size(), widths); 103 | } 104 | } 105 | 106 | /// Returns a `entry_ptr_t` to position `pos`, 107 | /// or a sentinel value that acts as a one-pass-the-end pointer. 108 | inline entry_ptr_t at(size_t pos, entry_bit_width_t width) const { 109 | return bucket_layout_t::at(get_qv(), size(), pos, width); 110 | } 111 | 112 | inline bool is_allocated() const { 113 | return bool(m_data); 114 | } 115 | 116 | inline bool is_empty() const { 117 | return !bool(m_data); 118 | } 119 | 120 | inline size_t stat_allocation_size_in_bytes(entry_bit_width_t width) const { 121 | if (!is_empty()) { 122 | return (qvd_data_size(size(), width) + 1) * sizeof(uint64_t); 123 | } else { 124 | return 0; 125 | } 126 | } 127 | 128 | /// Insert a new element into the bucket, growing it as needed 129 | inline entry_ptr_t insert_at( 130 | size_t new_elem_bucket_pos, 131 | uint64_t new_elem_bv_bit, 132 | entry_bit_width_t width) 133 | { 134 | // Just a sanity check that can not live inside or outside `bucket_t` itself. 135 | static_assert(sizeof(bucket_t) == sizeof(void*), "unique_ptr is more than 1 ptr large!"); 136 | 137 | // TODO: check out different sizing strategies 138 | // eg, the known sparse_hash repo uses overallocation for small buckets 139 | 140 | // create a new bucket with enough size for the new element 141 | // NB: The elements in it are uninitialized 142 | auto new_bucket = bucket_t(bv() | new_elem_bv_bit, width); 143 | 144 | auto new_iter = new_bucket.at(0, width); 145 | auto old_iter = at(0, width); 146 | 147 | auto const new_iter_midpoint = new_bucket.at(new_elem_bucket_pos, width); 148 | auto const new_iter_end = new_bucket.at(new_bucket.size(), width); 149 | 150 | entry_ptr_t ret; 151 | 152 | // move all elements before the new element's location from old bucket into new bucket 153 | while(new_iter != new_iter_midpoint) { 154 | new_iter.init_from(old_iter); 155 | new_iter.increment_ptr(); 156 | old_iter.increment_ptr(); 157 | } 158 | 159 | // move new element into its location in the new bucket 160 | { 161 | ret = new_iter; 162 | new_iter.increment_ptr(); 163 | } 164 | 165 | // move all elements after the new element's location from old bucket into new bucket 166 | while(new_iter != new_iter_end) { 167 | new_iter.init_from(old_iter); 168 | new_iter.increment_ptr(); 169 | old_iter.increment_ptr(); 170 | } 171 | 172 | // destroy old empty elements, and overwrite with new bucket 173 | destroy_vals(width); 174 | *this = std::move(new_bucket); 175 | 176 | return ret; 177 | } 178 | private: 179 | inline static size_t size(uint64_t bv) { 180 | return popcount(bv); 181 | } 182 | 183 | inline uint64_t* get_qv() const { 184 | return static_cast(m_data.get()) + 1; 185 | } 186 | 187 | inline static size_t qvd_data_size(size_t size, entry_bit_width_t width) { 188 | return bucket_layout_t::calc_sizes(size, width).overall_qword_size; 189 | } 190 | 191 | /// Creates the pointers to the beginnings of the two arrays inside 192 | /// the allocation. 193 | inline entry_ptr_t ptr(entry_bit_width_t width) const { 194 | return bucket_layout_t::ptr(get_qv(), size(), width); 195 | } 196 | }; 197 | 198 | } 199 | 200 | template 201 | struct heap_size> { 202 | using T = compact_hash::bucket_t; 203 | using entry_bit_width_t = typename T::entry_bit_width_t; 204 | 205 | static object_size_t compute(T const& val, entry_bit_width_t const& widths) { 206 | using namespace compact_hash; 207 | 208 | auto bytes = object_size_t::empty(); 209 | 210 | size_t size = val.size(); 211 | 212 | if (size > 0) { 213 | size_t raw_size = T::qvd_data_size(size, widths) + 1; 214 | bytes += heap_size>::compute(val.m_data, raw_size); 215 | } 216 | 217 | return bytes; 218 | } 219 | }; 220 | 221 | template 222 | struct serialize> { 223 | using T = compact_hash::bucket_t; 224 | using entry_bit_width_t = typename T::entry_bit_width_t; 225 | 226 | static object_size_t write(std::ostream& out, T const& val, entry_bit_width_t const& widths) { 227 | using namespace compact_hash; 228 | 229 | auto bytes = object_size_t::empty(); 230 | 231 | bytes += serialize::write(out, val.bv()); 232 | size_t size = val.size(); 233 | 234 | if (size > 0) { 235 | size_t raw_size = T::qvd_data_size(size, widths) + 1; 236 | for (size_t i = 1; i < raw_size; i++) { 237 | bytes += serialize::write(out, val.m_data[i]); 238 | } 239 | } 240 | 241 | return bytes; 242 | } 243 | static T read(std::istream& in, entry_bit_width_t const& widths) { 244 | using namespace compact_hash; 245 | 246 | T ret; 247 | 248 | uint64_t bv = serialize::read(in); 249 | size_t size = T::size(bv); 250 | 251 | if (size > 0) { 252 | size_t raw_size = T::qvd_data_size(size, widths) + 1; 253 | ret.m_data = std::make_unique(raw_size); 254 | ret.m_data[0] = bv; 255 | for (size_t i = 1; i < raw_size; i++) { 256 | ret.m_data[i] = serialize::read(in); 257 | } 258 | } 259 | 260 | return ret; 261 | } 262 | }; 263 | 264 | } 265 | -------------------------------------------------------------------------------- /include/tudocomp/util/compact_hash/storage/buckets_bv_t.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | // Table for uninitalized elements 12 | 13 | namespace tdc {namespace compact_hash { 14 | template 15 | struct buckets_bv_t { 16 | using satellite_t_export = satellite_t; 17 | using entry_ptr_t = typename satellite_t::entry_ptr_t; 18 | using entry_bit_width_t = typename satellite_t::entry_bit_width_t; 19 | 20 | using my_bucket_t = bucket_t<8, satellite_t>; 21 | using bucket_layout_t = typename my_bucket_t::bucket_layout_t; 22 | using buckets_t = std::unique_ptr; 23 | using qvd_t = typename satellite_t::bucket_data_layout_t; 24 | 25 | buckets_t m_buckets; 26 | 27 | template 28 | friend struct ::tdc::serialize; 29 | 30 | /// runtime initilization arguments, if any 31 | struct config_args {}; 32 | 33 | /// get the config of this instance 34 | inline config_args current_config() const { return config_args{}; } 35 | 36 | inline buckets_bv_t() {} 37 | inline buckets_bv_t(size_t table_size, 38 | entry_bit_width_t widths, 39 | config_args config) { 40 | size_t buckets_size = bucket_layout_t::table_size_to_bucket_size(table_size); 41 | 42 | m_buckets = std::make_unique(buckets_size); 43 | } 44 | using table_pos_t = sparse_pos_t; 45 | 46 | // pseudo-iterator for iterating over bucket elements 47 | // NB: does not wrap around! 48 | struct iter_t { 49 | my_bucket_t const* m_bucket; 50 | entry_ptr_t m_b_start; 51 | entry_ptr_t m_b_end; 52 | entry_bit_width_t m_widths; 53 | 54 | inline void set_bucket_elem_range(size_t end_offset) { 55 | size_t start_offset = 0; 56 | DCHECK_LE(start_offset, end_offset); 57 | 58 | m_b_start = m_bucket->at(start_offset, m_widths); 59 | m_b_end = m_bucket->at(end_offset, m_widths); 60 | } 61 | 62 | inline iter_t(my_bucket_t const* buckets, 63 | size_t buckets_size, 64 | table_pos_t const& pos, 65 | entry_bit_width_t const& widths): 66 | m_widths(widths) 67 | { 68 | // NB: Using pointer arithmetic here, because 69 | // we can (intentionally) end up with the address 1-past 70 | // the end of the vector, which represents an end-iterator. 71 | m_bucket = buckets + pos.idx_of_bucket; 72 | 73 | if(pos.idx_of_bucket < buckets_size) { 74 | set_bucket_elem_range(pos.offset_in_bucket()); 75 | } else { 76 | // use default constructed nullptr entry_ptr_t 77 | } 78 | } 79 | 80 | inline entry_ptr_t get() { 81 | return m_b_end; 82 | } 83 | 84 | inline void decrement() { 85 | if (m_b_start != m_b_end) { 86 | m_b_end.decrement_ptr(); 87 | } else { 88 | do { 89 | --m_bucket; 90 | } while(m_bucket->bv() == 0); 91 | set_bucket_elem_range(m_bucket->size() - 1); 92 | } 93 | } 94 | 95 | inline bool operator!=(iter_t& other) { 96 | return m_b_end != other.m_b_end; 97 | } 98 | }; 99 | 100 | template 101 | struct context_t { 102 | buckets_t& m_buckets; 103 | size_t const table_size; 104 | entry_bit_width_t widths; 105 | 106 | /// Run the destructors of the elements of the `i`-th bucket, 107 | /// and drop it from the hashtable, replacing it with an empty one. 108 | inline void drop_bucket(size_t i) { 109 | DCHECK_LT(i, bucket_layout_t::table_size_to_bucket_size(table_size)); 110 | m_buckets[i].destroy_vals(widths); 111 | m_buckets[i] = my_bucket_t(); 112 | } 113 | 114 | inline void destroy_vals() { 115 | if(m_buckets == nullptr) return; // stop when this is an instance after std::move 116 | 117 | size_t buckets_size = bucket_layout_t::table_size_to_bucket_size(table_size); 118 | 119 | for(size_t i = 0; i < buckets_size; i++) { 120 | m_buckets[i].destroy_vals(widths); 121 | } 122 | } 123 | inline table_pos_t table_pos(size_t pos) { 124 | return table_pos_t { pos, m_buckets.get() }; 125 | } 126 | inline entry_ptr_t allocate_pos(table_pos_t pos) { 127 | DCHECK(!pos.exists_in_bucket()); 128 | 129 | auto& bucket = pos.bucket(); 130 | auto offset_in_bucket = pos.offset_in_bucket(); 131 | uint64_t new_bucket_bv = bucket.bv() | pos.bit_mask_in_bucket; 132 | 133 | return bucket.insert_at(offset_in_bucket, new_bucket_bv, widths); 134 | } 135 | inline entry_ptr_t at(table_pos_t pos) { 136 | DCHECK(pos.exists_in_bucket()); 137 | 138 | auto& bucket = pos.bucket(); 139 | auto offset_in_bucket = pos.offset_in_bucket(); 140 | 141 | return bucket.at(offset_in_bucket, widths); 142 | } 143 | inline bool pos_is_empty(table_pos_t pos) { 144 | return !pos.exists_in_bucket(); 145 | } 146 | inline iter_t make_iter(table_pos_t const& pos) { 147 | size_t buckets_size = bucket_layout_t::table_size_to_bucket_size(table_size); 148 | return iter_t(m_buckets.get(), buckets_size, pos, widths); 149 | } 150 | inline void trim_storage(table_pos_t* last_start, table_pos_t const& end) { 151 | // Check if end lies on a bucket boundary, then drop all buckets before it 152 | 153 | if (end.offset_in_bucket() == 0) { 154 | 155 | // ignore buckets if we start in the middle of one 156 | if ((*last_start).offset_in_bucket() != 0) { 157 | // TODO: Just iterate forward to the first valid one 158 | *last_start = end; 159 | } 160 | 161 | auto bstart = (*last_start).idx_of_bucket; 162 | auto bend = end.idx_of_bucket; 163 | size_t buckets_size = bucket_layout_t::table_size_to_bucket_size(table_size); 164 | 165 | for (size_t i = bstart; i != bend; i = (i + 1) % buckets_size) { 166 | drop_bucket(i); 167 | } 168 | 169 | *last_start = end; 170 | } 171 | } 172 | }; 173 | inline auto context(size_t table_size, entry_bit_width_t const& widths) { 174 | // DCHECK(m_buckets); // this needs to be commented out for swapping two CHTs with std::move. 175 | return context_t { 176 | m_buckets, table_size, widths 177 | }; 178 | } 179 | inline auto context(size_t table_size, entry_bit_width_t const& widths) const { 180 | DCHECK(m_buckets); 181 | return context_t { 182 | m_buckets, table_size, widths 183 | }; 184 | } 185 | }; 186 | } 187 | 188 | template 189 | struct heap_size> { 190 | using T = compact_hash::buckets_bv_t; 191 | using bucket_t = typename T::my_bucket_t; 192 | using entry_bit_width_t = typename T::entry_bit_width_t; 193 | using bucket_layout_t = typename T::bucket_layout_t; 194 | 195 | static object_size_t compute(T const& val, size_t table_size, entry_bit_width_t const& widths) { 196 | using namespace compact_hash; 197 | 198 | auto bytes = object_size_t::empty(); 199 | bytes += object_size_t::exact(sizeof(decltype(val.m_buckets))); 200 | 201 | auto ctx = val.context(table_size, widths); 202 | 203 | size_t buckets_size = bucket_layout_t::table_size_to_bucket_size(table_size); 204 | for(size_t i = 0; i < buckets_size; i++) { 205 | auto& bucket = ctx.m_buckets[i]; 206 | bytes += heap_size::compute(bucket, widths); 207 | } 208 | 209 | return bytes; 210 | } 211 | }; 212 | 213 | template 214 | struct serialize> { 215 | using T = compact_hash::buckets_bv_t; 216 | using bucket_t = typename T::my_bucket_t; 217 | using entry_bit_width_t = typename T::entry_bit_width_t; 218 | using bucket_layout_t = typename T::bucket_layout_t; 219 | 220 | static object_size_t write(std::ostream& out, T const& val, size_t table_size, entry_bit_width_t const& widths) { 221 | using namespace compact_hash; 222 | 223 | auto bytes = object_size_t::empty(); 224 | 225 | auto ctx = val.context(table_size, widths); 226 | 227 | size_t buckets_size = bucket_layout_t::table_size_to_bucket_size(table_size); 228 | for(size_t i = 0; i < buckets_size; i++) { 229 | auto& bucket = ctx.m_buckets[i]; 230 | bytes += serialize::write(out, bucket, widths); 231 | } 232 | 233 | return bytes; 234 | } 235 | static T read(std::istream& in, size_t table_size, entry_bit_width_t const& widths) { 236 | using namespace compact_hash; 237 | 238 | T val { table_size, widths, {} }; 239 | 240 | auto ctx = val.context(table_size, widths); 241 | 242 | size_t buckets_size = bucket_layout_t::table_size_to_bucket_size(table_size); 243 | for(size_t i = 0; i < buckets_size; i++) { 244 | auto& bucket = ctx.m_buckets[i]; 245 | bucket = serialize::read(in, widths); 246 | } 247 | 248 | return val; 249 | } 250 | static bool equal_check(T const& lhs, T const& rhs, size_t table_size, entry_bit_width_t const& widths) { 251 | auto lhsc = lhs.context(table_size, widths); 252 | auto rhsc = rhs.context(table_size, widths); 253 | 254 | for (size_t i = 0; i < table_size; i++) { 255 | auto lhspos = lhsc.table_pos(i); 256 | auto rhspos = rhsc.table_pos(i); 257 | if (!gen_equal_diagnostic(lhsc.pos_is_empty(lhspos) == rhsc.pos_is_empty(rhspos))) { 258 | return false; 259 | } 260 | if (!lhsc.pos_is_empty(lhspos)) { 261 | auto lhsptrs = lhsc.at(lhspos); 262 | auto rhsptrs = rhsc.at(rhspos); 263 | 264 | if (!gen_equal_diagnostic(lhsptrs.contents_eq(rhsptrs))) { 265 | return false; 266 | } 267 | } 268 | } 269 | 270 | return true; 271 | } 272 | }; 273 | 274 | } 275 | -------------------------------------------------------------------------------- /include/tudocomp/util/compact_hash/storage/plain_sentinel_t.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | // Table for uninitalized elements 11 | 12 | namespace tdc {namespace compact_hash { 13 | template 14 | struct plain_sentinel_t { 15 | using satellite_t_export = satellite_t; 16 | using entry_ptr_t = typename satellite_t::entry_ptr_t; 17 | using entry_bit_width_t = typename satellite_t::entry_bit_width_t; 18 | using qvd_t = typename satellite_t::bucket_data_layout_t; 19 | using value_type = typename satellite_t::sentinel_value_type; 20 | 21 | template 22 | friend struct ::tdc::serialize; 23 | 24 | std::unique_ptr m_alloc; 25 | value_type m_empty_value; 26 | 27 | /// runtime initilization arguments, if any 28 | struct config_args { 29 | value_type empty_value = value_type(); 30 | }; 31 | 32 | /// get the config of this instance 33 | inline config_args current_config() const { 34 | return config_args{ 35 | m_empty_value, 36 | }; 37 | } 38 | 39 | inline plain_sentinel_t() {} 40 | inline plain_sentinel_t(size_t table_size, 41 | entry_bit_width_t widths, 42 | config_args config): 43 | m_empty_value(config.empty_value) 44 | { 45 | size_t alloc_size = qvd_t::calc_sizes(table_size, widths).overall_qword_size; 46 | m_alloc = std::make_unique(alloc_size); 47 | 48 | auto ctx = context(table_size, widths); 49 | 50 | for(size_t i = 0; i < table_size; i++) { 51 | // NB: Using at because allocate_pos() 52 | // destroys the location first. 53 | auto elem = ctx.at(ctx.table_pos(i)); 54 | elem.set_no_drop(value_type(m_empty_value), 0); 55 | } 56 | } 57 | struct table_pos_t { 58 | size_t offset; 59 | inline table_pos_t(): offset(-1) {} 60 | inline table_pos_t(size_t o): offset(o) {} 61 | inline table_pos_t& operator=(table_pos_t const& other) = default; 62 | inline table_pos_t(table_pos_t const& other) = default; 63 | }; 64 | // pseudo-iterator for iterating over bucket elements 65 | // NB: does not wrap around! 66 | struct iter_t { 67 | entry_ptr_t m_end; 68 | value_type const& m_empty_value; 69 | 70 | inline iter_t(entry_ptr_t endpos, 71 | value_type const& empty_value): 72 | m_end(endpos), 73 | m_empty_value(empty_value) 74 | { 75 | } 76 | 77 | inline entry_ptr_t get() { 78 | return m_end; 79 | } 80 | 81 | inline void decrement() { 82 | do { 83 | m_end.decrement_ptr(); 84 | } while(*m_end.val_ptr() == m_empty_value); 85 | } 86 | 87 | inline bool operator!=(iter_t& other) { 88 | return m_end != other.m_end; 89 | } 90 | }; 91 | 92 | template 93 | struct context_t { 94 | alloc_type& m_alloc; 95 | value_type const& m_empty_value; 96 | size_t const table_size; 97 | entry_bit_width_t widths; 98 | 99 | inline void destroy_vals() { 100 | qvd_t::destroy_vals(m_alloc.get(), table_size, widths); 101 | } 102 | 103 | inline table_pos_t table_pos(size_t pos) { 104 | return table_pos_t { pos }; 105 | } 106 | inline entry_ptr_t allocate_pos(table_pos_t pos) { 107 | DCHECK_LT(pos.offset, table_size); 108 | auto tmp = at(pos); 109 | 110 | // NB: allocate_pos returns a unitialized location, 111 | // but all locations are per default initialized with a empty_value. 112 | // Therefore we destroy the existing value first. 113 | tmp.uninitialize(); 114 | 115 | return tmp; 116 | } 117 | inline entry_ptr_t at(table_pos_t pos) { 118 | DCHECK_LT(pos.offset, table_size); 119 | return qvd_t::at(m_alloc.get(), table_size, pos.offset, widths); 120 | } 121 | inline bool pos_is_empty(table_pos_t pos) { 122 | DCHECK_LT(pos.offset, table_size); 123 | return *at(pos).val_ptr() == m_empty_value; 124 | } 125 | inline iter_t make_iter(table_pos_t const& pos) { 126 | // NB: One-pass-the-end is acceptable for a end iterator 127 | DCHECK_LE(pos.offset, table_size); 128 | return iter_t { 129 | qvd_t::at(m_alloc.get(), table_size, pos.offset, widths), 130 | m_empty_value, 131 | }; 132 | } 133 | inline void trim_storage(table_pos_t* last_start, table_pos_t const& end) { 134 | // Nothing to be done 135 | } 136 | }; 137 | inline auto context(size_t table_size, entry_bit_width_t const& widths) { 138 | return context_t> { 139 | m_alloc, m_empty_value, table_size, widths, 140 | }; 141 | } 142 | inline auto context(size_t table_size, entry_bit_width_t const& widths) const { 143 | return context_t const> { 144 | m_alloc, m_empty_value, table_size, widths, 145 | }; 146 | } 147 | }; 148 | } 149 | 150 | template 151 | struct heap_size> { 152 | using T = compact_hash::plain_sentinel_t; 153 | using entry_bit_width_t = typename T::entry_bit_width_t; 154 | using value_type = typename T::value_type; 155 | using qvd_t = typename T::qvd_t; 156 | 157 | static object_size_t compute(T const& val, size_t table_size, entry_bit_width_t const& widths) { 158 | using namespace compact_hash; 159 | 160 | auto bytes = object_size_t::empty(); 161 | 162 | auto alloc_size = qvd_t::calc_sizes(table_size, widths).overall_qword_size; 163 | 164 | bytes += heap_size::compute(val.m_empty_value); 165 | bytes += heap_size>::compute(val.m_alloc, alloc_size); 166 | 167 | return bytes; 168 | } 169 | }; 170 | 171 | template 172 | struct serialize> { 173 | using T = compact_hash::plain_sentinel_t; 174 | using entry_bit_width_t = typename T::entry_bit_width_t; 175 | using value_type = typename T::value_type; 176 | using qvd_t = typename T::qvd_t; 177 | 178 | static object_size_t write(std::ostream& out, T const& val, size_t table_size, entry_bit_width_t const& widths) { 179 | using namespace compact_hash; 180 | 181 | auto bytes = object_size_t::empty(); 182 | 183 | auto alloc_size = qvd_t::calc_sizes(table_size, widths).overall_qword_size; 184 | 185 | bytes += serialize::write(out, val.m_empty_value); 186 | for (size_t i = 0; i < alloc_size; i++) { 187 | bytes += serialize::write(out, val.m_alloc[i]); 188 | } 189 | 190 | return bytes; 191 | } 192 | static T read(std::istream& in, size_t table_size, entry_bit_width_t const& widths) { 193 | using namespace compact_hash; 194 | 195 | auto alloc_size = qvd_t::calc_sizes(table_size, widths).overall_qword_size; 196 | 197 | T ret; 198 | ret.m_empty_value = serialize::read(in); 199 | ret.m_alloc = std::make_unique(alloc_size); 200 | 201 | for (size_t i = 0; i < alloc_size; i++) { 202 | ret.m_alloc[i] = serialize::read(in); 203 | } 204 | 205 | return ret; 206 | } 207 | static bool equal_check(T const& lhs, T const& rhs, size_t table_size, entry_bit_width_t const& widths) { 208 | auto lhsc = lhs.context(table_size, widths); 209 | auto rhsc = rhs.context(table_size, widths); 210 | 211 | for (size_t i = 0; i < table_size; i++) { 212 | auto lhspos = lhsc.table_pos(i); 213 | auto rhspos = rhsc.table_pos(i); 214 | if (!gen_equal_diagnostic(lhsc.pos_is_empty(lhspos) == rhsc.pos_is_empty(rhspos))) { 215 | return false; 216 | } 217 | if (!lhsc.pos_is_empty(lhspos)) { 218 | auto lhsptrs = lhsc.at(lhspos); 219 | auto rhsptrs = rhsc.at(rhspos); 220 | 221 | if (!gen_equal_diagnostic(lhsptrs.get_quotient() == rhsptrs.get_quotient())) { 222 | return false; 223 | } 224 | if (!gen_equal_diagnostic(*lhsptrs.val_ptr() == *rhsptrs.val_ptr())) { 225 | return false; 226 | } 227 | } 228 | } 229 | 230 | return true; 231 | } 232 | }; 233 | 234 | } 235 | -------------------------------------------------------------------------------- /include/tudocomp/util/compact_hash/storage/sparse_pos_t.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | namespace tdc {namespace compact_hash { 11 | 12 | /// This type represents a position inside the compact sparse hashtable. 13 | /// 14 | /// It is valid to have a sparse_pos_t one-past-the-end of the underlying 15 | /// bucket vector, to act as an end-iterator. 16 | template 17 | class sparse_pos_t { 18 | private: 19 | bucket_t* m_buckets; 20 | 21 | public: 22 | /// Index of bucket inside the hashtable 23 | size_t idx_of_bucket; 24 | 25 | /// Bit mask of the element inside the bucket 26 | uint64_t bit_mask_in_bucket; 27 | 28 | inline sparse_pos_t(size_t pos, bucket_t* buckets): 29 | m_buckets(buckets), 30 | idx_of_bucket(bucket_layout_t::table_pos_to_idx_of_bucket(pos)), 31 | bit_mask_in_bucket(1ull << bucket_layout_t::table_pos_to_idx_inside_bucket(pos)) 32 | {} 33 | 34 | inline sparse_pos_t(): m_buckets(nullptr) {} 35 | inline sparse_pos_t(sparse_pos_t const& other) = default; 36 | inline sparse_pos_t& operator=(sparse_pos_t const& other) = default; 37 | 38 | /// Accesses the bucket at this sparse position. 39 | inline bucket_t& bucket() const { 40 | //DCHECK_LT(idx_of_bucket, m_buckets->size()); 41 | return m_buckets[idx_of_bucket]; 42 | } 43 | 44 | /// Check if the sparse position exists in the corresponding bucket. 45 | inline bool exists_in_bucket() const { 46 | // bitvector of the bucket 47 | uint64_t bv = bucket().bv(); 48 | 49 | return (bv & bit_mask_in_bucket) != 0; 50 | } 51 | 52 | /// Get the idx of the element inside the corresponding bucket. 53 | /// 54 | /// It is legal to call this method even if the element at 55 | /// the sparse position does not exists, to calculate a position 56 | /// at which it should be inserted. 57 | inline size_t offset_in_bucket() const { 58 | // bitvector of the bucket 59 | uint64_t bv = bucket().bv(); 60 | 61 | return popcount(bv & (bit_mask_in_bucket - 1)); 62 | } 63 | }; 64 | 65 | }} 66 | -------------------------------------------------------------------------------- /include/tudocomp/util/compact_hash/util.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | namespace tdc {namespace compact_hash { 11 | 12 | inline uint8_t log2_upper(uint64_t v) { // TODO: this is slow. Use the highest set bit 13 | uint8_t m = 0; 14 | uint64_t n = v; 15 | while(n) { 16 | n >>= 1; 17 | m++; 18 | } 19 | m--; 20 | return m; 21 | } 22 | 23 | inline bool is_pot(size_t n) { 24 | return (n > 0ull && ((n & (n - 1ull)) == 0ull)); 25 | } 26 | 27 | using QuotPtr = typename cbp::cbp_repr_t::pointer_t; 28 | 29 | template 30 | using ValPtr = typename cbp::cbp_repr_t::pointer_t; 31 | template 32 | using ValRef = typename cbp::cbp_repr_t::reference_t; 33 | 34 | inline size_t popcount(uint64_t value) { 35 | return __builtin_popcountll(value); 36 | } 37 | 38 | }} 39 | -------------------------------------------------------------------------------- /include/tudocomp/util/heap_size.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | namespace tdc { 11 | template 12 | struct heap_size { 13 | static object_size_t compute(T const& val) { 14 | return object_size_t::unknown_extra_data(sizeof(T)); 15 | } 16 | }; 17 | 18 | template 19 | inline object_size_t heap_size_compute(T const& val) { 20 | return heap_size::compute(val); 21 | } 22 | 23 | #define gen_heap_size_without_indirection(...) \ 24 | template<>\ 25 | struct heap_size<__VA_ARGS__> {\ 26 | static object_size_t compute(__VA_ARGS__ const& val) {\ 27 | return object_size_t::exact(sizeof(__VA_ARGS__));\ 28 | }\ 29 | }; 30 | 31 | gen_heap_size_without_indirection(bool) 32 | gen_heap_size_without_indirection(unsigned char) 33 | gen_heap_size_without_indirection(signed char) 34 | gen_heap_size_without_indirection(char) 35 | gen_heap_size_without_indirection(unsigned short int) 36 | gen_heap_size_without_indirection(unsigned int) 37 | gen_heap_size_without_indirection(unsigned long int) 38 | gen_heap_size_without_indirection(unsigned long long int) 39 | gen_heap_size_without_indirection(signed short int) 40 | gen_heap_size_without_indirection(signed int) 41 | gen_heap_size_without_indirection(signed long int) 42 | gen_heap_size_without_indirection(signed long long int) 43 | gen_heap_size_without_indirection(float) 44 | gen_heap_size_without_indirection(double) 45 | 46 | template 47 | struct heap_size> { 48 | static object_size_t compute(std::unique_ptr const& val, size_t size) { 49 | auto bytes = object_size_t::exact(sizeof(val)); 50 | 51 | for (size_t i = 0; i < size; i++) { 52 | bytes += heap_size::compute(val[i]); 53 | } 54 | 55 | return bytes; 56 | } 57 | }; 58 | } 59 | -------------------------------------------------------------------------------- /include/tudocomp/util/object_size_t.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace tdc { 9 | /// Represents the total size of some object. 10 | /// 11 | /// For example, it can represent the number of bytes written to a file, 12 | /// or the total heap size of a object in memory. 13 | /// 14 | /// It is possible for a datastructure to not have a known size. In that 15 | /// case, this datastructure should contain the closest lower approximation 16 | /// of one, and `is_exact()` returns `false`. 17 | class object_size_t { 18 | size_t m_bytes = 0; 19 | bool m_has_unknown_parts = false; 20 | 21 | object_size_t() = default; 22 | object_size_t(size_t bytes, bool has_unknown_parts): 23 | m_bytes(bytes), m_has_unknown_parts(has_unknown_parts) {} 24 | public: 25 | inline static object_size_t empty() { 26 | return object_size_t(0, false); 27 | } 28 | inline static object_size_t exact(size_t size) { 29 | return object_size_t(size, false); 30 | } 31 | inline static object_size_t unknown_extra_data(size_t size) { 32 | return object_size_t(size, true); 33 | } 34 | 35 | inline object_size_t operator+(object_size_t const& other) const { 36 | return object_size_t( 37 | m_bytes + other.m_bytes, 38 | m_has_unknown_parts || other.m_has_unknown_parts); 39 | } 40 | inline object_size_t& operator+=(object_size_t const& other) { 41 | m_bytes += other.m_bytes; 42 | m_has_unknown_parts |= other.m_has_unknown_parts; 43 | return *this; 44 | } 45 | 46 | inline size_t size_in_bytes() const { 47 | return m_bytes; 48 | } 49 | 50 | inline double size_in_kibibytes() const { 51 | return double(m_bytes) / 1024.0; 52 | } 53 | 54 | inline double size_in_mebibytes() const { 55 | return double(m_bytes) / 1024.0 / 1024.0; 56 | } 57 | 58 | inline bool is_exact() const { 59 | return !m_has_unknown_parts; 60 | } 61 | 62 | inline friend std::ostream& operator<<(std::ostream& out, object_size_t const& v) { 63 | if (!v.is_exact()) { 64 | out << ">="; 65 | } 66 | out << v.size_in_kibibytes(); 67 | out << " KiB"; 68 | return out; 69 | } 70 | }; 71 | } 72 | -------------------------------------------------------------------------------- /include/tudocomp/util/serialization.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | namespace tdc { 10 | inline bool equal_diagnostic(bool v, char const* msg) { 11 | if (!v) { 12 | std::cerr << "not equal: " << msg << "\n"; 13 | } 14 | return v; 15 | } 16 | #define gen_equal_diagnostic(e) \ 17 | equal_diagnostic(e, #e) 18 | 19 | #define gen_equal_check(field, ...) \ 20 | gen_equal_diagnostic( \ 21 | serialize::equal_check(lhs.field, rhs.field, ##__VA_ARGS__)) 22 | 23 | template 24 | struct serialize { 25 | //* 26 | static object_size_t write(std::ostream& out, T const& val) { 27 | CHECK(false) << "Need to specialize `tdc::serialize` for type " << typeid(T).name(); 28 | return object_size_t::unknown_extra_data(0); 29 | } 30 | static T read(std::istream& in) { 31 | CHECK(false) << "Need to specialize `tdc::serialize` for type " << typeid(T).name(); 32 | } 33 | static bool equal_check(T const& lhs, T const& rhs) { 34 | CHECK(false) << "Need to specialize `tdc::serialize` for type " << typeid(T).name(); 35 | return false; 36 | } 37 | //*/ 38 | }; 39 | 40 | template 41 | inline object_size_t serialize_write(std::ostream& out, T const& val) { 42 | return serialize::write(out, val); 43 | } 44 | 45 | template 46 | inline T serialize_read(std::istream& inp) { 47 | return serialize::read(inp); 48 | } 49 | 50 | template 51 | inline void serialize_read_into(std::istream& inp, T& out) { 52 | out = serialize::read(inp); 53 | } 54 | 55 | #define gen_direct_serialization(...) \ 56 | template<>\ 57 | struct serialize<__VA_ARGS__> {\ 58 | using T = __VA_ARGS__;\ 59 | static object_size_t write(std::ostream& out, T const& val) {\ 60 | out.write((char const*) &val, sizeof(T));\ 61 | return object_size_t::exact(sizeof(T));\ 62 | }\ 63 | static T read(std::istream& in) {\ 64 | T val;\ 65 | in.read((char*) &val, sizeof(T));\ 66 | return val;\ 67 | }\ 68 | static bool equal_check(T const& lhs, T const& rhs) {\ 69 | return gen_equal_diagnostic(lhs == rhs);\ 70 | }\ 71 | }; 72 | 73 | gen_direct_serialization(bool) 74 | gen_direct_serialization(unsigned char) 75 | gen_direct_serialization(signed char) 76 | gen_direct_serialization(char) 77 | gen_direct_serialization(unsigned short int) 78 | gen_direct_serialization(unsigned int) 79 | gen_direct_serialization(unsigned long int) 80 | gen_direct_serialization(unsigned long long int) 81 | gen_direct_serialization(signed short int) 82 | gen_direct_serialization(signed int) 83 | gen_direct_serialization(signed long int) 84 | gen_direct_serialization(signed long long int) 85 | gen_direct_serialization(float) 86 | gen_direct_serialization(double) 87 | 88 | } 89 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if(NOT GTEST_FOUND) 2 | MESSAGE(STATUS "gtest is not available - tests disabled!") 3 | return() 4 | endif() 5 | 6 | include(tdc_testsuite) 7 | 8 | run_test(compact_sparse_hash_tests DEPS ${TDC_TEST_DEPS} compact_sparse_hash) 9 | run_test(compact_hash_tests DEPS ${TDC_TEST_DEPS} compact_sparse_hash) 10 | run_test(compact_sparse_hash_displacement_tests DEPS ${TDC_TEST_DEPS} compact_sparse_hash) 11 | run_test(compact_hash_displacement_tests DEPS ${TDC_TEST_DEPS} compact_sparse_hash) 12 | run_test(compact_sparse_hash_elias_displacement_tests DEPS ${TDC_TEST_DEPS} compact_sparse_hash) 13 | run_test(compact_hash_elias_displacement_tests DEPS ${TDC_TEST_DEPS} compact_sparse_hash) 14 | 15 | run_test(v2_tests DEPS ${TDC_TEST_DEPS} compact_sparse_hash) 16 | run_test(sandbox_test DEPS ${TDC_TEST_DEPS} compact_sparse_hash) 17 | 18 | run_test(compact_sparse_hashset_tests DEPS ${TDC_TEST_DEPS} compact_sparse_hash) 19 | run_test(compact_sparse_hashset_displacement_tests DEPS ${TDC_TEST_DEPS} compact_sparse_hash) 20 | run_test(compact_sparse_hashset_elias_displacement_tests DEPS ${TDC_TEST_DEPS} compact_sparse_hash) 21 | 22 | run_test(compact_sparse_hashset_serialization_tests DEPS ${TDC_TEST_DEPS} compact_sparse_hash) 23 | -------------------------------------------------------------------------------- /test/compact_hash_displacement_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | template 9 | using COMPACT_TABLE = tdc::compact_hash::map::plain_layered_hashmap_t; 10 | 11 | #include "compact_hash_tests.template.hpp" 12 | -------------------------------------------------------------------------------- /test/compact_hash_elias_displacement_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | template 9 | using COMPACT_TABLE = tdc::compact_hash::map::plain_elias_hashmap_t; 10 | 11 | #include "compact_hash_tests.template.hpp" 12 | -------------------------------------------------------------------------------- /test/compact_hash_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | template 9 | using COMPACT_TABLE = tdc::compact_hash::map::plain_cv_hashmap_t; 10 | 11 | #include "compact_hash_tests.template.hpp" 12 | -------------------------------------------------------------------------------- /test/compact_hashset_tests.template.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace tdc; 8 | using namespace tdc::compact_hash; 9 | using namespace tdc::compact_hash::set; 10 | 11 | using compact_hash_type = COMPACT_TABLE; 12 | 13 | struct shadow_sets_t { 14 | std::unordered_set keys; 15 | std::unordered_set ids; 16 | compact_hash_type& table; 17 | 18 | shadow_sets_t(compact_hash_type& t): table(t) {} 19 | 20 | struct on_resize_t { 21 | shadow_sets_t& self; 22 | 23 | inline void on_resize(size_t table_size) { 24 | self.keys.clear(); 25 | self.ids.clear(); 26 | } 27 | inline void on_reinsert(uint64_t key, uint64_t id) { 28 | self.new_key(key, id); 29 | } 30 | }; 31 | auto on_resize() { 32 | return on_resize_t { *this }; 33 | } 34 | 35 | void new_key(uint64_t key, uint64_t id) { 36 | // std::cout << "insert(key=" << key << ", id=" << id << ")\n"; 37 | EXPECT_TRUE(keys.count(key) == 0) << "Key " << key << " already exists"; 38 | EXPECT_TRUE(ids.count(id) == 0) << "Id " << id << " already exists"; 39 | keys.insert(key); 40 | ids.insert(id); 41 | } 42 | 43 | void existing_key(uint64_t key, uint64_t id) { 44 | EXPECT_TRUE(keys.count(key) == 1) << "Key " << key << " does not exists"; 45 | EXPECT_TRUE(ids.count(id) == 1) << "Id " << id << " does not exists"; 46 | } 47 | 48 | auto lookup(uint64_t key) { 49 | auto r = table.lookup(key); 50 | 51 | if (r.found()) { 52 | EXPECT_TRUE(r.key_already_exist()); 53 | existing_key(key, r.id()); 54 | } 55 | 56 | return r; 57 | } 58 | auto lookup_insert(uint64_t key) { 59 | auto r = table.lookup_insert(key, on_resize()); 60 | 61 | if (r.key_already_exist()) { 62 | existing_key(key, r.id()); 63 | } else { 64 | new_key(key, r.id()); 65 | } 66 | 67 | return r; 68 | } 69 | auto lookup_insert_key_width(uint64_t key, uint8_t key_width) { 70 | auto r = table.lookup_insert_key_width(key, key_width, on_resize()); 71 | 72 | if (r.key_already_exist()) { 73 | existing_key(key, r.id()); 74 | } else { 75 | new_key(key, r.id()); 76 | } 77 | 78 | return r; 79 | } 80 | void max_load_factor(double v) { 81 | table.max_load_factor(v); 82 | } 83 | }; 84 | 85 | /// Assert that a element exists in the hashtable 86 | inline void debug_check_single(compact_hash_type& table, uint64_t key) { 87 | auto r = table.lookup(key); 88 | ASSERT_TRUE(r.found()) << "key " << key << " not found!"; 89 | 90 | auto c = table.count(key); 91 | ASSERT_EQ(c, 1U) << "key " << key << " not found!"; 92 | 93 | auto p = table.find(key); 94 | ASSERT_TRUE(p != decltype(p)()) << "key " << key << " not found!"; 95 | ASSERT_TRUE(p != nullptr) << "key " << key << " not found!"; 96 | ASSERT_TRUE(*p == key) << "key " << key << " not found!"; 97 | } 98 | 99 | /// Assert that a element exists in the hashtable 100 | inline void debug_check_single_id(compact_hash_type& table, uint64_t id) { 101 | auto r = table.lookup_id(id); 102 | ASSERT_TRUE(r.found()) << "id " << id << " not found!"; 103 | ASSERT_EQ(r.id(), id) << "lookup id is " << r.id() << " instead of " << id; 104 | } 105 | 106 | /// Assert that a element exists in the hashtable 107 | inline void debug_check_single(shadow_sets_t& table, uint64_t key) { 108 | auto r = table.lookup(key); 109 | ASSERT_TRUE(r.found()) << "key " << key << " not found!"; 110 | table.existing_key(key, r.id()); 111 | } 112 | 113 | template 114 | void test_hashfn() { 115 | for(uint32_t w = 1; w < 64; w++) { 116 | hashfn_t fn { w, {} }; 117 | 118 | size_t max_val = std::min(((1 << w) - 1), 1000); 119 | 120 | for (size_t i = 0; i < (max_val + 1); i++) { 121 | auto hi = fn.hash(i); 122 | auto hhi = fn.hash_inv(hi); 123 | /* 124 | std::cout 125 | << w << ", " 126 | << i << ", " 127 | << hi << ", " 128 | << hhi << "\n"; 129 | */ 130 | 131 | ASSERT_EQ(i, hhi); 132 | } 133 | } 134 | } 135 | 136 | TEST(hashfn, xorshift) { 137 | test_hashfn(); 138 | } 139 | 140 | TEST(hashfn, poplar_xorshift) { 141 | test_hashfn(); 142 | } 143 | 144 | TEST(hash, lookup_insert) { 145 | auto chx = compact_hash_type(256, 16); 146 | auto ch = shadow_sets_t(chx); 147 | 148 | ch.lookup_insert(44); 149 | ch.lookup_insert(45); 150 | ch.lookup_insert(45); 151 | ch.lookup_insert(44 + 256); 152 | ch.lookup_insert(45 + 256); 153 | ch.lookup_insert(46); 154 | 155 | ch.lookup_insert(44); 156 | ch.lookup_insert(45); 157 | ch.lookup_insert(44 + 256); 158 | ch.lookup_insert(45 + 256); 159 | ch.lookup_insert(46); 160 | 161 | //ch.lookup_insert(0); 162 | //ch.lookup_insert(4); 163 | //ch.lookup_insert(9); 164 | //ch.lookup_insert(128); 165 | 166 | //std::cout << "=======================\n"; 167 | //std::cout << ch.debug_state() << "\n"; 168 | //std::cout << "=======================\n"; 169 | 170 | } 171 | 172 | TEST(hash, lookup_insert_wrap) { 173 | auto chx = compact_hash_type(4, 16); 174 | auto ch = shadow_sets_t(chx); 175 | ch.max_load_factor(1.0); 176 | 177 | ch.lookup_insert(3); 178 | ch.lookup_insert(7); 179 | ch.lookup_insert(15); 180 | 181 | //std::cout << "=======================\n"; 182 | //std::cout << ch.debug_state() << "\n"; 183 | //std::cout << "=======================\n"; 184 | 185 | } 186 | 187 | TEST(hash, lookup_insert_move_wrap) { 188 | auto chx = compact_hash_type(8, 16); 189 | auto ch = shadow_sets_t(chx); 190 | ch.max_load_factor(1.0); 191 | 192 | ch.lookup_insert(3); 193 | ch.lookup_insert(3 + 8); 194 | 195 | ch.lookup_insert(5); 196 | ch.lookup_insert(5 + 8); 197 | ch.lookup_insert(5 + 16); 198 | ch.lookup_insert(5 + 24); 199 | 200 | ch.lookup_insert(4); 201 | 202 | //std::cout << "=======================\n"; 203 | //std::cout << ch.debug_state() << "\n"; 204 | //std::cout << "=======================\n"; 205 | 206 | debug_check_single(ch, 3); 207 | debug_check_single(ch, 3 + 8); 208 | debug_check_single(ch, 5); 209 | debug_check_single(ch, 5 + 8); 210 | debug_check_single(ch, 5 + 16); 211 | debug_check_single(ch, 5 + 24); 212 | debug_check_single(ch, 4); 213 | } 214 | 215 | TEST(hash, cornercase) { 216 | auto chx = compact_hash_type(8, 16); 217 | auto ch = shadow_sets_t(chx); 218 | 219 | ch.lookup_insert(0); 220 | ch.lookup_insert(0 + 8); 221 | 222 | debug_check_single(ch, 0); 223 | debug_check_single(ch, 0 + 8); 224 | 225 | //std::cout << "=======================\n"; 226 | //std::cout << ch.debug_state() << "\n"; 227 | //std::cout << "=======================\n"; 228 | 229 | } 230 | 231 | TEST(hash, grow) { 232 | std::vector lookup_inserted; 233 | 234 | auto chx = compact_hash_type(0, 10); // check that it grows to minimum 2 235 | auto ch = shadow_sets_t(chx); 236 | 237 | auto add = [&](auto key) { 238 | ch.lookup_insert(key); 239 | //lookup_inserted.clear(); 240 | lookup_inserted.push_back(key); 241 | for (auto& k : lookup_inserted) { 242 | debug_check_single(ch, k); 243 | } 244 | }; 245 | 246 | 247 | for(size_t i = 0; i < 1000; i++) { 248 | add(i); 249 | } 250 | 251 | //std::cout << "=======================\n"; 252 | //std::cout << ch.debug_state() << "\n"; 253 | //std::cout << "=======================\n"; 254 | 255 | } 256 | 257 | TEST(hash, grow_bits) { 258 | std::vector lookup_inserted; 259 | 260 | auto chx = compact_hash_type(0, 10); // check that it grows to minimum 2 261 | auto ch = shadow_sets_t(chx); 262 | 263 | uint8_t bits = 1; 264 | 265 | auto add = [&](auto key) { 266 | bits = std::max(bits, bits_for(key)); 267 | 268 | ch.lookup_insert_key_width(key, bits); 269 | //lookup_inserted.clear(); 270 | lookup_inserted.push_back(key); 271 | for (auto& k : lookup_inserted) { 272 | debug_check_single(ch, k); 273 | } 274 | }; 275 | 276 | 277 | for(size_t i = 0; i < 1000; i++) { 278 | add(i); 279 | } 280 | 281 | //std::cout << "=======================\n"; 282 | //std::cout << ch.debug_state() << "\n"; 283 | //std::cout << "=======================\n"; 284 | 285 | } 286 | 287 | TEST(hash, grow_bits_larger) { 288 | std::vector lookup_inserted; 289 | 290 | auto chx = compact_hash_type(0, 0); // check that it grows to minimum 2 291 | auto ch = shadow_sets_t(chx); 292 | 293 | uint8_t bits = 1; 294 | 295 | auto add = [&](auto key) { 296 | bits = std::max(bits, bits_for(key)); 297 | 298 | ch.lookup_insert_key_width(key, bits); 299 | lookup_inserted.clear(); 300 | lookup_inserted.push_back(key); 301 | for (auto& k : lookup_inserted) { 302 | debug_check_single(ch, k); 303 | } 304 | }; 305 | 306 | 307 | for(size_t i = 0; i < 10000; i++) { 308 | add(i*13ull); 309 | } 310 | } 311 | 312 | TEST(hash, grow_bits_larger_address) { 313 | std::vector lookup_inserted; 314 | 315 | auto chx = compact_hash_type(0, 0); // check that it grows to minimum 2 316 | auto ch = shadow_sets_t(chx); 317 | 318 | uint8_t bits = 1; 319 | 320 | auto add = [&](auto key) { 321 | bits = std::max(bits, bits_for(key)); 322 | 323 | auto r = ch.lookup_insert_key_width(key, bits); 324 | ASSERT_FALSE(r.key_already_exist()); 325 | lookup_inserted.clear(); 326 | lookup_inserted.push_back(key); 327 | for (auto& k : lookup_inserted) { 328 | debug_check_single(ch, k); 329 | } 330 | }; 331 | 332 | 333 | for(size_t i = 0; i < 10000; i++) { 334 | add(i*13ull); 335 | } 336 | 337 | //std::cout << "=======================\n"; 338 | //std::cout << ch.debug_state() << "\n"; 339 | //std::cout << "=======================\n"; 340 | } 341 | 342 | constexpr size_t load_max = 100000; 343 | //constexpr size_t load_max = 100; 344 | 345 | void load_factor_test(float z) { 346 | auto tablex = compact_hash_type(0, 1); 347 | auto table = shadow_sets_t(tablex); 348 | // TODO DEBUG 349 | // table.debug_state(); 350 | 351 | table.max_load_factor(z); 352 | for(size_t i = 0; i < load_max; i++) { 353 | table.lookup_insert_key_width(i, bits_for(i)); 354 | } 355 | //std::cout << table.debug_print_storage() << "\n"; 356 | for(size_t i = 0; i < load_max; i++) { 357 | auto r = table.lookup(i); 358 | ASSERT_TRUE(r.found()); 359 | } 360 | auto r = table.lookup(load_max); 361 | ASSERT_FALSE(r.found()); 362 | 363 | // TODO DEBUG 364 | /* 365 | auto stats = table.stat_gather(); 366 | 367 | std::cout << "stats.buckets: " << stats.buckets << "\n"; 368 | std::cout << "stats.allocated_buckets: " << stats.allocated_buckets << "\n"; 369 | std::cout << "stats.buckets_real_allocated_capacity_in_bytes: " << stats.buckets_real_allocated_capacity_in_bytes << "\n"; 370 | std::cout << "stats.real_allocated_capacity_in_bytes: " << stats.real_allocated_capacity_in_bytes << "\n"; 371 | std::cout << "stats.theoretical_minimum_size_in_bits: " << stats.theoretical_minimum_size_in_bits << "\n"; 372 | */ 373 | } 374 | 375 | TEST(hash_load, max_load_10) { 376 | load_factor_test(0.1); 377 | } 378 | TEST(hash_load, max_load_20) { 379 | load_factor_test(0.2); 380 | } 381 | TEST(hash_load, max_load_30) { 382 | load_factor_test(0.3); 383 | } 384 | TEST(hash_load, max_load_40) { 385 | load_factor_test(0.4); 386 | } 387 | TEST(hash_load, max_load_50) { 388 | load_factor_test(0.5); 389 | } 390 | TEST(hash_load, max_load_60) { 391 | load_factor_test(0.6); 392 | } 393 | TEST(hash_load, max_load_70) { 394 | load_factor_test(0.7); 395 | } 396 | TEST(hash_load, max_load_80) { 397 | load_factor_test(0.8); 398 | } 399 | TEST(hash_load, max_load_90) { 400 | load_factor_test(0.9); 401 | } 402 | TEST(hash_load, max_load_100) { 403 | load_factor_test(1.0); 404 | } 405 | 406 | TEST(hash, swap) { 407 | auto a = compact_hash_type(8, 16); 408 | { 409 | auto& ch = a; 410 | ch.max_load_factor(1.0); 411 | ch.lookup_insert(3); 412 | ch.lookup_insert(3 + 8); 413 | ch.lookup_insert(5); 414 | ch.lookup_insert(5 + 8); 415 | ch.lookup_insert(5 + 16); 416 | ch.lookup_insert(5 + 24); 417 | ch.lookup_insert(4); 418 | } 419 | auto b = compact_hash_type(8, 16); 420 | { 421 | auto& ch = b; 422 | ch.max_load_factor(1.0); 423 | ch.lookup_insert(3); 424 | ch.lookup_insert(3 + 8); 425 | ch.lookup_insert(5); 426 | ch.lookup_insert(5 + 8); 427 | ch.lookup_insert(5 + 16); 428 | ch.lookup_insert(5 + 24); 429 | ch.lookup_insert(4); 430 | } 431 | 432 | a.swap(b); 433 | std::swap(a, b); 434 | } 435 | 436 | TEST(hash, grow_bits_larger_id_lookup) { 437 | auto ch = compact_hash_type(0, 1); // check that it grows to minimum 2 438 | 439 | uint8_t bits = 1; 440 | 441 | auto add = [&](auto key) { 442 | bits = std::max(bits, bits_for(key)); 443 | 444 | auto entry = ch.lookup_insert_key_width(key, bits); 445 | uint64_t id = entry.id(); 446 | debug_check_single(ch, key); 447 | debug_check_single_id(ch, id); 448 | }; 449 | 450 | 451 | for(size_t i = 0; i < 10000; i++) { 452 | add(i*13ull); 453 | } 454 | } 455 | 456 | -------------------------------------------------------------------------------- /test/compact_sparse_hash_displacement_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | template 9 | using COMPACT_TABLE = tdc::compact_hash::map::sparse_layered_hashmap_t; 10 | 11 | #include "compact_hash_tests.template.hpp" 12 | -------------------------------------------------------------------------------- /test/compact_sparse_hash_elias_displacement_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | template 9 | using COMPACT_TABLE = tdc::compact_hash::map::sparse_elias_hashmap_t; 10 | 11 | #include "compact_hash_tests.template.hpp" 12 | -------------------------------------------------------------------------------- /test/compact_sparse_hash_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | template 9 | using COMPACT_TABLE = tdc::compact_hash::map::sparse_cv_hashmap_t; 10 | 11 | #include "compact_hash_tests.template.hpp" 12 | -------------------------------------------------------------------------------- /test/compact_sparse_hashset_displacement_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | using COMPACT_TABLE = tdc::compact_hash::set::sparse_layered_hashset_t<>; 9 | 10 | #include "compact_hashset_tests.template.hpp" 11 | -------------------------------------------------------------------------------- /test/compact_sparse_hashset_elias_displacement_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | using COMPACT_TABLE = tdc::compact_hash::set::sparse_elias_hashset_t<>; 9 | 10 | #include "compact_hashset_tests.template.hpp" 11 | -------------------------------------------------------------------------------- /test/compact_sparse_hashset_serialization_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | using namespace tdc::compact_hash; 20 | using namespace tdc::compact_hash::set; 21 | using namespace tdc::compact_hash::map; 22 | 23 | template 24 | void serialize_test_builder(build_func f) { 25 | using tdc::serialize; 26 | using tdc::heap_size; 27 | auto a = f(); 28 | 29 | std::stringstream ss; 30 | auto bytes = serialize::write(ss, a); 31 | size_t stream_bytes = ss.tellp(); 32 | ASSERT_EQ(bytes.size_in_bytes(), stream_bytes); 33 | 34 | auto b = serialize::read(ss); 35 | 36 | ASSERT_TRUE(serialize::equal_check(a, b)); 37 | 38 | auto c = f(); 39 | 40 | ASSERT_TRUE(serialize::equal_check(a, c)); 41 | ASSERT_TRUE(serialize::equal_check(b, c)); 42 | 43 | std::cout << "heap size: " 44 | << heap_size::compute(a) 45 | << ", written bytes: " 46 | << bytes 47 | << "\n"; 48 | } 49 | 50 | 51 | template 52 | void serialize_test_set() { 53 | serialize_test_builder([] { 54 | auto ch = table_t(8, 16); 55 | ch.max_load_factor(1.0); 56 | ch.lookup_insert(3); 57 | ch.lookup_insert(3 + 8); 58 | ch.lookup_insert(5); 59 | ch.lookup_insert(5 + 8); 60 | ch.lookup_insert(5 + 16); 61 | ch.lookup_insert(5 + 24); 62 | ch.lookup_insert(4); 63 | return ch; 64 | }); 65 | serialize_test_builder([] { 66 | auto ch = table_t(8, 16); 67 | ch.max_load_factor(1.0); 68 | ch.lookup_insert(3); 69 | ch.lookup_insert(3 + 8); 70 | ch.lookup_insert(5); 71 | ch.lookup_insert(5 + 8); 72 | ch.lookup_insert(5 + 16); 73 | ch.lookup_insert(5 + 24); 74 | ch.lookup_insert(4); 75 | return ch; 76 | }); 77 | 78 | serialize_test_builder([] { 79 | auto ch = table_t(0, 10); 80 | 81 | auto add = [&](auto key) { 82 | ch.lookup_insert(key); 83 | }; 84 | 85 | for(size_t i = 0; i < 1000; i++) { 86 | add(i); 87 | } 88 | 89 | return ch; 90 | }); 91 | 92 | serialize_test_builder([] { 93 | auto ch = table_t(0, 10); 94 | 95 | uint8_t bits = 1; 96 | 97 | auto add = [&](auto key) { 98 | bits = std::max(bits, tdc::bits_for(key)); 99 | 100 | ch.lookup_insert_key_width(key, bits); 101 | 102 | }; 103 | 104 | for(size_t i = 0; i < 1000; i++) { 105 | add(i); 106 | } 107 | 108 | return ch; 109 | }); 110 | 111 | serialize_test_builder([] { 112 | auto ch = table_t(0, 0); 113 | 114 | uint8_t bits = 1; 115 | 116 | auto add = [&](auto key) { 117 | bits = std::max(bits, tdc::bits_for(key)); 118 | ch.lookup_insert_key_width(key, bits); 119 | }; 120 | 121 | 122 | for(size_t i = 0; i < 10000; i++) { 123 | add(i*13ull); 124 | } 125 | 126 | return ch; 127 | }); 128 | } 129 | 130 | #define gen_test_set(name, ...) \ 131 | TEST(serialize, name) { \ 132 | serialize_test_set<__VA_ARGS__>(); \ 133 | } 134 | 135 | gen_test_set(set_poplar_displacement_compact_fixed_4, 136 | hashset_t< 137 | poplar_xorshift_t, 138 | displacement_t< 139 | layered_displacement_table_t> 140 | > 141 | > 142 | ) 143 | 144 | gen_test_set(set_poplar_displacement_compact_dynamic, 145 | hashset_t< 146 | poplar_xorshift_t, 147 | displacement_t< 148 | layered_displacement_table_t 149 | > 150 | > 151 | ) 152 | 153 | gen_test_set(set_poplar_cv, 154 | hashset_t< 155 | poplar_xorshift_t, 156 | cv_bvs_t 157 | > 158 | ) 159 | 160 | gen_test_set(set_poplar_displacement_elias_fixed_1024, 161 | hashset_t< 162 | poplar_xorshift_t, 163 | displacement_t< 164 | elias_gamma_displacement_table_t< 165 | fixed_elias_gamma_bucket_size_t<1024> 166 | > 167 | > 168 | > 169 | ) 170 | 171 | gen_test_set(set_poplar_displacement_elias_growing, 172 | hashset_t< 173 | poplar_xorshift_t, 174 | displacement_t< 175 | elias_gamma_displacement_table_t< 176 | growing_elias_gamma_bucket_size_t 177 | > 178 | > 179 | > 180 | ) 181 | 182 | gen_test_set(set_poplar_displacement_elias_dynamic, 183 | hashset_t< 184 | poplar_xorshift_t, 185 | displacement_t< 186 | elias_gamma_displacement_table_t< 187 | dynamic_fixed_elias_gamma_bucket_size_t 188 | > 189 | > 190 | > 191 | ) 192 | 193 | template 194 | void serialize_test_map() { 195 | serialize_test_builder([] { 196 | auto ch = table_t(8, 16); 197 | ch.max_load_factor(1.0); 198 | ch.insert(3, 42); 199 | ch.insert(3 + 8, 43); 200 | ch.insert(5, 44); 201 | ch.insert(5 + 8, 45); 202 | ch.insert(5 + 16, 46); 203 | ch.insert(5 + 24, 47); 204 | ch.insert(4, 48); 205 | return ch; 206 | }); 207 | serialize_test_builder([] { 208 | auto ch = table_t(8, 16); 209 | ch.max_load_factor(1.0); 210 | ch.insert(3, 49); 211 | ch.insert(3 + 8, 50); 212 | ch.insert(5, 51); 213 | ch.insert(5 + 8, 52); 214 | ch.insert(5 + 16, 53); 215 | ch.insert(5 + 24, 54); 216 | ch.insert(4, 55); 217 | return ch; 218 | }); 219 | 220 | serialize_test_builder([] { 221 | auto ch = table_t(0, 10); 222 | 223 | auto add = [&](auto key) { 224 | ch.insert(key, key * 3); 225 | }; 226 | 227 | for(size_t i = 0; i < 1000; i++) { 228 | add(i); 229 | } 230 | 231 | return ch; 232 | }); 233 | 234 | serialize_test_builder([] { 235 | auto ch = table_t(0, 10); 236 | 237 | uint8_t bits = 1; 238 | 239 | auto add = [&](auto key) { 240 | bits = std::max(bits, tdc::bits_for(key)); 241 | 242 | ch.insert_key_width(key, key * 4, bits); 243 | 244 | }; 245 | 246 | for(size_t i = 0; i < 1000; i++) { 247 | add(i); 248 | } 249 | 250 | return ch; 251 | }); 252 | 253 | serialize_test_builder([] { 254 | auto ch = table_t(0, 0); 255 | 256 | uint8_t bits = 1; 257 | 258 | auto add = [&](auto key) { 259 | bits = std::max(bits, tdc::bits_for(key)); 260 | ch.insert_key_width(key, key * 5, bits); 261 | }; 262 | 263 | 264 | for(size_t i = 0; i < 10000; i++) { 265 | add(i*13ull); 266 | } 267 | 268 | return ch; 269 | }); 270 | } 271 | 272 | #define gen_test_map(name, ...) \ 273 | TEST(serialize, name) { \ 274 | serialize_test_map<__VA_ARGS__>(); \ 275 | } 276 | 277 | using val_t = uint64_t; 278 | 279 | gen_test_map(map_poplar_bbv_displacement_compact_fixed_4, 280 | hashmap_t< 281 | val_t, 282 | poplar_xorshift_t, 283 | buckets_bv_t, 284 | displacement_t< 285 | layered_displacement_table_t> 286 | > 287 | > 288 | ) 289 | 290 | gen_test_map(map_poplar_bbv_displacement_compact_dynamic, 291 | hashmap_t< 292 | val_t, 293 | poplar_xorshift_t, 294 | buckets_bv_t, 295 | displacement_t< 296 | layered_displacement_table_t 297 | > 298 | > 299 | ) 300 | 301 | gen_test_map(map_poplar_bbv_cv, 302 | hashmap_t< 303 | val_t, 304 | poplar_xorshift_t, 305 | buckets_bv_t, 306 | cv_bvs_t 307 | > 308 | ) 309 | 310 | gen_test_map(map_poplar_bbv_displacement_elias_fixed_1024, 311 | hashmap_t< 312 | val_t, 313 | poplar_xorshift_t, 314 | buckets_bv_t, 315 | displacement_t< 316 | elias_gamma_displacement_table_t< 317 | fixed_elias_gamma_bucket_size_t<1024> 318 | > 319 | > 320 | > 321 | ) 322 | 323 | gen_test_map(map_poplar_bbv_displacement_elias_growing, 324 | hashmap_t< 325 | val_t, 326 | poplar_xorshift_t, 327 | buckets_bv_t, 328 | displacement_t< 329 | elias_gamma_displacement_table_t< 330 | growing_elias_gamma_bucket_size_t 331 | > 332 | > 333 | > 334 | ) 335 | 336 | gen_test_map(map_poplar_bbv_displacement_elias_dynamic, 337 | hashmap_t< 338 | val_t, 339 | poplar_xorshift_t, 340 | buckets_bv_t, 341 | displacement_t< 342 | elias_gamma_displacement_table_t< 343 | dynamic_fixed_elias_gamma_bucket_size_t 344 | > 345 | > 346 | > 347 | ) 348 | 349 | gen_test_map(map_poplar_ps_displacement_compact_fixed_4, 350 | hashmap_t< 351 | val_t, 352 | poplar_xorshift_t, 353 | plain_sentinel_t, 354 | displacement_t< 355 | layered_displacement_table_t> 356 | > 357 | > 358 | ) 359 | 360 | gen_test_map(map_poplar_ps_displacement_compact_dynamic, 361 | hashmap_t< 362 | val_t, 363 | poplar_xorshift_t, 364 | plain_sentinel_t, 365 | displacement_t< 366 | layered_displacement_table_t 367 | > 368 | > 369 | ) 370 | 371 | gen_test_map(map_poplar_ps_cv, 372 | hashmap_t< 373 | val_t, 374 | poplar_xorshift_t, 375 | plain_sentinel_t, 376 | cv_bvs_t 377 | > 378 | ) 379 | 380 | gen_test_map(map_poplar_ps_displacement_elias_fixed_1024, 381 | hashmap_t< 382 | val_t, 383 | poplar_xorshift_t, 384 | plain_sentinel_t, 385 | displacement_t< 386 | elias_gamma_displacement_table_t< 387 | fixed_elias_gamma_bucket_size_t<1024> 388 | > 389 | > 390 | > 391 | ) 392 | 393 | gen_test_map(map_poplar_ps_displacement_elias_growing, 394 | hashmap_t< 395 | val_t, 396 | poplar_xorshift_t, 397 | plain_sentinel_t, 398 | displacement_t< 399 | elias_gamma_displacement_table_t< 400 | growing_elias_gamma_bucket_size_t 401 | > 402 | > 403 | > 404 | ) 405 | 406 | gen_test_map(map_poplar_ps_displacement_elias_dynamic, 407 | hashmap_t< 408 | val_t, 409 | poplar_xorshift_t, 410 | plain_sentinel_t, 411 | displacement_t< 412 | elias_gamma_displacement_table_t< 413 | dynamic_fixed_elias_gamma_bucket_size_t 414 | > 415 | > 416 | > 417 | ) 418 | -------------------------------------------------------------------------------- /test/compact_sparse_hashset_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | using COMPACT_TABLE = tdc::compact_hash::set::sparse_cv_hashset_t<>; 9 | 10 | #include "compact_hashset_tests.template.hpp" 11 | -------------------------------------------------------------------------------- /test/sandbox_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | using namespace tdc; 10 | using namespace tdc::compact_hash::map; 11 | using namespace tdc::compact_hash; 12 | using namespace std; 13 | 14 | TEST(Sandbox, example) { 15 | auto map = sparse_cv_hashmap_t(0, 4); // creates a hash table with zero entries, set the bit-width of the keys to three 16 | std::cout << "Key Width: " << map.key_width() << std::endl; 17 | std::cout << "Add i -> i*i from i = 0 up to 15" << std::endl; 18 | for(int i = 0; i <= 15; ++i) { // interval [0..15] can be represented by 4-bits 19 | map.insert(i, std::move(i*i)); // insert key i, value i*i 20 | std::cout << i << " -> " << map[i] << std::endl; // map[i] returns value with key i 21 | } 22 | std::cout << "Size: " << map.size() << std::endl; 23 | std::cout << "Update all values, set to i -> i" << std::endl; 24 | for(int i = 0; i <= 15; ++i) { 25 | std::cout << "Previously: " << i << " -> " << map[i] << std::endl; // map[i] returns value with key i 26 | map[i] = i; 27 | std::cout << "Now: " << i << " -> " << map[i] << std::endl; 28 | } 29 | std::cout << "Size: " << map.size() << std::endl; 30 | std::cout << "Add 10 additional elements with key-width 9" << std::endl; 31 | for(int i = 1; i < 11; ++i) { // interval [0..15]<<5 can be represented by 9-bits 32 | map.insert_key_width(i<<5, std::move(i+1), 9); // insert key i<<5, value i, key have a width of 9 33 | std::cout << (i<<5) << " -> " << map[i<<5] << std::endl; // map[i] returns value with key i 34 | } 35 | std::cout << "Key Width: " << map.key_width() << std::endl; 36 | std::cout << "Size: " << map.size() << std::endl; 37 | std::cout << "Old values are still stored: " << std::endl; 38 | for(int i = 0; i <= 15; ++i) { 39 | std::cout << i << " -> " << map[i] << std::endl; 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /test/v2_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | using namespace tdc::compact_hash::map; 18 | using namespace tdc::compact_hash; 19 | using namespace tdc; 20 | 21 | template 22 | using map_bucket_t = bucket_t<8, satellite_data_t>; 23 | 24 | template 25 | void BucketTest() { 26 | using widths_t = typename satellite_data_t::entry_bit_width_t; 27 | 28 | auto b = map_bucket_t(); 29 | 30 | widths_t ws { 5, 7 }; 31 | b = map_bucket_t(0b10, ws); 32 | 33 | ASSERT_EQ(b.bv(), 2U); 34 | ASSERT_EQ(b.size(), 1U); 35 | ASSERT_EQ(b.is_empty(), false); 36 | 37 | auto p1 = b.at(0, ws); 38 | p1.set_no_drop(3, 4); 39 | 40 | b.stat_allocation_size_in_bytes(ws); 41 | 42 | auto p2 = b.insert_at(0, 0b11, ws); 43 | p2.set_no_drop(5, 6); 44 | 45 | p2.set(7, 8); 46 | 47 | b.destroy_vals(ws); 48 | } 49 | 50 | #define MakeBucketTest(tname) \ 51 | TEST(Bucket, tname##_test) { \ 52 | BucketTest(); \ 53 | } 54 | 55 | using uint_t40 = uint_t<40>; 56 | MakeBucketTest(uint8_t); 57 | MakeBucketTest(uint64_t); 58 | MakeBucketTest(dynamic_t); 59 | MakeBucketTest(uint_t40); 60 | 61 | template typename table_t, typename val_t> 62 | void TableTest() { 63 | using tab_t = table_t>; 64 | using widths_t = typename satellite_data_t::entry_bit_width_t; 65 | 66 | { 67 | auto t = tab_t(); 68 | 69 | widths_t ws { 5, 7 }; 70 | size_t table_size = 16; 71 | t = tab_t(table_size, ws, {}); 72 | auto ctx = t.context(table_size, ws); 73 | 74 | for(size_t i = 0; i < table_size; i++) { 75 | auto pos = ctx.table_pos(i); 76 | ASSERT_EQ(ctx.pos_is_empty(pos), true); 77 | 78 | auto elem = ctx.allocate_pos(pos); 79 | elem.set_no_drop(i + 1, i + 2); 80 | } 81 | 82 | for(size_t i = 0; i < table_size; i++) { 83 | auto pos = ctx.table_pos(i); 84 | ASSERT_EQ(ctx.pos_is_empty(pos), false); 85 | 86 | auto elem = ctx.at(pos); 87 | ASSERT_EQ(*elem.val_ptr(), i + 1); 88 | ASSERT_EQ(elem.get_quotient(), i + 2); 89 | } 90 | } 91 | 92 | { 93 | widths_t ws { 5, 7 }; 94 | size_t table_size = 128; 95 | auto t = tab_t(table_size, ws, {}); 96 | auto ctx = t.context(table_size, ws); 97 | 98 | for(size_t i = 60; i < 80; i++) { 99 | auto pos = ctx.table_pos(i); 100 | ASSERT_EQ(ctx.pos_is_empty(pos), true); 101 | 102 | auto elem = ctx.allocate_pos(pos); 103 | elem.set_no_drop(i - 60 + 1, i - 60 + 2); 104 | 105 | ASSERT_EQ(ctx.pos_is_empty(pos), false); 106 | } 107 | 108 | auto iter = ctx.make_iter(ctx.table_pos(80)); 109 | (void) iter; 110 | for(size_t i = 0; i < 20; i++) { 111 | iter.decrement(); 112 | auto elem = iter.get(); 113 | 114 | ASSERT_EQ(*elem.val_ptr(), 20 - i); 115 | ASSERT_EQ(elem.get_quotient(), 20 - i + 1); 116 | } 117 | } 118 | 119 | } 120 | 121 | #define MakeTableTest(tab, tname) \ 122 | TEST(Table, tab##_##tname##_test) { \ 123 | TableTest(); \ 124 | } 125 | 126 | MakeTableTest(plain_sentinel_t, uint8_t); 127 | MakeTableTest(buckets_bv_t, uint8_t); 128 | MakeTableTest(plain_sentinel_t, uint64_t); 129 | MakeTableTest(buckets_bv_t, uint64_t); 130 | MakeTableTest(plain_sentinel_t, dynamic_t); 131 | MakeTableTest(buckets_bv_t, dynamic_t); 132 | MakeTableTest(plain_sentinel_t, uint_t40); 133 | MakeTableTest(buckets_bv_t, uint_t40); 134 | 135 | template typename table_t, typename val_t> 136 | void CVTableTest() { 137 | using tab_t = table_t>; 138 | using widths_t = typename satellite_data_t::entry_bit_width_t; 139 | using value_type = typename cbp::cbp_repr_t::value_type; 140 | 141 | widths_t ws { 5, 7 }; 142 | auto size_mgr = size_manager_t(128); 143 | 144 | auto t = tab_t(size_mgr.capacity(), ws, {}); 145 | auto p = placement_t(size_mgr.capacity(), {}); 146 | 147 | auto tctx = t.context(size_mgr.capacity(), ws); 148 | auto pctx = p.context(t, size_mgr.capacity(), ws, size_mgr); 149 | 150 | auto check_insert = [&](auto ia, auto value, auto sq, bool should_exists) { 151 | auto res = pctx.lookup_insert(ia, sq); 152 | ASSERT_EQ(res.key_already_exist(), should_exists); 153 | ASSERT_EQ(res.ptr().get_quotient(), sq); 154 | *res.ptr().val_ptr() = value; 155 | }; 156 | auto table_state = [&](std::vector> const& should) { 157 | std::vector> r; 158 | for (size_t i = 0; i < size_mgr.capacity(); i++) { 159 | auto tpos = tctx.table_pos(i); 160 | if (!tctx.pos_is_empty(tpos)) { 161 | // TODO: Replace with search() 162 | auto ptr = tctx.at(tpos); 163 | r.push_back(std::array{ 164 | i, value_type(*ptr.val_ptr()), ptr.get_quotient() 165 | }); 166 | } 167 | } 168 | auto is = r; 169 | ASSERT_EQ(is, should); 170 | }; 171 | 172 | check_insert(60, 1, 5U, false); 173 | table_state({ 174 | {60, 1, 5}, 175 | }); 176 | 177 | check_insert(66, 2, 5U, false); 178 | table_state({ 179 | {60, 1, 5}, 180 | {66, 2, 5}, 181 | }); 182 | 183 | check_insert(64, 3, 5U, false); 184 | table_state({ 185 | {60, 1, 5}, 186 | {64, 3, 5}, 187 | {66, 2, 5}, 188 | }); 189 | 190 | check_insert(62, 4, 5U, false); 191 | table_state({ 192 | {60, 1, 5}, 193 | {62, 4, 5}, 194 | {64, 3, 5}, 195 | {66, 2, 5}, 196 | }); 197 | 198 | check_insert(62, 5, 6U, false); 199 | table_state({ 200 | {60, 1, 5}, 201 | {62, 4, 5}, 202 | {63, 5, 6}, 203 | {64, 3, 5}, 204 | {66, 2, 5}, 205 | }); 206 | 207 | check_insert(62, 10, 6U, true); 208 | table_state({ 209 | {60, 1, 5}, 210 | {62, 4, 5}, 211 | {63, 10, 6}, 212 | {64, 3, 5}, 213 | {66, 2, 5}, 214 | }); 215 | 216 | check_insert(62, 9, 7U, false); 217 | table_state({ 218 | {60, 1, 5}, 219 | {62, 4, 5}, 220 | {63, 10, 6}, 221 | {64, 9, 7}, 222 | {65, 3, 5}, 223 | {66, 2, 5}, 224 | }); 225 | 226 | /* 227 | Test: 228 | - multiple independ inserts 229 | - appends to same group 230 | - appends to displaced group 231 | 232 | */ 233 | } 234 | 235 | #define MakeCVTableTest(place, tab, tname) \ 236 | TEST(CVTable, place##_##tab##_##tname##_test) { \ 237 | CVTableTest(); \ 238 | } 239 | 240 | MakeCVTableTest(cv_bvs_t, plain_sentinel_t, uint8_t); 241 | MakeCVTableTest(cv_bvs_t, plain_sentinel_t, uint64_t); 242 | MakeCVTableTest(cv_bvs_t, plain_sentinel_t, dynamic_t); 243 | MakeCVTableTest(cv_bvs_t, plain_sentinel_t, uint_t40); 244 | MakeCVTableTest(cv_bvs_t, buckets_bv_t, uint8_t); 245 | MakeCVTableTest(cv_bvs_t, buckets_bv_t, uint64_t); 246 | MakeCVTableTest(cv_bvs_t, buckets_bv_t, dynamic_t); 247 | MakeCVTableTest(cv_bvs_t, buckets_bv_t, uint_t40); 248 | 249 | template typename table_t, typename val_t> 250 | void DPTableTest() { 251 | using tab_t = table_t>; 252 | using widths_t = typename satellite_data_t::entry_bit_width_t; 253 | using value_type = typename cbp::cbp_repr_t::value_type; 254 | 255 | struct TestSizeMgr { 256 | size_t table_size; 257 | inline size_t mod_add(size_t i, size_t delta = 1) const { 258 | return (i + delta) % table_size; 259 | } 260 | inline size_t mod_sub(size_t i, size_t delta = 1) const { 261 | return (i + table_size - delta) % table_size; 262 | } 263 | }; 264 | 265 | widths_t ws { 5, 7 }; 266 | auto size_mgr = TestSizeMgr { 128 }; 267 | auto t = tab_t(size_mgr.table_size, ws, {}); 268 | auto p = placement_t(size_mgr.table_size, {}); 269 | 270 | auto tctx = t.context(size_mgr.table_size, ws); 271 | auto pctx = p.context(t, size_mgr.table_size, ws, size_mgr); 272 | 273 | auto check_insert = [&](auto ia, auto value, auto sq, bool should_exists) { 274 | auto res = pctx.lookup_insert(ia, sq); 275 | ASSERT_EQ(res.key_already_exist(), should_exists); 276 | ASSERT_EQ(res.ptr().get_quotient(), sq); 277 | *res.ptr().val_ptr() = value; 278 | }; 279 | auto table_state = [&](std::vector> const& should) { 280 | std::vector> r; 281 | for (size_t i = 0; i < size_mgr.table_size; i++) { 282 | auto tpos = tctx.table_pos(i); 283 | if (!tctx.pos_is_empty(tpos)) { 284 | // TODO: Replace with search() 285 | auto ptr = tctx.at(tpos); 286 | r.push_back(std::array{ 287 | i, value_type(*ptr.val_ptr()), ptr.get_quotient() 288 | }); 289 | } 290 | } 291 | auto is = r; 292 | ASSERT_EQ(is, should); 293 | }; 294 | 295 | check_insert(60, 1, 5U, false); 296 | table_state({ 297 | {60, 1, 5}, 298 | }); 299 | 300 | check_insert(66, 2, 5U, false); 301 | table_state({ 302 | {60, 1, 5}, 303 | {66, 2, 5}, 304 | }); 305 | 306 | check_insert(64, 3, 5U, false); 307 | table_state({ 308 | {60, 1, 5}, 309 | {64, 3, 5}, 310 | {66, 2, 5}, 311 | }); 312 | 313 | check_insert(62, 4, 5U, false); 314 | table_state({ 315 | {60, 1, 5}, 316 | {62, 4, 5}, 317 | {64, 3, 5}, 318 | {66, 2, 5}, 319 | }); 320 | 321 | check_insert(62, 5, 6U, false); 322 | table_state({ 323 | {60, 1, 5}, 324 | {62, 4, 5}, 325 | {63, 5, 6}, 326 | {64, 3, 5}, 327 | {66, 2, 5}, 328 | }); 329 | 330 | check_insert(62, 10, 6U, true); 331 | table_state({ 332 | {60, 1, 5}, 333 | {62, 4, 5}, 334 | {63, 10, 6}, 335 | {64, 3, 5}, 336 | {66, 2, 5}, 337 | }); 338 | 339 | check_insert(62, 9, 7U, false); 340 | table_state({ 341 | {60, 1, 5}, 342 | {62, 4, 5}, 343 | {63, 10, 6}, 344 | {64, 3, 5}, 345 | {65, 9, 7}, 346 | {66, 2, 5}, 347 | }); 348 | 349 | /* 350 | Test: 351 | - multiple independ inserts 352 | - appends to same group 353 | - appends to displaced group 354 | 355 | */ 356 | } 357 | 358 | #define MakeDPTableTest(place, tab, tname) \ 359 | TEST(DPTable, place##_##tab##_##tname##_test) { \ 360 | DPTableTest(); \ 361 | } 362 | 363 | using naive_displacement_t = displacement_t; 364 | MakeDPTableTest(naive_displacement_t, plain_sentinel_t, uint8_t); 365 | MakeDPTableTest(naive_displacement_t, plain_sentinel_t, uint64_t); 366 | MakeDPTableTest(naive_displacement_t, plain_sentinel_t, dynamic_t); 367 | MakeDPTableTest(naive_displacement_t, plain_sentinel_t, uint_t40); 368 | MakeDPTableTest(naive_displacement_t, buckets_bv_t, uint8_t); 369 | MakeDPTableTest(naive_displacement_t, buckets_bv_t, uint64_t); 370 | MakeDPTableTest(naive_displacement_t, buckets_bv_t, dynamic_t); 371 | MakeDPTableTest(naive_displacement_t, buckets_bv_t, uint_t40); 372 | 373 | using layered_displacement_t = displacement_t>>; 374 | MakeDPTableTest(layered_displacement_t, plain_sentinel_t, uint8_t); 375 | MakeDPTableTest(layered_displacement_t, plain_sentinel_t, uint64_t); 376 | MakeDPTableTest(layered_displacement_t, plain_sentinel_t, dynamic_t); 377 | MakeDPTableTest(layered_displacement_t, plain_sentinel_t, uint_t40); 378 | MakeDPTableTest(layered_displacement_t, buckets_bv_t, uint8_t); 379 | MakeDPTableTest(layered_displacement_t, buckets_bv_t, uint64_t); 380 | MakeDPTableTest(layered_displacement_t, buckets_bv_t, dynamic_t); 381 | MakeDPTableTest(layered_displacement_t, buckets_bv_t, uint_t40); 382 | 383 | using layered_displacement2_t = displacement_t>; 384 | MakeDPTableTest(layered_displacement2_t, plain_sentinel_t, uint8_t); 385 | MakeDPTableTest(layered_displacement2_t, plain_sentinel_t, uint64_t); 386 | MakeDPTableTest(layered_displacement2_t, plain_sentinel_t, dynamic_t); 387 | MakeDPTableTest(layered_displacement2_t, plain_sentinel_t, uint_t40); 388 | MakeDPTableTest(layered_displacement2_t, buckets_bv_t, uint8_t); 389 | MakeDPTableTest(layered_displacement2_t, buckets_bv_t, uint64_t); 390 | MakeDPTableTest(layered_displacement2_t, buckets_bv_t, dynamic_t); 391 | MakeDPTableTest(layered_displacement2_t, buckets_bv_t, uint_t40); 392 | 393 | using elias_gamma_displacement_t = displacement_t>>; 394 | MakeDPTableTest(elias_gamma_displacement_t, plain_sentinel_t, uint8_t); 395 | MakeDPTableTest(elias_gamma_displacement_t, plain_sentinel_t, uint64_t); 396 | MakeDPTableTest(elias_gamma_displacement_t, plain_sentinel_t, dynamic_t); 397 | MakeDPTableTest(elias_gamma_displacement_t, plain_sentinel_t, uint_t40); 398 | MakeDPTableTest(elias_gamma_displacement_t, buckets_bv_t, uint8_t); 399 | MakeDPTableTest(elias_gamma_displacement_t, buckets_bv_t, uint64_t); 400 | MakeDPTableTest(elias_gamma_displacement_t, buckets_bv_t, dynamic_t); 401 | MakeDPTableTest(elias_gamma_displacement_t, buckets_bv_t, uint_t40); 402 | 403 | using elias_gamma_displacement2_t = displacement_t>; 404 | MakeDPTableTest(elias_gamma_displacement2_t, plain_sentinel_t, uint8_t); 405 | MakeDPTableTest(elias_gamma_displacement2_t, plain_sentinel_t, uint64_t); 406 | MakeDPTableTest(elias_gamma_displacement2_t, plain_sentinel_t, dynamic_t); 407 | MakeDPTableTest(elias_gamma_displacement2_t, plain_sentinel_t, uint_t40); 408 | MakeDPTableTest(elias_gamma_displacement2_t, buckets_bv_t, uint8_t); 409 | MakeDPTableTest(elias_gamma_displacement2_t, buckets_bv_t, uint64_t); 410 | MakeDPTableTest(elias_gamma_displacement2_t, buckets_bv_t, dynamic_t); 411 | MakeDPTableTest(elias_gamma_displacement2_t, buckets_bv_t, uint_t40); 412 | 413 | using elias_gamma_displacement3_t = displacement_t>; 414 | MakeDPTableTest(elias_gamma_displacement3_t, plain_sentinel_t, uint8_t); 415 | MakeDPTableTest(elias_gamma_displacement3_t, plain_sentinel_t, uint64_t); 416 | MakeDPTableTest(elias_gamma_displacement3_t, plain_sentinel_t, dynamic_t); 417 | MakeDPTableTest(elias_gamma_displacement3_t, plain_sentinel_t, uint_t40); 418 | MakeDPTableTest(elias_gamma_displacement3_t, buckets_bv_t, uint8_t); 419 | MakeDPTableTest(elias_gamma_displacement3_t, buckets_bv_t, uint64_t); 420 | MakeDPTableTest(elias_gamma_displacement3_t, buckets_bv_t, dynamic_t); 421 | MakeDPTableTest(elias_gamma_displacement3_t, buckets_bv_t, uint_t40); 422 | 423 | template typename table_t, typename val_t> 424 | void FullTableTest() { 425 | { 426 | table_t table; 427 | 428 | table.insert_kv_width(42, 124, 8, 8); 429 | 430 | auto r = table[42]; 431 | ASSERT_EQ(r, 124u); 432 | } 433 | { 434 | table_t table; 435 | 436 | auto tchk = [&](size_t end) { 437 | for (uint64_t w = 1; w < end; w++) { 438 | auto r = table[w]; 439 | ASSERT_EQ(r, w); 440 | } 441 | auto nptr = typename table_t::pointer_type(); 442 | for (uint64_t w = 1; w < end; w++) { 443 | auto r = table.search(w); 444 | ASSERT_NE(r, nptr); 445 | ASSERT_EQ(*r, w); 446 | } 447 | }; 448 | bool quick = true; 449 | 450 | size_t last_bits = 0; 451 | for (uint64_t v = 1; v < 1000; v++) { 452 | size_t bits = bits_for(v); 453 | if (last_bits != bits) { 454 | //std::cout << "bits: " << bits << "\n"; 455 | last_bits = bits; 456 | } 457 | table.insert_kv_width(v, std::move(v), bits, bits); 458 | 459 | if (!quick) { 460 | tchk(v + 1); 461 | } 462 | } 463 | if (quick) { 464 | tchk(1000); 465 | } 466 | } 467 | } 468 | 469 | #define MakeFullTableTest(tab, tname) \ 470 | TEST(FullTable, tab##_##tname##_test) { \ 471 | FullTableTest(); \ 472 | } 473 | 474 | template 475 | using csh_test_t = hashmap_t; 476 | template 477 | using ch_test_t = hashmap_t; 478 | 479 | template 480 | using csh_disp_test_t = hashmap_t; 481 | template 482 | using ch_disp_test_t = hashmap_t; 483 | 484 | MakeFullTableTest(csh_test_t, uint16_t) 485 | MakeFullTableTest(csh_test_t, uint64_t) 486 | MakeFullTableTest(csh_test_t, dynamic_t) 487 | MakeFullTableTest(csh_test_t, uint_t40) 488 | MakeFullTableTest(ch_test_t, uint16_t) 489 | MakeFullTableTest(ch_test_t, uint64_t) 490 | MakeFullTableTest(ch_test_t, dynamic_t) 491 | MakeFullTableTest(ch_test_t, uint_t40) 492 | MakeFullTableTest(csh_disp_test_t, uint16_t) 493 | MakeFullTableTest(csh_disp_test_t, uint64_t) 494 | MakeFullTableTest(csh_disp_test_t, dynamic_t) 495 | MakeFullTableTest(csh_disp_test_t, uint_t40) 496 | MakeFullTableTest(ch_disp_test_t, uint16_t) 497 | MakeFullTableTest(ch_disp_test_t, uint64_t) 498 | MakeFullTableTest(ch_disp_test_t, dynamic_t) 499 | MakeFullTableTest(ch_disp_test_t, uint_t40) 500 | --------------------------------------------------------------------------------