├── .clang-format ├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── bench ├── CMakeLists.txt ├── bench_lambdas.cpp ├── bench_load_factors.cpp ├── bench_maps.cpp ├── cmdline.h └── common.hpp ├── include ├── CMakeLists.txt ├── poplar.hpp └── poplar │ ├── basics.hpp │ ├── bijective_hash.hpp │ ├── bit_tools.hpp │ ├── bit_vector.hpp │ ├── compact_bonsai_nlm.hpp │ ├── compact_bonsai_trie.hpp │ ├── compact_fkhash_nlm.hpp │ ├── compact_fkhash_trie.hpp │ ├── compact_hash_table.hpp │ ├── compact_vector.hpp │ ├── exception.hpp │ ├── hash.hpp │ ├── map.hpp │ ├── plain_bonsai_nlm.hpp │ ├── plain_bonsai_trie.hpp │ ├── plain_fkhash_nlm.hpp │ ├── plain_fkhash_trie.hpp │ ├── poplar_config.hpp │ ├── standard_hash_table.hpp │ └── vbyte.hpp ├── poplar_config.hpp.in ├── sample ├── CMakeLists.txt └── sample.cpp └── test ├── CMakeLists.txt ├── CMakeLists.txt.in ├── bijective_hash_test.cpp ├── bit_vector_test.cpp ├── compact_hash_table_test.cpp ├── hash_trie_test.cpp ├── map_test.cpp ├── test_common.hpp └── words.txt /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | # BasedOnStyle: Google 4 | AccessModifierOffset: -2 5 | AlignAfterOpenBracket: Align 6 | AlignConsecutiveAssignments: false 7 | AlignConsecutiveDeclarations: false 8 | AlignEscapedNewlines: Left 9 | AlignOperands: true 10 | AlignTrailingComments: false 11 | AllowAllParametersOfDeclarationOnNextLine: true 12 | AllowShortBlocksOnASingleLine: false 13 | AllowShortCaseLabelsOnASingleLine: false 14 | AllowShortFunctionsOnASingleLine: Empty 15 | AllowShortIfStatementsOnASingleLine: true 16 | AllowShortLoopsOnASingleLine: true 17 | AlwaysBreakAfterDefinitionReturnType: None 18 | AlwaysBreakAfterReturnType: None 19 | AlwaysBreakBeforeMultilineStrings: true 20 | AlwaysBreakTemplateDeclarations: true 21 | BinPackArguments: true 22 | BinPackParameters: true 23 | BraceWrapping: 24 | AfterClass: false 25 | AfterControlStatement: false 26 | AfterEnum: false 27 | AfterFunction: false 28 | AfterNamespace: false 29 | AfterObjCDeclaration: false 30 | AfterStruct: false 31 | AfterUnion: false 32 | AfterExternBlock: false 33 | BeforeCatch: false 34 | BeforeElse: false 35 | IndentBraces: false 36 | SplitEmptyFunction: true 37 | SplitEmptyRecord: true 38 | SplitEmptyNamespace: true 39 | BreakBeforeBinaryOperators: None 40 | BreakBeforeBraces: Attach 41 | BreakBeforeInheritanceComma: false 42 | BreakBeforeTernaryOperators: true 43 | BreakConstructorInitializersBeforeComma: false 44 | BreakConstructorInitializers: BeforeColon 45 | BreakAfterJavaFieldAnnotations: false 46 | BreakStringLiterals: true 47 | ColumnLimit: 120 48 | CommentPragmas: '^ IWYU pragma:' 49 | CompactNamespaces: false 50 | ConstructorInitializerAllOnOneLineOrOnePerLine: false 51 | ConstructorInitializerIndentWidth: 4 52 | ContinuationIndentWidth: 4 53 | Cpp11BracedListStyle: true 54 | DerivePointerAlignment: false 55 | DisableFormat: false 56 | ExperimentalAutoDetectBinPacking: false 57 | FixNamespaceComments: true 58 | ForEachMacros: 59 | - foreach 60 | - Q_FOREACH 61 | - BOOST_FOREACH 62 | IncludeBlocks: Preserve 63 | IncludeCategories: 64 | - Regex: '^' 65 | Priority: 2 66 | - Regex: '^<.*\.h>' 67 | Priority: 1 68 | - Regex: '^<.*' 69 | Priority: 2 70 | - Regex: '.*' 71 | Priority: 3 72 | IncludeIsMainRegex: '([-_](test|unittest))?$' 73 | IndentCaseLabels: true 74 | IndentPPDirectives: None 75 | IndentWidth: 4 76 | IndentWrappedFunctionNames: false 77 | JavaScriptQuotes: Leave 78 | JavaScriptWrapImports: true 79 | KeepEmptyLinesAtTheStartOfBlocks: false 80 | MacroBlockBegin: '' 81 | MacroBlockEnd: '' 82 | MaxEmptyLinesToKeep: 1 83 | NamespaceIndentation: None 84 | ObjCBlockIndentWidth: 2 85 | ObjCSpaceAfterProperty: false 86 | ObjCSpaceBeforeProtocolList: false 87 | PenaltyBreakAssignment: 2 88 | PenaltyBreakBeforeFirstCallParameter: 1 89 | PenaltyBreakComment: 300 90 | PenaltyBreakFirstLessLess: 120 91 | PenaltyBreakString: 1000 92 | PenaltyExcessCharacter: 1000000 93 | PenaltyReturnTypeOnItsOwnLine: 200 94 | PointerAlignment: Left 95 | RawStringFormats: 96 | - Delimiter: pb 97 | Language: TextProto 98 | BasedOnStyle: google 99 | ReflowComments: true 100 | SortIncludes: true 101 | SortUsingDeclarations: true 102 | SpaceAfterCStyleCast: false 103 | SpaceAfterTemplateKeyword: true 104 | SpaceBeforeAssignmentOperators: true 105 | SpaceBeforeParens: ControlStatements 106 | SpaceInEmptyParentheses: false 107 | SpacesBeforeTrailingComments: 2 108 | SpacesInAngles: false 109 | SpacesInContainerLiterals: true 110 | SpacesInCStyleCastParentheses: false 111 | SpacesInParentheses: false 112 | SpacesInSquareBrackets: false 113 | Standard: Auto 114 | TabWidth: 8 115 | UseTab: Never 116 | ... 117 | 118 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | 34 | # My Def 35 | build*/ 36 | release-build/ 37 | cmake-build-debug/ 38 | .idea/ 39 | .DS_Store 40 | .vscode/ 41 | # include/poplar/poplar_config.hpp 42 | extra/ 43 | # pyscripts/ 44 | # とりあえず 45 | .travis.yml -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | project(poplar_trie) 3 | 4 | if (NOT CMAKE_BUILD_TYPE) 5 | set(CMAKE_BUILD_TYPE Release) 6 | endif () 7 | 8 | # C++17 compiler check 9 | if (CMAKE_CXX_COMPILER MATCHES ".*clang.*" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") 10 | set(CMAKE_COMPILER_IS_CLANGXX 1) 11 | endif () 12 | if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel") 13 | set(CMAKE_COMPILER_IS_INTEL 1) 14 | endif () 15 | if ((CMAKE_COMPILER_IS_GNUCXX AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 7.0) OR (CMAKE_COMPILER_IS_CLANGXX AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.0)) 16 | message(FATAL_ERROR "Your C++ compiler does not support C++17. Please install g++ 7.0 (or greater) or clang 4.0 (or greater)") 17 | else () 18 | message(STATUS "Compiler is recent enough to support C++17.") 19 | endif () 20 | 21 | set(BUILTIN_POPCNT 0) 22 | 23 | if (DISABLE_SSE4_2) 24 | message(STATUS "sse4.2 disabled") 25 | elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux") 26 | # Use /proc/cpuinfo to get the information 27 | file(STRINGS "/proc/cpuinfo" _cpuinfo) 28 | if(_cpuinfo MATCHES "(sse4_2)|(sse4a)") 29 | set(BUILTIN_POPCNT 1) 30 | endif() 31 | elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin") 32 | execute_process(COMMAND sysctl -n machdep.cpu.features OUTPUT_VARIABLE _cpuinfo OUTPUT_STRIP_TRAILING_WHITESPACE) 33 | if(_cpuinfo MATCHES "SSE4.2") 34 | set(BUILTIN_POPCNT 1) 35 | endif() 36 | endif() 37 | 38 | if(BUILTIN_POPCNT) 39 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2") 40 | endif() 41 | 42 | set(GCC_WARNINGS "-Wall -Werror=return-type") 43 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++1z -pthread ${GCC_WARNINGS}") 44 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DNDEBUG -march=native") 45 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -ggdb -DDEBUG") 46 | 47 | message(STATUS "BUILD_TYPE is ${CMAKE_BUILD_TYPE}") 48 | message(STATUS "CXX_FLAGS are ${CMAKE_CXX_FLAGS}") 49 | message(STATUS "CXX_FLAGS_DEBUG are ${CMAKE_CXX_FLAGS_DEBUG}") 50 | message(STATUS "CXX_FLAGS_RELEASE are ${CMAKE_CXX_FLAGS_RELEASE}") 51 | 52 | option(POPLAR_EXTRA_STATS 53 | "Enable to compute extra statistics." 54 | OFF) 55 | 56 | configure_file( 57 | ${CMAKE_CURRENT_SOURCE_DIR}/poplar_config.hpp.in 58 | ${CMAKE_CURRENT_SOURCE_DIR}/include/poplar/poplar_config.hpp 59 | ) 60 | 61 | enable_testing() 62 | include_directories(include) 63 | 64 | add_subdirectory(include) 65 | add_subdirectory(bench) 66 | add_subdirectory(sample) 67 | add_subdirectory(test) 68 | 69 | 70 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018–2019 Shunsuke Kanda 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Poplar-trie: A C++17 implementation of memory-efficient dynamic tries 2 | 3 | Poplar-trie is a C++17 library of a memory-efficient associative array whose keys are strings. The data structure is based on a dynamic path-decomposed trie (DynPDT) described in the paper, Shunsuke Kanda, Dominik Köppl, Yasuo Tabei, Kazuhiro Morita, and Masao Fuketa: [Dynamic Path-decomposed Tries](https://arxiv.org/abs/1906.06015), *ACM Journal of Experimental Algorithmics (JEA)*, *25*(1): 1–28, 2020. 4 | 5 | ## Implementation overview 6 | 7 | Poplar-trie is a memory-efficient updatable associative array implementation which maps key strings to values of any type like `std::map`. 8 | DynPDT is composed of two structures: dynamic trie and node label map (NLM) structures. 9 | This library contains some implementations of those structures, as follows. 10 | 11 | ### Implementations based on m-Bonsai 12 | 13 | - Classes [`plain_bonsai_trie`](https://github.com/kampersanda/poplar-trie/blob/master/include/poplar/plain_bonsai_trie.hpp) and [`compact_bonsai_trie`](https://github.com/kampersanda/poplar-trie/blob/master/include/poplar/compact_bonsai_trie.hpp) are dynamic trie implementations based on [m-Bonsai](https://github.com/Poyias/mBonsai). 14 | - Classes [`plain_bonsai_nlm`](https://github.com/kampersanda/poplar-trie/blob/master/include/poplar/plain_bonsai_nlm.hpp) and [`compact_bonsai_nlm`](https://github.com/kampersanda/poplar-trie/blob/master/include/poplar/compact_bonsai_nlm.hpp) are NLM implementations designed for these dynamic tries. 15 | 16 | ### Implementations based on FK-hash 17 | 18 | - Classes [`plain_fkhash_trie`](https://github.com/kampersanda/poplar-trie/blob/master/include/poplar/plain_fkhash_trie.hpp) and [`compact_fkhash_trie`](https://github.com/kampersanda/poplar-trie/blob/master/include/poplar/compact_fkhash_trie.hpp) are dynamic trie implementations based on [HashTrie](https://github.com/tudocomp/tudocomp) developed by Fischer and Köppl. 19 | - Classes [`plain_fkhash_nlm`](https://github.com/kampersanda/poplar-trie/blob/master/include/poplar/plain_fkhash_nlm.hpp) and [`compact_fkhash_nlm`](https://github.com/kampersanda/poplar-trie/blob/master/include/poplar/compact_fkhash_nlm.hpp) are NLM implementations designed for these dynamic tries. 20 | 21 | ### Aliases 22 | 23 | Class [`map`](https://github.com/kampersanda/poplar-trie/blob/master/include/poplar/map.hpp) takes these classes as the template arguments and implements the associative array. 24 | So, there are some implementation combinations. 25 | In [`poplar.hpp`](https://github.com/kampersanda/poplar-trie/blob/master/include/poplar.hpp), the following aliases are provided. 26 | 27 | | Alias | Trie Impl. | NLM impl. | 28 | | :------------------------ | :-------------------- | :------------------- | 29 | | `plain_bonsai_map` | `plain_bonsai_trie` | `plain_bonsai_nlm` | 30 | | `semi_compact_bonsai_map` | `plain_bonsai_trie` | `compact_bonsai_nlm` | 31 | | `compact_bonsai_map` | `compact_bonsai_trie` | `compact_bonsai_nlm` | 32 | | `plain_fkhash_map` | `plain_fkhash_trie` | `plain_fkhash_nlm` | 33 | | `semi_compact_fkhash_map` | `plain_fkhash_trie` | `compact_fkhash_nlm` | 34 | | `compact_fkhash_map` | `compact_fkhash_trie` | `compact_fkhash_nlm` | 35 | 36 | 37 | ## Install 38 | 39 | This library consists of only header files. 40 | Please through the path to the directory [`poplar-trie/include`](https://github.com/kampersanda/poplar-trie/tree/master/include). 41 | 42 | 43 | ## Build instructions 44 | 45 | You can download and compile Poplar-trie as the following commands. 46 | 47 | ``` 48 | $ git clone https://github.com/kampersanda/poplar-trie.git 49 | $ cd poplar-trie 50 | $ mkdir build 51 | $ cd build 52 | $ cmake .. 53 | $ make 54 | $ make install 55 | ``` 56 | 57 | The library uses C++17, so please install g++ 7.0 (or greater) or clang 4.0 (or greater). 58 | In addition, CMake 2.8 (or greater) has to be installed to compile the library. 59 | 60 | On the default setting, the library attempts to use `SSE4.2` for popcount primitives. 61 | If you do not want to use it, please set `DISABLE_SSE4_2` at build time, e.g., `cmake .. -DDISABLE_SSE4_2=1`. 62 | 63 | ## Easy example 64 | 65 | The following code is an easy example of inserting and searching key-value pairs. 66 | 67 | ```c++ 68 | #include 69 | #include 70 | 71 | int main() { 72 | std::vector keys = {"Aoba", "Yun", "Hajime", "Hihumi", "Kou", 73 | "Rin", "Hazuki", "Umiko", "Nene"}; 74 | const auto num_keys = static_cast(keys.size()); 75 | 76 | poplar::plain_bonsai_map map; 77 | 78 | try { 79 | for (int i = 0; i < num_keys; ++i) { 80 | int* ptr = map.update(keys[i]); 81 | *ptr = i + 1; 82 | } 83 | for (int i = 0; i < num_keys; ++i) { 84 | const int* ptr = map.find(keys[i]); 85 | if (ptr == nullptr or *ptr != i + 1) { 86 | return 1; 87 | } 88 | std::cout << keys[i] << ": " << *ptr << std::endl; 89 | } 90 | { 91 | const int* ptr = map.find("Hotaru"); 92 | if (ptr != nullptr) { 93 | return 1; 94 | } 95 | std::cout << "Hotaru: " << -1 << std::endl; 96 | } 97 | } catch (const poplar::exception& ex) { 98 | std::cerr << ex.what() << std::endl; 99 | return 1; 100 | } 101 | 102 | std::cout << "#keys = " << map.size() << std::endl; 103 | 104 | return 0; 105 | } 106 | ``` 107 | 108 | The output will be 109 | 110 | ``` 111 | Aoba: 1 112 | Yun: 2 113 | Hajime: 3 114 | Hihumi: 4 115 | Kou: 5 116 | Rin: 6 117 | Hazuki: 7 118 | Umiko: 8 119 | Nene: 9 120 | Hotaru: -1 121 | #keys = 9 122 | ``` 123 | 124 | ### Note: Deletion implementation 125 | 126 | Since DynPDT cannot support garbage collection for deleted keys, Poplar-trie does not provide deletion functions. However, you can easily implement that function by setting the value associated with a deleted key to an invalid value. For example, 127 | 128 | ```c++ 129 | int* ptr = map.update(deleted_key); 130 | *ptr = -1; // invalid value 131 | ``` 132 | 133 | In this approach, the memory used for deleted keys is not released, although it may be reused for keys inserted subsequently. 134 | 135 | ## Benchmarks 136 | 137 | Comparison experiments were conducted on one core of a quad-core Intel Xeon CPU E5-2680 v2 clocked at 2.80 Ghz in a machine with 256 GB of RAM, running the 64-bit version of CentOS 6.10 based on Linux 2.6. 138 | The source code was compiled with g++ (version 7.3.0) in optimization mode -O3. 139 | 140 | To measure the performance, we inserted strings in a dataset to a data structure in random order, and measured the maximum resident set size and insertion time. 141 | The lookup time was measured by retrieving a million strings randomly extracted from the dataset. 142 | 143 | The source codes for the experiments are at [dictionary_bench](https://github.com/kampersanda/dictionary_bench). 144 | 145 | ### Page Titles of English Wikipedia 146 | 147 | - Dataset: All page titles from English Wikipedia in Sep. 2018 148 | - Number of keys: 14,130,439 149 | - File size: 0.28 GiB 150 | 151 | | Implementation | Space (GiB) | Insert (us/key) | Lookup (us/key) | 152 | | -------------------------------------------------------------------------------- | ----------: | --------------: | --------------: | 153 | | [`poplar::plain_bonsai_map`](https://github.com/kampersanda/poplar-trie) | 0.64 | 0.98 | 0.68 | 154 | | [`poplar::semi_compact_bonsai_map`](https://github.com/kampersanda/poplar-trie) | 0.28 | 1.60 | 0.96 | 155 | | [`poplar::compact_bonsai_map`](https://github.com/kampersanda/poplar-trie) | 0.24 | 1.71 | 1.02 | 156 | | [`poplar::plain_fkhash_map`](https://github.com/kampersanda/poplar-trie) | 0.67 | 0.79 | 0.86 | 157 | | [`poplar::semi_compact_fkhash_map`](https://github.com/kampersanda/poplar-trie) | 0.31 | 0.96 | 1.15 | 158 | | [`poplar::compact_fkhash_map`](https://github.com/kampersanda/poplar-trie) | 0.27 | 1.14 | 1.22 | 159 | | [`std::unordered_map`](http://en.cppreference.com/w/cpp/container/unordered_map) | 1.29 | 0.50 | 0.27 | 160 | | [`google::dense_hash_map`](https://github.com/sparsehash/sparsehash) | 1.64 | 0.54 | 0.14 | 161 | | [`spp::sparse_hash_map`](https://github.com/greg7mdp/sparsepp) | 0.97 | 0.69 | 0.18 | 162 | | [`tsl::hopscotch_map`](https://github.com/Tessil/hopscotch-map) | 1.08 | 0.42 | 0.13 | 163 | | [`tsl::robin_map`](https://github.com/Tessil/robin-map) | 1.83 | 0.41 | 0.12 | 164 | | [`tsl::array_map`](https://github.com/Tessil/array-hash) | 0.69 | 0.73 | 0.14 | 165 | | [`tsl::htrie_map`](https://github.com/Tessil/hat-trie) | 0.43 | 0.60 | 0.27 | 166 | | [`JudySL`](http://judy.sourceforge.net) | 0.66 | 0.92 | 0.74 | 167 | | [`libart`](https://github.com/armon/libart) | 1.23 | 1.00 | 0.73 | 168 | | [`cedar::da`](http://www.tkl.iis.u-tokyo.ac.jp/~ynaga/cedar/) (reduced trie) | 1.19 | 0.89 | 0.59 | 169 | | [`cedar::da`](http://www.tkl.iis.u-tokyo.ac.jp/~ynaga/cedar/) (prefix trie) | 0.63 | 0.89 | 0.61 | 170 | 171 | ### URLs of UK domain 172 | 173 | - Dataset: URLs obtained from a 2005 crawl of the `.uk` domain performed by UbiCrawler 174 | - Number of keys: 39,459,925 175 | - File size: 2.7 GiB 176 | 177 | | Implementation | Space (GiB) | Insert (us/key) | Lookup (us/key) | 178 | | -------------------------------------------------------------------------------- | ----------: | --------------: | --------------: | 179 | | [`poplar::plain_bonsai_map`](https://github.com/kampersanda/poplar-trie) | 2.32 | 1.45 | 0.94 | 180 | | [`poplar::semi_compact_bonsai_map`](https://github.com/kampersanda/poplar-trie) | 1.26 | 2.76 | 1.44 | 181 | | [`poplar::compact_bonsai_map`](https://github.com/kampersanda/poplar-trie) | 1.09 | 2.87 | 1.44 | 182 | | [`poplar::plain_fkhash_map`](https://github.com/kampersanda/poplar-trie) | 2.32 | 1.27 | 1.24 | 183 | | [`poplar::semi_compact_fkhash_map`](https://github.com/kampersanda/poplar-trie) | 1.38 | 1.74 | 1.93 | 184 | | [`poplar::compact_fkhash_map`](https://github.com/kampersanda/poplar-trie) | 1.21 | 2.04 | 2.02 | 185 | | [`std::unordered_map`](http://en.cppreference.com/w/cpp/container/unordered_map) | 6.05 | 0.67 | 0.50 | 186 | | [`google::dense_hash_map`](https://github.com/sparsehash/sparsehash) | 10.50 | 1.09 | 0.27 | 187 | | [`spp::sparse_hash_map`](https://github.com/greg7mdp/sparsepp) | 5.06 | 0.96 | 0.37 | 188 | | [`tsl::hopscotch_map`](https://github.com/Tessil/hopscotch-map) | 6.23 | 0.75 | 0.25 | 189 | | [`tsl::robin_map`](https://github.com/Tessil/robin-map) | 9.23 | 0.63 | 0.25 | 190 | | [`tsl::array_map`](https://github.com/Tessil/array-hash) | 5.91 | 1.16 | 0.28 | 191 | | [`tsl::htrie_map`](https://github.com/Tessil/hat-trie) | 2.68 | 1.08 | 0.51 | 192 | | [`JudySL`](http://judy.sourceforge.net) | 2.21 | 1.88 | 1.59 | 193 | | [`libart`](https://github.com/armon/libart) | 5.17 | 1.64 | 1.19 | 194 | | [`cedar::da`](http://www.tkl.iis.u-tokyo.ac.jp/~ynaga/cedar/) (reduced trie) | 7.37 | 2.24 | 2.30 | 195 | | [`cedar::da`](http://www.tkl.iis.u-tokyo.ac.jp/~ynaga/cedar/) (prefix trie) | 2.02 | 2.20 | 2.28 | 196 | 197 | ## Todo 198 | 199 | - Add comments to the codes 200 | - Create the API document 201 | 202 | ## Licensing 203 | 204 | This library is free software provided under [MIT License](https://github.com/kampersanda/poplar-trie/blob/master/LICENSE). 205 | 206 | If you use the library, please cite the following paper: 207 | 208 | ```tex 209 | @article{kanda2020dynamic, 210 | title={Dynamic Path-decomposed Tries}, 211 | author={Kanda, Shunsuke and K{\"o}ppl, Dominik and Tabei, Yasuo and Morita, Kazuhiro and Fuketa, Masao}, 212 | journal={Journal of Experimental Algorithmics (JEA)}, 213 | volume={25}, 214 | number={1}, 215 | pages={1--28}, 216 | year={2020}, 217 | publisher={ACM} 218 | } 219 | ``` 220 | 221 | ## Related work 222 | 223 | - [compact\_sparse\_hash](https://github.com/tudocomp/compact_sparse_hash) is an efficient implementation of a compact associative array with integer keys. 224 | - [mBonsai](https://github.com/Poyias/mBonsai) is the original implementation of succinct dynamic tries. 225 | - [tudocomp](https://github.com/tudocomp/tudocomp) includes many dynamic trie implementations for LZ factorization. 226 | 227 | ## Special thanks 228 | 229 | Thanks to [Dr. Dominik Köppl](https://github.com/koeppl) I was able to create the bijective hash function in `bijective_hash.hpp`. 230 | -------------------------------------------------------------------------------- /bench/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(bench_load_factors bench_load_factors.cpp) 2 | add_executable(bench_maps bench_maps.cpp) 3 | add_executable(bench_lambdas bench_lambdas.cpp) 4 | -------------------------------------------------------------------------------- /bench/bench_lambdas.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #include 25 | 26 | #include "cmdline.h" 27 | #include "common.hpp" 28 | 29 | namespace { 30 | 31 | using namespace poplar; 32 | 33 | template 34 | void build(const std::string& key_name, uint32_t capa_bits, uint64_t lambda, bool detail) { 35 | uint64_t process_size = get_process_size(); 36 | 37 | std::ifstream ifs{key_name}; 38 | if (!ifs) { 39 | std::cerr << "error: failed to open " << key_name << std::endl; 40 | std::exit(1); 41 | } 42 | 43 | uint64_t num_keys = 0; 44 | double elapsed_sec = 0.0; 45 | 46 | Map map{capa_bits, lambda}; 47 | 48 | std::string key; 49 | key.reserve(1024); 50 | 51 | timer t; 52 | while (std::getline(ifs, key)) { 53 | map.update(make_char_range(key)); 54 | ++num_keys; 55 | } 56 | 57 | elapsed_sec = t.get<>(); 58 | process_size = get_process_size() - process_size; 59 | 60 | #ifdef POPLAR_EXTRA_STATS 61 | std::cout << lambda << '\t' << process_size << '\t' << elapsed_sec << '\t' << map.rate_steps() << '\t' 62 | << map.num_resize() << std::endl; 63 | #else 64 | std::cout << lambda << '\t' << process_size << '\t' << elapsed_sec << std::endl; 65 | #endif 66 | 67 | if (detail) { 68 | show_member(std::cout, "", "map"); 69 | map.show_stats(std::cout, 1); 70 | } 71 | } 72 | 73 | } // namespace 74 | 75 | int main(int argc, char* argv[]) { 76 | std::ios::sync_with_stdio(false); 77 | 78 | cmdline::parser p; 79 | p.add("key_fn", 'k', "input file name of keywords", true); 80 | p.add("map_type", 't', "cbm | cfkm", true); 81 | p.add("capa_bits", 'b', "#bits of initial capacity", false, 16); 82 | p.add("detail", 'd', "show detail stats?", false, false); 83 | p.parse_check(argc, argv); 84 | 85 | auto key_fn = p.get("key_fn"); 86 | auto map_type = p.get("map_type"); 87 | auto capa_bits = p.get("capa_bits"); 88 | auto detail = p.get("detail"); 89 | 90 | #ifdef POPLAR_EXTRA_STATS 91 | std::cout << "lambda\tprocess_size\telapsed_sec\trate_steps\tnum_resize" << std::endl; 92 | #else 93 | std::cout << "lambda\tprocess_size\telapsed_sec" << std::endl; 94 | #endif 95 | 96 | try { 97 | for (uint64_t lambda = 4; lambda <= 1024; lambda *= 2) { 98 | if (map_type == "cbm") { 99 | build>(key_fn, capa_bits, lambda, detail); 100 | } 101 | if (map_type == "cfkm") { 102 | build>(key_fn, capa_bits, lambda, detail); 103 | } 104 | } 105 | } catch (const exception& ex) { 106 | std::cerr << ex.what() << std::endl; 107 | } 108 | 109 | return 1; 110 | } 111 | -------------------------------------------------------------------------------- /bench/bench_load_factors.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #include 25 | 26 | #include "cmdline.h" 27 | #include "common.hpp" 28 | 29 | namespace { 30 | 31 | using namespace poplar; 32 | 33 | template 34 | void build(const std::string& key_name, uint32_t capa_bits, uint64_t lambda) { 35 | uint64_t process_size = get_process_size(); 36 | 37 | std::ifstream ifs{key_name}; 38 | if (!ifs) { 39 | std::cerr << "error: failed to open " << key_name << std::endl; 40 | exit(1); 41 | } 42 | 43 | size_t num_keys = 0; 44 | double elapsed_sec = 0.0; 45 | 46 | Map map{capa_bits, lambda}; 47 | 48 | try { 49 | std::string key; 50 | key.reserve(1024); 51 | 52 | timer t; 53 | while (std::getline(ifs, key)) { 54 | map.update(make_char_range(key)); 55 | ++num_keys; 56 | } 57 | elapsed_sec = t.get<>(); 58 | process_size = get_process_size() - process_size; 59 | } catch (const exception& ex) { 60 | std::cerr << ex.what() << std::endl; 61 | } 62 | 63 | std::ostream& out = std::cout; 64 | auto indent = get_indent(0); 65 | 66 | show_stat(out, indent, "map_name", short_realname()); 67 | show_stat(out, indent, "key_name", key_name); 68 | show_stat(out, indent, "init_capa_bits", capa_bits); 69 | show_stat(out, indent, "num_keys", num_keys); 70 | show_stat(out, indent, "elapsed_sec", elapsed_sec); 71 | show_stat(out, indent, "rss_bytes", process_size); 72 | show_stat(out, indent, "rss_MiB", process_size / (1024.0 * 1024.0)); 73 | 74 | show_member(out, indent, "map"); 75 | map.show_stats(out, 1); 76 | 77 | out << "-----" << std::endl; 78 | } 79 | 80 | } // namespace 81 | 82 | int main(int argc, char* argv[]) { 83 | std::ios::sync_with_stdio(false); 84 | 85 | cmdline::parser p; 86 | p.add("key_fn", 'k', "input file name of keywords", true); 87 | p.add("capa_bits", 'b', "#bits of initial capacity", false, 16); 88 | p.add("lambda", 'l', "lambda", false, 32); 89 | p.parse_check(argc, argv); 90 | 91 | auto key_fn = p.get("key_fn"); 92 | auto capa_bits = p.get("capa_bits"); 93 | auto lambda = p.get("lambda"); 94 | 95 | using nlm_type = compact_bonsai_nlm; 96 | 97 | using map_80_3_type = map, nlm_type>; 98 | using map_85_3_type = map, nlm_type>; 99 | using map_90_3_type = map, nlm_type>; 100 | using map_95_3_type = map, nlm_type>; 101 | 102 | using map_80_4_type = map, nlm_type>; 103 | using map_85_4_type = map, nlm_type>; 104 | using map_90_4_type = map, nlm_type>; 105 | using map_95_4_type = map, nlm_type>; 106 | 107 | using map_80_5_type = map, nlm_type>; 108 | using map_85_5_type = map, nlm_type>; 109 | using map_90_5_type = map, nlm_type>; 110 | using map_95_5_type = map, nlm_type>; 111 | 112 | build(key_fn, capa_bits, lambda); 113 | build(key_fn, capa_bits, lambda); 114 | build(key_fn, capa_bits, lambda); 115 | build(key_fn, capa_bits, lambda); 116 | 117 | build(key_fn, capa_bits, lambda); 118 | build(key_fn, capa_bits, lambda); 119 | build(key_fn, capa_bits, lambda); 120 | build(key_fn, capa_bits, lambda); 121 | 122 | build(key_fn, capa_bits, lambda); 123 | build(key_fn, capa_bits, lambda); 124 | build(key_fn, capa_bits, lambda); 125 | build(key_fn, capa_bits, lambda); 126 | 127 | return 0; 128 | } 129 | -------------------------------------------------------------------------------- /bench/bench_maps.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #include 25 | 26 | #include "cmdline.h" 27 | #include "common.hpp" 28 | 29 | namespace { 30 | 31 | using namespace poplar; 32 | 33 | using value_type = int; 34 | 35 | inline double get_average(const std::vector& ary) { 36 | double sum = 0.0; 37 | for (auto v : ary) { 38 | sum += v; 39 | } 40 | return sum / ary.size(); 41 | } 42 | inline double get_min(const std::vector& ary) { 43 | double min = std::numeric_limits::max(); 44 | for (auto v : ary) { 45 | if (v < min) { 46 | min = v; 47 | } 48 | } 49 | return min; 50 | } 51 | 52 | template 53 | int bench(const cmdline::parser& p) { 54 | auto key_fn = p.get("key_fn"); 55 | auto query_fn = p.get("query_fn"); 56 | auto capa_bits = p.get("capa_bits"); 57 | auto lambda = p.get("lambda"); 58 | auto runs = p.get("runs"); 59 | auto detail = p.get("detail"); 60 | 61 | uint64_t num_keys = 0, num_queries = 0; 62 | uint64_t ok = 0, ng = 0; 63 | uint64_t process_size = get_process_size(); 64 | 65 | double insert_us_per_key = 0.0, search_us_per_query = 0.0; 66 | double best_insert_us_per_key = 0.0, best_search_us_per_query = 0.0; 67 | 68 | auto map = std::make_unique(capa_bits, lambda); 69 | { 70 | std::ifstream ifs{key_fn}; 71 | if (!ifs) { 72 | std::cerr << "error: failed to open " << key_fn << std::endl; 73 | return 1; 74 | } 75 | 76 | std::string key; 77 | key.reserve(1024); 78 | 79 | while (std::getline(ifs, key)) { 80 | *map->update(key) = 1; 81 | ++num_keys; 82 | } 83 | process_size = get_process_size() - process_size; 84 | } 85 | 86 | std::shared_ptr> keys; 87 | std::shared_ptr> queries; 88 | 89 | { 90 | std::ifstream ifs{key_fn}; 91 | if (!ifs) { 92 | std::cerr << "Error: failed to open " << key_fn << std::endl; 93 | return 1; 94 | } 95 | 96 | keys = std::make_shared>(); 97 | keys->reserve(num_keys); 98 | 99 | for (std::string line; std::getline(ifs, line);) { 100 | keys->push_back(line); 101 | } 102 | } 103 | 104 | if (query_fn != "-") { 105 | std::ifstream ifs{query_fn}; 106 | if (!ifs) { 107 | std::cerr << "Error: failed to open " << query_fn << std::endl; 108 | return 1; 109 | } 110 | 111 | queries = std::make_shared>(); 112 | 113 | for (std::string line; std::getline(ifs, line);) { 114 | queries->push_back(line); 115 | } 116 | queries->shrink_to_fit(); 117 | } else { 118 | queries = keys; 119 | } 120 | 121 | { 122 | std::vector insert_times(runs); 123 | std::vector search_times(runs); 124 | 125 | for (int i = 0; i < runs; ++i) { 126 | auto map = std::make_unique(capa_bits, lambda); 127 | 128 | // insertion 129 | { 130 | timer t; 131 | for (const std::string& key : *keys) { 132 | *map->update(key) = 1; 133 | } 134 | insert_times[i] = t.get() / keys->size(); 135 | } 136 | 137 | // retrieval 138 | size_t _ok = 0, _ng = 0; 139 | { 140 | timer t; 141 | for (const std::string& query : *queries) { 142 | auto ptr = map->find(query); 143 | if (ptr != nullptr and *ptr == 1) { 144 | ++_ok; 145 | } else { 146 | ++_ng; 147 | } 148 | } 149 | search_times[i] = t.get() / queries->size(); 150 | } 151 | 152 | if (i != 0) { 153 | if ((ok != _ok) or (ng != _ng)) { 154 | std::cerr << "critical error for search results" << std::endl; 155 | return 1; 156 | } 157 | } 158 | 159 | ok = _ok; 160 | ng = _ng; 161 | } 162 | 163 | num_keys = keys->size(); 164 | num_queries = queries->size(); 165 | insert_us_per_key = get_average(insert_times); 166 | best_insert_us_per_key = get_min(insert_times); 167 | search_us_per_query = get_average(search_times); 168 | best_search_us_per_query = get_min(search_times); 169 | } 170 | 171 | std::ostream& out = std::cout; 172 | auto indent = get_indent(0); 173 | 174 | show_stat(out, indent, "map_name", short_realname()); 175 | show_stat(out, indent, "key_fn", key_fn); 176 | show_stat(out, indent, "query_fn", query_fn); 177 | show_stat(out, indent, "init_capa_bits", capa_bits); 178 | 179 | show_stat(out, indent, "rss_bytes", process_size); 180 | show_stat(out, indent, "rss_MiB", process_size / (1024.0 * 1024.0)); 181 | 182 | show_stat(out, indent, "num_keys", num_keys); 183 | show_stat(out, indent, "num_queries", num_queries); 184 | 185 | show_stat(out, indent, "runs", runs); 186 | show_stat(out, indent, "insert_us_per_key", insert_us_per_key); 187 | show_stat(out, indent, "best_insert_us_per_key", best_insert_us_per_key); 188 | show_stat(out, indent, "search_us_per_query", search_us_per_query); 189 | show_stat(out, indent, "best_search_us_per_query", best_search_us_per_query); 190 | 191 | show_stat(out, indent, "ok", ok); 192 | show_stat(out, indent, "ng", ng); 193 | 194 | if (detail) { 195 | show_member(out, indent, "map"); 196 | map->show_stats(out, 1); 197 | } 198 | 199 | return 0; 200 | } 201 | 202 | } // namespace 203 | 204 | int main(int argc, char* argv[]) { 205 | std::ios::sync_with_stdio(false); 206 | 207 | cmdline::parser p; 208 | p.add("key_fn", 'k', "input file name of keywords", true); 209 | p.add("query_fn", 'q', "input file name of queries", false, "-"); 210 | p.add("map_type", 't', "pbm | scbm | cbm | pfkm | scfkm | cfkm", true); 211 | p.add("chunk_size", 'c', "8 | 16 | 32 | 64 (for scbm, cbm, scfkm and cfkm)", false, 16); 212 | p.add("capa_bits", 'b', "#bits of initial capacity", false, 16); 213 | p.add("lambda", 'l', "lambda", false, 32); 214 | p.add("runs", 'r', "# of runs", false, 10); 215 | p.add("detail", 'd', "show detail stats?", false, false); 216 | p.parse_check(argc, argv); 217 | 218 | auto map_type = p.get("map_type"); 219 | auto chunk_size = p.get("chunk_size"); 220 | 221 | try { 222 | if (map_type == "pbm") { 223 | return bench>(p); 224 | } 225 | if (map_type == "scbm") { 226 | switch (chunk_size) { 227 | case 8: 228 | return bench>(p); 229 | case 16: 230 | return bench>(p); 231 | case 32: 232 | return bench>(p); 233 | case 64: 234 | return bench>(p); 235 | default: 236 | std::cerr << p.usage() << std::endl; 237 | return 1; 238 | } 239 | } 240 | if (map_type == "cbm") { 241 | switch (chunk_size) { 242 | case 8: 243 | return bench>(p); 244 | case 16: 245 | return bench>(p); 246 | case 32: 247 | return bench>(p); 248 | case 64: 249 | return bench>(p); 250 | default: 251 | std::cerr << p.usage() << std::endl; 252 | return 1; 253 | } 254 | } 255 | if (map_type == "pfkm") { 256 | return bench>(p); 257 | } 258 | if (map_type == "scfkm") { 259 | switch (chunk_size) { 260 | case 8: 261 | return bench>(p); 262 | case 16: 263 | return bench>(p); 264 | case 32: 265 | return bench>(p); 266 | case 64: 267 | return bench>(p); 268 | default: 269 | std::cerr << p.usage() << std::endl; 270 | return 1; 271 | } 272 | } 273 | if (map_type == "cfkm") { 274 | switch (chunk_size) { 275 | case 8: 276 | return bench>(p); 277 | case 16: 278 | return bench>(p); 279 | case 32: 280 | return bench>(p); 281 | case 64: 282 | return bench>(p); 283 | default: 284 | std::cerr << p.usage() << std::endl; 285 | return 1; 286 | } 287 | } 288 | } catch (const exception& ex) { 289 | std::cerr << ex.what() << std::endl; 290 | } 291 | 292 | std::cerr << p.usage() << std::endl; 293 | return 1; 294 | } 295 | -------------------------------------------------------------------------------- /bench/common.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef POPLAR_TRIE_COMMON_HPP 25 | #define POPLAR_TRIE_COMMON_HPP 26 | 27 | #ifdef __APPLE__ 28 | #include 29 | #endif 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | #include 39 | 40 | namespace poplar { 41 | 42 | class timer { 43 | public: 44 | using hrc = std::chrono::high_resolution_clock; 45 | 46 | timer() = default; 47 | 48 | template > 49 | double get() const { 50 | return std::chrono::duration(hrc::now() - tp_).count(); 51 | } 52 | 53 | private: 54 | hrc::time_point tp_{hrc::now()}; 55 | }; 56 | 57 | // From Cedar (http://www.tkl.iis.u-tokyo.ac.jp/~ynaga/cedar/) 58 | inline size_t get_process_size() { 59 | #ifdef __APPLE__ 60 | struct task_basic_info t_info; 61 | mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT; 62 | task_info(current_task(), TASK_BASIC_INFO, reinterpret_cast(&t_info), &t_info_count); 63 | return t_info.resident_size; 64 | #else 65 | FILE* fp = std::fopen("/proc/self/statm", "r"); 66 | size_t dummy(0), vm(0); 67 | std::fscanf(fp, "%ld %ld ", &dummy, &vm); // get resident (see procfs) 68 | std::fclose(fp); 69 | return vm * ::getpagesize(); 70 | #endif 71 | } 72 | 73 | template 74 | inline std::string realname() { 75 | int status; 76 | return abi::__cxa_demangle(typeid(T).name(), nullptr, nullptr, &status); 77 | } 78 | template 79 | inline std::string short_realname() { 80 | auto name = realname(); 81 | name = std::regex_replace(name, std::regex{R"( |poplar::)"}, ""); 82 | name = std::regex_replace(name, std::regex{R"((\d+)ul{0,2})"}, "$1"); 83 | return name; 84 | } 85 | 86 | template 87 | inline double get_average(const std::array& ary) { 88 | double sum = 0.0; 89 | for (auto v : ary) { 90 | sum += v; 91 | } 92 | return sum / N; 93 | } 94 | 95 | inline std::vector load_keys(const char* key_name) { 96 | std::ifstream ifs{key_name}; 97 | if (!ifs) { 98 | std::cerr << "Error: failed to open " << key_name << std::endl; 99 | exit(1); 100 | } 101 | 102 | std::vector keys; 103 | for (std::string line; std::getline(ifs, line);) { 104 | keys.push_back(line); 105 | } 106 | 107 | return keys; 108 | } 109 | 110 | } // namespace poplar 111 | 112 | #endif // POPLAR_TRIE_COMMON_HPP 113 | -------------------------------------------------------------------------------- /include/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file(GLOB HEADERS "poplar/*.hpp") 2 | install(FILES poplar.hpp DESTINATION include) 3 | install(FILES ${HEADERS} DESTINATION include/poplar) 4 | -------------------------------------------------------------------------------- /include/poplar.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef POPLAR_TRIE_POPLAR_HPP 25 | #define POPLAR_TRIE_POPLAR_HPP 26 | 27 | #include "poplar/compact_bonsai_trie.hpp" 28 | #include "poplar/compact_fkhash_trie.hpp" 29 | #include "poplar/plain_bonsai_trie.hpp" 30 | #include "poplar/plain_fkhash_trie.hpp" 31 | 32 | #include "poplar/compact_bonsai_nlm.hpp" 33 | #include "poplar/compact_fkhash_nlm.hpp" 34 | #include "poplar/plain_bonsai_nlm.hpp" 35 | #include "poplar/plain_fkhash_nlm.hpp" 36 | 37 | #include "poplar/map.hpp" 38 | 39 | namespace poplar { 40 | 41 | template 42 | using plain_bonsai_map = map, plain_bonsai_nlm>; 43 | 44 | template 45 | using semi_compact_bonsai_map = map, compact_bonsai_nlm>; 46 | 47 | template 48 | using compact_bonsai_map = map, compact_bonsai_nlm>; 49 | 50 | template 51 | using plain_fkhash_map = map, plain_fkhash_nlm>; 52 | 53 | template 54 | using semi_compact_fkhash_map = map, compact_fkhash_nlm>; 55 | 56 | template 57 | using compact_fkhash_map = map, compact_fkhash_nlm>; 58 | 59 | } // namespace poplar 60 | 61 | #endif // POPLAR_TRIE_POPLAR_HPP 62 | -------------------------------------------------------------------------------- /include/poplar/basics.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef POPLAR_TRIE_BASICS_HPP 25 | #define POPLAR_TRIE_BASICS_HPP 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | #include "poplar_config.hpp" 37 | 38 | namespace poplar { 39 | 40 | using std::uint16_t; 41 | using std::uint32_t; 42 | using std::uint64_t; 43 | using std::uint8_t; 44 | 45 | enum class trie_type_ids : uint8_t { BONSAI_TRIE, FKHASH_TRIE }; 46 | 47 | struct char_range { 48 | const uint8_t* begin = nullptr; 49 | const uint8_t* end = nullptr; 50 | 51 | uint8_t operator[](uint64_t i) const { 52 | return begin[i]; 53 | } 54 | bool empty() const { 55 | return begin == end; 56 | } 57 | uint64_t length() const { 58 | return static_cast(end - begin); 59 | } 60 | }; 61 | 62 | inline char_range make_char_range(const char* str) { 63 | auto ptr = reinterpret_cast(str); 64 | return {ptr, ptr + (std::strlen(str) + 1)}; 65 | } 66 | inline char_range make_char_range(const std::string& str) { 67 | auto ptr = reinterpret_cast(str.c_str()); 68 | return {ptr, ptr + (str.size() + 1)}; 69 | } 70 | 71 | constexpr bool is_power2(uint64_t n) { 72 | return n != 0 and (n & (n - 1)) == 0; 73 | } 74 | 75 | constexpr uint32_t bits_to_bytes(uint32_t bits) { 76 | if (bits == 0) { 77 | return 0; 78 | } 79 | return bits / 8 + (bits % 8 == 0 ? 0 : 1); 80 | } 81 | 82 | constexpr void copy_bytes(uint8_t* dst, const uint8_t* src, uint64_t num) { 83 | for (uint64_t i = 0; i < num; ++i) { 84 | dst[i] = src[i]; 85 | } 86 | } 87 | 88 | // 89 | template 90 | constexpr std::pair decompose_value(uint64_t x) { 91 | return {x / N, x % N}; 92 | } 93 | 94 | class size_p2 { 95 | public: 96 | size_p2() = default; 97 | 98 | explicit size_p2(uint32_t bits) : bits_{bits}, mask_{(1ULL << bits) - 1} {} 99 | 100 | uint32_t bits() const { 101 | return bits_; 102 | } 103 | uint64_t mask() const { 104 | return mask_; 105 | } 106 | uint64_t size() const { 107 | return mask_ + 1; 108 | } 109 | 110 | private: 111 | uint32_t bits_ = 0; 112 | uint64_t mask_ = 0; 113 | }; 114 | 115 | inline std::string get_indent(int n) { 116 | return std::string(n * 4, ' '); 117 | } 118 | inline void show_member(std::ostream& os, const std::string& indent, const char* k) { 119 | os << indent << k << ":\n"; 120 | } 121 | template 122 | inline void show_stat(std::ostream& os, const std::string& indent, const char* k, const V& v) { 123 | os << indent << k << ':' << v << '\n'; 124 | } 125 | 126 | template 127 | struct chunk_type_traits; 128 | template <> 129 | struct chunk_type_traits<8> { 130 | using type = uint8_t; 131 | }; 132 | template <> 133 | struct chunk_type_traits<16> { 134 | using type = uint16_t; 135 | }; 136 | template <> 137 | struct chunk_type_traits<32> { 138 | using type = uint32_t; 139 | }; 140 | template <> 141 | struct chunk_type_traits<64> { 142 | using type = uint64_t; 143 | }; 144 | 145 | } // namespace poplar 146 | 147 | #endif // POPLAR_TRIE_BASICS_HPP 148 | -------------------------------------------------------------------------------- /include/poplar/bijective_hash.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef POPLAR_TRIE_BIJECTIVE_HASH_HPP 25 | #define POPLAR_TRIE_BIJECTIVE_HASH_HPP 26 | 27 | #include "basics.hpp" 28 | 29 | namespace poplar::bijective_hash { 30 | 31 | // clang-format off 32 | constexpr uint64_t PRIME_TABLE[][2][3] = { 33 | {{0ULL, 0ULL, 0ULL}, {0ULL, 0ULL, 0ULL}}, // 0 34 | {{1ULL, 1ULL, 1ULL}, {1ULL, 1ULL, 1ULL}}, // 1 35 | {{3ULL, 1ULL, 3ULL}, {3ULL, 1ULL, 3ULL}}, // 2 36 | {{7ULL, 5ULL, 3ULL}, {7ULL, 5ULL, 3ULL}}, // 3 37 | {{13ULL, 11ULL, 7ULL}, {5ULL, 3ULL, 7ULL}}, // 4 38 | {{31ULL, 29ULL, 23ULL}, {31ULL, 21ULL, 7ULL}}, // 5 39 | {{61ULL, 59ULL, 53ULL}, {21ULL, 51ULL, 29ULL}}, // 6 40 | {{127ULL, 113ULL, 109ULL}, {127ULL, 17ULL, 101ULL}}, // 7 41 | {{251ULL, 241ULL, 239ULL}, {51ULL, 17ULL, 15ULL}}, // 8 42 | {{509ULL, 503ULL, 499ULL}, {341ULL, 455ULL, 315ULL}}, // 9 43 | {{1021ULL, 1019ULL, 1013ULL}, {341ULL, 819ULL, 93ULL}}, // 10 44 | {{2039ULL, 2029ULL, 2027ULL}, {455ULL, 1509ULL, 195ULL}}, // 11 45 | {{4093ULL, 4091ULL, 4079ULL}, {1365ULL, 819ULL, 3855ULL}}, // 12 46 | {{8191ULL, 8179ULL, 8171ULL}, {8191ULL, 4411ULL, 4291ULL}}, // 13 47 | {{16381ULL, 16369ULL, 16363ULL}, {5461ULL, 4369ULL, 12483ULL}}, // 14 48 | {{32749ULL, 32719ULL, 32717ULL}, {13797ULL, 10031ULL, 1285ULL}}, // 15 49 | {{65521ULL, 65519ULL, 65497ULL}, {4369ULL, 3855ULL, 36969ULL}}, // 16 50 | {{131071ULL, 131063ULL, 131059ULL}, {131071ULL, 29127ULL, 110907ULL}}, // 17 51 | {{262139ULL, 262133ULL, 262127ULL}, {209715ULL, 95325ULL, 200463ULL}}, // 18 52 | {{524287ULL, 524269ULL, 524261ULL}, {524287ULL, 275941ULL, 271853ULL}}, // 19 53 | {{1048573ULL, 1048571ULL, 1048559ULL}, {349525ULL, 209715ULL, 986895ULL}}, // 20 54 | {{2097143ULL, 2097133ULL, 2097131ULL}, {1864135ULL, 1324517ULL, 798915ULL}}, // 21 55 | {{4194301ULL, 4194287ULL, 4194277ULL}, {1398101ULL, 986895ULL, 3417581ULL}}, // 22 56 | {{8388593ULL, 8388587ULL, 8388581ULL}, {1118481ULL, 798915ULL, 3417581ULL}}, // 23 57 | {{16777213ULL, 16777199ULL, 16777183ULL}, {5592405ULL, 986895ULL, 15760415ULL}}, // 24 58 | {{33554393ULL, 33554383ULL, 33554371ULL}, {17207401ULL, 31500079ULL, 15952107ULL}}, // 25 59 | {{67108859ULL, 67108837ULL, 67108819ULL}, {53687091ULL, 62137837ULL, 50704475ULL}}, // 26 60 | {{134217689ULL, 134217649ULL, 134217617ULL}, {17207401ULL, 113830225ULL, 82223473ULL}}, // 27 61 | {{268435399ULL, 268435367ULL, 268435361ULL}, {131863031ULL, 96516119ULL, 186492001ULL}}, // 28 62 | {{536870909ULL, 536870879ULL, 536870869ULL}, {357913941ULL, 32537631ULL, 274678141ULL}}, // 29 63 | {{1073741789ULL, 1073741783ULL, 1073741741ULL}, {889671797ULL, 1047552999ULL, 349289509ULL}}, // 30 64 | {{2147483647ULL, 2147483629ULL, 2147483587ULL}, {2147483647ULL, 1469330917ULL, 1056139499ULL}}, // 31 65 | {{4294967291ULL, 4294967279ULL, 4294967231ULL}, {858993459ULL, 252645135ULL, 1057222719ULL}}, // 32 66 | {{8589934583ULL, 8589934567ULL, 8589934543ULL}, {7635497415ULL, 1030792151ULL, 3856705327ULL}}, // 33 67 | {{17179869143ULL, 17179869107ULL, 17179869071ULL}, {9637487591ULL, 11825104763ULL, 12618841967ULL}}, // 34 68 | {{34359738337ULL, 34359738319ULL, 34359738307ULL}, {1108378657ULL, 21036574511ULL, 22530975979ULL}}, // 35 69 | {{68719476731ULL, 68719476719ULL, 68719476713ULL}, {13743895347ULL, 64677154575ULL, 8963410009ULL}}, // 36 70 | {{137438953447ULL, 137438953441ULL, 137438953427ULL}, {43980465111ULL, 35468117025ULL, 70246576219ULL}}, // 37 71 | {{274877906899ULL, 274877906857ULL, 274877906837ULL}, {207685529691ULL, 41073710233ULL, 208085144509ULL}}, // 38 72 | {{549755813881ULL, 549755813869ULL, 549755813821ULL}, {78536544841ULL, 347214198245ULL, 369238979477ULL}}, // 39 73 | {{1099511627689ULL, 1099511627609ULL, 1099511627581ULL}, {315951617177ULL, 928330176745ULL, 343949791253ULL}}, // 40 74 | {{2199023255531ULL, 2199023255521ULL, 2199023255497ULL}, {209430786243ULL, 1134979744801ULL, 1119502748281ULL}}, // 41 75 | {{4398046511093ULL, 4398046511087ULL, 4398046511071ULL}, {1199467230301ULL, 3363212037903ULL, 3331853417503ULL}}, // 42 76 | {{8796093022151ULL, 8796093022141ULL, 8796093022091ULL}, {8178823336439ULL, 918994793365ULL, 2405769031715ULL}}, // 43 77 | {{17592186044399ULL, 17592186044299ULL, 17592186044297ULL}, {16557351571215ULL, 2405769031715ULL, 2365335938745ULL}}, // 44 78 | {{35184372088777ULL, 35184372088763ULL, 35184372088751ULL}, {27507781814905ULL, 17847145262451ULL, 11293749065551ULL}}, // 45 79 | {{70368744177643ULL, 70368744177607ULL, 70368744177601ULL}, {13403570319555ULL, 34567102403063ULL, 4467856773185ULL}}, // 46 80 | {{140737488355213ULL, 140737488355201ULL, 140737488355181ULL}, {88113905752901ULL, 4432676798593ULL, 22020151239269ULL}}, // 47 81 | {{281474976710597ULL, 281474976710591ULL, 281474976710567ULL}, {100186008659725ULL, 4330384257087ULL, 123342967322647ULL}}, // 48 82 | {{562949953421231ULL, 562949953421201ULL, 562949953421189ULL}, {222399981598543ULL, 25358106009969ULL, 366146311168333ULL}}, // 49 83 | {{1125899906842597ULL, 1125899906842589ULL, 1125899906842573ULL}, {667199944795629ULL, 289517118902389ULL, 286994093901061ULL}}, // 50 84 | {{2251799813685119ULL, 2251799813685109ULL, 2251799813685083ULL}, {558586000294015ULL, 161999986596061ULL, 232003617167571ULL}}, // 51 85 | {{4503599627370449ULL, 4503599627370353ULL, 4503599627370323ULL}, {3449565672028465ULL, 3558788516733329ULL, 3514369651416283ULL}}, // 52 86 | {{9007199254740881ULL, 9007199254740847ULL, 9007199254740761ULL}, {2840107873116529ULL, 496948924399503ULL, 4991002184445225ULL}}, // 53 87 | {{18014398509481951ULL, 18014398509481931ULL, 18014398509481853ULL}, {16922616781634591ULL, 13595772459986403ULL, 6600695637062101ULL}}, // 54 88 | {{36028797018963913ULL, 36028797018963901ULL, 36028797018963869ULL}, {20962209174669945ULL, 20434243085382549ULL, 11645671763705525ULL}}, // 55 89 | {{72057594037927931ULL, 72057594037927909ULL, 72057594037927889ULL}, {14411518807585587ULL, 18681598454277613ULL, 21463964181510449ULL}}, // 56 90 | {{144115188075855859ULL, 144115188075855823ULL, 144115188075855811ULL}, {88686269585142075ULL, 44116894308935471ULL, 18900352534538475ULL}}, // 57 91 | {{288230376151711687ULL, 288230376151711681ULL, 288230376151711607ULL}, {126416831645487607ULL, 18300341342965825ULL, 136751638320155207ULL}}, // 58 92 | {{576460752303423263ULL, 576460752303423061ULL, 576460752303422971ULL}, {5124095576030431ULL, 2700050362076925ULL, 198471980483577139ULL}}, // 59 93 | {{1152921504606846883ULL, 1152921504606846803ULL, 1152921504606846697ULL}, {12397005425880075ULL, 566464323072728283ULL, 4132335141960025ULL}}, // 60 94 | {{2305843009213693951ULL, 2305843009213693669ULL, 2305843009213693613ULL}, {2305843009213693951ULL, 1768084568902373101ULL, 360500529464087845ULL}}, // 61 95 | {{4611686018427387733ULL, 4611686018427387421ULL, 4611686018427387271ULL}, {4557748170258646525ULL, 152768066863019061ULL, 1515372340968241207ULL}}, // 62 96 | {{9223372036854775291ULL, 9223372036854775279ULL, 9223372036854775181ULL}, {3657236494304118067ULL, 2545580940228350223ULL, 3339243145719352645ULL}} // 63 97 | }; 98 | // clang-format on 99 | 100 | class split_mix_hasher { 101 | public: 102 | split_mix_hasher() = default; 103 | 104 | explicit split_mix_hasher(uint32_t univ_bits) { 105 | assert(0 < univ_bits && univ_bits < 64); 106 | 107 | shift_ = univ_bits / 2 + 1; 108 | univ_size_ = size_p2{univ_bits}; 109 | } 110 | 111 | uint64_t hash(uint64_t x) const { 112 | assert(x < univ_size_.size()); 113 | x = hash_<0>(x); 114 | x = hash_<1>(x); 115 | x = hash_<2>(x); 116 | return x; 117 | } 118 | 119 | uint64_t hash_inv(uint64_t x) const { 120 | assert(x < univ_size_.size()); 121 | x = hash_inv_<2>(x); 122 | x = hash_inv_<1>(x); 123 | x = hash_inv_<0>(x); 124 | return x; 125 | } 126 | 127 | uint64_t size() const { 128 | return univ_size_.size(); 129 | } 130 | 131 | uint64_t bits() const { 132 | return univ_size_.bits(); 133 | } 134 | 135 | void show_stats(std::ostream& os, int n = 0) const { 136 | auto indent = get_indent(n); 137 | show_stat(os, indent, "name", "split_mix_hasher"); 138 | show_stat(os, indent, "size", size()); 139 | show_stat(os, indent, "bits", bits()); 140 | } 141 | 142 | private: 143 | uint32_t shift_ = 0; 144 | size_p2 univ_size_; 145 | 146 | template 147 | uint64_t hash_(uint64_t x) const { 148 | x = x ^ (x >> (shift_ + N)); 149 | x = (x * PRIME_TABLE[univ_size_.bits()][0][N]) & univ_size_.mask(); 150 | return x; 151 | } 152 | 153 | template 154 | uint64_t hash_inv_(uint64_t x) const { 155 | x = (x * PRIME_TABLE[univ_size_.bits()][1][N]) & univ_size_.mask(); 156 | x = x ^ (x >> (shift_ + N)); 157 | return x; 158 | } 159 | }; 160 | 161 | } // namespace poplar::bijective_hash 162 | 163 | #endif // POPLAR_TRIE_BIJECTIVE_HASH_HPP 164 | -------------------------------------------------------------------------------- /include/poplar/bit_tools.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef POPLAR_TRIE_BIT_TOOLS_HPP 25 | #define POPLAR_TRIE_BIT_TOOLS_HPP 26 | 27 | #ifdef __SSE4_2__ 28 | #include 29 | #endif 30 | 31 | #include "basics.hpp" 32 | 33 | namespace poplar::bit_tools { 34 | 35 | // Gets a bit 36 | inline bool get_bit(uint8_t x, uint64_t i) { 37 | assert(i < 8); 38 | return (x & (1U << i)) != 0; 39 | } 40 | inline bool get_bit(uint16_t x, uint64_t i) { 41 | assert(i < 16); 42 | return (x & (1U << i)) != 0; 43 | } 44 | inline bool get_bit(uint32_t x, uint64_t i) { 45 | assert(i < 32); 46 | return (x & (1U << i)) != 0; 47 | } 48 | inline bool get_bit(uint64_t x, uint64_t i) { 49 | assert(i < 64); 50 | return (x & (1ULL << i)) != 0; 51 | } 52 | 53 | // Sets a bit 54 | inline void set_bit(uint8_t& x, uint64_t i, bool bit = true) { 55 | assert(i < 8); 56 | if (bit) { 57 | x |= (1U << i); 58 | } else { 59 | x &= ~(1U << i); 60 | } 61 | } 62 | inline void set_bit(uint16_t& x, uint64_t i, bool bit = true) { 63 | assert(i < 16); 64 | if (bit) { 65 | x |= (1U << i); 66 | } else { 67 | x &= ~(1U << i); 68 | } 69 | } 70 | inline void set_bit(uint32_t& x, uint64_t i, bool bit = true) { 71 | assert(i < 32); 72 | if (bit) { 73 | x |= (1U << i); 74 | } else { 75 | x &= ~(1U << i); 76 | } 77 | } 78 | inline void set_bit(uint64_t& x, uint64_t i, bool bit = true) { 79 | assert(i < 64); 80 | if (bit) { 81 | x |= (1ULL << i); 82 | } else { 83 | x &= ~(1ULL << i); 84 | } 85 | } 86 | 87 | constexpr uint8_t POPCNT_TABLE[256] = { 88 | 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 89 | 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 90 | 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 91 | 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 92 | 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 93 | 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 94 | 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; 95 | 96 | // Popcount 97 | inline uint64_t popcnt(uint8_t x) { 98 | return POPCNT_TABLE[x]; 99 | } 100 | inline uint64_t popcnt(uint16_t x) { 101 | return POPCNT_TABLE[x & UINT8_MAX] + POPCNT_TABLE[x >> 8]; 102 | } 103 | inline uint64_t popcnt(uint32_t x) { 104 | #ifdef __SSE4_2__ 105 | return static_cast(__builtin_popcount(x)); 106 | #else 107 | x = (x & 0x55555555U) + ((x & 0xAAAAAAAAU) >> 1); 108 | x = (x & 0x33333333U) + ((x & 0xCCCCCCCCU) >> 2); 109 | x = (x & 0x0F0F0F0FU) + ((x & 0xF0F0F0F0U) >> 4); 110 | x *= 0x01010101U; 111 | return x >> 24; 112 | #endif 113 | } 114 | inline uint64_t popcnt(uint64_t x) { 115 | #ifdef __SSE4_2__ 116 | return static_cast(__builtin_popcountll(x)); 117 | #else 118 | x = (x & 0x5555555555555555ULL) + ((x & 0xAAAAAAAAAAAAAAAAULL) >> 1); 119 | x = (x & 0x3333333333333333ULL) + ((x & 0xCCCCCCCCCCCCCCCCULL) >> 2); 120 | x = (x & 0x0F0F0F0F0F0F0F0FULL) + ((x & 0xF0F0F0F0F0F0F0F0ULL) >> 4); 121 | x *= 0x0101010101010101ULL; 122 | return x >> 56; 123 | #endif 124 | } 125 | 126 | // Masked Popcount 127 | inline uint64_t popcnt(uint8_t x, uint64_t i) { 128 | assert(i < 8); 129 | return popcnt(static_cast(x & ((1U << i) - 1))); 130 | } 131 | inline uint64_t popcnt(uint16_t x, uint64_t i) { 132 | assert(i < 16); 133 | return popcnt(static_cast(x & ((1U << i) - 1))); 134 | } 135 | inline uint64_t popcnt(uint32_t x, uint64_t i) { 136 | assert(i < 32); 137 | return popcnt(static_cast(x & ((1U << i) - 1))); 138 | } 139 | inline uint64_t popcnt(uint64_t x, uint64_t i) { 140 | assert(i < 64); 141 | return popcnt(static_cast(x & ((1ULL << i) - 1))); 142 | } 143 | 144 | // From sdsl-lite (https://github.com/simongog/sdsl-lite) 145 | constexpr uint64_t PS_OVERFLOW[65] = { 146 | 0x8080808080808080ULL, 0x7f7f7f7f7f7f7f7fULL, 0x7e7e7e7e7e7e7e7eULL, 0x7d7d7d7d7d7d7d7dULL, 0x7c7c7c7c7c7c7c7cULL, 147 | 0x7b7b7b7b7b7b7b7bULL, 0x7a7a7a7a7a7a7a7aULL, 0x7979797979797979ULL, 0x7878787878787878ULL, 0x7777777777777777ULL, 148 | 0x7676767676767676ULL, 0x7575757575757575ULL, 0x7474747474747474ULL, 0x7373737373737373ULL, 0x7272727272727272ULL, 149 | 0x7171717171717171ULL, 0x7070707070707070ULL, 0x6f6f6f6f6f6f6f6fULL, 0x6e6e6e6e6e6e6e6eULL, 0x6d6d6d6d6d6d6d6dULL, 150 | 0x6c6c6c6c6c6c6c6cULL, 0x6b6b6b6b6b6b6b6bULL, 0x6a6a6a6a6a6a6a6aULL, 0x6969696969696969ULL, 0x6868686868686868ULL, 151 | 0x6767676767676767ULL, 0x6666666666666666ULL, 0x6565656565656565ULL, 0x6464646464646464ULL, 0x6363636363636363ULL, 152 | 0x6262626262626262ULL, 0x6161616161616161ULL, 0x6060606060606060ULL, 0x5f5f5f5f5f5f5f5fULL, 0x5e5e5e5e5e5e5e5eULL, 153 | 0x5d5d5d5d5d5d5d5dULL, 0x5c5c5c5c5c5c5c5cULL, 0x5b5b5b5b5b5b5b5bULL, 0x5a5a5a5a5a5a5a5aULL, 0x5959595959595959ULL, 154 | 0x5858585858585858ULL, 0x5757575757575757ULL, 0x5656565656565656ULL, 0x5555555555555555ULL, 0x5454545454545454ULL, 155 | 0x5353535353535353ULL, 0x5252525252525252ULL, 0x5151515151515151ULL, 0x5050505050505050ULL, 0x4f4f4f4f4f4f4f4fULL, 156 | 0x4e4e4e4e4e4e4e4eULL, 0x4d4d4d4d4d4d4d4dULL, 0x4c4c4c4c4c4c4c4cULL, 0x4b4b4b4b4b4b4b4bULL, 0x4a4a4a4a4a4a4a4aULL, 157 | 0x4949494949494949ULL, 0x4848484848484848ULL, 0x4747474747474747ULL, 0x4646464646464646ULL, 0x4545454545454545ULL, 158 | 0x4444444444444444ULL, 0x4343434343434343ULL, 0x4242424242424242ULL, 0x4141414141414141ULL, 0x4040404040404040ULL}; 159 | 160 | // From sdsl-lite (https://github.com/simongog/sdsl-lite) 161 | constexpr uint8_t LT_SEL[256 * 8] = { 162 | 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 163 | 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 164 | 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 165 | 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 166 | 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 167 | 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 168 | 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 169 | 170 | 0, 0, 0, 1, 0, 2, 2, 1, 0, 3, 3, 1, 3, 2, 2, 1, 0, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 0, 5, 5, 1, 5, 171 | 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 0, 6, 6, 1, 6, 2, 2, 1, 6, 3, 172 | 3, 1, 3, 2, 2, 1, 6, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 173 | 1, 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 0, 7, 7, 1, 7, 2, 2, 1, 7, 3, 3, 1, 3, 2, 2, 1, 7, 4, 4, 1, 174 | 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 7, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 4, 175 | 3, 3, 1, 3, 2, 2, 1, 7, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1, 6, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 176 | 2, 1, 6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 177 | 178 | 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 3, 3, 2, 0, 0, 0, 4, 0, 4, 4, 2, 0, 4, 4, 3, 4, 3, 3, 2, 0, 0, 0, 5, 0, 179 | 5, 5, 2, 0, 5, 5, 3, 5, 3, 3, 2, 0, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2, 0, 0, 0, 6, 0, 6, 6, 2, 0, 6, 180 | 6, 3, 6, 3, 3, 2, 0, 6, 6, 4, 6, 4, 4, 2, 6, 4, 4, 3, 4, 3, 3, 2, 0, 6, 6, 5, 6, 5, 5, 2, 6, 5, 5, 3, 5, 3, 3, 181 | 2, 6, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2, 0, 0, 0, 7, 0, 7, 7, 2, 0, 7, 7, 3, 7, 3, 3, 2, 0, 7, 7, 4, 182 | 7, 4, 4, 2, 7, 4, 4, 3, 4, 3, 3, 2, 0, 7, 7, 5, 7, 5, 5, 2, 7, 5, 5, 3, 5, 3, 3, 2, 7, 5, 5, 4, 5, 4, 4, 2, 5, 183 | 4, 4, 3, 4, 3, 3, 2, 0, 7, 7, 6, 7, 6, 6, 2, 7, 6, 6, 3, 6, 3, 3, 2, 7, 6, 6, 4, 6, 4, 4, 2, 6, 4, 4, 3, 4, 3, 184 | 3, 2, 7, 6, 6, 5, 6, 5, 5, 2, 6, 5, 5, 3, 5, 3, 3, 2, 6, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2, 185 | 186 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 4, 4, 3, 0, 0, 0, 0, 0, 187 | 0, 0, 5, 0, 0, 0, 5, 0, 5, 5, 3, 0, 0, 0, 5, 0, 5, 5, 4, 0, 5, 5, 4, 5, 4, 4, 3, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 188 | 0, 6, 0, 6, 6, 3, 0, 0, 0, 6, 0, 6, 6, 4, 0, 6, 6, 4, 6, 4, 4, 3, 0, 0, 0, 6, 0, 6, 6, 5, 0, 6, 6, 5, 6, 5, 5, 189 | 3, 0, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 7, 0, 7, 7, 3, 0, 0, 0, 7, 190 | 0, 7, 7, 4, 0, 7, 7, 4, 7, 4, 4, 3, 0, 0, 0, 7, 0, 7, 7, 5, 0, 7, 7, 5, 7, 5, 5, 3, 0, 7, 7, 5, 7, 5, 5, 4, 7, 191 | 5, 5, 4, 5, 4, 4, 3, 0, 0, 0, 7, 0, 7, 7, 6, 0, 7, 7, 6, 7, 6, 6, 3, 0, 7, 7, 6, 7, 6, 6, 4, 7, 6, 6, 4, 6, 4, 192 | 4, 3, 0, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 3, 7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3, 193 | 194 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 195 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 5, 0, 5, 5, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 196 | 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 6, 0, 6, 6, 4, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 6, 0, 6, 6, 197 | 5, 0, 0, 0, 6, 0, 6, 6, 5, 0, 6, 6, 5, 6, 5, 5, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 198 | 0, 0, 0, 7, 0, 0, 0, 7, 0, 7, 7, 4, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 7, 0, 7, 7, 5, 0, 0, 0, 7, 0, 7, 7, 5, 0, 199 | 7, 7, 5, 7, 5, 5, 4, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 7, 0, 7, 7, 6, 0, 0, 0, 7, 0, 7, 7, 6, 0, 7, 7, 6, 7, 6, 200 | 6, 4, 0, 0, 0, 7, 0, 7, 7, 6, 0, 7, 7, 6, 7, 6, 6, 5, 0, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 4, 201 | 202 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 203 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 204 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 205 | 6, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 6, 0, 6, 6, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 206 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 7, 0, 207 | 0, 0, 7, 0, 7, 7, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 7, 0, 7, 208 | 7, 6, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 7, 0, 7, 7, 6, 0, 0, 0, 7, 0, 7, 7, 6, 0, 7, 7, 6, 7, 6, 6, 5, 209 | 210 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 212 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 213 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 214 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 215 | 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 216 | 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 7, 0, 7, 7, 6, 217 | 218 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 219 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 220 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 221 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 222 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 223 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 224 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7}; 225 | 226 | // From sdsl-lite (https://github.com/simongog/sdsl-lite) 227 | constexpr uint64_t select(uint64_t x, uint64_t i) { 228 | assert(i != 0); 229 | #ifdef __SSE4_2__ 230 | uint64_t s = x; 231 | s = s - ((s >> 1) & 0x5555555555555555ULL); 232 | s = (s & 0x3333333333333333ULL) + ((s >> 2) & 0x3333333333333333ULL); 233 | s = (s + (s >> 4)) & 0x0F0F0F0F0F0F0F0FULL; 234 | s = 0x0101010101010101ULL * s; 235 | uint64_t b = (s + PS_OVERFLOW[i]) & 0x8080808080808080ULL; 236 | int byte_nr = __builtin_ctzll(b) >> 3; // byte nr in [0..7] 237 | s <<= 8; 238 | i -= (s >> (byte_nr << 3)) & 0xFFULL; 239 | return (byte_nr << 3) + LT_SEL[((i - 1) << 8) + ((x >> (byte_nr << 3)) & 0xFFULL)]; 240 | #else 241 | uint64_t s = x; // s = sum 242 | s = s - ((s >> 1) & 0x5555555555555555ULL); 243 | s = (s & 0x3333333333333333ULL) + ((s >> 2) & 0x3333333333333333ULL); 244 | s = (s + (s >> 4)) & 0x0F0F0F0F0F0F0F0FULL; 245 | s = 0x0101010101010101ULL * s; 246 | uint64_t b = (s + PS_OVERFLOW[i]); //&0x8080808080808080ULL;// add something to the partial sums to cause overflow 247 | i = (i - 1) << 8; 248 | if (b & 0x0000000080000000ULL) // byte <=3 249 | if (b & 0x0000000000008000ULL) // byte <= 1 250 | if (b & 0x0000000000000080ULL) 251 | return LT_SEL[(x & 0xFFULL) + i]; 252 | else 253 | return 8 + LT_SEL[(((x >> 8) & 0xFFULL) + i - ((s & 0xFFULL) << 8)) & 0x7FFULL]; // byte 1; 254 | else // byte >1 255 | if (b & 0x0000000000800000ULL) // byte <=2 256 | return 16 + LT_SEL[(((x >> 16) & 0xFFULL) + i - (s & 0xFF00ULL)) & 0x7FFULL]; // byte 2; 257 | else 258 | return 24 + LT_SEL[(((x >> 24) & 0xFFULL) + i - ((s >> 8) & 0xFF00ULL)) & 0x7FFULL]; // byte 3; 259 | else // byte > 3 260 | if (b & 0x0000800000000000ULL) // byte <=5 261 | if (b & 0x0000008000000000ULL) // byte <=4 262 | return 32 + LT_SEL[(((x >> 32) & 0xFFULL) + i - ((s >> 16) & 0xFF00ULL)) & 0x7FFULL]; // byte 4; 263 | else 264 | return 40 + LT_SEL[(((x >> 40) & 0xFFULL) + i - ((s >> 24) & 0xFF00ULL)) & 0x7FFULL]; // byte 5; 265 | else // byte >5 266 | if (b & 0x0080000000000000ULL) // byte<=6 267 | return 48 + LT_SEL[(((x >> 48) & 0xFFULL) + i - ((s >> 32) & 0xFF00ULL)) & 0x7FFULL]; // byte 6; 268 | else 269 | return 56 + LT_SEL[(((x >> 56) & 0xFFULL) + i - ((s >> 40) & 0xFF00ULL)) & 0x7FFULL]; // byte 7; 270 | return 0; 271 | #endif 272 | } 273 | 274 | #ifndef __SSE4_2__ 275 | constexpr uint32_t MSB_TABLE[256] = { 276 | 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 277 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 278 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 279 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 280 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 281 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 282 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}; 283 | #endif 284 | 285 | constexpr uint32_t msb(uint64_t x) { 286 | #ifdef __SSE4_2__ 287 | return x == 0 ? 0 : 63 - __builtin_clzll(x); 288 | #else 289 | uint64_t x1 = x >> 32; 290 | if (x1 > 0) { // 32 < |x| <= 64 291 | uint64_t x2 = x1 >> 16; 292 | if (x2 > 0) { // 48 < |x| <= 64 293 | x1 = x2 >> 8; 294 | if (x1 > 0) { // 56 < |x| <= 64 295 | return MSB_TABLE[x1] + 56; 296 | } else { // 48 < |x| <= 56 297 | return MSB_TABLE[x2] + 48; 298 | } 299 | } else { // 32 < |x| <= 48 300 | x2 = x1 >> 8; 301 | if (x2 > 0) { // 40 < |x| <= 48 302 | return MSB_TABLE[x2] + 40; 303 | } else { // 32 < |x| <= 40 304 | return MSB_TABLE[x1] + 32; 305 | } 306 | } 307 | } else { // 0 < |x| <= 32 308 | uint64_t x2 = x >> 16; 309 | if (x2 > 0) { // 16 < |x| <= 32 310 | x1 = x2 >> 8; 311 | if (x1 > 0) { // 24 < |x| <= 32 312 | return MSB_TABLE[x1] + 24; 313 | } else { // 16 < |x| <= 24 314 | return MSB_TABLE[x2] + 16; 315 | } 316 | } else { // 0 < |x| <= 16 317 | x1 = x >> 8; 318 | if (x1 > 0) { // 8 < |x| <= 16 319 | return MSB_TABLE[x1] + 8; 320 | } else { // 0 < |x| <= 8 321 | return MSB_TABLE[x]; 322 | } 323 | } 324 | } 325 | #endif 326 | } 327 | 328 | constexpr uint32_t ceil_log2(uint64_t x) { 329 | return (x > 1) ? msb(x - 1) + 1 : 0; 330 | } 331 | 332 | template 333 | constexpr uint64_t words_for(uint64_t bits) { 334 | uint64_t word_bits = sizeof(T) * 8; 335 | return (bits + word_bits - 1) / word_bits; 336 | } 337 | 338 | } // namespace poplar::bit_tools 339 | 340 | #endif // POPLAR_TRIE_BIT_TOOLS_HPP 341 | -------------------------------------------------------------------------------- /include/poplar/bit_vector.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef POPLAR_TRIE_BIT_VECTOR_HPP 25 | #define POPLAR_TRIE_BIT_VECTOR_HPP 26 | 27 | #include 28 | 29 | #include "bit_tools.hpp" 30 | 31 | namespace poplar { 32 | 33 | class bit_vector { 34 | public: 35 | bit_vector() = default; 36 | 37 | explicit bit_vector(uint64_t size) { 38 | chunks_.resize(bit_tools::words_for(size)); 39 | size_ = size; 40 | } 41 | 42 | void reserve(uint64_t capa) { 43 | chunks_.reserve(bit_tools::words_for(capa)); 44 | } 45 | 46 | ~bit_vector() = default; 47 | 48 | bool operator[](uint64_t i) const { 49 | return get(i); 50 | } 51 | bool get(uint64_t i) const { 52 | assert(i < size_); 53 | return bit_tools::get_bit(chunks_[i / 64], i % 64); 54 | } 55 | void set(uint64_t i, bool bit = true) { 56 | assert(i < size_); 57 | bit_tools::set_bit(chunks_[i / 64], i % 64, bit); 58 | } 59 | 60 | uint64_t get_bits(uint64_t pos, uint32_t len) const { 61 | assert(pos + len <= size()); 62 | if (len == 0) { 63 | return 0; 64 | } 65 | uint64_t chunk_id = pos / 64; 66 | uint64_t pos_in_chunk = pos % 64; 67 | uint64_t mask = -(len == 64) | ((1ULL << len) - 1); 68 | if (pos_in_chunk + len <= 64) { 69 | return (chunks_[chunk_id] >> pos_in_chunk) & mask; 70 | } else { 71 | return (chunks_[chunk_id] >> pos_in_chunk) | ((chunks_[chunk_id + 1] << (64 - pos_in_chunk)) & mask); 72 | } 73 | } 74 | 75 | void append_bit(bool bit) { 76 | uint64_t pos_in_chunk = size_ % 64; 77 | if (pos_in_chunk == 0) { 78 | chunks_.emplace_back(0); 79 | } 80 | chunks_.back() |= static_cast(bit) << pos_in_chunk; 81 | ++size_; 82 | } 83 | void append_bits(uint64_t bits, uint32_t len) { 84 | assert((len == 64) or (bits >> len) == 0); 85 | 86 | if (len == 0) { 87 | return; 88 | } 89 | 90 | uint64_t pos_in_chunk = size_ % 64; 91 | size_ += len; 92 | if (pos_in_chunk == 0) { 93 | chunks_.push_back(bits); 94 | } else { 95 | chunks_.back() |= (bits << pos_in_chunk); 96 | if (len > 64 - pos_in_chunk) { 97 | chunks_.push_back(bits >> (64 - pos_in_chunk)); 98 | } 99 | } 100 | } 101 | 102 | uint64_t size() const { 103 | return size_; 104 | } 105 | 106 | bit_vector(const bit_vector&) = delete; 107 | bit_vector& operator=(const bit_vector&) = delete; 108 | 109 | bit_vector(bit_vector&& rhs) noexcept = default; 110 | bit_vector& operator=(bit_vector&& rhs) noexcept = default; 111 | 112 | private: 113 | std::vector chunks_; 114 | uint64_t size_ = 0; 115 | }; 116 | 117 | } // namespace poplar 118 | 119 | #endif // POPLAR_TRIE_BIT_VECTOR_HPP 120 | -------------------------------------------------------------------------------- /include/poplar/compact_bonsai_nlm.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef POPLAR_TRIE_COMPACT_BONSAI_NLM_HPP 25 | #define POPLAR_TRIE_COMPACT_BONSAI_NLM_HPP 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | #include "vbyte.hpp" 32 | 33 | namespace poplar { 34 | 35 | template 36 | class compact_bonsai_nlm { 37 | public: 38 | using this_type = compact_bonsai_nlm; 39 | using value_type = Value; 40 | using chunk_type = typename chunk_type_traits::type; 41 | 42 | static constexpr auto trie_type_id = trie_type_ids::BONSAI_TRIE; 43 | 44 | public: 45 | compact_bonsai_nlm() = default; 46 | 47 | explicit compact_bonsai_nlm(uint32_t capa_bits) : ptrs_((1ULL << capa_bits) / ChunkSize), chunks_(ptrs_.size()) {} 48 | 49 | ~compact_bonsai_nlm() = default; 50 | 51 | std::pair compare(uint64_t pos, const char_range& key) const { 52 | auto [chunk_id, pos_in_chunk] = decompose_value(pos); 53 | 54 | assert(ptrs_[chunk_id]); 55 | assert(bit_tools::get_bit(chunks_[chunk_id], pos_in_chunk)); 56 | 57 | const uint8_t* ptr = ptrs_[chunk_id].get(); 58 | const uint64_t offset = bit_tools::popcnt(chunks_[chunk_id], pos_in_chunk); 59 | 60 | uint64_t alloc = 0; 61 | for (uint64_t i = 0; i < offset; ++i) { 62 | ptr += vbyte::decode(ptr, alloc); 63 | ptr += alloc; 64 | } 65 | ptr += vbyte::decode(ptr, alloc); 66 | 67 | if (key.empty()) { 68 | return {reinterpret_cast(ptr), 0}; 69 | } 70 | 71 | uint64_t length = alloc - sizeof(value_type); 72 | for (uint64_t i = 0; i < length; ++i) { 73 | if (key[i] != ptr[i]) { 74 | return {nullptr, i}; 75 | } 76 | } 77 | 78 | if (key[length] != '\0') { 79 | return {nullptr, length}; 80 | } 81 | 82 | // +1 considers the terminator '\0' 83 | return {reinterpret_cast(ptr + length), length + 1}; 84 | }; 85 | 86 | value_type* insert(uint64_t pos, const char_range& key) { 87 | auto [chunk_id, pos_in_chunk] = decompose_value(pos); 88 | 89 | assert(!bit_tools::get_bit(chunks_[chunk_id], pos_in_chunk)); 90 | bit_tools::set_bit(chunks_[chunk_id], pos_in_chunk); 91 | 92 | ++size_; 93 | 94 | #ifdef POPLAR_EXTRA_STATS 95 | max_length_ = std::max(max_length_, key.length()); 96 | sum_length_ += key.length(); 97 | #endif 98 | 99 | if (!ptrs_[chunk_id]) { 100 | // First association in the group 101 | uint64_t length = key.empty() ? 0 : key.length() - 1; 102 | uint64_t new_alloc = vbyte::size(length + sizeof(value_type)) + length + sizeof(value_type); 103 | label_bytes_ += new_alloc; 104 | 105 | ptrs_[chunk_id] = std::make_unique(new_alloc); 106 | uint8_t* ptr = ptrs_[chunk_id].get(); 107 | 108 | ptr += vbyte::encode(ptr, length + sizeof(value_type)); 109 | copy_bytes(ptr, key.begin, length); 110 | 111 | auto ret_ptr = reinterpret_cast(ptr + length); 112 | *ret_ptr = static_cast(0); 113 | 114 | return ret_ptr; 115 | } 116 | 117 | // Second and subsequent association in the group 118 | auto fr_alloc = get_allocs_(chunk_id, pos_in_chunk); 119 | 120 | const uint64_t len = key.empty() ? 0 : key.length() - 1; 121 | const uint64_t new_alloc = vbyte::size(len + sizeof(value_type)) + len + sizeof(value_type); 122 | label_bytes_ += new_alloc; 123 | 124 | auto new_unique = std::make_unique(fr_alloc.first + new_alloc + fr_alloc.second); 125 | 126 | // Get raw pointers 127 | const uint8_t* orig_ptr = ptrs_[chunk_id].get(); 128 | uint8_t* new_ptr = new_unique.get(); 129 | 130 | // Copy the front allocation 131 | copy_bytes(new_ptr, orig_ptr, fr_alloc.first); 132 | orig_ptr += fr_alloc.first; 133 | new_ptr += fr_alloc.first; 134 | 135 | // Set new allocation 136 | new_ptr += vbyte::encode(new_ptr, len + sizeof(value_type)); 137 | copy_bytes(new_ptr, key.begin, len); 138 | new_ptr += len; 139 | *reinterpret_cast(new_ptr) = static_cast(0); 140 | 141 | // Copy the back allocation 142 | copy_bytes(new_ptr + sizeof(value_type), orig_ptr, fr_alloc.second); 143 | 144 | // Overwrite 145 | ptrs_[chunk_id] = std::move(new_unique); 146 | 147 | return reinterpret_cast(new_ptr); 148 | } 149 | 150 | template 151 | void expand(const T& pos_map) { 152 | this_type new_ls(bit_tools::ceil_log2(ptrs_.size() * ChunkSize * 2)); 153 | 154 | for (uint64_t pos = 0; pos < pos_map.size(); ++pos) { 155 | auto [chunk_id, pos_in_chunk] = decompose_value(pos); 156 | uint64_t new_pos = pos_map[pos]; 157 | if (new_pos != UINT64_MAX) { 158 | auto orig_slice = get_slice_(chunk_id, pos_in_chunk); 159 | if (!orig_slice.empty()) { 160 | auto [new_chunk_id, new_pos_in_chunk] = decompose_value(new_pos); 161 | new_ls.set_slice_(new_chunk_id, new_pos_in_chunk, orig_slice); 162 | } 163 | } 164 | if (pos_in_chunk == ChunkSize - 1) { 165 | ptrs_[chunk_id].reset(); 166 | } 167 | } 168 | 169 | new_ls.size_ = size_; 170 | #ifdef POPLAR_EXTRA_STATS 171 | new_ls.max_length_ = max_length_; 172 | new_ls.sum_length_ = sum_length_; 173 | #endif 174 | new_ls.label_bytes_ = label_bytes_; 175 | *this = std::move(new_ls); 176 | } 177 | 178 | uint64_t size() const { 179 | return size_; 180 | } 181 | uint64_t num_ptrs() const { 182 | return ptrs_.size(); 183 | } 184 | uint64_t alloc_bytes() const { 185 | uint64_t bytes = 0; 186 | bytes += ptrs_.capacity() * sizeof(std::unique_ptr); 187 | bytes += chunks_.capacity() * sizeof(chunk_type); 188 | bytes += label_bytes_; 189 | return bytes; 190 | } 191 | 192 | void show_stats(std::ostream& os, int n = 0) const { 193 | auto indent = get_indent(n); 194 | show_stat(os, indent, "name", "compact_bonsai_nlm"); 195 | show_stat(os, indent, "size", size()); 196 | show_stat(os, indent, "num_ptrs", num_ptrs()); 197 | show_stat(os, indent, "alloc_bytes", alloc_bytes()); 198 | #ifdef POPLAR_EXTRA_STATS 199 | show_stat(os, indent, "max_length", max_length_); 200 | show_stat(os, indent, "ave_length", double(sum_length_) / size()); 201 | #endif 202 | show_stat(os, indent, "chunk_size", ChunkSize); 203 | } 204 | 205 | compact_bonsai_nlm(const compact_bonsai_nlm&) = delete; 206 | compact_bonsai_nlm& operator=(const compact_bonsai_nlm&) = delete; 207 | 208 | compact_bonsai_nlm(compact_bonsai_nlm&&) noexcept = default; 209 | compact_bonsai_nlm& operator=(compact_bonsai_nlm&&) noexcept = default; 210 | 211 | private: 212 | std::vector> ptrs_; 213 | std::vector chunks_; 214 | uint64_t size_ = 0; 215 | uint64_t label_bytes_ = 0; 216 | 217 | #ifdef POPLAR_EXTRA_STATS 218 | uint64_t max_length_ = 0; 219 | uint64_t sum_length_ = 0; 220 | #endif 221 | 222 | std::pair get_allocs_(uint64_t chunk_id, uint64_t pos_in_chunk) { 223 | assert(bit_tools::get_bit(chunks_[chunk_id], pos_in_chunk)); 224 | 225 | const uint8_t* ptr = ptrs_[chunk_id].get(); 226 | assert(ptr != nullptr); 227 | 228 | // -1 means the difference of the above bit_tools::set_bit 229 | const uint64_t num = bit_tools::popcnt(chunks_[chunk_id]) - 1; 230 | const uint64_t offset = bit_tools::popcnt(chunks_[chunk_id], pos_in_chunk); 231 | 232 | uint64_t front_alloc = 0, back_alloc = 0; 233 | 234 | for (uint64_t i = 0; i < num; ++i) { 235 | uint64_t len = 0; 236 | len += vbyte::decode(ptr, len); 237 | if (i < offset) { 238 | front_alloc += len; 239 | } else { 240 | back_alloc += len; 241 | } 242 | ptr += len; 243 | } 244 | 245 | return {front_alloc, back_alloc}; 246 | } 247 | 248 | char_range get_slice_(uint64_t chunk_id, uint64_t pos_in_chunk) const { 249 | if (!bit_tools::get_bit(chunks_[chunk_id], pos_in_chunk)) { 250 | // pos indicates a step node 251 | return {nullptr, nullptr}; 252 | } 253 | 254 | const uint8_t* ptr = ptrs_[chunk_id].get(); 255 | assert(ptr != nullptr); 256 | 257 | const uint64_t offset = bit_tools::popcnt(chunks_[chunk_id], pos_in_chunk); 258 | 259 | // Proceeds the target position 260 | uint64_t len = 0; 261 | for (uint64_t i = 0; i < offset; ++i) { 262 | ptr += vbyte::decode(ptr, len); 263 | ptr += len; 264 | } 265 | 266 | uint64_t vsize = vbyte::decode(ptr, len); 267 | return {ptr, ptr + (vsize + len)}; 268 | } 269 | 270 | void set_slice_(uint64_t chunk_id, uint64_t pos_in_chunk, char_range new_slice) { 271 | assert(!bit_tools::get_bit(chunks_[chunk_id], pos_in_chunk)); 272 | 273 | bit_tools::set_bit(chunks_[chunk_id], pos_in_chunk); 274 | const uint8_t* ptr = ptrs_[chunk_id].get(); 275 | 276 | if (ptr == nullptr) { 277 | // First association in the group 278 | ptrs_[chunk_id] = std::make_unique(new_slice.length()); 279 | copy_bytes(ptrs_[chunk_id].get(), new_slice.begin, new_slice.length()); 280 | return; 281 | } 282 | 283 | // Second and subsequent association in the group 284 | auto fr_alloc = get_allocs_(chunk_id, pos_in_chunk); 285 | auto new_unique = std::make_unique(fr_alloc.first + new_slice.length() + fr_alloc.second); 286 | 287 | uint8_t* new_ptr = new_unique.get(); 288 | 289 | // Copy the front allocation 290 | copy_bytes(new_ptr, ptr, fr_alloc.first); 291 | ptr += fr_alloc.first; 292 | new_ptr += fr_alloc.first; 293 | 294 | // Set new label 295 | copy_bytes(new_ptr, new_slice.begin, new_slice.length()); 296 | new_ptr += new_slice.length(); 297 | 298 | // Copy back 299 | copy_bytes(new_ptr, ptr, fr_alloc.second); 300 | ptrs_[chunk_id] = std::move(new_unique); 301 | } 302 | }; 303 | 304 | } // namespace poplar 305 | 306 | #endif // POPLAR_TRIE_COMPACT_BONSAI_NLM_HPP 307 | -------------------------------------------------------------------------------- /include/poplar/compact_bonsai_trie.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef POPLAR_TRIE_COMPACT_BONSAI_TRIE_HPP 25 | #define POPLAR_TRIE_COMPACT_BONSAI_TRIE_HPP 26 | 27 | #include "bijective_hash.hpp" 28 | #include "bit_vector.hpp" 29 | #include "compact_hash_table.hpp" 30 | #include "compact_vector.hpp" 31 | #include "standard_hash_table.hpp" 32 | 33 | namespace poplar { 34 | 35 | template , 36 | class AuxMap = standard_hash_table<>, class Hasher = bijective_hash::split_mix_hasher> 37 | class compact_bonsai_trie { 38 | static_assert(0 < MaxFactor and MaxFactor < 100); 39 | static_assert(0 < Dsp1Bits and Dsp1Bits < 64); 40 | 41 | public: 42 | using this_type = compact_bonsai_trie; 43 | using aux_cht_type = AuxCht; 44 | using aux_map_type = AuxMap; 45 | 46 | static constexpr uint64_t nil_id = UINT64_MAX; 47 | static constexpr uint32_t min_capa_bits = 16; 48 | 49 | static constexpr uint32_t dsp1_bits = Dsp1Bits; 50 | static constexpr uint64_t dsp1_mask = (1ULL << dsp1_bits) - 1; 51 | static constexpr uint32_t dsp2_bits = aux_cht_type::val_bits; 52 | static constexpr uint32_t dsp2_mask = aux_cht_type::val_mask; 53 | 54 | static constexpr auto trie_type_id = trie_type_ids::BONSAI_TRIE; 55 | 56 | public: 57 | compact_bonsai_trie() = default; 58 | 59 | compact_bonsai_trie(uint32_t capa_bits, uint32_t symb_bits, uint32_t cht_capa_bits = 0) { 60 | capa_size_ = size_p2{std::max(min_capa_bits, capa_bits)}; 61 | symb_size_ = size_p2{symb_bits}; 62 | 63 | max_size_ = static_cast(capa_size_.size() * MaxFactor / 100.0); 64 | 65 | hasher_ = Hasher{capa_size_.bits() + symb_size_.bits()}; 66 | table_ = compact_vector{capa_size_.size(), symb_size_.bits() + dsp1_bits}; 67 | aux_cht_ = aux_cht_type{capa_size_.bits(), cht_capa_bits}; 68 | } 69 | 70 | ~compact_bonsai_trie() = default; 71 | 72 | uint64_t get_root() const { 73 | assert(size_ != 0); 74 | return 0; 75 | } 76 | 77 | void add_root() { 78 | assert(size_ == 0); 79 | size_ = 1; 80 | } 81 | 82 | uint64_t find_child(uint64_t node_id, uint64_t symb) const { 83 | assert(node_id < capa_size_.size()); 84 | assert(symb < symb_size_.size()); 85 | 86 | if (size_ == 0) { 87 | return nil_id; 88 | } 89 | 90 | auto [quo, mod] = decompose_(hasher_.hash(make_key_(node_id, symb))); 91 | 92 | for (uint64_t i = mod, cnt = 1;; i = right_(i), ++cnt) { 93 | if (i == get_root()) { 94 | // because the root's dsp value is zero though it is defined 95 | continue; 96 | } 97 | 98 | if (compare_dsp_(i, 0)) { 99 | // this slot is empty 100 | return nil_id; 101 | } 102 | 103 | if (compare_dsp_(i, cnt) and quo == get_quo_(i)) { 104 | return i; 105 | } 106 | } 107 | } 108 | 109 | bool add_child(uint64_t& node_id, uint64_t symb) { 110 | assert(node_id < capa_size_.size()); 111 | assert(symb < symb_size_.size()); 112 | 113 | auto [quo, mod] = decompose_(hasher_.hash(make_key_(node_id, symb))); 114 | 115 | for (uint64_t i = mod, cnt = 1;; i = right_(i), ++cnt) { 116 | // because the root's dsp value is zero though it is defined 117 | if (i == get_root()) { 118 | continue; 119 | } 120 | 121 | if (compare_dsp_(i, 0)) { 122 | // this slot is empty 123 | if (size_ == max_size_) { 124 | return false; // needs to expand 125 | } 126 | 127 | update_slot_(i, quo, cnt); 128 | 129 | ++size_; 130 | node_id = i; 131 | 132 | return true; 133 | } 134 | 135 | if (compare_dsp_(i, cnt) and quo == get_quo_(i)) { 136 | node_id = i; 137 | return false; // already stored 138 | } 139 | } 140 | } 141 | 142 | std::pair get_parent_and_symb(uint64_t node_id) const { 143 | assert(node_id < capa_size_.size()); 144 | 145 | if (compare_dsp_(node_id, 0)) { 146 | // root or not exist 147 | return {nil_id, 0}; 148 | } 149 | 150 | uint64_t dist = get_dsp_(node_id) - 1; 151 | uint64_t init_id = dist <= node_id ? node_id - dist : table_.size() - (dist - node_id); 152 | uint64_t key = hasher_.hash_inv(get_quo_(node_id) << capa_size_.bits() | init_id); 153 | 154 | // Returns pair (parent, label) 155 | return std::make_pair(key >> symb_size_.bits(), key & symb_size_.mask()); 156 | } 157 | 158 | class node_map { 159 | public: 160 | node_map() = default; 161 | 162 | node_map(compact_vector&& map_high, compact_vector&& map_low, bit_vector&& done_flags) 163 | : map_high_(std::move(map_high)), map_low_(std::move(map_low)), done_flags_(std::move(done_flags)) {} 164 | 165 | ~node_map() = default; 166 | 167 | uint64_t operator[](uint64_t i) const { 168 | if (!done_flags_[i]) { 169 | return UINT64_MAX; 170 | } 171 | if (map_high_.size() == 0) { 172 | return map_low_[i]; 173 | } else { 174 | return map_low_[i] | (map_high_[i] << map_low_.width()); 175 | } 176 | } 177 | 178 | uint64_t size() const { 179 | return map_low_.size(); 180 | } 181 | 182 | node_map(const node_map&) = delete; 183 | node_map& operator=(const node_map&) = delete; 184 | 185 | node_map(node_map&&) noexcept = default; 186 | node_map& operator=(node_map&&) noexcept = default; 187 | 188 | private: 189 | compact_vector map_high_; 190 | compact_vector map_low_; 191 | bit_vector done_flags_; 192 | }; 193 | 194 | bool needs_to_expand() const { 195 | return max_size() <= size(); 196 | } 197 | 198 | node_map expand() { 199 | // this_type new_ht{capa_bits() + 1, symb_size_.bits(), aux_cht_.capa_bits()}; 200 | this_type new_ht{capa_bits() + 1, symb_size_.bits()}; 201 | new_ht.add_root(); 202 | 203 | #ifdef POPLAR_EXTRA_STATS 204 | new_ht.num_resize_ = num_resize_ + 1; 205 | #endif 206 | 207 | bit_vector done_flags(capa_size()); 208 | done_flags.set(get_root()); 209 | 210 | size_p2 low_size{table_.width()}; 211 | compact_vector map_high; 212 | 213 | if (low_size.bits() < new_ht.capa_bits()) { 214 | map_high = compact_vector(capa_size(), new_ht.capa_bits() - low_size.bits()); 215 | } 216 | 217 | std::vector> path; 218 | path.reserve(256); 219 | 220 | auto get_mapping = [&](uint64_t i) -> uint64_t { 221 | if (map_high.size() == 0) { 222 | return table_[i]; 223 | } else { 224 | return table_[i] | (map_high[i] << low_size.bits()); 225 | } 226 | }; 227 | 228 | auto set_mapping = [&](uint64_t i, uint64_t v) { 229 | if (map_high.size() == 0) { 230 | table_.set(i, v); 231 | } else { 232 | table_.set(i, v & low_size.mask()); 233 | map_high.set(i, v >> low_size.bits()); 234 | } 235 | }; 236 | 237 | // 0 is root 238 | for (uint64_t i = 1; i < table_.size(); ++i) { 239 | if (done_flags[i] or compare_dsp_(i, 0)) { 240 | // skip already processed or empty elements 241 | continue; 242 | } 243 | 244 | path.clear(); 245 | uint64_t node_id = i; 246 | 247 | do { 248 | auto [parent, label] = get_parent_and_symb(node_id); 249 | assert(parent != nil_id); 250 | path.emplace_back(std::make_pair(node_id, label)); 251 | node_id = parent; 252 | } while (!done_flags[node_id]); 253 | 254 | uint64_t new_node_id = get_mapping(node_id); 255 | 256 | for (auto rit = std::rbegin(path); rit != std::rend(path); ++rit) { 257 | new_ht.add_child(new_node_id, rit->second); 258 | set_mapping(rit->first, new_node_id); 259 | done_flags.set(rit->first); 260 | } 261 | } 262 | 263 | node_map node_map{std::move(map_high), std::move(table_), std::move(done_flags)}; 264 | std::swap(*this, new_ht); 265 | 266 | return node_map; 267 | } 268 | 269 | uint64_t size() const { 270 | return size_; 271 | } 272 | uint64_t max_size() const { 273 | return max_size_; 274 | } 275 | uint64_t capa_size() const { 276 | return capa_size_.size(); 277 | } 278 | uint32_t capa_bits() const { 279 | return capa_size_.bits(); 280 | } 281 | uint64_t symb_size() const { 282 | return symb_size_.size(); 283 | } 284 | uint32_t symb_bits() const { 285 | return symb_size_.bits(); 286 | } 287 | #ifdef POPLAR_EXTRA_STATS 288 | uint64_t num_resize() const { 289 | return num_resize_; 290 | } 291 | #endif 292 | uint64_t alloc_bytes() const { 293 | uint64_t bytes = 0; 294 | bytes += table_.alloc_bytes(); 295 | bytes += aux_cht_.alloc_bytes(); 296 | bytes += aux_map_.alloc_bytes(); 297 | return bytes; 298 | } 299 | 300 | void show_stats(std::ostream& os, int n = 0) const { 301 | auto indent = get_indent(n); 302 | show_stat(os, indent, "name", "compact_hash_trie"); 303 | show_stat(os, indent, "factor", double(size()) / capa_size() * 100); 304 | show_stat(os, indent, "max_factor", MaxFactor); 305 | show_stat(os, indent, "size", size()); 306 | show_stat(os, indent, "alloc_bytes", alloc_bytes()); 307 | show_stat(os, indent, "capa_bits", capa_bits()); 308 | show_stat(os, indent, "symb_bits", symb_bits()); 309 | show_stat(os, indent, "dsp1st_bits", dsp1_bits); 310 | show_stat(os, indent, "dsp2nd_bits", dsp2_bits); 311 | #ifdef POPLAR_EXTRA_STATS 312 | show_stat(os, indent, "rate_dsp1st", double(num_dsps_[0]) / size()); 313 | show_stat(os, indent, "rate_dsp2nd", double(num_dsps_[1]) / size()); 314 | show_stat(os, indent, "rate_dsp3rd", double(num_dsps_[2]) / size()); 315 | show_stat(os, indent, "num_resize", num_resize_); 316 | #endif 317 | show_member(os, indent, "hasher_"); 318 | hasher_.show_stats(os, n + 1); 319 | show_member(os, indent, "aux_cht_"); 320 | aux_cht_.show_stats(os, n + 1); 321 | show_member(os, indent, "aux_map_"); 322 | aux_map_.show_stats(os, n + 1); 323 | } 324 | 325 | compact_bonsai_trie(const compact_bonsai_trie&) = delete; 326 | compact_bonsai_trie& operator=(const compact_bonsai_trie&) = delete; 327 | 328 | compact_bonsai_trie(compact_bonsai_trie&&) noexcept = default; 329 | compact_bonsai_trie& operator=(compact_bonsai_trie&&) noexcept = default; 330 | 331 | private: 332 | Hasher hasher_; 333 | compact_vector table_; 334 | aux_cht_type aux_cht_; // 2nd dsp 335 | aux_map_type aux_map_; // 3rd dsp 336 | uint64_t size_ = 0; // # of registered nodes 337 | uint64_t max_size_ = 0; // MaxFactor% of the capacity 338 | size_p2 capa_size_; 339 | size_p2 symb_size_; 340 | #ifdef POPLAR_EXTRA_STATS 341 | uint64_t num_resize_ = 0; 342 | uint64_t num_dsps_[3] = {}; 343 | #endif 344 | 345 | uint64_t make_key_(uint64_t node_id, uint64_t symb) const { 346 | return (node_id << symb_size_.bits()) | symb; 347 | } 348 | std::pair decompose_(uint64_t x) const { 349 | return {x >> capa_size_.bits(), x & capa_size_.mask()}; 350 | } 351 | uint64_t right_(uint64_t slot_id) const { 352 | return (slot_id + 1) & capa_size_.mask(); 353 | } 354 | 355 | uint64_t get_quo_(uint64_t slot_id) const { 356 | return table_[slot_id] >> dsp1_bits; 357 | } 358 | uint64_t get_dsp_(uint64_t slot_id) const { 359 | uint64_t dsp = table_[slot_id] & dsp1_mask; 360 | if (dsp < dsp1_mask) { 361 | return dsp; 362 | } 363 | 364 | dsp = aux_cht_.get(slot_id); 365 | if (dsp != aux_cht_type::nil) { 366 | return dsp + dsp1_mask; 367 | } 368 | 369 | return aux_map_.get(slot_id); 370 | } 371 | 372 | bool compare_dsp_(uint64_t slot_id, uint64_t rhs) const { 373 | uint64_t lhs = table_[slot_id] & dsp1_mask; 374 | if (lhs < dsp1_mask) { 375 | return lhs == rhs; 376 | } 377 | if (rhs < dsp1_mask) { 378 | return false; 379 | } 380 | 381 | lhs = aux_cht_.get(slot_id); 382 | if (lhs != aux_cht_type::nil) { 383 | return lhs + dsp1_mask == rhs; 384 | } 385 | if (rhs < dsp1_mask + dsp2_mask) { 386 | return false; 387 | } 388 | 389 | auto val = aux_map_.get(slot_id); 390 | assert(val != aux_map_type::nil); 391 | return val == rhs; 392 | } 393 | 394 | void update_slot_(uint64_t slot_id, uint64_t quo, uint64_t dsp) { 395 | assert(table_[slot_id] == 0); 396 | assert(quo < symb_size_.size()); 397 | 398 | uint64_t v = quo << dsp1_bits; 399 | 400 | if (dsp < dsp1_mask) { 401 | v |= dsp; 402 | } else { 403 | v |= dsp1_mask; 404 | uint64_t _dsp = dsp - dsp1_mask; 405 | if (_dsp < dsp2_mask) { 406 | aux_cht_.set(slot_id, _dsp); 407 | } else { 408 | aux_map_.set(slot_id, dsp); 409 | } 410 | } 411 | 412 | #ifdef POPLAR_EXTRA_STATS 413 | if (dsp < dsp1_mask) { 414 | ++num_dsps_[0]; 415 | } else if (dsp < dsp1_mask + dsp2_mask) { 416 | ++num_dsps_[1]; 417 | } else { 418 | ++num_dsps_[2]; 419 | } 420 | #endif 421 | 422 | table_.set(slot_id, v); 423 | } 424 | }; 425 | 426 | } // namespace poplar 427 | 428 | #endif // POPLAR_TRIE_COMPACT_BONSAI_TRIE_HPP 429 | -------------------------------------------------------------------------------- /include/poplar/compact_fkhash_nlm.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef POPLAR_TRIE_COMPACT_FKHASH_NLM_HPP 25 | #define POPLAR_TRIE_COMPACT_FKHASH_NLM_HPP 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | #include "vbyte.hpp" 32 | 33 | namespace poplar { 34 | 35 | template 36 | class compact_fkhash_nlm { 37 | public: 38 | using this_type = compact_fkhash_nlm; 39 | using value_type = Value; 40 | using chunk_type = typename chunk_type_traits::type; 41 | 42 | static constexpr auto trie_type_id = trie_type_ids::FKHASH_TRIE; 43 | 44 | public: 45 | compact_fkhash_nlm() = default; 46 | 47 | explicit compact_fkhash_nlm(uint32_t capa_bits) { 48 | chunk_ptrs_.reserve((1ULL << capa_bits) / ChunkSize); 49 | chunk_buf_.reserve(1ULL << 10); 50 | } 51 | 52 | ~compact_fkhash_nlm() = default; 53 | 54 | std::pair compare(uint64_t pos, const char_range& key) const { 55 | assert(pos < size_); 56 | 57 | const uint8_t* char_ptr = nullptr; 58 | auto [chunk_id, pos_in_chunk] = decompose_value(pos); 59 | 60 | if (chunk_id < chunk_ptrs_.size()) { 61 | char_ptr = chunk_ptrs_[chunk_id].get(); 62 | } else { 63 | assert(chunk_id == chunk_ptrs_.size()); 64 | char_ptr = chunk_buf_.data(); 65 | } 66 | 67 | uint64_t alloc = 0; 68 | for (uint64_t i = 0; i < pos_in_chunk; ++i) { 69 | char_ptr += vbyte::decode(char_ptr, alloc); 70 | char_ptr += alloc; 71 | } 72 | char_ptr += vbyte::decode(char_ptr, alloc); 73 | 74 | if (key.empty()) { 75 | return {reinterpret_cast(char_ptr), 0}; 76 | } 77 | 78 | assert(sizeof(value_type) <= alloc); 79 | 80 | uint64_t length = alloc - sizeof(value_type); 81 | for (uint64_t i = 0; i < length; ++i) { 82 | if (key[i] != char_ptr[i]) { 83 | return {nullptr, i}; 84 | } 85 | } 86 | 87 | if (key[length] != '\0') { 88 | return {nullptr, length}; 89 | } 90 | 91 | // +1 considers the terminator '\0' 92 | return {reinterpret_cast(char_ptr + length), length + 1}; 93 | }; 94 | 95 | value_type* append(const char_range& key) { 96 | auto [chunk_id, pos_in_chunk] = decompose_value(size_++); 97 | if (chunk_id != 0 && pos_in_chunk == 0) { 98 | release_buf_(); 99 | } 100 | 101 | #ifdef POPLAR_EXTRA_STATS 102 | max_length_ = std::max(max_length_, key.length()); 103 | sum_length_ += key.length(); 104 | #endif 105 | 106 | uint64_t length = key.empty() ? 0 : key.length() - 1; 107 | vbyte::append(chunk_buf_, length + sizeof(value_type)); 108 | std::copy(key.begin, key.begin + length, std::back_inserter(chunk_buf_)); 109 | for (size_t i = 0; i < sizeof(value_type); ++i) { 110 | chunk_buf_.emplace_back('\0'); 111 | } 112 | 113 | return reinterpret_cast(chunk_buf_.data() + chunk_buf_.size() - sizeof(value_type)); 114 | } 115 | 116 | // Associate a dummy label 117 | void append_dummy() { 118 | auto [chunk_id, pos_in_chunk] = decompose_value(size_++); 119 | if (chunk_id != 0 && pos_in_chunk == 0) { 120 | release_buf_(); 121 | } 122 | 123 | vbyte::append(chunk_buf_, 0); 124 | } 125 | 126 | uint64_t size() const { 127 | return size_; 128 | } 129 | uint64_t num_ptrs() const { 130 | return chunk_ptrs_.size(); 131 | } 132 | uint64_t alloc_bytes() const { 133 | uint64_t bytes = 0; 134 | bytes += chunk_ptrs_.capacity() * sizeof(std::unique_ptr); 135 | bytes += chunk_buf_.capacity(); 136 | bytes += label_bytes_; 137 | return bytes; 138 | } 139 | 140 | void show_stats(std::ostream& os, int n = 0) const { 141 | auto indent = get_indent(n); 142 | show_stat(os, indent, "name", "compact_fkhash_nlm"); 143 | show_stat(os, indent, "size", size()); 144 | show_stat(os, indent, "num_ptrs", num_ptrs()); 145 | show_stat(os, indent, "alloc_bytes", alloc_bytes()); 146 | #ifdef POPLAR_EXTRA_STATS 147 | show_stat(os, indent, "max_length", max_length_); 148 | show_stat(os, indent, "ave_length", double(sum_length_) / size()); 149 | #endif 150 | show_stat(os, indent, "chunk_size", ChunkSize); 151 | } 152 | 153 | compact_fkhash_nlm(const compact_fkhash_nlm&) = delete; 154 | compact_fkhash_nlm& operator=(const compact_fkhash_nlm&) = delete; 155 | 156 | compact_fkhash_nlm(compact_fkhash_nlm&&) noexcept = default; 157 | compact_fkhash_nlm& operator=(compact_fkhash_nlm&&) noexcept = default; 158 | 159 | private: 160 | std::vector> chunk_ptrs_; 161 | std::vector chunk_buf_; // for the last chunk 162 | uint64_t size_ = 0; 163 | uint64_t label_bytes_ = 0; 164 | 165 | #ifdef POPLAR_EXTRA_STATS 166 | uint64_t max_length_ = 0; 167 | uint64_t sum_length_ = 0; 168 | #endif 169 | 170 | void release_buf_() { 171 | label_bytes_ += chunk_buf_.size(); 172 | auto new_uptr = std::make_unique(chunk_buf_.size()); 173 | std::copy(chunk_buf_.begin(), chunk_buf_.end(), new_uptr.get()); 174 | chunk_ptrs_.emplace_back(std::move(new_uptr)); 175 | chunk_buf_.clear(); 176 | } 177 | }; 178 | 179 | } // namespace poplar 180 | 181 | #endif // POPLAR_TRIE_COMPACT_FKHASH_NLM_HPP 182 | -------------------------------------------------------------------------------- /include/poplar/compact_fkhash_trie.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef POPLAR_TRIE_COMPACT_FKHASH_TRIE_HPP 25 | #define POPLAR_TRIE_COMPACT_FKHASH_TRIE_HPP 26 | 27 | #include "bijective_hash.hpp" 28 | #include "bit_vector.hpp" 29 | #include "compact_hash_table.hpp" 30 | #include "compact_vector.hpp" 31 | #include "standard_hash_table.hpp" 32 | 33 | namespace poplar { 34 | 35 | template , 36 | class AuxMap = standard_hash_table<>, class Hasher = bijective_hash::split_mix_hasher> 37 | class compact_fkhash_trie { 38 | static_assert(0 < MaxFactor and MaxFactor < 100); 39 | static_assert(0 < Dsp1Bits and Dsp1Bits < 64); 40 | 41 | public: 42 | using this_type = compact_fkhash_trie; 43 | using aux_cht_type = AuxCht; 44 | using aux_map_type = AuxMap; 45 | 46 | static constexpr uint64_t nil_id = UINT64_MAX; 47 | static constexpr uint32_t min_capa_bits = 16; 48 | 49 | static constexpr uint32_t dsp1_bits = Dsp1Bits; 50 | static constexpr uint64_t dsp1_mask = (1ULL << dsp1_bits) - 1; 51 | static constexpr uint32_t dsp2_bits = aux_cht_type::val_bits; 52 | static constexpr uint32_t dsp2_mask = aux_cht_type::val_mask; 53 | 54 | static constexpr auto trie_type_id = trie_type_ids::FKHASH_TRIE; 55 | 56 | public: 57 | compact_fkhash_trie() = default; 58 | 59 | compact_fkhash_trie(uint32_t capa_bits, uint32_t symb_bits, uint32_t cht_capa_bits = 0) { 60 | capa_size_ = size_p2{std::max(min_capa_bits, capa_bits)}; 61 | symb_size_ = size_p2{symb_bits}; 62 | max_size_ = static_cast(capa_size_.size() * MaxFactor / 100.0); 63 | hasher_ = Hasher{capa_size_.bits() + symb_size_.bits()}; 64 | table_ = compact_vector{capa_size_.size(), symb_size_.bits() + dsp1_bits}; 65 | aux_cht_ = aux_cht_type{capa_size_.bits(), cht_capa_bits}; 66 | ids_ = compact_vector{capa_size_.size(), capa_size_.bits(), capa_size_.mask()}; 67 | } 68 | 69 | ~compact_fkhash_trie() = default; 70 | 71 | // The root ID is assigned but its slot does not exist in the table 72 | uint64_t get_root() const { 73 | assert(size_ != 0); 74 | return 0; 75 | } 76 | 77 | void add_root() { 78 | assert(size_ == 0); 79 | size_ = 1; 80 | } 81 | 82 | uint64_t find_child(uint64_t node_id, uint64_t symb) const { 83 | if (size_ == 0) { 84 | return nil_id; 85 | } 86 | 87 | auto [quo, mod] = decompose_(hasher_.hash(make_key_(node_id, symb))); 88 | 89 | for (uint64_t i = mod, cnt = 0;; i = right_(i), ++cnt) { 90 | uint64_t child_id = ids_[i]; 91 | 92 | if (child_id == capa_size_.mask()) { 93 | // encounter an empty slot 94 | return nil_id; 95 | } 96 | 97 | if (compare_dsp_(i, cnt) and quo == get_quo_(i)) { 98 | return child_id; 99 | } 100 | } 101 | } 102 | 103 | bool add_child(uint64_t& node_id, uint64_t symb) { 104 | assert(node_id < capa_size_.size()); 105 | assert(symb < symb_size_.size()); 106 | 107 | if (max_size() <= size()) { 108 | expand_(); 109 | } 110 | 111 | auto [quo, mod] = decompose_(hasher_.hash(make_key_(node_id, symb))); 112 | 113 | for (uint64_t i = mod, cnt = 0;; i = right_(i), ++cnt) { 114 | uint64_t child_id = ids_[i]; 115 | 116 | if (child_id == capa_size_.mask()) { 117 | // encounter an empty slot 118 | update_slot_(i, quo, cnt, size_); 119 | node_id = size_++; 120 | return true; 121 | } 122 | 123 | if (compare_dsp_(i, cnt) and quo == get_quo_(i)) { 124 | node_id = child_id; 125 | return false; // already stored 126 | } 127 | } 128 | } 129 | 130 | bool needs_to_expand() const { 131 | return max_size() <= size(); 132 | } 133 | 134 | uint64_t size() const { 135 | return size_; 136 | } 137 | uint64_t max_size() const { 138 | return max_size_; 139 | } 140 | uint64_t capa_size() const { 141 | return capa_size_.size(); 142 | } 143 | uint32_t capa_bits() const { 144 | return capa_size_.bits(); 145 | } 146 | uint64_t symb_size() const { 147 | return symb_size_.size(); 148 | } 149 | uint32_t symb_bits() const { 150 | return symb_size_.bits(); 151 | } 152 | #ifdef POPLAR_EXTRA_STATS 153 | uint64_t num_resize() const { 154 | return num_resize_; 155 | } 156 | #endif 157 | uint64_t alloc_bytes() const { 158 | uint64_t bytes = 0; 159 | bytes += table_.alloc_bytes(); 160 | bytes += aux_cht_.alloc_bytes(); 161 | bytes += aux_map_.alloc_bytes(); 162 | bytes += ids_.alloc_bytes(); 163 | return bytes; 164 | } 165 | 166 | void show_stats(std::ostream& os, int n = 0) const { 167 | auto indent = get_indent(n); 168 | show_stat(os, indent, "name", "compact_fkhash_trie"); 169 | show_stat(os, indent, "factor", double(size()) / capa_size() * 100); 170 | show_stat(os, indent, "max_factor", MaxFactor); 171 | show_stat(os, indent, "size", size()); 172 | show_stat(os, indent, "alloc_bytes", alloc_bytes()); 173 | show_stat(os, indent, "capa_bits", capa_bits()); 174 | show_stat(os, indent, "symb_bits", symb_bits()); 175 | show_stat(os, indent, "dsp1st_bits", dsp1_bits); 176 | show_stat(os, indent, "dsp2nd_bits", dsp2_bits); 177 | #ifdef POPLAR_EXTRA_STATS 178 | show_stat(os, indent, "rate_dsp1st", double(num_dsps_[0]) / size()); 179 | show_stat(os, indent, "rate_dsp2nd", double(num_dsps_[1]) / size()); 180 | show_stat(os, indent, "rate_dsp3rd", double(num_dsps_[2]) / size()); 181 | show_stat(os, indent, "num_resize", num_resize_); 182 | #endif 183 | show_member(os, indent, "hasher_"); 184 | hasher_.show_stats(os, n + 1); 185 | show_member(os, indent, "aux_cht_"); 186 | aux_cht_.show_stats(os, n + 1); 187 | show_member(os, indent, "aux_map_"); 188 | aux_map_.show_stats(os, n + 1); 189 | } 190 | 191 | compact_fkhash_trie(const compact_fkhash_trie&) = delete; 192 | compact_fkhash_trie& operator=(const compact_fkhash_trie&) = delete; 193 | 194 | compact_fkhash_trie(compact_fkhash_trie&&) noexcept = default; 195 | compact_fkhash_trie& operator=(compact_fkhash_trie&&) noexcept = default; 196 | 197 | private: 198 | Hasher hasher_; 199 | compact_vector table_; 200 | aux_cht_type aux_cht_; // 2nd dsp 201 | aux_map_type aux_map_; // 3rd dsp 202 | compact_vector ids_; 203 | uint64_t size_ = 0; // # of registered nodes 204 | uint64_t max_size_ = 0; // MaxFactor% of the capacity 205 | size_p2 capa_size_; 206 | size_p2 symb_size_; 207 | #ifdef POPLAR_EXTRA_STATS 208 | uint64_t num_resize_ = 0; 209 | uint64_t num_dsps_[3] = {}; 210 | #endif 211 | 212 | uint64_t make_key_(uint64_t node_id, uint64_t symb) const { 213 | return (node_id << symb_size_.bits()) | symb; 214 | } 215 | std::pair decompose_(uint64_t x) const { 216 | return {x >> capa_size_.bits(), x & capa_size_.mask()}; 217 | } 218 | uint64_t right_(uint64_t slot_id) const { 219 | return (slot_id + 1) & capa_size_.mask(); 220 | } 221 | 222 | uint64_t get_quo_(uint64_t slot_id) const { 223 | return table_[slot_id] >> dsp1_bits; 224 | } 225 | uint64_t get_dsp_(uint64_t slot_id) const { 226 | uint64_t dsp = table_[slot_id] & dsp1_mask; 227 | if (dsp < dsp1_mask) { 228 | return dsp; 229 | } 230 | 231 | dsp = aux_cht_.get(slot_id); 232 | if (dsp != aux_cht_type::nil) { 233 | return dsp + dsp1_mask; 234 | } 235 | 236 | return aux_map_.get(slot_id); 237 | } 238 | 239 | bool compare_dsp_(uint64_t slot_id, uint64_t rhs) const { 240 | uint64_t lhs = table_[slot_id] & dsp1_mask; 241 | if (lhs < dsp1_mask) { 242 | return lhs == rhs; 243 | } 244 | if (rhs < dsp1_mask) { 245 | return false; 246 | } 247 | 248 | lhs = aux_cht_.get(slot_id); 249 | if (lhs != aux_cht_type::nil) { 250 | return lhs + dsp1_mask == rhs; 251 | } 252 | if (rhs < dsp1_mask + dsp2_mask) { 253 | return false; 254 | } 255 | 256 | auto val = aux_map_.get(slot_id); 257 | assert(val != aux_map_type::nil); 258 | return val == rhs; 259 | } 260 | 261 | void update_slot_(uint64_t slot_id, uint64_t quo, uint64_t dsp, uint64_t node_id) { 262 | assert(table_[slot_id] == 0); 263 | assert(quo < symb_size_.size()); 264 | 265 | uint64_t v = quo << dsp1_bits; 266 | 267 | if (dsp < dsp1_mask) { 268 | v |= dsp; 269 | } else { 270 | v |= dsp1_mask; 271 | uint64_t _dsp = dsp - dsp1_mask; 272 | if (_dsp < dsp2_mask) { 273 | aux_cht_.set(slot_id, _dsp); 274 | } else { 275 | aux_map_.set(slot_id, dsp); 276 | } 277 | } 278 | 279 | #ifdef POPLAR_EXTRA_STATS 280 | if (dsp < dsp1_mask) { 281 | ++num_dsps_[0]; 282 | } else if (dsp < dsp1_mask + dsp2_mask) { 283 | ++num_dsps_[1]; 284 | } else { 285 | ++num_dsps_[2]; 286 | } 287 | #endif 288 | 289 | table_.set(slot_id, v); 290 | ids_.set(slot_id, node_id); 291 | } 292 | 293 | void expand_() { 294 | this_type new_ht{capa_bits() + 1, symb_size_.bits()}; 295 | #ifdef POPLAR_EXTRA_STATS 296 | new_ht.num_resize_ = num_resize_ + 1; 297 | #endif 298 | 299 | for (uint64_t i = 0; i < capa_size_.size(); ++i) { 300 | uint64_t node_id = ids_[i]; 301 | 302 | if (node_id == capa_size_.mask()) { 303 | // encounter an empty slot 304 | continue; 305 | } 306 | 307 | uint64_t dist = get_dsp_(i); 308 | uint64_t init_id = dist <= i ? i - dist : table_.size() - (dist - i); 309 | uint64_t key = hasher_.hash_inv(get_quo_(i) << capa_size_.bits() | init_id); 310 | 311 | auto [quo, mod] = new_ht.decompose_(new_ht.hasher_.hash(key)); 312 | 313 | for (uint64_t new_i = mod, cnt = 0;; new_i = new_ht.right_(new_i), ++cnt) { 314 | if (new_ht.ids_[new_i] == new_ht.capa_size_.mask()) { 315 | // encounter an empty slot 316 | new_ht.update_slot_(new_i, quo, cnt, node_id); 317 | break; 318 | } 319 | } 320 | } 321 | 322 | new_ht.size_ = size_; 323 | std::swap(*this, new_ht); 324 | } 325 | }; 326 | 327 | } // namespace poplar 328 | 329 | #endif // POPLAR_TRIE_COMPACT_FKHASH_TRIE_HPP 330 | -------------------------------------------------------------------------------- /include/poplar/compact_hash_table.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef POPLAR_TRIE_COMPACT_HASH_TABLE_HPP 25 | #define POPLAR_TRIE_COMPACT_HASH_TABLE_HPP 26 | 27 | #include "bijective_hash.hpp" 28 | #include "bit_tools.hpp" 29 | #include "compact_vector.hpp" 30 | #include "exception.hpp" 31 | 32 | namespace poplar { 33 | 34 | template 35 | class compact_hash_table { 36 | static_assert(0 < MaxFactor and MaxFactor < 100); 37 | 38 | public: 39 | using this_type = compact_hash_table; 40 | 41 | static constexpr uint32_t min_capa_bits = 12; 42 | static constexpr uint32_t val_bits = ValBits; 43 | static constexpr uint64_t val_mask = (1ULL << ValBits) - 1; 44 | static constexpr uint64_t nil = UINT64_MAX; 45 | 46 | public: 47 | compact_hash_table() = default; 48 | 49 | explicit compact_hash_table(uint32_t univ_bits, uint32_t capa_bits = min_capa_bits) { 50 | univ_size_ = size_p2{univ_bits}; 51 | capa_size_ = size_p2{std::max(min_capa_bits, capa_bits)}; 52 | 53 | assert(capa_size_.bits() <= univ_size_.bits()); 54 | 55 | quo_size_ = size_p2{univ_size_.bits() - capa_size_.bits()}; 56 | quo_shift_ = 2 + val_bits; 57 | quo_invmask_ = ~(quo_size_.mask() << quo_shift_); 58 | 59 | max_size_ = static_cast(capa_size_.size() * MaxFactor / 100.0); 60 | 61 | hasher_ = Hasher{univ_size_.bits()}; 62 | table_ = compact_vector{capa_size_.size(), quo_size_.bits() + val_bits + 2, (val_mask << 2) | 1ULL}; 63 | } 64 | 65 | ~compact_hash_table() = default; 66 | 67 | uint64_t get(uint64_t key) const { 68 | assert(key < univ_size_.size()); 69 | 70 | auto [quo, mod] = decompose_(hasher_.hash(key)); 71 | 72 | if (!get_vbit_(mod)) { 73 | return nil; 74 | } 75 | 76 | uint64_t slot_id = find_ass_cbit_(mod); 77 | if (slot_id == UINT64_MAX) { 78 | return nil; 79 | } 80 | 81 | if (!find_item_(slot_id, quo)) { 82 | return nil; 83 | } 84 | return get_val_(slot_id); 85 | } 86 | 87 | bool set(uint64_t key, uint64_t val) { 88 | assert(key < univ_size_.size()); 89 | assert(val < val_mask); 90 | 91 | if (max_size_ <= size_) { 92 | // expand 93 | this_type new_cht{univ_size_.bits(), capa_size_.bits() + 1}; 94 | #ifdef POPLAR_EXTRA_STATS 95 | new_cht.num_resize_ = num_resize_ + 1; 96 | #endif 97 | clone(new_cht); 98 | *this = std::move(new_cht); 99 | } 100 | 101 | auto [quo, mod] = decompose_(hasher_.hash(key)); 102 | 103 | if (is_vacant_(mod)) { 104 | // without collision 105 | update_slot_(mod, quo, val, true, true); 106 | ++size_; 107 | return true; 108 | } 109 | 110 | uint64_t empty_id = 0; 111 | uint64_t slot_id = find_ass_cbit_(mod, empty_id); 112 | 113 | if (!get_vbit_(mod)) { // initial insertion in the group 114 | // create a new collision group 115 | if (slot_id != UINT64_MAX) { // require to displace existing groups? 116 | do { 117 | slot_id = right_(slot_id); 118 | } while (!get_cbit_(slot_id)); 119 | 120 | slot_id = left_(slot_id); // rightmost slot of the group 121 | 122 | while (empty_id != slot_id) { 123 | empty_id = copy_from_right_(empty_id); 124 | } 125 | } else { 126 | // not inside other collision groups 127 | } 128 | set_vbit_(mod, true); 129 | set_cbit_(empty_id, true); 130 | } else { 131 | // collision group already exists 132 | if (find_item_(slot_id, quo)) { // already registered? 133 | set_val_(slot_id, val); // update 134 | return false; 135 | } 136 | 137 | slot_id = left_(slot_id); // rightmost of the group 138 | 139 | // displace existing groups for creating an empty slot 140 | while (empty_id != slot_id) { 141 | empty_id = copy_from_right_(empty_id); 142 | } 143 | set_cbit_(empty_id, false); 144 | } 145 | 146 | set_quo_(empty_id, quo); 147 | set_val_(empty_id, val); 148 | 149 | ++size_; 150 | 151 | return true; 152 | } 153 | 154 | void clone(this_type& new_cht) const { 155 | set_mapper mapper; 156 | clone(new_cht, mapper); 157 | } 158 | template 159 | void clone(this_type& new_cht, const SetMapper& mapper) const { 160 | POPLAR_THROW_IF(new_cht.size() != 0, "new_cht must be empty."); 161 | POPLAR_THROW_IF(new_cht.max_size() < size(), "this->size() <= new_cht.max_size() must hold."); 162 | 163 | // Find the first vacant slot 164 | uint64_t i = 0; 165 | while (!is_vacant_(i)) { 166 | i = right_(i); 167 | } 168 | 169 | const uint64_t beg = i; 170 | i = right_(i); // skip the vacant 171 | 172 | for (bool completed = false; !completed;) { 173 | // Find the leftmost of some collision groups 174 | while (is_vacant_(i)) { 175 | i = right_(i); 176 | if (i == beg) { 177 | completed = true; 178 | } 179 | } 180 | 181 | assert(get_cbit_(i)); 182 | uint64_t init_id = i; 183 | 184 | do { 185 | // Find the rightmost of the collision group 186 | while (!get_vbit_(init_id)) { 187 | init_id = right_(init_id); 188 | } 189 | 190 | do { 191 | assert(!is_vacant_(i)); 192 | 193 | uint64_t key = hasher_.hash_inv((get_quo_(i) << capa_size_.bits()) | init_id); 194 | uint64_t val = get_val_(i); 195 | // new_cht.set(key, val); 196 | mapper(new_cht, key, val); 197 | 198 | i = right_(i); 199 | if (i == beg) { 200 | completed = true; 201 | } 202 | } while (!get_cbit_(i)); 203 | 204 | init_id = right_(init_id); 205 | } while (i != init_id); 206 | } 207 | 208 | assert(size() == new_cht.size()); 209 | } 210 | 211 | uint64_t size() const { 212 | return size_; 213 | } 214 | uint64_t max_size() const { 215 | return max_size_; 216 | } 217 | uint64_t univ_size() const { 218 | return univ_size_.size(); 219 | } 220 | uint32_t univ_bits() const { 221 | return univ_size_.bits(); 222 | } 223 | uint64_t capa_size() const { 224 | return capa_size_.size(); 225 | } 226 | uint32_t capa_bits() const { 227 | return capa_size_.bits(); 228 | } 229 | uint64_t alloc_bytes() const { 230 | return table_.alloc_bytes(); 231 | } 232 | 233 | void show_stats(std::ostream& os, int n = 0) const { 234 | auto indent = get_indent(n); 235 | show_stat(os, indent, "name", "compact_hash_table"); 236 | show_stat(os, indent, "factor", double(size()) / capa_size() * 100); 237 | show_stat(os, indent, "max_factor", MaxFactor); 238 | show_stat(os, indent, "size", size()); 239 | show_stat(os, indent, "capa_size", capa_size()); 240 | show_stat(os, indent, "alloc_bytes", alloc_bytes()); 241 | #ifdef POPLAR_EXTRA_STATS 242 | show_stat(os, indent, "num_resize", num_resize_); 243 | #endif 244 | show_member(os, indent, "hasher_"); 245 | hasher_.show_stats(os, n + 1); 246 | } 247 | 248 | compact_hash_table(const compact_hash_table&) = delete; 249 | compact_hash_table& operator=(const compact_hash_table&) = delete; 250 | 251 | compact_hash_table(compact_hash_table&&) noexcept = default; 252 | compact_hash_table& operator=(compact_hash_table&&) noexcept = default; 253 | 254 | private: 255 | Hasher hasher_; 256 | compact_vector table_; 257 | uint64_t size_ = 0; // # of registered nodes 258 | uint64_t max_size_ = 0; // MaxFactor% of the capacity 259 | size_p2 univ_size_; 260 | size_p2 capa_size_; 261 | size_p2 quo_size_; 262 | uint64_t quo_shift_ = 0; 263 | uint64_t quo_invmask_ = 0; // For setter 264 | #ifdef POPLAR_EXTRA_STATS 265 | uint64_t num_resize_ = 0; 266 | #endif 267 | 268 | struct set_mapper { 269 | void operator()(this_type& new_cht, uint64_t key, uint64_t val) const { 270 | new_cht.set(key, val); 271 | } 272 | }; 273 | 274 | uint64_t find_ass_cbit_(uint64_t slot_id) const { 275 | uint64_t dummy = 0; 276 | return find_ass_cbit_(slot_id, dummy); 277 | } 278 | 279 | uint64_t find_ass_cbit_(uint64_t slot_id, uint64_t& empty_id) const { 280 | uint64_t num_vbits = 0; 281 | do { 282 | if (get_vbit_(slot_id)) { 283 | ++num_vbits; 284 | } 285 | slot_id = left_(slot_id); 286 | } while (!is_vacant_(slot_id)); 287 | 288 | empty_id = slot_id; 289 | 290 | if (num_vbits == 0) { 291 | return UINT64_MAX; 292 | } 293 | 294 | uint64_t num_cbits = 0; 295 | while (num_cbits < num_vbits) { 296 | slot_id = right_(slot_id); 297 | if (get_cbit_(slot_id)) { 298 | ++num_cbits; 299 | } 300 | } 301 | 302 | return slot_id; 303 | } 304 | 305 | bool find_item_(uint64_t& slot_id, uint64_t quo) const { 306 | do { 307 | if (get_quo_(slot_id) == quo) { 308 | return true; 309 | } 310 | slot_id = right_(slot_id); 311 | } while (!get_cbit_(slot_id)); 312 | return false; 313 | } 314 | 315 | std::pair decompose_(uint64_t x) const { 316 | return {x >> capa_size_.bits(), x & capa_size_.mask()}; 317 | } 318 | 319 | uint64_t left_(uint64_t slot_id) const { 320 | return (slot_id - 1) & capa_size_.mask(); 321 | } 322 | uint64_t right_(uint64_t slot_id) const { 323 | return (slot_id + 1) & capa_size_.mask(); 324 | } 325 | bool is_vacant_(uint64_t slot_id) const { 326 | return get_val_(slot_id) == val_mask; 327 | } 328 | 329 | uint64_t get_quo_(uint64_t slot_id) const { 330 | return table_.get(slot_id) >> quo_shift_; 331 | } 332 | uint64_t get_val_(uint64_t slot_id) const { 333 | return (table_.get(slot_id) >> 2) & val_mask; 334 | } 335 | bool get_vbit_(uint64_t slot_id) const { 336 | return (table_.get(slot_id) & 2) == 2; 337 | } 338 | bool get_cbit_(uint64_t slot_id) const { 339 | return (table_.get(slot_id) & 1) == 1; 340 | } 341 | 342 | void set_quo_(uint64_t slot_id, uint64_t quo) { 343 | assert(quo < quo_size_.size()); 344 | table_.set(slot_id, (table_.get(slot_id) & quo_invmask_) | (quo << quo_shift_)); 345 | } 346 | void set_val_(uint64_t slot_id, uint64_t val) { 347 | assert(val <= val_mask); 348 | table_.set(slot_id, (table_.get(slot_id) & ~(val_mask << 2)) | (val << 2)); 349 | } 350 | void set_vbit_(uint64_t slot_id, bool bit) { 351 | table_.set(slot_id, (table_.get(slot_id) & ~2ULL) | (bit << 1)); 352 | } 353 | void set_cbit_(uint64_t slot_id, bool bit) { 354 | table_.set(slot_id, (table_.get(slot_id) & ~1ULL) | bit); 355 | } 356 | 357 | // Copies the slot from the right one except virgin bit information. 358 | uint64_t copy_from_right_(uint64_t slot_id) { 359 | uint64_t _slot_id = right_(slot_id); 360 | table_.set(slot_id, (table_.get(_slot_id) & ~2ULL) | (get_vbit_(slot_id) << 1)); 361 | return _slot_id; 362 | } 363 | 364 | void update_slot_(uint64_t slot_id, uint64_t quo, uint64_t val, bool vbit, bool cbit) { 365 | assert(quo < quo_size_.size()); 366 | assert(val <= val_mask); 367 | table_.set(slot_id, (quo << quo_shift_) | (val << 2) | (vbit << 1) | cbit); 368 | } 369 | }; 370 | 371 | } // namespace poplar 372 | 373 | #endif // POPLAR_TRIE_COMPACT_HASH_TABLE_HPP 374 | -------------------------------------------------------------------------------- /include/poplar/compact_vector.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef POPLAR_TRIE_COMPACT_VECTOR_HPP 25 | #define POPLAR_TRIE_COMPACT_VECTOR_HPP 26 | 27 | #include 28 | 29 | #include "bit_tools.hpp" 30 | #include "exception.hpp" 31 | 32 | namespace poplar { 33 | 34 | class compact_vector { 35 | public: 36 | compact_vector() = default; 37 | 38 | compact_vector(uint64_t size, uint32_t width) { 39 | POPLAR_THROW_IF(64 <= width, "width overflow."); 40 | 41 | size_ = size; 42 | mask_ = (1ULL << width) - 1; 43 | width_ = width; 44 | chunks_.resize(bit_tools::words_for(size_ * width_), 0); 45 | } 46 | 47 | compact_vector(uint64_t size, uint32_t width, uint64_t init) : compact_vector{size, width} { 48 | for (uint64_t i = 0; i < size; ++i) { 49 | set(i, init); 50 | } 51 | } 52 | 53 | ~compact_vector() = default; 54 | 55 | void resize(uint64_t size) { 56 | size_ = size; 57 | chunks_.resize(bit_tools::words_for(size_ * width_)); 58 | } 59 | 60 | uint64_t operator[](uint64_t i) const { 61 | return get(i); 62 | } 63 | 64 | uint64_t get(uint64_t i) const { 65 | assert(i < size_); 66 | 67 | auto [quo, mod] = decompose_value<64>(i * width_); 68 | 69 | if (mod + width_ <= 64) { 70 | return (chunks_[quo] >> mod) & mask_; 71 | } else { 72 | return ((chunks_[quo] >> mod) | (chunks_[quo + 1] << (64 - mod))) & mask_; 73 | } 74 | } 75 | 76 | void set(uint64_t i, uint64_t v) { 77 | assert(i < size_); 78 | assert(v <= mask_); 79 | 80 | auto [quo, mod] = decompose_value<64>(i * width_); 81 | 82 | chunks_[quo] &= ~(mask_ << mod); 83 | chunks_[quo] |= (v & mask_) << mod; 84 | 85 | if (64 < mod + width_) { 86 | const uint64_t diff = 64 - mod; 87 | chunks_[quo + 1] &= ~(mask_ >> diff); 88 | chunks_[quo + 1] |= (v & mask_) >> diff; 89 | } 90 | } 91 | 92 | uint64_t size() const { 93 | return size_; 94 | } 95 | uint32_t width() const { 96 | return width_; 97 | } 98 | uint64_t alloc_bytes() const { 99 | return chunks_.capacity() * sizeof(uint64_t); 100 | } 101 | 102 | compact_vector(const compact_vector&) = delete; 103 | compact_vector& operator=(const compact_vector&) = delete; 104 | 105 | compact_vector(compact_vector&&) noexcept = default; 106 | compact_vector& operator=(compact_vector&&) noexcept = default; 107 | 108 | private: 109 | std::vector chunks_; 110 | uint64_t size_ = 0; 111 | uint64_t mask_ = 0; 112 | uint64_t width_ = 0; 113 | }; 114 | 115 | } // namespace poplar 116 | 117 | #endif // POPLAR_TRIE_COMPACT_VECTOR_HPP 118 | -------------------------------------------------------------------------------- /include/poplar/exception.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef POPLAR_TRIE_EXCEPTION_HPP 25 | #define POPLAR_TRIE_EXCEPTION_HPP 26 | 27 | #include 28 | 29 | namespace poplar { 30 | 31 | class exception : public std::exception { 32 | public: 33 | explicit exception(const char* msg) : msg_{msg} {} 34 | ~exception() throw() override = default; 35 | 36 | const char* what() const throw() override { 37 | return msg_; 38 | } 39 | 40 | private: 41 | const char* msg_; 42 | }; 43 | 44 | #define POPLAR_TO_STR_(n) #n 45 | #define POPLAR_TO_STR(n) POPLAR_TO_STR_(n) 46 | #define POPLAR_THROW(msg) throw poplar::exception(__FILE__ ":" POPLAR_TO_STR(__LINE__) ":" msg) 47 | #define POPLAR_THROW_IF(cond, msg) (void)((!(cond)) || (POPLAR_THROW(msg), 0)) 48 | 49 | } // namespace poplar 50 | 51 | #endif // POPLAR_TRIE_EXCEPTION_HPP 52 | -------------------------------------------------------------------------------- /include/poplar/hash.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef POPLAR_TRIE_HASH_HPP 25 | #define POPLAR_TRIE_HASH_HPP 26 | 27 | #include "basics.hpp" 28 | 29 | namespace poplar::hash { 30 | 31 | // From http://xoroshiro.di.unimi.it/splitmix64.c 32 | struct vigna_hasher { 33 | vigna_hasher() = default; 34 | 35 | explicit vigna_hasher(uint64_t seed) : seed_(seed) {} 36 | 37 | static uint64_t hash(uint64_t x) { 38 | x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9ULL; 39 | x = (x ^ (x >> 27)) * 0x94d049bb133111ebULL; 40 | x = x ^ (x >> 31); 41 | return x; 42 | } 43 | uint64_t operator()(uint64_t x) const { 44 | x += seed_; 45 | x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9ULL; 46 | x = (x ^ (x >> 27)) * 0x94d049bb133111ebULL; 47 | return x ^ (x >> 31); 48 | } 49 | 50 | private: 51 | uint64_t seed_ = 0x9e3779b97f4a7c15ULL; 52 | }; 53 | 54 | } // namespace poplar::hash 55 | 56 | #endif // POPLAR_TRIE_HASH_HPP 57 | -------------------------------------------------------------------------------- /include/poplar/map.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef POPLAR_TRIE_MAP_HPP 25 | #define POPLAR_TRIE_MAP_HPP 26 | 27 | #include 28 | #include 29 | 30 | #include "bit_tools.hpp" 31 | #include "exception.hpp" 32 | 33 | namespace poplar { 34 | 35 | // This class implements an updatable associative array whose keys are strings. 36 | // The data structure is based on a dynamic path-decomposed trie described in the following paper, 37 | // - "Dynamic Path-Decomposed Tries" available at https://arxiv.org/abs/1906.06015. 38 | template 39 | class map { 40 | static_assert(Trie::trie_type_id == NLM::trie_type_id); 41 | 42 | public: 43 | using this_type = map; 44 | using trie_type = Trie; 45 | using value_type = typename NLM::value_type; 46 | 47 | static constexpr auto trie_type_id = Trie::trie_type_id; 48 | static constexpr uint32_t min_capa_bits = Trie::min_capa_bits; 49 | 50 | public: 51 | // Generic constructor. 52 | map() = default; 53 | 54 | // Class constructor. Initially allocates the hash table of length 55 | // 2**capa_bits. 56 | explicit map(uint32_t capa_bits, uint64_t lambda = 32) { 57 | POPLAR_THROW_IF(!is_power2(lambda), "lambda must be a power of 2."); 58 | 59 | is_ready_ = true; 60 | lambda_ = lambda; 61 | hash_trie_ = Trie{capa_bits, 8 + bit_tools::ceil_log2(lambda_)}; 62 | label_store_ = NLM{hash_trie_.capa_bits()}; 63 | codes_.fill(UINT8_MAX); 64 | codes_[0] = static_cast(num_codes_++); // terminator 65 | } 66 | 67 | // Generic destructor. 68 | ~map() = default; 69 | 70 | // Searches the given key and returns the value pointer if registered; 71 | // otherwise returns nullptr. 72 | const value_type* find(const std::string& key) const { 73 | return find(make_char_range(key)); 74 | } 75 | const value_type* find(char_range key) const { 76 | POPLAR_THROW_IF(key.empty(), "key must be a non-empty string."); 77 | POPLAR_THROW_IF(*(key.end - 1) != '\0', "The last character of key must be the null terminator."); 78 | 79 | if (!is_ready_ or hash_trie_.size() == 0) { 80 | return nullptr; 81 | } 82 | 83 | auto node_id = hash_trie_.get_root(); 84 | 85 | while (!key.empty()) { 86 | auto [vptr, match] = label_store_.compare(node_id, key); 87 | if (vptr != nullptr) { 88 | return vptr; 89 | } 90 | 91 | key.begin += match; 92 | 93 | while (lambda_ <= match) { 94 | node_id = hash_trie_.find_child(node_id, step_symb); 95 | if (node_id == nil_id) { 96 | return nullptr; 97 | } 98 | match -= lambda_; 99 | } 100 | 101 | if (codes_[*key.begin] == UINT8_MAX) { 102 | // Detecting an useless character 103 | return nullptr; 104 | } 105 | 106 | node_id = hash_trie_.find_child(node_id, make_symb_(*key.begin, match)); 107 | if (node_id == nil_id) { 108 | return nullptr; 109 | } 110 | 111 | ++key.begin; 112 | } 113 | 114 | return label_store_.compare(node_id, key).first; 115 | } 116 | 117 | // Inserts the given key and returns the value pointer. 118 | value_type* update(const std::string& key) { 119 | return update(make_char_range(key)); 120 | } 121 | value_type* update(char_range key) { 122 | POPLAR_THROW_IF(key.empty(), "key must be a non-empty string."); 123 | POPLAR_THROW_IF(*(key.end - 1) != '\0', "The last character of key must be the null terminator."); 124 | 125 | if (hash_trie_.size() == 0) { 126 | if (!is_ready_) { 127 | *this = this_type{0}; 128 | } 129 | // The first insertion 130 | ++size_; 131 | hash_trie_.add_root(); 132 | 133 | if constexpr (trie_type_id == trie_type_ids::FKHASH_TRIE) { 134 | // assert(hash_trie_.get_root() == label_store_.size()); 135 | return label_store_.append(key); 136 | } 137 | if constexpr (trie_type_id == trie_type_ids::BONSAI_TRIE) { 138 | return label_store_.insert(hash_trie_.get_root(), key); 139 | } 140 | // should not come 141 | assert(false); 142 | } 143 | 144 | auto node_id = hash_trie_.get_root(); 145 | 146 | while (!key.empty()) { 147 | auto [vptr, match] = label_store_.compare(node_id, key); 148 | if (vptr != nullptr) { 149 | return const_cast(vptr); 150 | } 151 | 152 | key.begin += match; 153 | 154 | while (lambda_ <= match) { 155 | if (hash_trie_.add_child(node_id, step_symb)) { 156 | expand_if_needed_(node_id); 157 | #ifdef POPLAR_EXTRA_STATS 158 | ++num_steps_; 159 | #endif 160 | if constexpr (trie_type_id == trie_type_ids::FKHASH_TRIE) { 161 | assert(node_id == label_store_.size()); 162 | label_store_.append_dummy(); 163 | } 164 | } 165 | match -= lambda_; 166 | } 167 | 168 | if (codes_[*key.begin] == UINT8_MAX) { 169 | // Update table 170 | codes_[*key.begin] = static_cast(num_codes_++); 171 | POPLAR_THROW_IF(UINT8_MAX == num_codes_, ""); 172 | } 173 | 174 | if (hash_trie_.add_child(node_id, make_symb_(*key.begin, match))) { 175 | expand_if_needed_(node_id); 176 | ++key.begin; 177 | ++size_; 178 | 179 | if constexpr (trie_type_id == trie_type_ids::FKHASH_TRIE) { 180 | assert(node_id == label_store_.size()); 181 | return label_store_.append(key); 182 | } 183 | if constexpr (trie_type_id == trie_type_ids::BONSAI_TRIE) { 184 | return label_store_.insert(node_id, key); 185 | } 186 | // should not come 187 | assert(false); 188 | } 189 | 190 | ++key.begin; 191 | } 192 | 193 | auto vptr = label_store_.compare(node_id, key).first; 194 | return vptr ? const_cast(vptr) : nullptr; 195 | } 196 | 197 | // Gets the number of registered keys. 198 | uint64_t size() const { 199 | return size_; 200 | } 201 | // Gets the capacity of the hash table. 202 | uint64_t capa_size() const { 203 | return hash_trie_.capa_size(); 204 | } 205 | #ifdef POPLAR_EXTRA_STATS 206 | double rate_steps() const { 207 | return double(num_steps_) / size_; 208 | } 209 | uint64_t num_resize() const { 210 | return hash_trie_.num_resize(); 211 | } 212 | #endif 213 | uint64_t alloc_bytes() const { 214 | uint64_t bytes = 0; 215 | bytes += hash_trie_.alloc_bytes(); 216 | bytes += label_store_.alloc_bytes(); 217 | bytes += codes_.size(); 218 | return bytes; 219 | } 220 | 221 | void show_stats(std::ostream& os, int n = 0) const { 222 | auto indent = get_indent(n); 223 | show_stat(os, indent, "name", "map"); 224 | show_stat(os, indent, "lambda", lambda_); 225 | show_stat(os, indent, "size", size()); 226 | show_stat(os, indent, "alloc_bytes", alloc_bytes()); 227 | #ifdef POPLAR_EXTRA_STATS 228 | show_stat(os, indent, "rate_steps", rate_steps()); 229 | #endif 230 | show_member(os, indent, "hash_trie_"); 231 | hash_trie_.show_stats(os, n + 1); 232 | show_member(os, indent, "label_store_"); 233 | label_store_.show_stats(os, n + 1); 234 | } 235 | 236 | map(const map&) = delete; 237 | map& operator=(const map&) = delete; 238 | 239 | map(map&&) noexcept = default; 240 | map& operator=(map&&) noexcept = default; 241 | 242 | private: 243 | static constexpr uint64_t nil_id = Trie::nil_id; 244 | static constexpr uint64_t step_symb = UINT8_MAX; // (UINT8_MAX, 0) 245 | 246 | bool is_ready_ = false; 247 | uint64_t lambda_ = 32; 248 | 249 | Trie hash_trie_; 250 | NLM label_store_; 251 | std::array codes_ = {}; 252 | uint32_t num_codes_ = 0; 253 | uint64_t size_ = 0; 254 | #ifdef POPLAR_EXTRA_STATS 255 | uint64_t num_steps_ = 0; 256 | #endif 257 | 258 | uint64_t make_symb_(uint8_t c, uint64_t match) const { 259 | assert(codes_[c] != UINT8_MAX); 260 | return static_cast(codes_[c]) | (match << 8); 261 | } 262 | 263 | void expand_if_needed_(uint64_t& node_id) { 264 | if constexpr (trie_type_id == trie_type_ids::BONSAI_TRIE) { 265 | if (!hash_trie_.needs_to_expand()) { 266 | return; 267 | } 268 | auto node_map = hash_trie_.expand(); 269 | node_id = node_map[node_id]; 270 | label_store_.expand(node_map); 271 | } 272 | } 273 | }; 274 | 275 | } // namespace poplar 276 | 277 | #endif // POPLAR_TRIE_MAP_HPP 278 | -------------------------------------------------------------------------------- /include/poplar/plain_bonsai_nlm.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef POPLAR_TRIE_PLAIN_BONSAI_NLM_HPP 25 | #define POPLAR_TRIE_PLAIN_BONSAI_NLM_HPP 26 | 27 | #include 28 | #include 29 | 30 | #include "basics.hpp" 31 | #include "compact_vector.hpp" 32 | 33 | namespace poplar { 34 | 35 | template 36 | class plain_bonsai_nlm { 37 | public: 38 | using value_type = Value; 39 | 40 | static constexpr auto trie_type_id = trie_type_ids::BONSAI_TRIE; 41 | 42 | public: 43 | plain_bonsai_nlm() = default; 44 | 45 | explicit plain_bonsai_nlm(uint32_t capa_bits) : ptrs_(1ULL << capa_bits) {} 46 | 47 | ~plain_bonsai_nlm() = default; 48 | 49 | std::pair compare(uint64_t pos, const char_range& key) const { 50 | assert(pos < ptrs_.size()); 51 | assert(ptrs_[pos]); 52 | 53 | const uint8_t* ptr = ptrs_[pos].get(); 54 | 55 | if (key.empty()) { 56 | return {reinterpret_cast(ptr), 0}; 57 | } 58 | 59 | for (uint64_t i = 0; i < key.length(); ++i) { 60 | if (key[i] != ptr[i]) { 61 | return {nullptr, i}; 62 | } 63 | } 64 | 65 | return {reinterpret_cast(ptr + key.length()), key.length()}; 66 | } 67 | 68 | value_type* insert(uint64_t pos, const char_range& key) { 69 | assert(!ptrs_[pos]); 70 | 71 | ++size_; 72 | 73 | uint64_t length = key.length(); 74 | ptrs_[pos] = std::make_unique(length + sizeof(value_type)); 75 | auto ptr = ptrs_[pos].get(); 76 | copy_bytes(ptr, key.begin, length); 77 | 78 | label_bytes_ += length + sizeof(value_type); 79 | 80 | #ifdef POPLAR_EXTRA_STATS 81 | max_length_ = std::max(max_length_, length); 82 | sum_length_ += length; 83 | #endif 84 | 85 | auto ret = reinterpret_cast(ptr + length); 86 | *ret = static_cast(0); 87 | 88 | return ret; 89 | } 90 | 91 | template 92 | void expand(const T& pos_map) { 93 | std::vector> new_ptrs(ptrs_.size() * 2); 94 | for (uint64_t i = 0; i < pos_map.size(); ++i) { 95 | if (pos_map[i] != UINT64_MAX) { 96 | new_ptrs[pos_map[i]] = std::move(ptrs_[i]); 97 | } 98 | } 99 | ptrs_ = std::move(new_ptrs); 100 | } 101 | 102 | uint64_t size() const { 103 | return size_; 104 | } 105 | uint64_t num_ptrs() const { 106 | return ptrs_.size(); 107 | } 108 | uint64_t alloc_bytes() const { 109 | uint64_t bytes = 0; 110 | bytes += ptrs_.capacity() * sizeof(std::unique_ptr); 111 | bytes += label_bytes_; 112 | return bytes; 113 | } 114 | 115 | void show_stats(std::ostream& os, int n = 0) const { 116 | auto indent = get_indent(n); 117 | show_stat(os, indent, "name", "plain_bonsai_nlm"); 118 | show_stat(os, indent, "size", size()); 119 | show_stat(os, indent, "num_ptrs", num_ptrs()); 120 | show_stat(os, indent, "alloc_bytes", alloc_bytes()); 121 | #ifdef POPLAR_EXTRA_STATS 122 | show_stat(os, indent, "max_length", max_length_); 123 | show_stat(os, indent, "ave_length", double(sum_length_) / size()); 124 | #endif 125 | } 126 | 127 | plain_bonsai_nlm(const plain_bonsai_nlm&) = delete; 128 | plain_bonsai_nlm& operator=(const plain_bonsai_nlm&) = delete; 129 | 130 | plain_bonsai_nlm(plain_bonsai_nlm&&) noexcept = default; 131 | plain_bonsai_nlm& operator=(plain_bonsai_nlm&&) noexcept = default; 132 | 133 | private: 134 | std::vector> ptrs_; 135 | uint64_t size_ = 0; 136 | uint64_t label_bytes_ = 0; 137 | #ifdef POPLAR_EXTRA_STATS 138 | uint64_t max_length_ = 0; 139 | uint64_t sum_length_ = 0; 140 | #endif 141 | }; 142 | 143 | } // namespace poplar 144 | 145 | #endif // POPLAR_TRIE_PLAIN_BONSAI_NLM_HPP 146 | -------------------------------------------------------------------------------- /include/poplar/plain_bonsai_trie.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef POPLAR_TRIE_PLAIN_BONSAI_TRIE_HPP 25 | #define POPLAR_TRIE_PLAIN_BONSAI_TRIE_HPP 26 | 27 | #include "bit_tools.hpp" 28 | #include "bit_vector.hpp" 29 | #include "compact_vector.hpp" 30 | #include "hash.hpp" 31 | 32 | namespace poplar { 33 | 34 | template 35 | class plain_bonsai_trie { 36 | private: 37 | static_assert(0 < MaxFactor and MaxFactor < 100); 38 | 39 | public: 40 | static constexpr uint64_t nil_id = UINT64_MAX; 41 | static constexpr uint32_t min_capa_bits = 16; 42 | 43 | static constexpr auto trie_type_id = trie_type_ids::BONSAI_TRIE; 44 | 45 | public: 46 | plain_bonsai_trie() = default; 47 | 48 | plain_bonsai_trie(uint32_t capa_bits, uint32_t symb_bits) { 49 | capa_size_ = size_p2{std::max(min_capa_bits, capa_bits)}; 50 | symb_size_ = size_p2{symb_bits}; 51 | max_size_ = static_cast(capa_size_.size() * MaxFactor / 100.0); 52 | table_ = compact_vector{capa_size_.size(), capa_size_.bits() + symb_size_.bits()}; 53 | } 54 | 55 | ~plain_bonsai_trie() = default; 56 | 57 | uint64_t get_root() const { 58 | assert(size_ != 0); 59 | return 1; 60 | } 61 | 62 | void add_root() { 63 | assert(size_ == 0); 64 | size_ = 1; 65 | } 66 | 67 | uint64_t find_child(uint64_t node_id, uint64_t symb) const { 68 | assert(node_id < capa_size_.size()); 69 | assert(symb < symb_size_.size()); 70 | 71 | if (size_ == 0) { 72 | return nil_id; 73 | } 74 | 75 | uint64_t key = make_key_(node_id, symb); 76 | assert(key != 0); 77 | 78 | for (uint64_t i = Hasher::hash(key) & capa_size_.mask();; i = right_(i)) { 79 | if (i == 0) { 80 | // table_[0] is always empty so that table_[i] = 0 indicates to be empty. 81 | continue; 82 | } 83 | if (i == get_root()) { 84 | continue; 85 | } 86 | if (table_[i] == 0) { 87 | // encounter an empty slot 88 | return nil_id; 89 | } 90 | if (table_[i] == key) { 91 | return i; 92 | } 93 | } 94 | } 95 | 96 | bool add_child(uint64_t& node_id, uint64_t symb) { 97 | assert(node_id < capa_size_.size()); 98 | assert(symb < symb_size_.size()); 99 | 100 | uint64_t key = make_key_(node_id, symb); 101 | assert(key != 0); 102 | 103 | for (uint64_t i = Hasher::hash(key) & capa_size_.mask();; i = right_(i)) { 104 | if (i == 0) { 105 | // table_[0] is always empty so that any table_[i] = 0 indicates to be empty. 106 | continue; 107 | } 108 | 109 | if (i == get_root()) { 110 | continue; 111 | } 112 | 113 | if (table_[i] == 0) { 114 | // this slot is empty 115 | if (size_ == max_size_) { 116 | return false; // needs to expand 117 | } 118 | 119 | table_.set(i, key); 120 | 121 | ++size_; 122 | node_id = i; 123 | 124 | return true; 125 | } 126 | 127 | if (table_[i] == key) { 128 | node_id = i; 129 | return false; // already stored 130 | } 131 | } 132 | } 133 | 134 | std::pair get_parent_and_symb(uint64_t node_id) const { 135 | assert(node_id < capa_size_.size()); 136 | 137 | uint64_t key = table_[node_id]; 138 | if (key == 0) { 139 | // root or not exist 140 | return {nil_id, 0}; 141 | } 142 | // Returns pair (parent, label) 143 | return std::make_pair(key >> symb_size_.bits(), key & symb_size_.mask()); 144 | }; 145 | 146 | class node_map { 147 | public: 148 | node_map() = default; 149 | 150 | node_map(compact_vector&& map, bit_vector&& done_flags) 151 | : map_{std::move(map)}, done_flags_{std::move(done_flags)} {} 152 | 153 | ~node_map() = default; 154 | 155 | uint64_t operator[](uint64_t i) const { 156 | return done_flags_[i] ? map_[i] : UINT64_MAX; 157 | } 158 | 159 | uint64_t size() const { 160 | return map_.size(); 161 | } 162 | 163 | node_map(const node_map&) = delete; 164 | node_map& operator=(const node_map&) = delete; 165 | 166 | node_map(node_map&& rhs) noexcept = default; 167 | node_map& operator=(node_map&& rhs) noexcept = default; 168 | 169 | private: 170 | compact_vector map_; 171 | bit_vector done_flags_; 172 | }; 173 | 174 | bool needs_to_expand() const { 175 | return max_size() <= size(); 176 | } 177 | 178 | node_map expand() { 179 | plain_bonsai_trie new_ht{capa_bits() + 1, symb_size_.bits()}; 180 | new_ht.add_root(); 181 | 182 | #ifdef POPLAR_EXTRA_STATS 183 | new_ht.num_resize_ = num_resize_ + 1; 184 | #endif 185 | 186 | bit_vector done_flags(capa_size()); 187 | done_flags.set(get_root()); 188 | 189 | table_.set(get_root(), new_ht.get_root()); 190 | 191 | std::vector> path; 192 | path.reserve(256); 193 | 194 | // 0 is empty, 1 is root 195 | for (uint64_t i = 2; i < table_.size(); ++i) { 196 | if (done_flags[i] || table_[i] == 0) { 197 | // skip already processed or empty elements 198 | continue; 199 | } 200 | 201 | path.clear(); 202 | uint64_t node_id = i; 203 | 204 | do { 205 | auto [parent, label] = get_parent_and_symb(node_id); 206 | assert(parent != nil_id); 207 | path.emplace_back(std::make_pair(node_id, label)); 208 | node_id = parent; 209 | } while (!done_flags[node_id]); 210 | 211 | uint64_t new_node_id = table_[node_id]; 212 | 213 | for (auto rit = std::rbegin(path); rit != std::rend(path); ++rit) { 214 | new_ht.add_child(new_node_id, rit->second); 215 | table_.set(rit->first, new_node_id); 216 | done_flags.set(rit->first); 217 | } 218 | } 219 | 220 | node_map node_map{std::move(table_), std::move(done_flags)}; 221 | std::swap(*this, new_ht); 222 | 223 | return node_map; 224 | } 225 | 226 | // # of registerd nodes 227 | uint64_t size() const { 228 | return size_; 229 | } 230 | uint64_t max_size() const { 231 | return max_size_; 232 | } 233 | uint64_t capa_size() const { 234 | return capa_size_.size(); 235 | } 236 | uint32_t capa_bits() const { 237 | return capa_size_.bits(); 238 | } 239 | uint64_t symb_size() const { 240 | return symb_size_.size(); 241 | } 242 | uint32_t symb_bits() const { 243 | return symb_size_.bits(); 244 | } 245 | #ifdef POPLAR_EXTRA_STATS 246 | uint64_t num_resize() const { 247 | return num_resize_; 248 | } 249 | #endif 250 | uint64_t alloc_bytes() const { 251 | return table_.alloc_bytes(); 252 | } 253 | 254 | void show_stats(std::ostream& os, int n = 0) const { 255 | auto indent = get_indent(n); 256 | show_stat(os, indent, "name", "plain_bonsai_trie"); 257 | show_stat(os, indent, "factor", double(size()) / capa_size() * 100); 258 | show_stat(os, indent, "max_factor", MaxFactor); 259 | show_stat(os, indent, "size", size()); 260 | show_stat(os, indent, "alloc_bytes", alloc_bytes()); 261 | show_stat(os, indent, "capa_bits", capa_bits()); 262 | show_stat(os, indent, "symb_bits", symb_bits()); 263 | #ifdef POPLAR_EXTRA_STATS 264 | show_stat(os, indent, "num_resize", num_resize_); 265 | #endif 266 | } 267 | 268 | plain_bonsai_trie(const plain_bonsai_trie&) = delete; 269 | plain_bonsai_trie& operator=(const plain_bonsai_trie&) = delete; 270 | 271 | plain_bonsai_trie(plain_bonsai_trie&&) noexcept = default; 272 | plain_bonsai_trie& operator=(plain_bonsai_trie&&) noexcept = default; 273 | 274 | private: 275 | compact_vector table_; 276 | uint64_t size_ = 0; // # of registered nodes 277 | uint64_t max_size_ = 0; // MaxFactor% of the capacity 278 | size_p2 capa_size_; 279 | size_p2 symb_size_; 280 | #ifdef POPLAR_EXTRA_STATS 281 | uint64_t num_resize_ = 0; 282 | #endif 283 | 284 | uint64_t make_key_(uint64_t node_id, uint64_t symb) const { 285 | return (node_id << symb_size_.bits()) | symb; 286 | } 287 | uint64_t right_(uint64_t slot_id) const { 288 | return (slot_id + 1) & capa_size_.mask(); 289 | } 290 | }; 291 | 292 | } // namespace poplar 293 | 294 | #endif // POPLAR_TRIE_PLAIN_BONSAI_TRIE_HPP 295 | -------------------------------------------------------------------------------- /include/poplar/plain_fkhash_nlm.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef POPLAR_TRIE_PLAIN_FKHASH_NLM_HPP 25 | #define POPLAR_TRIE_PLAIN_FKHASH_NLM_HPP 26 | 27 | #include 28 | 29 | #include "basics.hpp" 30 | #include "exception.hpp" 31 | 32 | namespace poplar { 33 | 34 | template 35 | class plain_fkhash_nlm { 36 | public: 37 | using value_type = Value; 38 | 39 | static constexpr auto trie_type_id = trie_type_ids::FKHASH_TRIE; 40 | 41 | public: 42 | plain_fkhash_nlm() = default; 43 | 44 | explicit plain_fkhash_nlm(uint32_t capa_bits) { 45 | ptrs_.reserve(1ULL << capa_bits); 46 | } 47 | 48 | ~plain_fkhash_nlm() = default; 49 | 50 | std::pair compare(uint64_t pos, const char_range& key) const { 51 | assert(pos < ptrs_.size()); 52 | assert(ptrs_[pos]); 53 | 54 | const uint8_t* ptr = ptrs_[pos].get(); 55 | 56 | if (key.empty()) { 57 | return {reinterpret_cast(ptr), 0}; 58 | } 59 | 60 | for (uint64_t i = 0; i < key.length(); ++i) { 61 | if (key[i] != ptr[i]) { 62 | return {nullptr, i}; 63 | } 64 | } 65 | 66 | return {reinterpret_cast(ptr + key.length()), key.length()}; 67 | } 68 | 69 | value_type* append(const char_range& key) { 70 | uint64_t length = key.length(); 71 | ptrs_.emplace_back(std::make_unique(length + sizeof(value_type))); 72 | label_bytes_ += length + sizeof(value_type); 73 | 74 | auto ptr = ptrs_.back().get(); 75 | copy_bytes(ptr, key.begin, length); 76 | 77 | #ifdef POPLAR_EXTRA_STATS 78 | max_length_ = std::max(max_length_, length); 79 | sum_length_ += length; 80 | #endif 81 | 82 | auto ret = reinterpret_cast(ptr + length); 83 | *ret = static_cast(0); 84 | 85 | return ret; 86 | } 87 | 88 | void append_dummy() { 89 | ptrs_.emplace_back(nullptr); 90 | } 91 | 92 | uint64_t size() const { 93 | return ptrs_.size(); 94 | } 95 | uint64_t alloc_bytes() const { 96 | uint64_t bytes = 0; 97 | bytes += ptrs_.capacity() * sizeof(std::unique_ptr); 98 | bytes += label_bytes_; 99 | return bytes; 100 | } 101 | 102 | void show_stats(std::ostream& os, int n = 0) const { 103 | auto indent = get_indent(n); 104 | show_stat(os, indent, "name", "plain_fkhash_nlm"); 105 | show_stat(os, indent, "size", size()); 106 | show_stat(os, indent, "alloc_bytes", alloc_bytes()); 107 | #ifdef POPLAR_EXTRA_STATS 108 | show_stat(os, indent, "max_length", max_length_); 109 | show_stat(os, indent, "ave_length", double(sum_length_) / size()); 110 | #endif 111 | } 112 | 113 | plain_fkhash_nlm(const plain_fkhash_nlm&) = delete; 114 | plain_fkhash_nlm& operator=(const plain_fkhash_nlm&) = delete; 115 | 116 | plain_fkhash_nlm(plain_fkhash_nlm&&) noexcept = default; 117 | plain_fkhash_nlm& operator=(plain_fkhash_nlm&&) noexcept = default; 118 | 119 | private: 120 | std::vector> ptrs_; 121 | uint64_t label_bytes_ = 0; 122 | #ifdef POPLAR_EXTRA_STATS 123 | uint64_t max_length_ = 0; 124 | uint64_t sum_length_ = 0; 125 | #endif 126 | }; 127 | 128 | } // namespace poplar 129 | 130 | #endif // POPLAR_TRIE_PLAIN_FKHASH_NLM_HPP 131 | -------------------------------------------------------------------------------- /include/poplar/plain_fkhash_trie.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef POPLAR_TRIE_PLAIN_FKHASH_TRIE_HPP 25 | #define POPLAR_TRIE_PLAIN_FKHASH_TRIE_HPP 26 | 27 | #include 28 | 29 | #include "bit_tools.hpp" 30 | #include "bit_vector.hpp" 31 | #include "compact_vector.hpp" 32 | #include "hash.hpp" 33 | 34 | namespace poplar { 35 | 36 | // The node IDs are arranged incrementally 37 | template 38 | class plain_fkhash_trie { 39 | static_assert(0 < MaxFactor and MaxFactor < 100); 40 | 41 | public: 42 | using this_type = plain_fkhash_trie; 43 | 44 | static constexpr uint64_t nil_id = UINT64_MAX; 45 | static constexpr uint32_t min_capa_bits = 16; 46 | 47 | static constexpr auto trie_type_id = trie_type_ids::FKHASH_TRIE; 48 | 49 | public: 50 | plain_fkhash_trie() = default; 51 | 52 | plain_fkhash_trie(uint32_t capa_bits, uint32_t symb_bits) { 53 | capa_size_ = size_p2{std::max(min_capa_bits, capa_bits)}; 54 | symb_size_ = size_p2{symb_bits}; 55 | max_size_ = static_cast(capa_size_.size() * MaxFactor / 100.0); 56 | table_ = compact_vector{capa_size_.size(), capa_size_.bits() + symb_size_.bits()}; 57 | ids_ = compact_vector{capa_size_.size(), capa_size_.bits()}; 58 | } 59 | 60 | ~plain_fkhash_trie() = default; 61 | 62 | // The root ID is assigned but its slot does not exist in the table 63 | uint64_t get_root() const { 64 | assert(size_ != 0); 65 | return 0; 66 | } 67 | 68 | void add_root() { 69 | assert(size_ == 0); 70 | size_ = 1; 71 | } 72 | 73 | uint64_t find_child(uint64_t node_id, uint64_t symb) const { 74 | assert(node_id < capa_size_.size()); 75 | assert(symb < symb_size_.size()); 76 | 77 | if (size_ == 0) { 78 | return nil_id; 79 | } 80 | 81 | uint64_t key = make_key_(node_id, symb); 82 | 83 | for (uint64_t i = init_id_(key);; i = right_(i)) { 84 | uint64_t child_id = ids_[i]; 85 | 86 | if (child_id == 0) { // empty? 87 | return nil_id; 88 | } 89 | if (table_[i] == key) { 90 | return child_id; 91 | } 92 | } 93 | } 94 | 95 | bool add_child(uint64_t& node_id, uint64_t symb) { 96 | assert(node_id < capa_size_.size()); 97 | assert(symb < symb_size_.size()); 98 | 99 | if (max_size() <= size()) { 100 | expand_(); 101 | } 102 | 103 | uint64_t key = make_key_(node_id, symb); 104 | assert(key != 0); 105 | 106 | for (uint64_t i = init_id_(key);; i = right_(i)) { 107 | uint64_t child_id = ids_[i]; 108 | 109 | if (child_id == 0) { // empty? 110 | node_id = size_++; // new child_id 111 | assert(node_id != 0); 112 | 113 | table_.set(i, key); 114 | ids_.set(i, node_id); 115 | 116 | return true; 117 | } 118 | 119 | if (table_[i] == key) { 120 | node_id = child_id; 121 | return false; // already stored 122 | } 123 | } 124 | } 125 | 126 | // # of registerd nodes 127 | uint64_t size() const { 128 | return size_; 129 | } 130 | uint64_t max_size() const { 131 | return max_size_; 132 | } 133 | uint64_t capa_size() const { 134 | return capa_size_.size(); 135 | } 136 | uint32_t capa_bits() const { 137 | return capa_size_.bits(); 138 | } 139 | uint64_t symb_size() const { 140 | return symb_size_.size(); 141 | } 142 | uint32_t symb_bits() const { 143 | return symb_size_.bits(); 144 | } 145 | #ifdef POPLAR_EXTRA_STATS 146 | uint64_t num_resize() const { 147 | return num_resize_; 148 | } 149 | #endif 150 | uint64_t alloc_bytes() const { 151 | uint64_t bytes = 0; 152 | bytes += table_.alloc_bytes(); 153 | bytes += ids_.alloc_bytes(); 154 | return bytes; 155 | } 156 | 157 | void show_stats(std::ostream& os, int n = 0) const { 158 | auto indent = get_indent(n); 159 | show_stat(os, indent, "name", "plain_fkhash_trie"); 160 | show_stat(os, indent, "factor", double(size()) / capa_size() * 100); 161 | show_stat(os, indent, "max_factor", MaxFactor); 162 | show_stat(os, indent, "size", size()); 163 | show_stat(os, indent, "alloc_bytes", alloc_bytes()); 164 | show_stat(os, indent, "capa_bits", capa_bits()); 165 | show_stat(os, indent, "symb_bits", symb_bits()); 166 | #ifdef POPLAR_EXTRA_STATS 167 | show_stat(os, indent, "num_resize", num_resize_); 168 | #endif 169 | } 170 | 171 | plain_fkhash_trie(const plain_fkhash_trie&) = delete; 172 | plain_fkhash_trie& operator=(const plain_fkhash_trie&) = delete; 173 | 174 | plain_fkhash_trie(plain_fkhash_trie&&) noexcept = default; 175 | plain_fkhash_trie& operator=(plain_fkhash_trie&&) noexcept = default; 176 | 177 | private: 178 | compact_vector table_; 179 | compact_vector ids_; 180 | uint64_t size_ = 0; // # of registered nodes 181 | uint64_t max_size_ = 0; // MaxFactor% of the capacity 182 | size_p2 capa_size_; 183 | size_p2 symb_size_; 184 | #ifdef POPLAR_EXTRA_STATS 185 | uint64_t num_resize_ = 0; 186 | #endif 187 | 188 | uint64_t make_key_(uint64_t node_id, uint64_t symb) const { 189 | return (node_id << symb_size_.bits()) | symb; 190 | } 191 | uint64_t init_id_(uint64_t key) const { 192 | return Hasher::hash(key) & capa_size_.mask(); 193 | } 194 | uint64_t right_(uint64_t slot_id) const { 195 | return (slot_id + 1) & capa_size_.mask(); 196 | } 197 | 198 | void expand_() { 199 | this_type new_ht{capa_bits() + 1, symb_bits()}; 200 | #ifdef POPLAR_EXTRA_STATS 201 | new_ht.num_resize_ = num_resize_ + 1; 202 | #endif 203 | 204 | for (uint64_t i = 0; i < capa_size_.size(); ++i) { 205 | uint64_t child_id = ids_[i]; 206 | if (child_id == 0) { // empty? 207 | continue; 208 | } 209 | 210 | uint64_t key = table_[i]; 211 | assert(key != 0); 212 | 213 | for (uint64_t new_i = new_ht.init_id_(key);; new_i = new_ht.right_(new_i)) { 214 | if (new_ht.ids_[new_i] == 0) { // empty? 215 | new_ht.table_.set(new_i, key); 216 | new_ht.ids_.set(new_i, child_id); 217 | break; 218 | } 219 | } 220 | } 221 | 222 | new_ht.size_ = size_; 223 | *this = std::move(new_ht); 224 | } 225 | }; 226 | 227 | } // namespace poplar 228 | 229 | #endif // POPLAR_TRIE_PLAIN_FKHASH_TRIE_HPP 230 | -------------------------------------------------------------------------------- /include/poplar/poplar_config.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef POPLAR_TRIE_CONFIG_HPP 25 | #define POPLAR_TRIE_CONFIG_HPP 26 | 27 | /* #undef POPLAR_EXTRA_STATS */ 28 | 29 | #endif // POPLAR_TRIE_CONFIG_HPP 30 | -------------------------------------------------------------------------------- /include/poplar/standard_hash_table.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef POPLAR_STANDARD_HASH_TABLE_HPP 25 | #define POPLAR_STANDARD_HASH_TABLE_HPP 26 | 27 | #include "exception.hpp" 28 | #include "hash.hpp" 29 | 30 | namespace poplar { 31 | 32 | template 33 | class standard_hash_table { 34 | static_assert(0 < MaxFactor and MaxFactor < 100); 35 | 36 | public: 37 | using this_type = standard_hash_table; 38 | 39 | static constexpr uint32_t min_capa_bits = 6; 40 | static constexpr uint64_t nil = UINT64_MAX; 41 | 42 | public: 43 | standard_hash_table() = default; 44 | 45 | explicit standard_hash_table(uint32_t capa_bits) { 46 | capa_size_ = size_p2(std::max(min_capa_bits, capa_bits)); 47 | max_size_ = static_cast(capa_size_.size() * MaxFactor / 100.0); 48 | table_.resize(capa_size_.size()); 49 | } 50 | 51 | ~standard_hash_table() = default; 52 | 53 | uint64_t get(uint64_t key) const { 54 | if (table_.empty()) { 55 | return nil; 56 | } 57 | 58 | for (uint64_t i = init_id_(key);; i = right_(i)) { 59 | if (table_[i].key == UINT64_MAX) { 60 | return nil; 61 | } 62 | if (table_[i].key == key) { 63 | return table_[i].val; 64 | } 65 | } 66 | } 67 | 68 | bool set(uint64_t key, uint64_t val) { 69 | if (table_.empty()) { 70 | *this = this_type(min_capa_bits); 71 | } 72 | 73 | if (max_size_ <= size_) { 74 | expand_(); 75 | } 76 | 77 | for (uint64_t i = init_id_(key);; i = right_(i)) { 78 | if (table_[i].key == UINT64_MAX) { 79 | table_[i] = {key, val}; 80 | ++size_; 81 | return true; 82 | } 83 | if (table_[i].key == key) { 84 | table_[i].val = val; 85 | return false; 86 | } 87 | } 88 | } 89 | 90 | uint64_t size() const { 91 | return size_; 92 | } 93 | uint64_t max_size() const { 94 | return max_size_; 95 | } 96 | uint64_t capa_size() const { 97 | return capa_size_.size(); 98 | } 99 | uint32_t capa_bits() const { 100 | return capa_size_.bits(); 101 | } 102 | uint64_t alloc_bytes() const { 103 | return table_.capacity() * sizeof(slot_type); 104 | } 105 | 106 | void show_stats(std::ostream& os, int n = 0) const { 107 | auto indent = get_indent(n); 108 | show_stat(os, indent, "name", "standard_hash_table"); 109 | show_stat(os, indent, "factor", double(size()) / capa_size() * 100); 110 | show_stat(os, indent, "max_factor", MaxFactor); 111 | show_stat(os, indent, "size", size()); 112 | show_stat(os, indent, "capa_size", capa_size()); 113 | show_stat(os, indent, "alloc_bytes", alloc_bytes()); 114 | #ifdef POPLAR_EXTRA_STATS 115 | show_stat(os, indent, "num_resize", num_resize_); 116 | #endif 117 | } 118 | 119 | standard_hash_table(const standard_hash_table&) = delete; 120 | standard_hash_table& operator=(const standard_hash_table&) = delete; 121 | 122 | standard_hash_table(standard_hash_table&&) noexcept = default; 123 | standard_hash_table& operator=(standard_hash_table&&) noexcept = default; 124 | 125 | private: 126 | struct slot_type { 127 | uint64_t key = UINT64_MAX; 128 | uint64_t val = 0; 129 | }; 130 | 131 | std::vector table_; 132 | uint64_t size_ = 0; // # of registered nodes 133 | uint64_t max_size_ = 0; // MaxFactor% of the capacity 134 | size_p2 capa_size_; 135 | #ifdef POPLAR_EXTRA_STATS 136 | uint64_t num_resize_ = 0; 137 | #endif 138 | 139 | uint64_t init_id_(uint64_t key) const { 140 | return Hasher::hash(key) & capa_size_.mask(); 141 | } 142 | uint64_t right_(uint64_t slot_id) const { 143 | return (slot_id + 1) & capa_size_.mask(); 144 | } 145 | 146 | void expand_() { 147 | this_type new_ht{capa_size_.bits() + 1}; 148 | #ifdef POPLAR_EXTRA_STATS 149 | new_ht.num_resize_ = num_resize_ + 1; 150 | #endif 151 | 152 | for (uint64_t i = 0; i < table_.size(); ++i) { 153 | if (table_[i].key != UINT64_MAX) { 154 | new_ht.set(table_[i].key, table_[i].val); 155 | } 156 | } 157 | 158 | assert(size() == new_ht.size()); 159 | *this = std::move(new_ht); 160 | } 161 | }; 162 | 163 | } // namespace poplar 164 | 165 | #endif // POPLAR_STANDARD_HASH_TABLE_HPP 166 | -------------------------------------------------------------------------------- /include/poplar/vbyte.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef POPLAR_TRIE_VBYTE_HPP 25 | #define POPLAR_TRIE_VBYTE_HPP 26 | 27 | #include 28 | 29 | #include "basics.hpp" 30 | 31 | namespace poplar::vbyte { 32 | 33 | inline uint64_t size(uint64_t val) { 34 | uint64_t n = 1; 35 | while (127ULL < val) { 36 | ++n; 37 | val >>= 7; 38 | } 39 | return n; 40 | } 41 | 42 | inline uint64_t append(std::vector& vec, uint64_t val) { 43 | uint64_t size = vec.size(); 44 | while (127ULL < val) { 45 | vec.emplace_back(static_cast((val & 127ULL) | 0x80ULL)); 46 | val >>= 7; 47 | } 48 | vec.emplace_back(static_cast(val & 127ULL)); 49 | return vec.size() - size; 50 | } 51 | 52 | inline uint64_t encode(uint8_t* codes, uint64_t val) { 53 | uint64_t i = 0; 54 | while (127ULL < val) { 55 | codes[i++] = static_cast((val & 127ULL) | 0x80ULL); 56 | val >>= 7; 57 | } 58 | codes[i++] = static_cast(val & 127ULL); 59 | return i; 60 | } 61 | 62 | inline uint64_t decode(const uint8_t* codes, uint64_t& val) { 63 | val = 0; 64 | uint64_t i = 0, shift = 0; 65 | while ((codes[i] & 0x80) != 0) { 66 | val |= (codes[i++] & 127ULL) << shift; 67 | shift += 7; 68 | } 69 | val |= (codes[i++] & 127ULL) << shift; 70 | return i; 71 | } 72 | 73 | } // namespace poplar::vbyte 74 | 75 | #endif // POPLAR_TRIE_VBYTE_HPP 76 | -------------------------------------------------------------------------------- /poplar_config.hpp.in: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef POPLAR_TRIE_CONFIG_HPP 25 | #define POPLAR_TRIE_CONFIG_HPP 26 | 27 | #cmakedefine POPLAR_EXTRA_STATS 28 | 29 | #endif // POPLAR_TRIE_CONFIG_HPP -------------------------------------------------------------------------------- /sample/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(sample sample.cpp) 2 | target_link_libraries(sample) 3 | -------------------------------------------------------------------------------- /sample/sample.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #include 25 | #include 26 | 27 | int main() { 28 | std::vector keys = {"Aoba", "Yun", "Hajime", "Hihumi", "Kou", "Rin", "Hazuki", "Umiko", "Nene"}; 29 | const auto num_keys = static_cast(keys.size()); 30 | 31 | poplar::plain_bonsai_map map; 32 | 33 | try { 34 | for (int i = 0; i < num_keys; ++i) { 35 | int* ptr = map.update(keys[i]); 36 | *ptr = i + 1; 37 | } 38 | for (int i = 0; i < num_keys; ++i) { 39 | const int* ptr = map.find(keys[i]); 40 | if (ptr == nullptr or *ptr != i + 1) { 41 | return 1; 42 | } 43 | std::cout << keys[i] << ": " << *ptr << std::endl; 44 | } 45 | { 46 | const int* ptr = map.find("Hotaru"); 47 | if (ptr != nullptr) { 48 | return 1; 49 | } 50 | std::cout << "Hotaru: " << -1 << std::endl; 51 | } 52 | } catch (const poplar::exception& ex) { 53 | std::cerr << ex.what() << std::endl; 54 | return 1; 55 | } 56 | 57 | std::cout << "#keys = " << map.size() << std::endl; 58 | 59 | return 0; 60 | } 61 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Download and unpack googletest at configure time 2 | configure_file(CMakeLists.txt.in 3 | ${CMAKE_BINARY_DIR}/googletest-download/CMakeLists.txt) 4 | execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . 5 | RESULT_VARIABLE result 6 | WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download) 7 | if (result) 8 | message(FATAL_ERROR "CMake step for googletest failed: ${result}") 9 | endif () 10 | execute_process(COMMAND ${CMAKE_COMMAND} --build . 11 | RESULT_VARIABLE result 12 | WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download) 13 | if (result) 14 | message(FATAL_ERROR "Build step for googletest failed: ${result}") 15 | endif () 16 | 17 | # Prevent overriding the parent project's compiler/linker 18 | # settings on Windows 19 | set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) 20 | 21 | # Add googletest directly to our build. This defines 22 | # the gtest and gtest_main targets. 23 | add_subdirectory(${CMAKE_BINARY_DIR}/googletest-src 24 | ${CMAKE_BINARY_DIR}/googletest-build 25 | EXCLUDE_FROM_ALL) 26 | 27 | # The gtest/gtest_main targets carry header search path 28 | # dependencies automatically when using CMake 2.8.11 or 29 | # later. Otherwise we have to add them here ourselves. 30 | if (CMAKE_VERSION VERSION_LESS 2.8.11) 31 | include_directories("${gtest_SOURCE_DIR}/include") 32 | endif () 33 | 34 | file(GLOB TEST_SOURCES *_test.cpp) 35 | foreach (TEST_SOURCE ${TEST_SOURCES}) 36 | get_filename_component(TEST_SOURCE_NAME ${TEST_SOURCE} NAME_WE) 37 | add_executable(${TEST_SOURCE_NAME} ${TEST_SOURCE}) 38 | target_link_libraries(${TEST_SOURCE_NAME} gtest_main) 39 | add_test(run_${TEST_SOURCE_NAME} ${TEST_SOURCE_NAME}) 40 | endforeach () 41 | 42 | configure_file(words.txt ${CMAKE_CURRENT_BINARY_DIR}/words.txt COPYONLY) 43 | -------------------------------------------------------------------------------- /test/CMakeLists.txt.in: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.2) 2 | 3 | project(googletest-download NONE) 4 | 5 | include(ExternalProject) 6 | ExternalProject_Add(googletest 7 | GIT_REPOSITORY https://github.com/google/googletest.git 8 | GIT_TAG release-1.8.1 9 | SOURCE_DIR "${CMAKE_BINARY_DIR}/googletest-src" 10 | BINARY_DIR "${CMAKE_BINARY_DIR}/googletest-build" 11 | CONFIGURE_COMMAND "" 12 | BUILD_COMMAND "" 13 | INSTALL_COMMAND "" 14 | TEST_COMMAND "" 15 | ) -------------------------------------------------------------------------------- /test/bijective_hash_test.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #include 25 | #include 26 | #include 27 | 28 | #include "test_common.hpp" 29 | 30 | namespace { 31 | 32 | using namespace poplar; 33 | using namespace poplar::test; 34 | 35 | constexpr uint64_t N = 1ULL << 10; 36 | 37 | template 38 | void check_bijection(uint32_t univ_bits) { 39 | Hasher h{univ_bits}; 40 | 41 | if (h.size() <= N) { 42 | for (uint64_t x = 0; x < h.size(); ++x) { 43 | ASSERT_EQ(x, h.hash_inv(h.hash(x))); 44 | } 45 | } else { 46 | std::random_device rnd; 47 | for (uint64_t i = 0; i < N; ++i) { 48 | uint64_t x = rnd() % N; 49 | ASSERT_EQ(x, h.hash_inv(h.hash(x))); 50 | } 51 | } 52 | } 53 | 54 | template 55 | class bijective_hash_test : public ::testing::Test {}; 56 | 57 | using bijective_hash_types = ::testing::Types; 58 | 59 | TYPED_TEST_CASE(bijective_hash_test, bijective_hash_types); 60 | 61 | TYPED_TEST(bijective_hash_test, Tiny) { 62 | for (uint32_t i = 1; i < 64; ++i) { 63 | check_bijection(i); 64 | } 65 | } 66 | 67 | } // namespace 68 | -------------------------------------------------------------------------------- /test/bit_vector_test.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #include 25 | #include 26 | #include 27 | 28 | #include 29 | 30 | #include "test_common.hpp" 31 | 32 | namespace { 33 | 34 | using namespace poplar; 35 | using namespace poplar::test; 36 | 37 | constexpr uint64_t N = 10000; 38 | 39 | TEST(bit_vector_test, Tiny) { 40 | std::vector orig; 41 | bit_vector bv; 42 | 43 | { 44 | std::random_device rnd; 45 | for (uint64_t i = 0; i < N; ++i) { 46 | uint64_t x = rnd() & UINT32_MAX; 47 | orig.push_back(x); 48 | bv.append_bits(x, bit_tools::ceil_log2(x)); 49 | } 50 | } 51 | 52 | uint64_t pos = 0; 53 | for (uint64_t i = 0; i < N; ++i) { 54 | uint64_t len = bit_tools::ceil_log2(orig[i]); 55 | ASSERT_EQ(orig[i], bv.get_bits(pos, len)); 56 | pos += len; 57 | } 58 | } 59 | 60 | } // namespace -------------------------------------------------------------------------------- /test/compact_hash_table_test.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #include 25 | 26 | #include 27 | #include 28 | 29 | #include "test_common.hpp" 30 | 31 | namespace { 32 | 33 | using namespace poplar; 34 | using namespace poplar::test; 35 | 36 | constexpr uint32_t VAL_BITS = 16; 37 | constexpr uint64_t VAL_MASK = (1ULL << VAL_BITS) - 1; 38 | 39 | std::map create_map(uint32_t univ_bits, uint64_t size) { 40 | std::map m; 41 | std::random_device rnd; 42 | 43 | uint64_t univ_mask = (1ULL << univ_bits) - 1; 44 | 45 | while (m.size() < size) { 46 | uint64_t key = rnd() & univ_mask; 47 | uint64_t val = rnd() & VAL_MASK; 48 | if (val == VAL_MASK) { 49 | val = 0; 50 | } 51 | m.insert(std::make_pair(key, val)); 52 | }; 53 | 54 | return m; 55 | }; 56 | 57 | TEST(compact_hash_table_test, Tiny) { 58 | const uint64_t univ_bits = 14; 59 | const uint64_t capa_bits = 8; 60 | const uint64_t size = 1ULL << (univ_bits - 1); 61 | 62 | auto m = create_map(univ_bits, size); 63 | compact_hash_table cht(univ_bits, capa_bits); 64 | 65 | for (auto item : m) { 66 | cht.set(item.first, item.second); 67 | } 68 | 69 | for (auto item : m) { 70 | ASSERT_EQ(cht.get(item.first), item.second); 71 | } 72 | } 73 | 74 | } // namespace 75 | -------------------------------------------------------------------------------- /test/hash_trie_test.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #include 25 | #include 26 | 27 | #include "test_common.hpp" 28 | 29 | namespace { 30 | 31 | using namespace poplar; 32 | using namespace poplar::test; 33 | 34 | template 35 | void insert_keys(Trie& ht, const std::vector& keys, std::vector& ids) { 36 | ASSERT_FALSE(keys.empty()); 37 | 38 | ids.resize(ht.capa_size(), UINT64_MAX); 39 | 40 | ht.add_root(); 41 | auto num_nodes = ht.size(); 42 | 43 | if constexpr (Trie::trie_type_id == trie_type_ids::FKHASH_TRIE) { 44 | ASSERT_EQ(ht.get_root(), 0); 45 | } 46 | 47 | for (uint64_t i = 0; i < keys.size(); ++i) { 48 | auto node_id = ht.get_root(); 49 | 50 | for (auto c : keys[i]) { 51 | if (ht.add_child(node_id, static_cast(c))) { 52 | if constexpr (Trie::trie_type_id == trie_type_ids::FKHASH_TRIE) { 53 | ASSERT_EQ(node_id, num_nodes); 54 | } 55 | 56 | ++num_nodes; 57 | 58 | if constexpr (Trie::trie_type_id == trie_type_ids::BONSAI_TRIE) { 59 | if (!ht.needs_to_expand()) { 60 | continue; 61 | } 62 | auto node_map = ht.expand(); 63 | node_id = node_map[node_id]; 64 | std::vector new_ids(ht.capa_size(), UINT64_MAX); 65 | for (uint64_t j = 0; j < node_map.size(); ++j) { 66 | if (node_map[j] != UINT64_MAX) { 67 | new_ids[node_map[j]] = ids[j]; 68 | } 69 | } 70 | ids = std::move(new_ids); 71 | } else { 72 | if (ids.size() < ht.capa_size()) { 73 | ids.resize(ht.capa_size()); 74 | } 75 | } 76 | } 77 | } 78 | 79 | ids[node_id] = i; 80 | } 81 | 82 | ASSERT_EQ(num_nodes, ht.size()); 83 | } 84 | 85 | template 86 | void search_keys(const Trie& ht, const std::vector& keys, const std::vector& ids) { 87 | ASSERT_FALSE(keys.empty()); 88 | 89 | for (uint64_t i = 0; i < keys.size(); ++i) { 90 | auto node_id = ht.get_root(); 91 | for (auto c : keys[i]) { 92 | node_id = ht.find_child(node_id, static_cast(c)); 93 | ASSERT_NE(node_id, Trie::nil_id); 94 | } 95 | 96 | ASSERT_EQ(i, ids[node_id]); 97 | } 98 | } 99 | 100 | template 101 | void restore_keys(const Trie& ht, const std::vector& keys, const std::vector& ids) { 102 | ASSERT_FALSE(keys.empty()); 103 | 104 | if constexpr (Trie::trie_type_id == trie_type_ids::BONSAI_TRIE) { 105 | std::string restore; 106 | 107 | for (uint64_t i = 0; i < ids.size(); ++i) { 108 | if (ids[i] == UINT64_MAX) { 109 | continue; 110 | } 111 | 112 | restore.clear(); 113 | 114 | uint64_t node_id = i; 115 | while (node_id != ht.get_root()) { 116 | auto ps = ht.get_parent_and_symb(node_id); 117 | ASSERT_NE(ps.first, Trie::nil_id); 118 | node_id = ps.first; 119 | restore += static_cast(ps.second); 120 | } 121 | 122 | std::reverse(restore.begin(), restore.end()); 123 | ASSERT_EQ(restore, keys[ids[i]]); 124 | } 125 | } 126 | } 127 | 128 | template 129 | class hash_trie_test : public ::testing::Test {}; 130 | 131 | using hash_trie_types = 132 | ::testing::Types, plain_bonsai_trie<>, compact_fkhash_trie<>, compact_bonsai_trie<>>; 133 | 134 | TYPED_TEST_CASE(hash_trie_test, hash_trie_types); 135 | 136 | TYPED_TEST(hash_trie_test, tiny) { 137 | TypeParam ht{0, 8}; 138 | auto keys = make_tiny_keys(); 139 | std::vector ids; 140 | insert_keys(ht, keys, ids); 141 | search_keys(ht, keys, ids); 142 | restore_keys(ht, keys, ids); 143 | } 144 | 145 | TYPED_TEST(hash_trie_test, words) { 146 | TypeParam ht{20, 8}; 147 | auto keys = load_keys("words.txt"); 148 | std::vector ids; 149 | insert_keys(ht, keys, ids); 150 | search_keys(ht, keys, ids); 151 | restore_keys(ht, keys, ids); 152 | } 153 | 154 | TYPED_TEST(hash_trie_test, words_ex) { 155 | TypeParam ht{0, 8}; 156 | auto keys = load_keys("words.txt"); 157 | std::vector ids; 158 | insert_keys(ht, keys, ids); 159 | search_keys(ht, keys, ids); 160 | restore_keys(ht, keys, ids); 161 | } 162 | 163 | } // namespace 164 | -------------------------------------------------------------------------------- /test/map_test.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #include 25 | #include 26 | 27 | #include "test_common.hpp" 28 | 29 | namespace { 30 | 31 | using namespace poplar; 32 | using namespace poplar::test; 33 | 34 | using value_type = uint64_t; 35 | 36 | template 37 | void insert_keys(Map& map, const std::vector& keys) { 38 | ASSERT_FALSE(keys.empty()); 39 | 40 | uint64_t num_keys = 0; 41 | for (uint64_t i = 0; i < keys.size(); i += 2) { 42 | auto ptr = map.update(make_char_range(keys[i])); 43 | ASSERT_EQ(*ptr, 0); 44 | *ptr = i; 45 | ++num_keys; 46 | } 47 | 48 | ASSERT_EQ(map.size(), num_keys); 49 | } 50 | 51 | template 52 | void search_keys(Map& map, const std::vector& keys) { 53 | ASSERT_FALSE(keys.empty()); 54 | 55 | for (uint64_t i = 0; i < keys.size(); i += 2) { 56 | auto ptr = map.find(make_char_range(keys[i])); 57 | ASSERT_NE(ptr, nullptr); 58 | ASSERT_EQ(*ptr, i); 59 | } 60 | 61 | for (uint64_t i = 0; i < keys.size(); i += 2) { 62 | auto ptr = map.update(make_char_range(keys[i])); 63 | ASSERT_NE(ptr, nullptr); 64 | ASSERT_EQ(*ptr, i); 65 | } 66 | 67 | for (uint64_t i = 1; i < keys.size(); i += 2) { 68 | auto ptr = map.find(make_char_range(keys[i])); 69 | ASSERT_EQ(ptr, nullptr); 70 | } 71 | } 72 | 73 | // clang-format off 74 | using map_types = ::testing::Types, 75 | compact_bonsai_map, 76 | plain_fkhash_map, 77 | compact_fkhash_map 78 | >; 79 | // clang-format on 80 | 81 | template 82 | class map_test : public ::testing::Test {}; 83 | 84 | TYPED_TEST_CASE(map_test, map_types); 85 | 86 | TYPED_TEST(map_test, Tiny) { 87 | TypeParam map; 88 | auto keys = make_tiny_keys(); 89 | insert_keys(map, keys); 90 | search_keys(map, keys); 91 | } 92 | 93 | TYPED_TEST(map_test, Words) { 94 | TypeParam map; 95 | auto keys = load_keys("words.txt"); 96 | insert_keys(map, keys); 97 | search_keys(map, keys); 98 | } 99 | 100 | } // namespace 101 | -------------------------------------------------------------------------------- /test/test_common.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2018–2019 Shunsuke Kanda 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | #ifndef POPLAR_TRIE_TEST_COMMON_HPP 25 | #define POPLAR_TRIE_TEST_COMMON_HPP 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | namespace poplar::test { 34 | 35 | inline std::vector make_tiny_keys() { 36 | return {"trie", "denying", "defies", "defy", "tries", "defying", "defied", 37 | "denied", "trying", "deny", "try", "denies", "tried"}; 38 | } 39 | 40 | inline std::vector load_keys(const char* filename) { 41 | std::vector keys; 42 | { 43 | std::ifstream ifs{filename}; 44 | if (!ifs) { 45 | return {}; 46 | } 47 | for (std::string line; std::getline(ifs, line);) { 48 | keys.push_back(line); 49 | } 50 | } 51 | return keys; 52 | } 53 | 54 | } // namespace poplar::test 55 | 56 | #endif // POPLAR_TRIE_TEST_COMMON_HPP 57 | --------------------------------------------------------------------------------