├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README-CN.md ├── README.md ├── benchmark ├── benchmark.h └── billion.cc ├── images ├── build.png ├── shd.png └── throughput.png ├── include ├── bbf.h ├── shd.h └── utils.h ├── src ├── bbf.cc ├── build.cc ├── common.h ├── hash.cc ├── internal.h ├── pipeline.h ├── search.cc ├── shd.cc └── utils.cc └── test ├── test.cc ├── test.h ├── test_bbf.cc ├── test_shd.cc └── test_utils.cc /.gitignore: -------------------------------------------------------------------------------- 1 | build/* -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | #=============================================================================== 2 | # Skew Hash and Displace Algorithm. 3 | # Copyright (C) 2020 Ruan Kunliang 4 | # 5 | # This library is free software; you can redistribute it and/or modify it under 6 | # the terms of the GNU Lesser General Public License as published by the Free 7 | # Software Foundation; either version 2.1 of the License, or (at your option) 8 | # any later version. 9 | # 10 | # This library is distributed in the hope that it will be useful, but WITHOUT 11 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 | # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 13 | # details. 14 | # 15 | # You should have received a copy of the GNU Lesser General Public License 16 | # along with the This Library; if not, see . 17 | #=============================================================================== 18 | 19 | cmake_minimum_required(VERSION 3.10) 20 | project(fastSHD) 21 | 22 | set(CMAKE_CXX_STANDARD 17) 23 | 24 | set(CMAKE_SKIP_BUILD_RPATH TRUE) 25 | set(CMAKE_EXE_LINKER_FLAGS -Wl,--rpath=.) 26 | 27 | option(MODERN_CPU_ONLY "build for modern CPU only" ON) 28 | 29 | set(CMAKE_CXX_FLAGS_RELEASE "-O2 -DNDEBUG -Wall") 30 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-unroll-loops -fno-stack-protector") 31 | if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") 32 | if (MODERN_CPU_ONLY) 33 | message("NOTICE: build for modern CPU not older than skylake") 34 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=skylake") 35 | #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mpopcnt") 36 | endif() 37 | endif() 38 | 39 | if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") 40 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --param case-values-threshold=3") 41 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --param max-inline-insns-size=64") 42 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --param large-function-insns=5000") 43 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --param large-function-growth=200") 44 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --param large-unit-insns=30000") 45 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --param inline-unit-growth=100") 46 | endif() 47 | 48 | include_directories(${CMAKE_SOURCE_DIR}/include) 49 | 50 | file(GLOB source 51 | src/*.cc 52 | ) 53 | 54 | add_library(shd SHARED ${source}) 55 | target_link_libraries(shd pthread) 56 | 57 | file(GLOB test_src 58 | test/*.cc 59 | ) 60 | 61 | add_executable(shd-test ${test_src}) 62 | target_link_libraries(shd-test pthread gtest shd) 63 | 64 | add_executable(bench-billion benchmark/billion.cc) 65 | target_link_libraries(bench-billion pthread gflags shd) 66 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 2.1, February 1999 3 | 4 | Copyright (C) 1991, 1999 Free Software Foundation, Inc. 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | [This is the first released version of the Lesser GPL. It also counts 10 | as the successor of the GNU Library Public License, version 2, hence 11 | the version number 2.1.] 12 | 13 | Preamble 14 | 15 | The licenses for most software are designed to take away your 16 | freedom to share and change it. By contrast, the GNU General Public 17 | Licenses are intended to guarantee your freedom to share and change 18 | free software--to make sure the software is free for all its users. 19 | 20 | This license, the Lesser General Public License, applies to some 21 | specially designated software packages--typically libraries--of the 22 | Free Software Foundation and other authors who decide to use it. You 23 | can use it too, but we suggest you first think carefully about whether 24 | this license or the ordinary General Public License is the better 25 | strategy to use in any particular case, based on the explanations below. 26 | 27 | When we speak of free software, we are referring to freedom of use, 28 | not price. Our General Public Licenses are designed to make sure that 29 | you have the freedom to distribute copies of free software (and charge 30 | for this service if you wish); that you receive source code or can get 31 | it if you want it; that you can change the software and use pieces of 32 | it in new free programs; and that you are informed that you can do 33 | these things. 34 | 35 | To protect your rights, we need to make restrictions that forbid 36 | distributors to deny you these rights or to ask you to surrender these 37 | rights. These restrictions translate to certain responsibilities for 38 | you if you distribute copies of the library or if you modify it. 39 | 40 | For example, if you distribute copies of the library, whether gratis 41 | or for a fee, you must give the recipients all the rights that we gave 42 | you. You must make sure that they, too, receive or can get the source 43 | code. If you link other code with the library, you must provide 44 | complete object files to the recipients, so that they can relink them 45 | with the library after making changes to the library and recompiling 46 | it. And you must show them these terms so they know their rights. 47 | 48 | We protect your rights with a two-step method: (1) we copyright the 49 | library, and (2) we offer you this license, which gives you legal 50 | permission to copy, distribute and/or modify the library. 51 | 52 | To protect each distributor, we want to make it very clear that 53 | there is no warranty for the free library. Also, if the library is 54 | modified by someone else and passed on, the recipients should know 55 | that what they have is not the original version, so that the original 56 | author's reputation will not be affected by problems that might be 57 | introduced by others. 58 | 59 | Finally, software patents pose a constant threat to the existence of 60 | any free program. We wish to make sure that a company cannot 61 | effectively restrict the users of a free program by obtaining a 62 | restrictive license from a patent holder. Therefore, we insist that 63 | any patent license obtained for a version of the library must be 64 | consistent with the full freedom of use specified in this license. 65 | 66 | Most GNU software, including some libraries, is covered by the 67 | ordinary GNU General Public License. This license, the GNU Lesser 68 | General Public License, applies to certain designated libraries, and 69 | is quite different from the ordinary General Public License. We use 70 | this license for certain libraries in order to permit linking those 71 | libraries into non-free programs. 72 | 73 | When a program is linked with a library, whether statically or using 74 | a shared library, the combination of the two is legally speaking a 75 | combined work, a derivative of the original library. The ordinary 76 | General Public License therefore permits such linking only if the 77 | entire combination fits its criteria of freedom. The Lesser General 78 | Public License permits more lax criteria for linking other code with 79 | the library. 80 | 81 | We call this license the "Lesser" General Public License because it 82 | does Less to protect the user's freedom than the ordinary General 83 | Public License. It also provides other free software developers Less 84 | of an advantage over competing non-free programs. These disadvantages 85 | are the reason we use the ordinary General Public License for many 86 | libraries. However, the Lesser license provides advantages in certain 87 | special circumstances. 88 | 89 | For example, on rare occasions, there may be a special need to 90 | encourage the widest possible use of a certain library, so that it becomes 91 | a de-facto standard. To achieve this, non-free programs must be 92 | allowed to use the library. A more frequent case is that a free 93 | library does the same job as widely used non-free libraries. In this 94 | case, there is little to gain by limiting the free library to free 95 | software only, so we use the Lesser General Public License. 96 | 97 | In other cases, permission to use a particular library in non-free 98 | programs enables a greater number of people to use a large body of 99 | free software. For example, permission to use the GNU C Library in 100 | non-free programs enables many more people to use the whole GNU 101 | operating system, as well as its variant, the GNU/Linux operating 102 | system. 103 | 104 | Although the Lesser General Public License is Less protective of the 105 | users' freedom, it does ensure that the user of a program that is 106 | linked with the Library has the freedom and the wherewithal to run 107 | that program using a modified version of the Library. 108 | 109 | The precise terms and conditions for copying, distribution and 110 | modification follow. Pay close attention to the difference between a 111 | "work based on the library" and a "work that uses the library". The 112 | former contains code derived from the library, whereas the latter must 113 | be combined with the library in order to run. 114 | 115 | GNU LESSER GENERAL PUBLIC LICENSE 116 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 117 | 118 | 0. This License Agreement applies to any software library or other 119 | program which contains a notice placed by the copyright holder or 120 | other authorized party saying it may be distributed under the terms of 121 | this Lesser General Public License (also called "this License"). 122 | Each licensee is addressed as "you". 123 | 124 | A "library" means a collection of software functions and/or data 125 | prepared so as to be conveniently linked with application programs 126 | (which use some of those functions and data) to form executables. 127 | 128 | The "Library", below, refers to any such software library or work 129 | which has been distributed under these terms. A "work based on the 130 | Library" means either the Library or any derivative work under 131 | copyright law: that is to say, a work containing the Library or a 132 | portion of it, either verbatim or with modifications and/or translated 133 | straightforwardly into another language. (Hereinafter, translation is 134 | included without limitation in the term "modification".) 135 | 136 | "Source code" for a work means the preferred form of the work for 137 | making modifications to it. For a library, complete source code means 138 | all the source code for all modules it contains, plus any associated 139 | interface definition files, plus the scripts used to control compilation 140 | and installation of the library. 141 | 142 | Activities other than copying, distribution and modification are not 143 | covered by this License; they are outside its scope. The act of 144 | running a program using the Library is not restricted, and output from 145 | such a program is covered only if its contents constitute a work based 146 | on the Library (independent of the use of the Library in a tool for 147 | writing it). Whether that is true depends on what the Library does 148 | and what the program that uses the Library does. 149 | 150 | 1. You may copy and distribute verbatim copies of the Library's 151 | complete source code as you receive it, in any medium, provided that 152 | you conspicuously and appropriately publish on each copy an 153 | appropriate copyright notice and disclaimer of warranty; keep intact 154 | all the notices that refer to this License and to the absence of any 155 | warranty; and distribute a copy of this License along with the 156 | Library. 157 | 158 | You may charge a fee for the physical act of transferring a copy, 159 | and you may at your option offer warranty protection in exchange for a 160 | fee. 161 | 162 | 2. You may modify your copy or copies of the Library or any portion 163 | of it, thus forming a work based on the Library, and copy and 164 | distribute such modifications or work under the terms of Section 1 165 | above, provided that you also meet all of these conditions: 166 | 167 | a) The modified work must itself be a software library. 168 | 169 | b) You must cause the files modified to carry prominent notices 170 | stating that you changed the files and the date of any change. 171 | 172 | c) You must cause the whole of the work to be licensed at no 173 | charge to all third parties under the terms of this License. 174 | 175 | d) If a facility in the modified Library refers to a function or a 176 | table of data to be supplied by an application program that uses 177 | the facility, other than as an argument passed when the facility 178 | is invoked, then you must make a good faith effort to ensure that, 179 | in the event an application does not supply such function or 180 | table, the facility still operates, and performs whatever part of 181 | its purpose remains meaningful. 182 | 183 | (For example, a function in a library to compute square roots has 184 | a purpose that is entirely well-defined independent of the 185 | application. Therefore, Subsection 2d requires that any 186 | application-supplied function or table used by this function must 187 | be optional: if the application does not supply it, the square 188 | root function must still compute square roots.) 189 | 190 | These requirements apply to the modified work as a whole. If 191 | identifiable sections of that work are not derived from the Library, 192 | and can be reasonably considered independent and separate works in 193 | themselves, then this License, and its terms, do not apply to those 194 | sections when you distribute them as separate works. But when you 195 | distribute the same sections as part of a whole which is a work based 196 | on the Library, the distribution of the whole must be on the terms of 197 | this License, whose permissions for other licensees extend to the 198 | entire whole, and thus to each and every part regardless of who wrote 199 | it. 200 | 201 | Thus, it is not the intent of this section to claim rights or contest 202 | your rights to work written entirely by you; rather, the intent is to 203 | exercise the right to control the distribution of derivative or 204 | collective works based on the Library. 205 | 206 | In addition, mere aggregation of another work not based on the Library 207 | with the Library (or with a work based on the Library) on a volume of 208 | a storage or distribution medium does not bring the other work under 209 | the scope of this License. 210 | 211 | 3. You may opt to apply the terms of the ordinary GNU General Public 212 | License instead of this License to a given copy of the Library. To do 213 | this, you must alter all the notices that refer to this License, so 214 | that they refer to the ordinary GNU General Public License, version 2, 215 | instead of to this License. (If a newer version than version 2 of the 216 | ordinary GNU General Public License has appeared, then you can specify 217 | that version instead if you wish.) Do not make any other change in 218 | these notices. 219 | 220 | Once this change is made in a given copy, it is irreversible for 221 | that copy, so the ordinary GNU General Public License applies to all 222 | subsequent copies and derivative works made from that copy. 223 | 224 | This option is useful when you wish to copy part of the code of 225 | the Library into a program that is not a library. 226 | 227 | 4. You may copy and distribute the Library (or a portion or 228 | derivative of it, under Section 2) in object code or executable form 229 | under the terms of Sections 1 and 2 above provided that you accompany 230 | it with the complete corresponding machine-readable source code, which 231 | must be distributed under the terms of Sections 1 and 2 above on a 232 | medium customarily used for software interchange. 233 | 234 | If distribution of object code is made by offering access to copy 235 | from a designated place, then offering equivalent access to copy the 236 | source code from the same place satisfies the requirement to 237 | distribute the source code, even though third parties are not 238 | compelled to copy the source along with the object code. 239 | 240 | 5. A program that contains no derivative of any portion of the 241 | Library, but is designed to work with the Library by being compiled or 242 | linked with it, is called a "work that uses the Library". Such a 243 | work, in isolation, is not a derivative work of the Library, and 244 | therefore falls outside the scope of this License. 245 | 246 | However, linking a "work that uses the Library" with the Library 247 | creates an executable that is a derivative of the Library (because it 248 | contains portions of the Library), rather than a "work that uses the 249 | library". The executable is therefore covered by this License. 250 | Section 6 states terms for distribution of such executables. 251 | 252 | When a "work that uses the Library" uses material from a header file 253 | that is part of the Library, the object code for the work may be a 254 | derivative work of the Library even though the source code is not. 255 | Whether this is true is especially significant if the work can be 256 | linked without the Library, or if the work is itself a library. The 257 | threshold for this to be true is not precisely defined by law. 258 | 259 | If such an object file uses only numerical parameters, data 260 | structure layouts and accessors, and small macros and small inline 261 | functions (ten lines or less in length), then the use of the object 262 | file is unrestricted, regardless of whether it is legally a derivative 263 | work. (Executables containing this object code plus portions of the 264 | Library will still fall under Section 6.) 265 | 266 | Otherwise, if the work is a derivative of the Library, you may 267 | distribute the object code for the work under the terms of Section 6. 268 | Any executables containing that work also fall under Section 6, 269 | whether or not they are linked directly with the Library itself. 270 | 271 | 6. As an exception to the Sections above, you may also combine or 272 | link a "work that uses the Library" with the Library to produce a 273 | work containing portions of the Library, and distribute that work 274 | under terms of your choice, provided that the terms permit 275 | modification of the work for the customer's own use and reverse 276 | engineering for debugging such modifications. 277 | 278 | You must give prominent notice with each copy of the work that the 279 | Library is used in it and that the Library and its use are covered by 280 | this License. You must supply a copy of this License. If the work 281 | during execution displays copyright notices, you must include the 282 | copyright notice for the Library among them, as well as a reference 283 | directing the user to the copy of this License. Also, you must do one 284 | of these things: 285 | 286 | a) Accompany the work with the complete corresponding 287 | machine-readable source code for the Library including whatever 288 | changes were used in the work (which must be distributed under 289 | Sections 1 and 2 above); and, if the work is an executable linked 290 | with the Library, with the complete machine-readable "work that 291 | uses the Library", as object code and/or source code, so that the 292 | user can modify the Library and then relink to produce a modified 293 | executable containing the modified Library. (It is understood 294 | that the user who changes the contents of definitions files in the 295 | Library will not necessarily be able to recompile the application 296 | to use the modified definitions.) 297 | 298 | b) Use a suitable shared library mechanism for linking with the 299 | Library. A suitable mechanism is one that (1) uses at run time a 300 | copy of the library already present on the user's computer system, 301 | rather than copying library functions into the executable, and (2) 302 | will operate properly with a modified version of the library, if 303 | the user installs one, as long as the modified version is 304 | interface-compatible with the version that the work was made with. 305 | 306 | c) Accompany the work with a written offer, valid for at 307 | least three years, to give the same user the materials 308 | specified in Subsection 6a, above, for a charge no more 309 | than the cost of performing this distribution. 310 | 311 | d) If distribution of the work is made by offering access to copy 312 | from a designated place, offer equivalent access to copy the above 313 | specified materials from the same place. 314 | 315 | e) Verify that the user has already received a copy of these 316 | materials or that you have already sent this user a copy. 317 | 318 | For an executable, the required form of the "work that uses the 319 | Library" must include any data and utility programs needed for 320 | reproducing the executable from it. However, as a special exception, 321 | the materials to be distributed need not include anything that is 322 | normally distributed (in either source or binary form) with the major 323 | components (compiler, kernel, and so on) of the operating system on 324 | which the executable runs, unless that component itself accompanies 325 | the executable. 326 | 327 | It may happen that this requirement contradicts the license 328 | restrictions of other proprietary libraries that do not normally 329 | accompany the operating system. Such a contradiction means you cannot 330 | use both them and the Library together in an executable that you 331 | distribute. 332 | 333 | 7. You may place library facilities that are a work based on the 334 | Library side-by-side in a single library together with other library 335 | facilities not covered by this License, and distribute such a combined 336 | library, provided that the separate distribution of the work based on 337 | the Library and of the other library facilities is otherwise 338 | permitted, and provided that you do these two things: 339 | 340 | a) Accompany the combined library with a copy of the same work 341 | based on the Library, uncombined with any other library 342 | facilities. This must be distributed under the terms of the 343 | Sections above. 344 | 345 | b) Give prominent notice with the combined library of the fact 346 | that part of it is a work based on the Library, and explaining 347 | where to find the accompanying uncombined form of the same work. 348 | 349 | 8. You may not copy, modify, sublicense, link with, or distribute 350 | the Library except as expressly provided under this License. Any 351 | attempt otherwise to copy, modify, sublicense, link with, or 352 | distribute the Library is void, and will automatically terminate your 353 | rights under this License. However, parties who have received copies, 354 | or rights, from you under this License will not have their licenses 355 | terminated so long as such parties remain in full compliance. 356 | 357 | 9. You are not required to accept this License, since you have not 358 | signed it. However, nothing else grants you permission to modify or 359 | distribute the Library or its derivative works. These actions are 360 | prohibited by law if you do not accept this License. Therefore, by 361 | modifying or distributing the Library (or any work based on the 362 | Library), you indicate your acceptance of this License to do so, and 363 | all its terms and conditions for copying, distributing or modifying 364 | the Library or works based on it. 365 | 366 | 10. Each time you redistribute the Library (or any work based on the 367 | Library), the recipient automatically receives a license from the 368 | original licensor to copy, distribute, link with or modify the Library 369 | subject to these terms and conditions. You may not impose any further 370 | restrictions on the recipients' exercise of the rights granted herein. 371 | You are not responsible for enforcing compliance by third parties with 372 | this License. 373 | 374 | 11. If, as a consequence of a court judgment or allegation of patent 375 | infringement or for any other reason (not limited to patent issues), 376 | conditions are imposed on you (whether by court order, agreement or 377 | otherwise) that contradict the conditions of this License, they do not 378 | excuse you from the conditions of this License. If you cannot 379 | distribute so as to satisfy simultaneously your obligations under this 380 | License and any other pertinent obligations, then as a consequence you 381 | may not distribute the Library at all. For example, if a patent 382 | license would not permit royalty-free redistribution of the Library by 383 | all those who receive copies directly or indirectly through you, then 384 | the only way you could satisfy both it and this License would be to 385 | refrain entirely from distribution of the Library. 386 | 387 | If any portion of this section is held invalid or unenforceable under any 388 | particular circumstance, the balance of the section is intended to apply, 389 | and the section as a whole is intended to apply in other circumstances. 390 | 391 | It is not the purpose of this section to induce you to infringe any 392 | patents or other property right claims or to contest validity of any 393 | such claims; this section has the sole purpose of protecting the 394 | integrity of the free software distribution system which is 395 | implemented by public license practices. Many people have made 396 | generous contributions to the wide range of software distributed 397 | through that system in reliance on consistent application of that 398 | system; it is up to the author/donor to decide if he or she is willing 399 | to distribute software through any other system and a licensee cannot 400 | impose that choice. 401 | 402 | This section is intended to make thoroughly clear what is believed to 403 | be a consequence of the rest of this License. 404 | 405 | 12. If the distribution and/or use of the Library is restricted in 406 | certain countries either by patents or by copyrighted interfaces, the 407 | original copyright holder who places the Library under this License may add 408 | an explicit geographical distribution limitation excluding those countries, 409 | so that distribution is permitted only in or among countries not thus 410 | excluded. In such case, this License incorporates the limitation as if 411 | written in the body of this License. 412 | 413 | 13. The Free Software Foundation may publish revised and/or new 414 | versions of the Lesser General Public License from time to time. 415 | Such new versions will be similar in spirit to the present version, 416 | but may differ in detail to address new problems or concerns. 417 | 418 | Each version is given a distinguishing version number. If the Library 419 | specifies a version number of this License which applies to it and 420 | "any later version", you have the option of following the terms and 421 | conditions either of that version or of any later version published by 422 | the Free Software Foundation. If the Library does not specify a 423 | license version number, you may choose any version ever published by 424 | the Free Software Foundation. 425 | 426 | 14. If you wish to incorporate parts of the Library into other free 427 | programs whose distribution conditions are incompatible with these, 428 | write to the author to ask for permission. For software which is 429 | copyrighted by the Free Software Foundation, write to the Free 430 | Software Foundation; we sometimes make exceptions for this. Our 431 | decision will be guided by the two goals of preserving the free status 432 | of all derivatives of our free software and of promoting the sharing 433 | and reuse of software generally. 434 | 435 | NO WARRANTY 436 | 437 | 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO 438 | WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. 439 | EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR 440 | OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY 441 | KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE 442 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 443 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE 444 | LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME 445 | THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 446 | 447 | 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN 448 | WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY 449 | AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU 450 | FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR 451 | CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE 452 | LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING 453 | RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A 454 | FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF 455 | SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 456 | DAMAGES. 457 | 458 | END OF TERMS AND CONDITIONS 459 | 460 | How to Apply These Terms to Your New Libraries 461 | 462 | If you develop a new library, and you want it to be of the greatest 463 | possible use to the public, we recommend making it free software that 464 | everyone can redistribute and change. You can do so by permitting 465 | redistribution under these terms (or, alternatively, under the terms of the 466 | ordinary General Public License). 467 | 468 | To apply these terms, attach the following notices to the library. It is 469 | safest to attach them to the start of each source file to most effectively 470 | convey the exclusion of warranty; and each file should have at least the 471 | "copyright" line and a pointer to where the full notice is found. 472 | 473 | 474 | Copyright (C) 475 | 476 | This library is free software; you can redistribute it and/or 477 | modify it under the terms of the GNU Lesser General Public 478 | License as published by the Free Software Foundation; either 479 | version 2.1 of the License, or (at your option) any later version. 480 | 481 | This library is distributed in the hope that it will be useful, 482 | but WITHOUT ANY WARRANTY; without even the implied warranty of 483 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 484 | Lesser General Public License for more details. 485 | 486 | You should have received a copy of the GNU Lesser General Public 487 | License along with this library; if not, write to the Free Software 488 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 489 | USA 490 | 491 | Also add information on how to contact you by electronic and paper mail. 492 | 493 | You should also get your employer (if you work as a programmer) or your 494 | school, if any, to sign a "copyright disclaimer" for the library, if 495 | necessary. Here is a sample; alter the names: 496 | 497 | Yoyodyne, Inc., hereby disclaims all copyright interest in the 498 | library `Frob' (a library for tweaking knobs) written by James Random 499 | Hacker. 500 | 501 | , 1 April 1990 502 | Ty Coon, President of Vice 503 | 504 | That's all there is to it! 505 | -------------------------------------------------------------------------------- /README-CN.md: -------------------------------------------------------------------------------- 1 | # Skew Hash and Displace 2 | 3 | ## 算法描述 4 | 5 | 本算法在[CHD](http://cmph.sourceforge.net/chd.html)算法的基础上做了改进,使用倾斜Hash作为一级Hash函数,令头部桶中元素天然比尾部桶多。 6 | 7 | ![](images/shd.png) 8 | 在一级Hash环节,倾斜Hash比CHD使用的均匀Hash更合理,因此SHD算法可以以更高的密度达到和CHD算法接近的性能。 9 | 10 | ![](images/build.png) 11 | 十亿数据的构建耗时在十几秒水平,稍快于fastCHD(见本项目的chd分支)。 12 | 13 | ![](images/throughput.png) 14 | 和fastCHD(见本项目的chd分支)一样,可提供单机亿级读取QPS。 15 | 16 | ## 关键特性 17 | * 极小的空间开销(每项3.9比特) 18 | * 惊人的读取性能 19 | * 快速生成,失误率极低 20 | * 在线不可写 21 | * 要求CPU支持小端非对齐内存访问(X86、ARM、RISC-V等) 22 | 23 | ## 其他解决方案 24 | * [极速版](https://github.com/PeterRK/SSHT) 25 | * [可写版](https://github.com/PeterRK/estuary) 26 | 27 | --- 28 | [【中文】](README-CN.md) [【英文】](README.md) 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Skew Hash and Displace 2 | 3 | ## Algorithm Discription 4 | 5 | This algorithm introduces an improvement on [CHD](http://cmph.sourceforge.net/chd.html), using skew hash as first level hash function to make elenments in head buckets more than those in tail buckets。 6 | 7 | ![](images/shd.png) 8 | Because it's more resonable to use skew hash instead of uniform hash in first level hashing, SHD can achieve higher density than CHD with almost the same performance. 9 | 10 | ![](images/build.png) 11 | Building dictionary with one billion data costs a dozen of seconds, a little faster than fastCHD (can be found in chd branch). 12 | 13 | ![](images/throughput.png) 14 | It can provide sub-billion level QPS on single machine, just like fastCHD (can be found in chd branch). 15 | 16 | ## Key Features 17 | * extreme low space overhead (3.9 bits per item) 18 | * amazing read performance 19 | * fast build with extreme low false failure rate 20 | * no online writing 21 | * work on CPU support little-endian unaligned memory access (X86,ARM,RISC-V...) 22 | 23 | ## Other Solutions 24 | * [faster reading](https://github.com/PeterRK/SSHT) 25 | * [online writable](https://github.com/PeterRK/estuary) 26 | 27 | --- 28 | [【Chinese】](README-CN.md) [【English】](README.md) 29 | -------------------------------------------------------------------------------- /benchmark/benchmark.h: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // Skew Hash and Displace Algorithm. 3 | // Copyright (C) 2020 Ruan Kunliang 4 | // 5 | // This library is free software; you can redistribute it and/or modify it under 6 | // the terms of the GNU Lesser General Public License as published by the Free 7 | // Software Foundation; either version 2.1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This library is distributed in the hope that it will be useful, but WITHOUT 11 | // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 | // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 13 | // details. 14 | // 15 | // You should have received a copy of the GNU Lesser General Public License 16 | // along with the This Library; if not, see . 17 | //============================================================================== 18 | 19 | #pragma once 20 | 21 | #include 22 | #include "../test/test.h" 23 | 24 | class XorShift128Plus final { 25 | public: 26 | XorShift128Plus() { 27 | std::random_device rd; 28 | for (unsigned i = 0; i < 4; i++) { 29 | reinterpret_cast(_s)[i] = rd(); 30 | } 31 | } 32 | uint64_t operator()() noexcept { 33 | uint64_t x = _s[0]; 34 | const uint64_t y = _s[1]; 35 | _s[0] = y; 36 | x ^= x << 23U; 37 | _s[1] = x ^ y ^ (x >> 17U) ^ (y >> 26U); 38 | return _s[1] + y; 39 | } 40 | private: 41 | uint64_t _s[2]; 42 | }; -------------------------------------------------------------------------------- /benchmark/billion.cc: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // Skew Hash and Displace Algorithm. 3 | // Copyright (C) 2020 Ruan Kunliang 4 | // 5 | // This library is free software; you can redistribute it and/or modify it under 6 | // the terms of the GNU Lesser General Public License as published by the Free 7 | // Software Foundation; either version 2.1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This library is distributed in the hope that it will be useful, but WITHOUT 11 | // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 | // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 13 | // details. 14 | // 15 | // You should have received a copy of the GNU Lesser General Public License 16 | // along with the This Library; if not, see . 17 | //============================================================================== 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include "benchmark.h" 31 | 32 | DEFINE_string(file, "bench.shd", "dict filename"); 33 | DEFINE_uint32(thread, 4, "number of worker threads"); 34 | DEFINE_bool(build, false, "build instead of fetching"); 35 | DEFINE_bool(copy, false, "load by copy"); 36 | 37 | static constexpr size_t BILLION = 1UL << 30U; 38 | 39 | static int BenchBuild() { 40 | shd::FileWriter output(FLAGS_file.c_str()); 41 | if (!output) { 42 | std::cout << "fail to create output file" << std::endl; 43 | return -1; 44 | } 45 | const size_t piece = BILLION/FLAGS_thread; 46 | const size_t remain = BILLION%FLAGS_thread; 47 | std::vector> input; 48 | input.reserve(FLAGS_thread); 49 | size_t off = 0; 50 | for (unsigned i = 0; i < FLAGS_thread; i++) { 51 | auto sz = i(off, sz)); 53 | off += sz; 54 | } 55 | 56 | shd::g_trace_build_time = true; 57 | 58 | auto start = std::chrono::steady_clock::now(); 59 | auto ret = BuildDict(input, output); 60 | if (ret != shd::BUILD_STATUS_OK) { 61 | std::cout << "fail to build: " << ret << std::endl; 62 | return 2; 63 | } 64 | auto end = std::chrono::steady_clock::now(); 65 | 66 | std::cout << std::chrono::duration_cast(end - start).count() << "s" << std::endl; 67 | return 0; 68 | } 69 | 70 | static int BenchFetch() { 71 | shd::PerfectHashtable dict(FLAGS_file, FLAGS_copy ? shd::PerfectHashtable::COPY_DATA : shd::PerfectHashtable::MAP_FETCH); 72 | if (!dict) { 73 | std::cout << "fail to load: " << FLAGS_file << std::endl; 74 | return -1; 75 | } 76 | if (dict.item() != BILLION) { 77 | std::cout << "need billion dict" << std::endl; 78 | return 1; 79 | } 80 | 81 | const unsigned n = FLAGS_thread; 82 | constexpr unsigned batch = 5000; 83 | constexpr unsigned loop = 1000; 84 | 85 | std::vector workers; 86 | workers.reserve(n); 87 | std::vector results(n); 88 | 89 | for (unsigned i = 0; i < n; i++) { 90 | workers.emplace_back([&dict](uint64_t* res){ 91 | std::vector key_vec(batch); 92 | auto out = std::make_unique(EmbeddingGenerator::VALUE_SIZE*batch); 93 | 94 | XorShift128Plus rnd; 95 | uint64_t sum_ns = 0; 96 | for (unsigned i = 0; i < loop; i++) { 97 | for (unsigned j = 0; j < batch; j++) { 98 | key_vec[j] = rnd()%BILLION; 99 | } 100 | auto start = std::chrono::steady_clock::now(); 101 | dict.batch_fetch(batch, (const uint8_t*)key_vec.data(), out.get()); 102 | auto end = std::chrono::steady_clock::now(); 103 | sum_ns += std::chrono::duration_cast(end - start).count(); 104 | } 105 | *res = sum_ns; 106 | }, &results[i]); 107 | } 108 | for (auto& t : workers) { 109 | t.join(); 110 | } 111 | 112 | uint64_t qps = 0; 113 | uint64_t ns = 0; 114 | for (auto x : results) { 115 | qps += (loop*batch)*1000000000ULL/x; 116 | ns += x; 117 | } 118 | ns /= n*(uint64_t)loop*(uint64_t)batch; 119 | 120 | std::cout << (qps/1000000U) << " mqps with " << n << " threads" << std::endl; 121 | std::cout << ns << " ns/op" << std::endl; 122 | return 0; 123 | } 124 | 125 | 126 | int main(int argc, char* argv[]) { 127 | google::ParseCommandLineFlags(&argc, &argv, true); 128 | 129 | auto cpus = get_nprocs(); 130 | if (cpus <= 0) cpus = 1; 131 | if (FLAGS_thread == 0 || FLAGS_thread > cpus) { 132 | FLAGS_thread = cpus; 133 | } 134 | 135 | if (FLAGS_build) { 136 | return BenchBuild(); 137 | } else { 138 | return BenchFetch(); 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /images/build.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeterRK/fastCHD/73df3082d8a300c6b0b89be8c06af414e7278073/images/build.png -------------------------------------------------------------------------------- /images/shd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeterRK/fastCHD/73df3082d8a300c6b0b89be8c06af414e7278073/images/shd.png -------------------------------------------------------------------------------- /images/throughput.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeterRK/fastCHD/73df3082d8a300c6b0b89be8c06af414e7278073/images/throughput.png -------------------------------------------------------------------------------- /include/bbf.h: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // Block Bloom Filter with 3.5% false positive rate 3 | // Copyright (C) 2025 Ruan Kunliang 4 | // 5 | // This library is free software; you can redistribute it and/or modify it under 6 | // the terms of the GNU Lesser General Public License as published by the Free 7 | // Software Foundation; either version 2.1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This library is distributed in the hope that it will be useful, but WITHOUT 11 | // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 | // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 13 | // details. 14 | // 15 | // You should have received a copy of the GNU Lesser General Public License 16 | // along with the This Library; if not, see . 17 | //============================================================================== 18 | 19 | #pragma once 20 | #ifndef BLOCK_BLOOM_FILTER_H_ 21 | #define BLOCK_BLOOM_FILTER_H_ 22 | 23 | #include 24 | #include 25 | #include 26 | #include "utils.h" 27 | 28 | namespace bbf { 29 | 30 | using ::shd::IDataWriter; 31 | using ::shd::MemBlock; 32 | using ::shd::Divisor; 33 | 34 | class BloomFilter { 35 | public: 36 | explicit BloomFilter(size_t capacity); 37 | explicit BloomFilter(const std::string& path); 38 | BloomFilter(size_t size, const std::function& load); 39 | bool operator!() const noexcept { return m_mem.size() < sizeof(uint64_t)*2; } 40 | 41 | size_t item() const noexcept { 42 | return *reinterpret_cast(m_mem.addr()); 43 | } 44 | size_t capacity() const noexcept { 45 | return m_mem.size() - sizeof(uint64_t); 46 | } 47 | 48 | bool dump(IDataWriter& out) { 49 | if (!*this) { 50 | return false; 51 | } 52 | return out.write(m_mem.addr(), m_mem.size()); 53 | } 54 | 55 | bool test(const uint8_t* key, unsigned len) const noexcept; 56 | bool set(const uint8_t* key, unsigned len) const noexcept; 57 | 58 | unsigned batch_test(unsigned batch, unsigned key_len, 59 | const uint8_t* __restrict__ keys, bool* __restrict__ out) const noexcept; 60 | 61 | void batch_set(unsigned batch, unsigned key_len, const uint8_t* keys) const noexcept; 62 | 63 | private: 64 | MemBlock m_mem; 65 | Divisor m_block; 66 | }; 67 | 68 | } // bbf 69 | #endif // BLOCK_BLOOM_FILTER_H_ -------------------------------------------------------------------------------- /include/shd.h: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // Skew Hash and Displace Algorithm. 3 | // Copyright (C) 2020 Ruan Kunliang 4 | // 5 | // This library is free software; you can redistribute it and/or modify it under 6 | // the terms of the GNU Lesser General Public License as published by the Free 7 | // Software Foundation; either version 2.1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This library is distributed in the hope that it will be useful, but WITHOUT 11 | // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 | // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 13 | // details. 14 | // 15 | // You should have received a copy of the GNU Lesser General Public License 16 | // along with the This Library; if not, see . 17 | //============================================================================== 18 | 19 | #pragma once 20 | #ifndef SHD_H_ 21 | #define SHD_H_ 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include "utils.h" 30 | 31 | namespace shd { 32 | 33 | static constexpr size_t MAX_KEY_LEN = UINT8_MAX; 34 | static constexpr size_t MAX_INLINE_VALUE_LEN = UINT16_MAX; 35 | static constexpr unsigned MAX_VALUE_LEN_BIT = 35U; //7x 36 | static constexpr size_t MAX_VALUE_LEN = (1ULL<>; 51 | 52 | extern BuildStatus BuildIndex(const DataReaders& in, IDataWriter& out, Retry retry=DEFAULT_RETRY); 53 | 54 | //key should have fixed length 55 | //dynamic length key is not useful, just pad or use checksum instead 56 | extern BuildStatus BuildSet(const DataReaders& in, IDataWriter& out, Retry retry=DEFAULT_RETRY); 57 | 58 | //key & value should have fixed length 59 | //inline large value may consume a lot of memory 60 | extern BuildStatus BuildDict(const DataReaders& in, IDataWriter& out, Retry retry=DEFAULT_RETRY); 61 | 62 | //key should have fixed length 63 | extern BuildStatus BuildDictWithVariedValue(const DataReaders& in, IDataWriter& out, Retry retry=DEFAULT_RETRY); 64 | 65 | extern bool g_trace_build_time; 66 | 67 | 68 | class PerfectHashtable { 69 | public: 70 | enum LoadPolicy {MAP_ONLY, MAP_FETCH, MAP_OCCUPY, COPY_DATA}; 71 | explicit PerfectHashtable(const std::string& path, LoadPolicy load_policy=MAP_ONLY); 72 | PerfectHashtable(size_t size, const std::function& load); 73 | bool operator!() const noexcept { return m_view == nullptr; } 74 | 75 | enum Type : uint8_t { 76 | INDEX_ONLY = 0, 77 | KEY_SET = 1, 78 | KV_INLINE = 2, 79 | KV_SEPARATED = 3, 80 | ILLEGAL_TYPE = 0xff 81 | }; 82 | Type type() const noexcept { return m_type; } 83 | uint8_t key_len() const noexcept { return m_key_len; } 84 | uint16_t val_len() const noexcept { return m_val_len; } 85 | size_t item() const noexcept { return m_item; } 86 | 87 | size_t locate(const uint8_t* key, uint8_t key_len) const noexcept; 88 | void batch_locate(unsigned batch, const uint8_t* __restrict__ keys, 89 | uint8_t key_len, uint64_t* __restrict__ out); 90 | 91 | //KEY_SET, KV_INLINE or KV_SEPARATED 92 | //key is found when output slice is valid 93 | Slice search(const uint8_t* key) const noexcept; 94 | 95 | //KEY_SET or KV_INLINE 96 | //keys == out is OK 97 | unsigned batch_search(unsigned batch, const uint8_t* const keys[], const uint8_t* out[], 98 | const PerfectHashtable* patch=nullptr) const noexcept; 99 | 100 | //only KV_INLINE, if dft_val == nullptr, do nothing when miss 101 | unsigned batch_fetch(unsigned batch, const uint8_t* __restrict__ keys, uint8_t* __restrict__ data, 102 | const uint8_t* __restrict__ dft_val=nullptr, 103 | const PerfectHashtable* patch=nullptr) const noexcept; 104 | 105 | unsigned batch_try_fetch(unsigned batch, const uint8_t* __restrict__ keys, uint8_t* __restrict__ data, 106 | unsigned* __restrict__ miss, const PerfectHashtable* patch=nullptr) const noexcept; 107 | 108 | BuildStatus derive(const DataReaders& in, IDataWriter& out, Retry retry=DEFAULT_RETRY) const; 109 | 110 | private: 111 | MemMap m_res; 112 | MemBlock m_mem; 113 | std::unique_ptr m_view; 114 | Type m_type = ILLEGAL_TYPE; 115 | uint8_t m_key_len = 0; 116 | uint16_t m_val_len = 0; 117 | size_t m_item = 0; 118 | 119 | void _post_init() noexcept; 120 | }; 121 | 122 | } //shd 123 | #endif //SHD_H_ 124 | -------------------------------------------------------------------------------- /include/utils.h: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // Skew Hash and Displace Algorithm. 3 | // Copyright (C) 2020 Ruan Kunliang 4 | // 5 | // This library is free software; you can redistribute it and/or modify it under 6 | // the terms of the GNU Lesser General Public License as published by the Free 7 | // Software Foundation; either version 2.1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This library is distributed in the hope that it will be useful, but WITHOUT 11 | // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 | // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 13 | // details. 14 | // 15 | // You should have received a copy of the GNU Lesser General Public License 16 | // along with the This Library; if not, see . 17 | //============================================================================== 18 | 19 | #pragma once 20 | #ifndef SHD_UTILS_H_ 21 | #define SHD_UTILS_H_ 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | namespace shd { 30 | 31 | class MemBlock final { 32 | public: 33 | MemBlock() noexcept 34 | : m_addr(nullptr), m_size(0), m_mmap(0) 35 | {} 36 | ~MemBlock() noexcept; 37 | explicit MemBlock(size_t size) noexcept; 38 | 39 | MemBlock(MemBlock&& other) noexcept 40 | : m_addr(other.m_addr), m_size(other.m_size), m_mmap(other.m_mmap) { 41 | other.m_addr = nullptr; 42 | other.m_size = 0; 43 | other.m_mmap = 0; 44 | } 45 | MemBlock& operator=(MemBlock&& other) noexcept { 46 | if (&other != this) { 47 | this->~MemBlock(); 48 | new(this)MemBlock(std::move(other)); 49 | } 50 | return *this; 51 | } 52 | 53 | size_t size() const noexcept { return m_size; } 54 | uint8_t* addr() const noexcept { return m_addr; } 55 | uint8_t* end() const noexcept { return m_addr + m_size; } 56 | bool operator!() const noexcept { return m_addr == nullptr; } 57 | 58 | static MemBlock LoadFile(const char* path) noexcept; 59 | private: 60 | MemBlock(const MemBlock&) noexcept = delete; 61 | MemBlock& operator=(const MemBlock&) noexcept = delete; 62 | uint8_t* m_addr; 63 | size_t m_size : (sizeof(size_t) * 8 - 1); 64 | size_t m_mmap : 1; 65 | }; 66 | 67 | 68 | class MemMap final { 69 | public: 70 | MemMap() noexcept = default; 71 | ~MemMap() noexcept; 72 | 73 | enum Policy {MAP_ONLY, FETCH, OCCUPY}; 74 | explicit MemMap(const char* path, Policy policy=MAP_ONLY) noexcept; 75 | 76 | MemMap(MemMap&& other) noexcept 77 | : m_addr(other.m_addr), m_size(other.m_size) { 78 | other.m_addr = nullptr; 79 | other.m_size = 0; 80 | } 81 | MemMap& operator=(MemMap&& other) noexcept { 82 | if (&other != this) { 83 | this->~MemMap(); 84 | new(this)MemMap(std::move(other)); 85 | } 86 | return *this; 87 | } 88 | 89 | size_t size() const noexcept { return m_size; } 90 | const uint8_t* addr() const noexcept { return m_addr; } 91 | const uint8_t* end() const noexcept { return m_addr + m_size; } 92 | bool operator!() const noexcept { return m_addr == nullptr; } 93 | private: 94 | MemMap(const MemMap&) noexcept = delete; 95 | MemMap& operator=(const MemMap&) noexcept = delete; 96 | uint8_t* m_addr = nullptr; 97 | size_t m_size = 0; 98 | }; 99 | 100 | 101 | class Logger { 102 | public: 103 | virtual ~Logger() = default; 104 | virtual void printf(const char* format, va_list args) = 0; 105 | static void Printf(const char* format, ...); 106 | static Logger* Bind(Logger* logger) noexcept { 107 | auto old = s_instance; 108 | s_instance = logger; 109 | return old; 110 | } 111 | private: 112 | static Logger* s_instance; 113 | }; 114 | 115 | struct Slice { 116 | const uint8_t* ptr = nullptr; 117 | size_t len = 0; 118 | bool valid() const noexcept { return ptr != nullptr; } 119 | }; 120 | 121 | struct Record { 122 | Slice key; 123 | Slice val; 124 | }; 125 | 126 | struct IDataReader { 127 | virtual void reset() = 0; 128 | virtual size_t total() = 0; 129 | virtual Record read(bool key_only) = 0; 130 | virtual ~IDataReader() noexcept = default; 131 | }; 132 | 133 | struct IDataWriter { 134 | virtual bool operator!() const noexcept = 0; 135 | virtual bool flush() = 0; 136 | virtual bool write(const void* data, size_t n) = 0; 137 | virtual ~IDataWriter() noexcept = default; 138 | }; 139 | 140 | class FileWriter : public IDataWriter { 141 | public: 142 | FileWriter() = default; 143 | explicit FileWriter(const char* path); 144 | virtual ~FileWriter() noexcept; 145 | 146 | FileWriter(FileWriter&& other) noexcept 147 | : m_buf(std::move(other.m_buf)), m_fd(other.m_fd) { 148 | other.m_fd = -1; 149 | } 150 | FileWriter& operator=(FileWriter&& other) noexcept { 151 | if (&other != this) { 152 | this->~FileWriter(); 153 | new(this)FileWriter(std::move(other)); 154 | } 155 | return *this; 156 | } 157 | 158 | bool operator!() const noexcept override; 159 | bool flush() noexcept override; 160 | bool write(const void* data, size_t n) noexcept override; 161 | 162 | private: 163 | static constexpr size_t BUFSZ = 8192; 164 | std::unique_ptr m_buf; 165 | unsigned m_off = 0; 166 | int m_fd = -1; 167 | bool _flush() noexcept; 168 | bool _write(const void* data, size_t n) noexcept; 169 | }; 170 | 171 | #ifdef SHD_PACK_SIZE 172 | #pragma pack(SHD_PACK_SIZE) 173 | #endif 174 | 175 | //Modified Robison 176 | template 177 | class DivisorMR { 178 | private: 179 | static_assert(std::is_same::value || std::is_same::value 180 | || std::is_same::value || std::is_same::value); 181 | Word m_val = 0; 182 | #ifndef DISABLE_SOFT_DIVIDE 183 | Word m_fac = 0; 184 | #ifdef SHD_PACK_SIZE 185 | Word m_tip = 0; 186 | unsigned m_sft = 0; 187 | #else 188 | uint8_t m_sft = 0; 189 | bool m_ab = false; 190 | #endif 191 | using DoubleWord = typename std::conditional::value, uint16_t, 192 | typename std::conditional::value, uint32_t, 193 | typename std::conditional::value, uint64_t, __uint128_t>::type>::type>::type; 194 | static constexpr unsigned BITWIDTH = sizeof(Word)*8; 195 | #endif 196 | 197 | protected: 198 | void _init(Word n) noexcept { 199 | m_val = n; 200 | #ifndef DISABLE_SOFT_DIVIDE 201 | m_fac = 0; 202 | m_sft = 0; 203 | #ifdef SHD_PACK_SIZE 204 | m_tip = 0; 205 | #else 206 | m_ab = false; 207 | #endif 208 | if (n == 0) { 209 | return; 210 | } 211 | m_sft = BITWIDTH - 1; 212 | constexpr Word one = 1; 213 | auto m = one << m_sft; 214 | for (; m > n; m >>= 1U) { 215 | m_sft--; 216 | } 217 | constexpr Word zero = 0; 218 | m_fac = ~zero; 219 | #ifdef SHD_PACK_SIZE 220 | m_tip = ~zero; 221 | #endif 222 | if (m == n) { 223 | return; 224 | } 225 | m_fac = (((DoubleWord)m) << BITWIDTH) / n; 226 | Word r = m_fac * n + n; 227 | #ifdef SHD_PACK_SIZE 228 | if (r <= m) { 229 | m_fac += 1; 230 | m_tip = 0; 231 | } else { 232 | m_tip = m_fac; 233 | } 234 | #else 235 | if (r <= m) { 236 | m_ab = true; 237 | } 238 | #endif 239 | #endif 240 | } 241 | 242 | public: 243 | Word value() const noexcept { return m_val; } 244 | 245 | Word div(Word m) const noexcept { 246 | #ifdef DISABLE_SOFT_DIVIDE 247 | return m / m_val; 248 | #else 249 | #ifdef SHD_PACK_SIZE 250 | auto t = m_tip; 251 | #else 252 | auto t = m_fac; 253 | if (m_ab) { 254 | t = m; 255 | } 256 | #endif 257 | return (m_fac * (DoubleWord)m + t) >> (BITWIDTH + m_sft); 258 | #endif 259 | } 260 | 261 | Word mod(Word m) const noexcept { 262 | #ifdef DISABLE_SOFT_DIVIDE 263 | return m % m_val; 264 | #else 265 | return m - m_val * div(m); 266 | #endif 267 | } 268 | }; 269 | 270 | //Lemire-Kaser-Kurz 271 | template 272 | class DivisorLKK { 273 | private: 274 | static_assert(std::is_same::value || std::is_same::value 275 | || std::is_same::value); 276 | Word m_val = 0; 277 | #ifndef DISABLE_SOFT_DIVIDE 278 | static constexpr unsigned BITWIDTH = sizeof(Word)*8; 279 | using DoubleWord = typename std::conditional::value, uint16_t, 280 | typename std::conditional::value, uint32_t, uint64_t>::type>::type; 281 | using QuaterWord = typename std::conditional::value, uint32_t, 282 | typename std::conditional::value, uint64_t, __uint128_t>::type>::type; 283 | DoubleWord m_fac = 0; 284 | #endif 285 | protected: 286 | void _init(Word n) noexcept { 287 | m_val = n; 288 | #ifndef DISABLE_SOFT_DIVIDE 289 | if (n == 0) { 290 | m_fac = 0; 291 | } else { 292 | constexpr DoubleWord zero = 0; 293 | m_fac = (DoubleWord)~zero / n + 1; 294 | } 295 | #endif 296 | } 297 | 298 | public: 299 | Word value() const noexcept { return m_val; } 300 | 301 | Word div(Word m) const noexcept { 302 | #ifdef DISABLE_SOFT_DIVIDE 303 | return m / m_val; 304 | #else 305 | Word q = (m * (QuaterWord)m_fac) >> (BITWIDTH * 2); 306 | if (m_fac == 0) { 307 | q = m; 308 | } 309 | return q; 310 | #endif 311 | } 312 | 313 | Word mod(Word m) const noexcept { 314 | #ifdef DISABLE_SOFT_DIVIDE 315 | return m % m_val; 316 | #else 317 | return ((QuaterWord)m_val * (DoubleWord)(m * m_fac)) >> (BITWIDTH * 2); 318 | #endif 319 | } 320 | }; 321 | 322 | 323 | template 324 | struct Divisor : public DivisorLKK { 325 | Divisor() noexcept = default; 326 | explicit Divisor(Word n) noexcept { this->_init(n); } 327 | Divisor& operator=(Word n) noexcept { 328 | this->_init(n); 329 | return *this; 330 | } 331 | }; 332 | 333 | template <> 334 | struct Divisor : public DivisorMR { 335 | Divisor() noexcept = default; 336 | explicit Divisor(uint64_t n) noexcept { this->_init(n); } 337 | Divisor& operator=(uint64_t n) noexcept { 338 | this->_init(n); 339 | return *this; 340 | } 341 | }; 342 | 343 | #ifdef SHD_PACK_SIZE 344 | #pragma pack() 345 | #endif 346 | 347 | template 348 | static inline Word operator/(Word m, const Divisor& d) noexcept { 349 | return d.div(m); 350 | } 351 | 352 | template 353 | static inline Word operator%(Word m, const Divisor& d) noexcept { 354 | return d.mod(m); 355 | } 356 | 357 | } //shd 358 | #endif //SHD_UTILS_H_ 359 | -------------------------------------------------------------------------------- /src/bbf.cc: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // Block Bloom Filter with 3.5% false positive rate 3 | // Copyright (C) 2025 Ruan Kunliang 4 | // 5 | // This library is free software; you can redistribute it and/or modify it under 6 | // the terms of the GNU Lesser General Public License as published by the Free 7 | // Software Foundation; either version 2.1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This library is distributed in the hope that it will be useful, but WITHOUT 11 | // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 | // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 13 | // details. 14 | // 15 | // You should have received a copy of the GNU Lesser General Public License 16 | // along with the This Library; if not, see . 17 | //============================================================================== 18 | 19 | #include 20 | #include "common.h" 21 | #include "bbf.h" 22 | #include "pipeline.h" 23 | 24 | namespace bbf { 25 | 26 | struct Step { 27 | uint64_t blk; 28 | uint64_t mask; 29 | }; 30 | 31 | static FORCE_INLINE Step Calc(const Divisor& block, const uint8_t *key, unsigned len) noexcept { 32 | auto code = ::shd::HashTo128(key, len); 33 | auto a = 1ULL << ((code.l >> 24) & 63); 34 | auto b = 1ULL << ((code.l >> 18) & 63); 35 | auto c = 1ULL << ((code.l >> 12) & 63); 36 | auto d = 1ULL << ((code.l >> 6) & 63); 37 | auto e = 1ULL << (code.l & 63); 38 | return {code.h % block, (a | b) | (c | d) | e}; 39 | } 40 | 41 | bool BloomFilter::test(const uint8_t *key, unsigned len) const noexcept { 42 | auto s = Calc(m_block, key, len); 43 | auto space = reinterpret_cast(m_mem.addr()+sizeof(uint64_t)); 44 | return (space[s.blk] & s.mask) == s.mask; 45 | } 46 | 47 | bool BloomFilter::set(const uint8_t *key, unsigned len) const noexcept { 48 | auto s = Calc(m_block, key, len); 49 | auto space = reinterpret_cast(m_mem.addr()+sizeof(uint64_t)); 50 | auto& item = *reinterpret_cast(m_mem.addr()); 51 | if ((space[s.blk] & s.mask) == s.mask) { 52 | return false; 53 | } 54 | space[s.blk] |= s.mask; 55 | item++; 56 | return true; 57 | } 58 | 59 | BloomFilter::BloomFilter(size_t capacity) { 60 | if (capacity == 0) { 61 | return; 62 | } 63 | m_block = (capacity+7) / sizeof(uint64_t); 64 | auto size = sizeof(uint64_t) + m_block.value() * sizeof(uint64_t); 65 | m_mem = MemBlock(size); 66 | if (!m_mem) { 67 | return; 68 | } 69 | memset(m_mem.addr(), 0, m_mem.size()); 70 | } 71 | 72 | BloomFilter::BloomFilter(const std::string& path) { 73 | auto mem = MemBlock::LoadFile(path.c_str()); 74 | if (!mem) { 75 | return; 76 | } 77 | if (mem.size() < sizeof(uint64_t)*2 || mem.size() % sizeof(uint64_t) != 0) { 78 | return; 79 | } 80 | m_block = (mem.size()-sizeof(uint64_t)) / sizeof(uint64_t); 81 | m_mem = std::move(mem); 82 | } 83 | 84 | BloomFilter::BloomFilter(size_t size, const std::function& load) { 85 | if (size < sizeof(uint64_t)*2 || size % sizeof(uint64_t) != 0) { 86 | return; 87 | } 88 | auto mem = MemBlock(size); 89 | if (!mem || !load(mem.addr())) { 90 | return; 91 | } 92 | m_block = (size-sizeof(uint64_t)) / sizeof(uint64_t); 93 | m_mem = std::move(mem); 94 | } 95 | 96 | unsigned BloomFilter::batch_test(unsigned batch, unsigned key_len, 97 | const uint8_t* __restrict__ keys, bool* __restrict__ out) const noexcept { 98 | auto space = reinterpret_cast(m_mem.addr()+sizeof(uint64_t)); 99 | unsigned hit = 0; 100 | Pipeline<15>(batch, 101 | [this, space, &keys, key_len](unsigned i)->Step { 102 | auto s = Calc(m_block, keys+i*key_len, key_len); 103 | PrefetchForNext(&space[s.blk]); 104 | return s; 105 | }, 106 | [space, &out, &hit](Step& s, unsigned i) { 107 | out[i] = (space[s.blk] & s.mask) == s.mask; 108 | hit += out[i]; 109 | } 110 | ); 111 | return hit; 112 | } 113 | 114 | void BloomFilter::batch_set(unsigned batch, unsigned key_len, const uint8_t* keys) const noexcept { 115 | auto space = reinterpret_cast(m_mem.addr()+sizeof(uint64_t)); 116 | auto& item = *reinterpret_cast(m_mem.addr()); 117 | Pipeline<15>(batch, 118 | [this, space, &keys, key_len](unsigned i)->Step { 119 | auto s = Calc(m_block, keys+i*key_len, key_len); 120 | PrefetchForNext(&space[s.blk]); 121 | return s; 122 | }, 123 | [space, &item](Step& s, unsigned) { 124 | item += (space[s.blk] & s.mask) != s.mask; 125 | space[s.blk] |= s.mask; 126 | } 127 | ); 128 | } 129 | 130 | } // bbf -------------------------------------------------------------------------------- /src/build.cc: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // Skew Hash and Displace Algorithm. 3 | // Copyright (C) 2020 Ruan Kunliang 4 | // 5 | // This library is free software; you can redistribute it and/or modify it under 6 | // the terms of the GNU Lesser General Public License as published by the Free 7 | // Software Foundation; either version 2.1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This library is distributed in the hope that it will be useful, but WITHOUT 11 | // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 | // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 13 | // details. 14 | // 15 | // You should have received a copy of the GNU Lesser General Public License 16 | // along with the This Library; if not, see . 17 | //============================================================================== 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include "internal.h" 32 | 33 | namespace shd { 34 | 35 | bool g_trace_build_time = false; 36 | static double DurationS(const std::chrono::steady_clock::time_point& start, const std::chrono::steady_clock::time_point& end) { 37 | return std::chrono::duration_cast(end - start).count() / 1000.0; 38 | } 39 | 40 | struct BuildException : public std::exception { 41 | const char* what() const noexcept override; 42 | }; 43 | const char* BuildException::what() const noexcept { 44 | return "build exception"; 45 | } 46 | 47 | struct InternalException : public std::exception { 48 | const char *what() const noexcept override; 49 | }; 50 | 51 | const char *InternalException::what() const noexcept { 52 | return "this should never occur"; 53 | } 54 | 55 | static FORCE_INLINE void Assert(bool condition) { 56 | if (UNLIKELY(!condition)) { 57 | throw InternalException(); 58 | } 59 | } 60 | 61 | #define ALLOC_MEM_BLOCK(mem, size) \ 62 | MemBlock mem(size); \ 63 | if (!mem) { \ 64 | throw std::bad_alloc(); \ 65 | } 66 | 67 | static bool HasConflict(V96 ids[], uint32_t cnt) { 68 | std::sort(ids, ids+cnt, [](const V96& a, const V96& b)->bool{ 69 | V96X ax, bx; 70 | ax.v = a; 71 | bx.v = b; 72 | if (ax.u.l64 < bx.u.l64) { 73 | return true; 74 | } else if (ax.u.l64 > bx.u.l64) { 75 | return false; 76 | } else { 77 | return ax.u.h32 < bx.u.h32; 78 | } 79 | }); 80 | for (uint32_t i = 1; i < cnt; i++) { 81 | if (ids[i] == ids[i-1]) { 82 | return true; 83 | } 84 | } 85 | return false; 86 | } 87 | 88 | static bool TryToMapLarge(V96 ids[], uint32_t cnt, uint8_t& sd8, uint8_t bitmap[], const Divisor& range, unsigned n) { 89 | auto mini_batch_mapping = [bitmap,range](uint8_t sd8, V96 ids[], unsigned n)->bool { 90 | assert(n <= MINI_BATCH); 91 | uint64_t pos[MINI_BATCH]; 92 | for (unsigned i = 0; i < n; i++) { 93 | pos[i] = L2Hash(ids[i], sd8) % range; 94 | PrefetchBit(bitmap, pos[i]); 95 | } 96 | for (unsigned i = 0; i < n; i++) { 97 | if (!TestAndSetBit(bitmap, pos[i])) { 98 | for (unsigned j = 0; j < i; j++) { 99 | ClearBit(bitmap, pos[j]); 100 | } 101 | return false; 102 | } 103 | } 104 | return true; 105 | }; 106 | 107 | assert(cnt > MINI_BATCH); 108 | while (n-- != 0) { 109 | auto tail = ids; 110 | auto remain = cnt; 111 | do { 112 | if (!mini_batch_mapping(sd8, tail, MINI_BATCH)) { 113 | goto retry; 114 | } 115 | tail += MINI_BATCH; 116 | remain -= MINI_BATCH; 117 | } while (remain > MINI_BATCH); 118 | if (mini_batch_mapping(sd8, tail, remain)) { 119 | return true; 120 | } 121 | retry: 122 | for (auto p = ids; p < tail; p++) { 123 | ClearBit(bitmap, L2Hash(*p, sd8) % range); 124 | } 125 | sd8++; 126 | } 127 | return false; 128 | } 129 | 130 | static bool TryToMapSmall(V96 ids[], uint32_t cnt, uint8_t& sd8, uint8_t bitmap[], const Divisor& range, unsigned n) { 131 | assert(cnt <= MINI_BATCH); 132 | for (unsigned m = 0; m < n; ) { 133 | uint64_t pos[MINI_BATCH]; 134 | auto sd8x = sd8; 135 | for (unsigned i = m, off = 0; i < n && off+cnt <= MINI_BATCH; i++) { 136 | for (unsigned j = 0; j < cnt; j++) { 137 | auto t = L2Hash(ids[j], sd8x) % range; 138 | PrefetchBit(bitmap, t); 139 | pos[off++] = t; 140 | } 141 | sd8x++; 142 | } 143 | for (unsigned off = 0; m < n && off+cnt <= MINI_BATCH; m++) { 144 | for (unsigned j = 0; j < cnt; j++) { 145 | if (!TestAndSetBit(bitmap, pos[off+j])) { 146 | for (unsigned k = 0; k < j; k++) { 147 | ClearBit(bitmap, pos[off+k]); 148 | } 149 | goto retry; 150 | } 151 | } 152 | return true; 153 | retry: 154 | off += cnt; 155 | sd8++; 156 | } 157 | } 158 | return false; 159 | } 160 | 161 | static std::tuple 162 | Mapping(V96 ids[], uint32_t cnt, uint8_t sd8, uint8_t bitmap[], const Divisor& range) { 163 | auto mini_batch_try = [bitmap,range,&sd8](V96 id, unsigned n)->bool { 164 | assert(n <= MINI_BATCH); 165 | uint64_t pos[MINI_BATCH]; 166 | auto sd8x = sd8; 167 | for (unsigned i = 0; i < n; i++) { 168 | pos[i] = L2Hash(id, sd8x++) % range; 169 | PrefetchBit(bitmap, pos[i]); 170 | } 171 | for (unsigned i = 0; i < n; i++) { 172 | if (TestAndSetBit(bitmap, pos[i])) { 173 | return true; 174 | } 175 | sd8++; 176 | } 177 | return false; 178 | }; 179 | 180 | constexpr unsigned TOTAL_TRIES = 256; 181 | if (cnt > MINI_BATCH) { 182 | constexpr unsigned FIRST_TRIES = 56; 183 | constexpr unsigned SECOND_TRIES = TOTAL_TRIES - FIRST_TRIES; 184 | if (TryToMapLarge(ids, cnt, sd8, bitmap, range, FIRST_TRIES)) { 185 | return {sd8, BUILD_STATUS_OK}; 186 | } 187 | if (HasConflict(ids, cnt)) { 188 | return {sd8, BUILD_STATUS_CONFLICT}; 189 | } 190 | if (TryToMapLarge(ids, cnt, sd8, bitmap, range, SECOND_TRIES)) { 191 | return {sd8, BUILD_STATUS_OK}; 192 | } 193 | } else if (cnt != 1) { 194 | constexpr unsigned FIRST_TRIES = 96; 195 | constexpr unsigned SECOND_TRIES = TOTAL_TRIES - FIRST_TRIES; 196 | if (TryToMapSmall(ids, cnt, sd8, bitmap, range, FIRST_TRIES)) { 197 | return {sd8, BUILD_STATUS_OK}; 198 | } 199 | if (HasConflict(ids, cnt)) { 200 | return {sd8, BUILD_STATUS_CONFLICT}; 201 | } 202 | if (TryToMapSmall(ids, cnt, sd8, bitmap, range, SECOND_TRIES)) { 203 | return {sd8, BUILD_STATUS_OK}; 204 | } 205 | } else { 206 | auto remain = TOTAL_TRIES; 207 | for (; remain > MINI_BATCH; remain -= MINI_BATCH) { 208 | if (mini_batch_try(ids[0], MINI_BATCH)) { 209 | return {sd8, BUILD_STATUS_OK}; 210 | } 211 | } 212 | if (mini_batch_try(ids[0], remain)) { 213 | return {sd8, BUILD_STATUS_OK}; 214 | } 215 | } 216 | return {sd8, BUILD_STATUS_OUT_OF_CHANCE}; 217 | } 218 | 219 | struct IndexPiece { 220 | uint32_t size = 0; 221 | std::unique_ptr cells; 222 | std::unique_ptr sections; 223 | }; 224 | 225 | static uint64_t GetSeed() { 226 | std::random_device rd; 227 | return (static_cast(rd()) << 32U) | static_cast(rd()); 228 | } 229 | 230 | static size_t SumInputSize(const DataReaders& in) { 231 | size_t total = 0; 232 | for (auto& reader : in) { 233 | total += reader->total(); 234 | } 235 | return total; 236 | } 237 | 238 | template 239 | static FORCE_INLINE void Shuffle(V96 ids[], uint32_t parts, 240 | const Hash& hash, const Offset& offset, const Border& border, bool prefetch=true) { 241 | auto prefetch4 = [ids, prefetch](size_t k) { 242 | if (prefetch && (k & 3UL) == 0) { 243 | PrefetchForFuture(&ids[k+4]); 244 | } 245 | }; 246 | for (uint32_t p = 0; p < parts; p++) { 247 | while (offset(p) < border(p)) { 248 | auto i = offset(p); 249 | auto q = hash(ids[i]); 250 | if (q == p) { 251 | offset(p)++; 252 | continue; 253 | } 254 | prefetch4(i); 255 | auto tmp = ids[i]; 256 | do { 257 | size_t j; 258 | uint32_t qx; 259 | do { 260 | j = offset(q)++; 261 | qx = hash(ids[j]); 262 | } while (qx == q); 263 | q = qx; 264 | prefetch4(j); 265 | std::swap(tmp, ids[j]); 266 | } while (q != p); 267 | offset(p)++; 268 | ids[i] = tmp; 269 | } 270 | } 271 | } 272 | 273 | template 274 | static FORCE_INLINE void Shuffle(V96 ids[], V96 shadow[], SizeT total, const Offset& offset) { 275 | if (total < MINI_BATCH*2) { 276 | for (SizeT i = 0; i < total; i++) { 277 | shadow[offset(ids[i])++] = ids[i]; 278 | } 279 | } else { 280 | static_assert((MINI_BATCH&(MINI_BATCH-1)) == 0); 281 | constexpr size_t batch = MINI_BATCH; 282 | constexpr size_t mask = MINI_BATCH-1; 283 | 284 | struct { 285 | SizeT* poff; 286 | V96* pout; 287 | } state[batch]; 288 | 289 | for (size_t i = 0; i < batch; i++) { 290 | auto& c = state[i]; 291 | c.poff = &offset(ids[i]); 292 | PrefetchForNext(c.poff); 293 | } 294 | for (size_t i = batch; i < batch*2; i++) { 295 | auto& c = state[i & mask]; 296 | c.pout = &shadow[(*c.poff)++]; 297 | PrefetchForNext(c.pout); 298 | c.poff = &offset(ids[i]); 299 | PrefetchForNext(c.poff); 300 | } 301 | for (size_t i = batch*2; i < total; i++) { 302 | auto& c = state[i & mask]; 303 | *c.pout = ids[i-batch*2]; 304 | c.pout = &shadow[(*c.poff)++]; 305 | PrefetchForNext(c.pout); 306 | c.poff = &offset(ids[i]); 307 | PrefetchForNext(c.poff); 308 | } 309 | for (size_t i = total; i < total+batch; i++) { 310 | auto& c = state[i & mask]; 311 | *c.pout = ids[i-batch*2]; 312 | c.pout = &shadow[(*c.poff)++]; 313 | PrefetchForNext(c.pout); 314 | } 315 | for (size_t i = total+batch; i < total+batch*2; i++) { 316 | auto& c = state[i & mask]; 317 | *c.pout = ids[i-batch*2]; 318 | } 319 | } 320 | } 321 | 322 | template 323 | static FORCE_INLINE SizeT Counting(V96 ids[], SizeT total, const Slot& slot, bool prefetch=true) { 324 | SizeT max = 0; 325 | if (!prefetch || total < MINI_BATCH) { 326 | for (SizeT i = 0; i < total; i++) { 327 | auto tmp = ++slot(ids[i]); 328 | if (tmp > max) max = tmp; 329 | } 330 | } else { 331 | static_assert((MINI_BATCH&(MINI_BATCH-1)) == 0); 332 | constexpr size_t batch = MINI_BATCH; 333 | constexpr size_t mask = MINI_BATCH-1; 334 | typedef SizeT* Pointer; 335 | Pointer pcnt[batch]; 336 | for (size_t i = 0; i < batch; i++) { 337 | pcnt[i] = &slot(ids[i]); 338 | PrefetchForNext(pcnt[i]); 339 | } 340 | for (size_t i = batch; i < total; i++) { 341 | auto j = i & mask; 342 | auto tmp = ++(*pcnt[j]); 343 | if (tmp > max) max = tmp; 344 | pcnt[j] = &slot(ids[i]); 345 | PrefetchForNext(pcnt[j]); 346 | } 347 | for (size_t i = total; i < total+batch; i++) { 348 | auto j = i & mask; 349 | auto tmp = ++(*pcnt[j]); 350 | if (tmp > max) max = tmp; 351 | } 352 | } 353 | return max; 354 | } 355 | 356 | struct L1Mark { 357 | uint32_t val; 358 | uint32_t idx; 359 | }; 360 | 361 | static NOINLINE uint32_t L1SortMarking(V96 ids[], uint32_t total, L1Mark table[], uint32_t tbsz, 362 | const Divisor& l1bd) { 363 | for (uint32_t i = 0; i < tbsz; i++) { 364 | table[i] = {0, i}; 365 | } 366 | return Counting(ids, total, 367 | [l1bd, table](const V96& id)->uint32_t& { 368 | return table[SkewMap(L1Hash(id), l1bd)].val; 369 | }); 370 | } 371 | 372 | static NOINLINE L1Mark* L1SortReorder(uint32_t max, unsigned n, L1Mark table[], L1Mark temp[]) { 373 | Assert(n > 0); 374 | uint32_t memo[256]; 375 | for (unsigned sft = 0; sft < 32U && (1U<> sft) & 0xffU]++; 379 | } 380 | uint32_t off = 0; 381 | for (auto& m : memo) { 382 | auto next = off + m; 383 | m = off; 384 | off = next; 385 | } 386 | for (uint32_t i = 0; i < n; i++) { 387 | auto j = memo[(table[i].val >> sft) & 0xffU]++; 388 | temp[j] = table[i]; 389 | } 390 | std::swap(table, temp); 391 | } 392 | 393 | uint32_t off = 0; 394 | for (int64_t i = n - 1; i >= 0; i--) { 395 | auto cnt = table[i].val; 396 | auto& rg = temp[table[i].idx]; 397 | rg.idx = off; 398 | off += cnt; 399 | rg.val = off; 400 | } 401 | return temp; 402 | } 403 | 404 | static NOINLINE void L1SortShuffle(V96 ids[], uint32_t l1sz, const Divisor& l1bd, L1Mark range[]) { 405 | Shuffle(ids, l1sz, 406 | [l1bd](const V96& id)->uint32_t { 407 | return SkewMap(L1Hash(id), l1bd); 408 | }, 409 | [range](uint32_t i)->uint32_t& { 410 | return range[i].idx; 411 | }, 412 | [range](uint32_t i)->uint32_t { 413 | return range[i].val; 414 | }, 415 | false); 416 | } 417 | 418 | static NOINLINE void L1SortShuffle(V96 ids[], V96 shadow[], uint32_t total, 419 | const Divisor& l1bd, L1Mark range[]) { 420 | Shuffle(ids, shadow, total, 421 | [l1bd, range](const V96& id)->uint32_t& { 422 | return range[SkewMap(L1Hash(id), l1bd)].idx; 423 | }); 424 | } 425 | 426 | static V96* L1Sort(V96 ids[], V96 shadow[], uint32_t total, 427 | uint32_t l1sz, const Divisor& l1bd) { 428 | ALLOC_MEM_BLOCK(mem, ((size_t)l1sz) * sizeof(L1Mark) * 2) 429 | auto table = (L1Mark*)mem.addr(); 430 | 431 | auto max = L1SortMarking(ids, total, table, l1sz, l1bd); 432 | if (max > std::min(l1sz+16U, (uint32_t)UINT16_MAX)) { 433 | return nullptr; 434 | } 435 | auto range = L1SortReorder(max, l1sz, table, table+l1sz); 436 | if (shadow == nullptr) { 437 | L1SortShuffle(ids, l1sz, l1bd, range); 438 | return ids; 439 | } else { 440 | L1SortShuffle(ids, shadow, total, l1bd, range); 441 | return shadow; 442 | } 443 | } 444 | 445 | static NOINLINE BuildStatus Build(V96 ids[], V96 shadow[], IndexPiece& out) { 446 | const uint32_t l1sz = L1Size(out.size); 447 | const Divisor l1bd(L1Band(out.size)); 448 | const Divisor l2sz(L2Size(out.size)); 449 | 450 | ids = L1Sort(ids, shadow, out.size, l1sz, l1bd); 451 | if (ids == nullptr) { 452 | return BUILD_STATUS_CONFLICT; 453 | }; 454 | 455 | const auto bitmap_size = BitmapSize(out.size); 456 | auto bitmap = std::make_unique(bitmap_size); 457 | memset(bitmap.get(), 0, bitmap_size); 458 | auto cells = std::make_unique(l1sz); 459 | 460 | uint8_t magic = 0; 461 | 462 | auto last = SkewMap(L1Hash(ids[0]), l1bd); 463 | uint32_t begin = 0; 464 | for (uint32_t i = 1; i < out.size; i++) { 465 | auto curr = SkewMap(L1Hash(ids[i]), l1bd); 466 | if (curr != last) { 467 | auto [sd8, status] = Mapping(ids+begin, i-begin, magic--, bitmap.get(), l2sz); 468 | if (status != BUILD_STATUS_OK) { 469 | return status; 470 | } 471 | cells[last] = sd8; 472 | last = curr; 473 | begin = i; 474 | } 475 | } 476 | auto [sd8, status] = Mapping(ids+begin, out.size-begin, magic, bitmap.get(), l2sz); 477 | if (status != BUILD_STATUS_OK) { 478 | return status; 479 | } 480 | cells[last] = sd8; 481 | 482 | out.cells = std::move(cells); 483 | const auto sec_sz = SectionSize(out.size); 484 | out.sections = std::make_unique(sec_sz); 485 | auto b32 = (const uint32_t*)bitmap.get(); 486 | uint32_t step = 0; 487 | for (uint32_t i = 0; i < sec_sz; i++) { 488 | auto& sec = out.sections[i]; 489 | sec.step = step; 490 | step += PopCount32(b32[0]); 491 | auto b64 = (const uint64_t*)(b32 + 1); 492 | step += PopCount64(b64[0]) + PopCount64(b64[1]) + PopCount64(b64[2]); 493 | sec.b32[0] = b32[0]; 494 | sec.b32[1] = b32[1]; 495 | sec.b32[2] = b32[2]; 496 | sec.b32[3] = b32[3]; 497 | sec.b32[4] = b32[4]; 498 | sec.b32[5] = b32[5]; 499 | sec.b32[6] = b32[6]; 500 | b32 += 7; 501 | } 502 | Assert(step == out.size); 503 | return BUILD_STATUS_OK; 504 | } 505 | 506 | static BuildStatus Build(V96 ids[], V96 shadow[], std::vector& out) { 507 | std::vector threads; 508 | threads.reserve(out.size()); 509 | std::vector part_status(out.size()); 510 | 511 | size_t off = 0; 512 | for (unsigned i = 0; i < out.size(); i++) { 513 | threads.emplace_back([](V96 ids[], V96 shadow[], IndexPiece* piece, BuildStatus* status) { 514 | *status = Build(ids, shadow, *piece); 515 | }, ids+off, shadow!=nullptr? shadow+off : nullptr, &out[i], &part_status[i]); 516 | off += out[i].size; 517 | } 518 | for (auto& t : threads) { 519 | t.join(); 520 | } 521 | BuildStatus status = BUILD_STATUS_OK; 522 | for (auto part : part_status) { 523 | if (part == BUILD_STATUS_CONFLICT) { 524 | status = BUILD_STATUS_CONFLICT; 525 | } else if (part == BUILD_STATUS_OUT_OF_CHANCE && status != BUILD_STATUS_CONFLICT) { 526 | status = BUILD_STATUS_OUT_OF_CHANCE; 527 | } 528 | } 529 | return status; 530 | } 531 | 532 | static BuildStatus Build(V96 ids[], V96 shadow[], std::vector& shuffle, std::vector& out) { 533 | const uint32_t n = shuffle.size(); 534 | Assert(n > 1 && n <= MAX_SEGMENT); 535 | const Divisor l0sz(n); 536 | out.clear(); 537 | out.resize(n); 538 | for (unsigned i = 0; i < n; i++) { 539 | if (shuffle[i] == 0 || shuffle[i] > UINT32_MAX) { 540 | return BUILD_STATUS_BAD_INPUT; 541 | } 542 | out[i].size = shuffle[i]; 543 | } 544 | 545 | auto spot1 = std::chrono::steady_clock::now(); 546 | if (shadow == nullptr) { 547 | size_t off = 0; 548 | auto border = std::make_unique(n); 549 | for (unsigned i = 0; i < n; i++) { 550 | shuffle[i] = off; 551 | PrefetchForFuture(&ids[off]); 552 | off += out[i].size; 553 | border[i] = off; 554 | } 555 | Shuffle(ids, n, 556 | [l0sz](const V96& id)->uint16_t { 557 | return L0Hash(id) % l0sz; 558 | }, 559 | [&shuffle](uint16_t i)->size_t& { 560 | return shuffle[i]; 561 | }, 562 | [&border](uint16_t i)->size_t { 563 | return border[i]; 564 | }); 565 | } else { 566 | size_t total = 0; 567 | size_t min = std::numeric_limits::max(); 568 | for (unsigned i = 0; i < n; i++) { 569 | shuffle[i] = total; 570 | auto sz = out[i].size; 571 | total += sz; 572 | if (sz < min) { 573 | min = sz; 574 | } 575 | } 576 | #ifdef NDEBUG 577 | auto heads = min >> 20U; 578 | #else 579 | auto heads = min >> 5U; 580 | #endif 581 | if (heads <= 1) { 582 | Shuffle(ids, shadow, total, 583 | [l0sz, &shuffle](const V96& id)->size_t& { 584 | return shuffle[L0Hash(id) % l0sz]; 585 | }); 586 | } else { //multi-head shuffle 587 | if (heads > n) { 588 | heads = n; 589 | } 590 | struct Range { 591 | size_t off; 592 | size_t end; 593 | }; 594 | std::vector> ctx(heads); 595 | for (unsigned i = 0; i < heads; i++) { 596 | ctx[i] = std::make_unique(n); 597 | } 598 | for (unsigned j = 0; j < n; j++) { 599 | const auto piece = out[j].size / heads; 600 | const auto remain = out[j].size % heads; 601 | size_t off = shuffle[j]; 602 | for (unsigned i = 0; i < heads; i++) { 603 | const auto part = i threads; 609 | threads.reserve(heads); 610 | const auto piece = total / heads; 611 | const auto remain = total % heads; 612 | size_t off = 0; 613 | for (unsigned i = 0; i < heads; i++) { 614 | const auto part = i(l0sz.value()); 617 | for (unsigned j = 0; j < l0sz.value(); j++) { 618 | idx[j] = self; 619 | } 620 | for (size_t i = 0; i < cnt; i++) { 621 | auto p = L0Hash(ids[i]) % l0sz; 622 | auto& k = idx[p]; 623 | for (unsigned j = 0; j < ctx.size(); j++) { 624 | auto& range = ctx[k][p]; 625 | auto off = AddRelaxed(range.off, 1UL); 626 | if (LIKELY(off < range.end)) { 627 | shadow[off] = ids[i]; 628 | break; 629 | } 630 | k = (k+1) % ctx.size(); 631 | } 632 | } 633 | }, i, ids+off, part); 634 | off += part; 635 | } 636 | for (auto& t : threads) { 637 | t.join(); 638 | } 639 | } 640 | std::swap(ids, shadow); 641 | } 642 | auto spot2 = std::chrono::steady_clock::now(); 643 | auto status = Build(ids, shadow, out); 644 | auto spot3 = std::chrono::steady_clock::now(); 645 | if (g_trace_build_time) { 646 | Logger::Printf("partition: %.3fs\n", DurationS(spot1, spot2)); 647 | Logger::Printf("build: %.3fs\n", DurationS(spot2, spot3)); 648 | } 649 | return status; 650 | } 651 | 652 | static BuildStatus Build(bool use_extra_mem, uint32_t seed, const DataReaders& in, std::vector& out) { 653 | const auto total = SumInputSize(in); 654 | Assert(!in.empty() && total > 0); 655 | 656 | ALLOC_MEM_BLOCK(mem, total*sizeof(V96)*(use_extra_mem?2U:1U)) 657 | auto ids = (V96*)mem.addr(); 658 | auto shadow = use_extra_mem? ids + total : nullptr; 659 | 660 | const uint32_t n = in.size(); 661 | #ifdef NDEBUG 662 | if (n == 1 || total < 8192U * n) { 663 | #else 664 | if (n == 1 || total < 32U * n) { 665 | #endif 666 | if (total > UINT32_MAX) { 667 | return BUILD_STATUS_BAD_INPUT; 668 | } 669 | auto spot1 = std::chrono::steady_clock::now(); 670 | auto p = ids; 671 | for (auto& reader : in) { 672 | reader->reset(); 673 | auto cnt = reader->total(); 674 | for (size_t i = 0; i < cnt; i++) { 675 | auto key = reader->read(true).key; 676 | if (key.ptr == nullptr || key.len == 0 || key.len > MAX_KEY_LEN) { 677 | return BUILD_STATUS_BAD_INPUT; 678 | } 679 | *p++ = GenID(seed, key.ptr, key.len); 680 | } 681 | } 682 | out.resize(1); 683 | out.front().size = total; 684 | auto spot2 = std::chrono::steady_clock::now(); 685 | auto status = Build(ids, shadow, out.front()); 686 | auto spot3 = std::chrono::steady_clock::now(); 687 | if (g_trace_build_time) { 688 | Logger::Printf("gen-id: %.3fs\n", DurationS(spot1, spot2)); 689 | Logger::Printf("build: %.3fs\n", DurationS(spot2, spot3)); 690 | } 691 | return status; 692 | } 693 | 694 | auto spot4 = std::chrono::steady_clock::now(); 695 | const Divisor l0sz(n); 696 | 697 | std::vector threads; 698 | threads.reserve(n); 699 | std::vector shuffle(n, 0); 700 | 701 | bool fail = false; 702 | size_t off = 0; 703 | for (auto& reader : in) { 704 | reader->reset(); 705 | threads.emplace_back([seed, &fail, &shuffle, l0sz](IDataReader* reader, V96 ids[]) { 706 | auto cnt = reader->total(); 707 | const uint32_t n = shuffle.size(); 708 | std::vector temp(n, 0); 709 | for (size_t j = 0; j < cnt; j++) { 710 | auto key = reader->read(true).key; 711 | if (key.ptr == nullptr || key.len == 0 || key.len > MAX_KEY_LEN) { 712 | fail = true; 713 | return; 714 | } 715 | ids[j] = GenID(seed, key.ptr, key.len); 716 | temp[L0Hash(ids[j])%l0sz]++; 717 | } 718 | for (unsigned j = 0; j < n; j++) { 719 | AddRelaxed(shuffle[j], temp[j]); 720 | } 721 | }, reader.get(), ids+off); 722 | off += reader->total(); 723 | } 724 | for (auto& t : threads) { 725 | t.join(); 726 | } 727 | if (fail) { 728 | return BUILD_STATUS_BAD_INPUT; 729 | } 730 | auto spot5 = std::chrono::steady_clock::now(); 731 | if (g_trace_build_time) { 732 | Logger::Printf("gen-id: %.3fs\n", DurationS(spot4, spot5)); 733 | } 734 | return Build(ids, shadow, shuffle, out); 735 | } 736 | 737 | static bool DumpIndex(IDataWriter& out, const Header& header, const std::vector& pieces) { 738 | std::vector items(pieces.size()); 739 | for (unsigned i = 0; i < pieces.size(); i++) { 740 | items[i] = pieces[i].size; 741 | } 742 | if (items.empty() 743 | || !out.write(&header, sizeof(header)) 744 | || !out.write(items.data(), items.size()*4U) 745 | ) return false; 746 | 747 | auto size = sizeof(Header) + items.size()*4U; 748 | for (auto& res : pieces) { 749 | auto sz = L1Size(res.size); 750 | if (!out.write(res.cells.get(), sz)) { 751 | return false; 752 | } 753 | size += sz; 754 | } 755 | const uint64_t zeros[4] = {0,0,0,0}; 756 | const auto unaligned = size; 757 | size = (size+31U)&(~31U); 758 | if (size > unaligned && !out.write(zeros, size-unaligned)) { 759 | return false; 760 | } 761 | for (auto& res : pieces) { 762 | auto sz = SectionSize(res.size) * (size_t)sizeof(BitmapSection); 763 | if (!out.write(res.sections.get(), sz)) { 764 | return false; 765 | } 766 | size += sz; 767 | } 768 | return true; 769 | } 770 | 771 | struct BasicInfo { 772 | Type type; 773 | uint8_t key_len; 774 | uint16_t val_len; 775 | }; 776 | 777 | std::unique_ptr CreateIndexView(const BasicInfo& info, uint32_t seed, const std::vector& pieces) { 778 | Assert(!pieces.empty()); 779 | auto view = std::make_unique(sizeof(PackView) + sizeof(SegmentView) * pieces.size()); 780 | auto index = (PackView*)view.get(); 781 | *index = PackView{}; 782 | index->key_len = info.key_len; 783 | index->val_len = info.val_len; 784 | index->line_size = info.key_len + (uint32_t)info.val_len; 785 | index->seed = seed; 786 | index->l0sz = pieces.size(); 787 | uint64_t off = 0; 788 | for (unsigned i = 0; i < pieces.size(); i++) { 789 | index->segments[i] = SegmentView{}; 790 | index->segments[i].l1bd = L1Band(pieces[i].size); 791 | index->segments[i].l2sz = L2Size(pieces[i].size); 792 | index->segments[i].sections = pieces[i].sections.get(); 793 | index->segments[i].cells = pieces[i].cells.get(); 794 | index->segments[i].offset = off; 795 | off += pieces[i].size; 796 | } 797 | return view; 798 | } 799 | 800 | static BuildStatus BuildAndDump(const DataReaders& in, IDataWriter& out, const BasicInfo& info, Retry retry, 801 | const std::function& fill) { 802 | const size_t total = SumInputSize(in); 803 | if (in.empty() || in.size() > MAX_SEGMENT || total == 0) { 804 | return BUILD_STATUS_BAD_INPUT; 805 | } 806 | Header header; 807 | header.type = info.type; 808 | header.key_len = info.key_len; 809 | header.val_len = info.val_len; 810 | header.item = total; 811 | header.item_high = total >> 32U; 812 | 813 | const bool use_extra_mem = info.key_len + (uint32_t)info.val_len > sizeof(V96)*2+4; 814 | 815 | std::vector pieces; 816 | for (bool done = false; !done; ) { 817 | header.seed = GetSeed(); 818 | const auto status = Build(use_extra_mem, header.seed, in, pieces); 819 | switch (status) { 820 | case BUILD_STATUS_OK: 821 | done = true; 822 | break; 823 | case BUILD_STATUS_CONFLICT: 824 | if (retry.conflict-- == 0) { 825 | return status; 826 | } 827 | case BUILD_STATUS_OUT_OF_CHANCE: 828 | if (retry.total-- == 0) { 829 | return status; 830 | } 831 | Logger::Printf(status==BUILD_STATUS_CONFLICT? "conflict, retry\n" : "failed, retry\n"); 832 | break; 833 | default: 834 | return status; 835 | } 836 | } 837 | header.seg_cnt = pieces.size(); 838 | if (!DumpIndex(out, header, pieces)) { 839 | return BUILD_STATUS_FAIL_TO_OUTPUT; 840 | } 841 | if (fill != nullptr) { 842 | auto index = CreateIndexView(info, header.seed, pieces); 843 | assert(index != nullptr); 844 | return fill(*(PackView*)index.get(), in, out); 845 | } 846 | return BUILD_STATUS_OK; 847 | } 848 | 849 | static FORCE_INLINE uint8_t* FindLine(uint8_t* space, const PackView& index, const uint8_t* key) { 850 | const auto pos = CalcPos(index, key, index.key_len); 851 | return space + pos*index.line_size; 852 | } 853 | 854 | static bool FillKeyValue(const PackView& index, IDataReader& reader, uint8_t* space) { 855 | Assert(index.key_len != 0); 856 | const auto total = reader.total(); 857 | auto fill_line = [&index](const Record& rec, uint8_t* line)->bool { 858 | Assert(rec.key.len == index.key_len); 859 | Assign(line, rec.key.ptr, index.key_len); 860 | if (index.val_len != 0) { 861 | if (rec.val.ptr == nullptr || rec.val.len != index.val_len) { 862 | return false; 863 | } 864 | memcpy(line+index.key_len, rec.val.ptr, index.val_len); 865 | } 866 | return true; 867 | }; 868 | reader.reset(); 869 | if (index.line_size <= DOUBLE_COPY_LINE_SIZE_LIMIT) { 870 | try { 871 | BatchDataMapping(index, space, total, 872 | [&reader, &fill_line, &index](uint8_t* buf) { 873 | auto rec = reader.read(index.val_len==0); 874 | if (rec.key.len != index.key_len || !fill_line(rec, buf)) { 875 | throw BuildException(); 876 | } 877 | }); 878 | } catch (const BuildException&) { 879 | return false; 880 | } 881 | } else { 882 | for (size_t i = 0; i < total; i++) { 883 | auto rec = reader.read(index.val_len==0); 884 | if (rec.key.len != index.key_len 885 | || !fill_line(rec, FindLine(space, index, rec.key.ptr))) { 886 | return false; 887 | } 888 | } 889 | } 890 | return true; 891 | } 892 | 893 | static BuildStatus FillInlineKeyValue(const PackView& index, const DataReaders& in, IDataWriter& out) { 894 | const auto total = SumInputSize(in); 895 | Assert(!in.empty() && total > 0); 896 | ALLOC_MEM_BLOCK(space, total*index.line_size); 897 | 898 | auto spot1 = std::chrono::steady_clock::now(); 899 | if (in.size() == 1 || total < 4096U * in.size()) { 900 | for (auto& reader : in) { 901 | if (!FillKeyValue(index, *reader, space.addr())) { 902 | return BUILD_STATUS_BAD_INPUT; 903 | } 904 | } 905 | } else { 906 | std::vector threads; 907 | threads.reserve(in.size()); 908 | bool fail = false; 909 | for (auto& reader : in) { 910 | threads.emplace_back([&fail, &space, &index](IDataReader* reader) { 911 | if (!FillKeyValue(index, *reader, space.addr())) { 912 | fail = true; 913 | } 914 | }, reader.get()); 915 | } 916 | for (auto& t : threads) { 917 | t.join(); 918 | } 919 | if (fail) { 920 | return BUILD_STATUS_BAD_INPUT; 921 | } 922 | } 923 | auto spot2 = std::chrono::steady_clock::now(); 924 | if (!out.write(space.addr(), space.size())) { 925 | return BUILD_STATUS_FAIL_TO_OUTPUT; 926 | } 927 | auto spot3 = std::chrono::steady_clock::now(); 928 | if (g_trace_build_time) { 929 | Logger::Printf("fill: %.3fs\n", DurationS(spot1, spot2)); 930 | Logger::Printf("dump: %.3fs\n", DurationS(spot2, spot3)); 931 | } 932 | return BUILD_STATUS_OK; 933 | } 934 | 935 | static unsigned VarIntSize(size_t n) { 936 | unsigned cnt = 1; 937 | while ((n & ~0x7fULL) != 0) { 938 | n >>= 7U; 939 | cnt++; 940 | } 941 | return cnt; 942 | } 943 | static bool WriteVarInt(size_t n, IDataWriter& out) { 944 | uint8_t buf[10]; 945 | unsigned w = 0; 946 | while ((n & ~0x7fULL) != 0) { 947 | buf[w++] = 0x80ULL | (n & 0x7fULL); 948 | n >>= 7U; 949 | } 950 | buf[w++] = n; 951 | return out.write(buf, w); 952 | } 953 | 954 | static BuildStatus FillSeparatedKeyValue(const PackView& index, const DataReaders& in, IDataWriter& out) { 955 | const auto total = SumInputSize(in); 956 | Assert(total> 0 && index.key_len != 0 && index.line_size == index.key_len + OFFSET_FIELD_SIZE); 957 | ALLOC_MEM_BLOCK(space, total*index.line_size) 958 | 959 | const auto key_len = index.key_len; 960 | size_t offset = 0; 961 | auto fill_line = [key_len, &offset](const Record& rec, uint8_t* line)->bool { 962 | Assign(line, rec.key.ptr, key_len); 963 | if (offset > MAX_OFFSET) { 964 | return false; 965 | } 966 | WriteOffsetField(line+key_len, offset); 967 | if (rec.val.len > MAX_VALUE_LEN || (rec.val.len != 0 && rec.val.ptr == nullptr)) { 968 | return false; 969 | } 970 | offset += VarIntSize(rec.val.len) + rec.val.len; 971 | return true; 972 | }; 973 | 974 | auto spot1 = std::chrono::steady_clock::now(); 975 | for (auto& reader : in) { 976 | reader->reset(); 977 | auto cnt = reader->total(); 978 | if (index.line_size <= DOUBLE_COPY_LINE_SIZE_LIMIT) { 979 | try { 980 | BatchDataMapping(index, space.addr(), cnt, 981 | [&reader, &fill_line, key_len](uint8_t* buf) { 982 | auto rec = reader->read(false); 983 | if (rec.key.len != key_len || !fill_line(rec, buf)) { 984 | throw BuildException(); 985 | } 986 | }); 987 | } catch (const BuildException&) { 988 | return offset > MAX_OFFSET? BUILD_STATUS_FAIL_TO_OUTPUT : BUILD_STATUS_BAD_INPUT; 989 | } 990 | } else { 991 | for (size_t i = 0; i < cnt; i++) { 992 | auto rec = reader->read(false); 993 | if (rec.key.len != key_len 994 | || !fill_line(rec, FindLine(space.addr(), index, rec.key.ptr))) { 995 | return offset > MAX_OFFSET? BUILD_STATUS_FAIL_TO_OUTPUT : BUILD_STATUS_BAD_INPUT; 996 | } 997 | } 998 | } 999 | } 1000 | auto spot2 = std::chrono::steady_clock::now(); 1001 | if (!out.write(space.addr(), space.size())) { 1002 | return BUILD_STATUS_FAIL_TO_OUTPUT; 1003 | } 1004 | space = MemBlock{}; 1005 | auto spot3 = std::chrono::steady_clock::now(); 1006 | 1007 | for (auto& reader : in) { 1008 | reader->reset(); 1009 | auto cnt = reader->total(); 1010 | for (size_t i = 0; i < cnt; i++) { 1011 | auto val = reader->read(false).val; 1012 | if (!WriteVarInt(val.len, out) || 1013 | (val.len != 0 && !out.write(val.ptr, val.len))) { 1014 | return BUILD_STATUS_FAIL_TO_OUTPUT; 1015 | } 1016 | } 1017 | } 1018 | auto spot4 = std::chrono::steady_clock::now(); 1019 | if (g_trace_build_time) { 1020 | Logger::Printf("fill index: %.3fs\n", DurationS(spot1, spot2)); 1021 | Logger::Printf("dump index: %.3fs\n", DurationS(spot2, spot3)); 1022 | Logger::Printf("dump value: %.3fs\n", DurationS(spot3, spot4)); 1023 | } 1024 | return BUILD_STATUS_OK; 1025 | } 1026 | 1027 | BuildStatus BuildIndex(const DataReaders& in, IDataWriter& out, Retry retry) { 1028 | return BuildAndDump(in, out, {Type::INDEX_ONLY, 0, 0}, retry, nullptr); 1029 | } 1030 | 1031 | static bool DetectKeyValueLen(const DataReaders& in, uint8_t& key_len, uint16_t* val_len) { 1032 | for (auto& reader : in) { 1033 | if (reader->total() == 0) { 1034 | continue; 1035 | } 1036 | auto rec = reader->read(val_len == nullptr); 1037 | if (rec.key.ptr == nullptr || rec.key.len == 0 || rec.key.len > MAX_KEY_LEN) { 1038 | return false; 1039 | } 1040 | key_len = rec.key.len; 1041 | if (val_len != nullptr) { 1042 | if (rec.val.ptr == nullptr || rec.val.len == 0 || rec.val.len > MAX_INLINE_VALUE_LEN) { 1043 | return false; 1044 | } 1045 | *val_len = rec.val.len; 1046 | } 1047 | reader->reset(); 1048 | return true; 1049 | } 1050 | return false; 1051 | } 1052 | 1053 | BuildStatus BuildSet(const DataReaders& in, IDataWriter& out, Retry retry) { 1054 | uint8_t key_len; 1055 | if (!DetectKeyValueLen(in, key_len, nullptr)) { 1056 | return BUILD_STATUS_BAD_INPUT; 1057 | } 1058 | return BuildAndDump(in, out, {Type::KEY_SET, key_len, 0}, retry, 1059 | [](const PackView& index, const DataReaders& in, IDataWriter& out)->BuildStatus { 1060 | return FillInlineKeyValue(index, in, out); 1061 | }); 1062 | } 1063 | 1064 | BuildStatus BuildDict(const DataReaders& in, IDataWriter& out, Retry retry) { 1065 | uint8_t key_len; 1066 | uint16_t val_len; 1067 | if (!DetectKeyValueLen(in, key_len, &val_len)) { 1068 | return BUILD_STATUS_BAD_INPUT; 1069 | } 1070 | return BuildAndDump(in, out, {Type::KV_INLINE, key_len, val_len}, retry, 1071 | [](const PackView& index, const DataReaders& in, IDataWriter& out)->BuildStatus { 1072 | return FillInlineKeyValue(index, in, out); 1073 | }); 1074 | } 1075 | 1076 | BuildStatus BuildDictWithVariedValue(const DataReaders& in, IDataWriter& out, Retry retry) { 1077 | uint8_t key_len; 1078 | if (!DetectKeyValueLen(in, key_len, nullptr)) { 1079 | return BUILD_STATUS_BAD_INPUT; 1080 | } 1081 | return BuildAndDump(in, out, {Type::KV_SEPARATED, key_len, OFFSET_FIELD_SIZE}, retry, 1082 | [](const PackView& index, const DataReaders& in, IDataWriter& out)->BuildStatus { 1083 | return FillSeparatedKeyValue(index, in, out); 1084 | }); 1085 | } 1086 | 1087 | 1088 | struct Shard { 1089 | size_t begin; 1090 | size_t end; 1091 | size_t valid; 1092 | }; 1093 | 1094 | class RebuildReader : public IDataReader { 1095 | public: 1096 | explicit RebuildReader(const std::shared_ptr& dirty, 1097 | const Shard& shard, const PackView& base, IDataReader& patch) 1098 | : m_dirty(dirty), m_shard(shard),m_base(base), m_patch(patch), m_pos(shard.begin) { 1099 | assert(base.type != Type::INDEX_ONLY && dirty != nullptr); 1100 | m_patch.reset(); 1101 | } 1102 | 1103 | void reset() override { 1104 | m_pos = m_shard.begin; 1105 | m_patch.reset(); 1106 | } 1107 | size_t total() override { 1108 | return m_shard.valid + m_patch.total(); 1109 | } 1110 | Record read(bool key_only) override { 1111 | while (m_pos < m_shard.end) { 1112 | if (TestBit(m_dirty->addr(), m_pos)) { 1113 | m_pos++; 1114 | continue; 1115 | } 1116 | auto line = m_base.content + (m_pos++)*m_base.line_size; 1117 | Record out; 1118 | out.key = {line, m_base.key_len}; 1119 | if (!key_only) { 1120 | auto field = line + m_base.key_len; 1121 | if (m_base.type != Type::KV_SEPARATED) { 1122 | out.val = {field, m_base.val_len}; 1123 | } else { 1124 | out.val = SeparatedValue(m_base.extend+ReadOffsetField(field), m_base.space_end); 1125 | } 1126 | } 1127 | return out; 1128 | } 1129 | return m_patch.read(key_only); 1130 | } 1131 | 1132 | private: 1133 | const std::shared_ptr m_dirty; 1134 | const Shard m_shard; 1135 | const PackView& m_base; 1136 | IDataReader& m_patch; 1137 | size_t m_pos = 0; 1138 | }; 1139 | 1140 | static DataReaders PrepareForRebuild(const PackView& base, const DataReaders& in) { 1141 | DataReaders out; 1142 | if (base.type == Type::INDEX_ONLY || in.empty() || in.size() > MAX_SEGMENT || base.item < in.size()) { 1143 | return out; 1144 | } 1145 | auto dirty = std::make_shared((base.item+7U)/8U); 1146 | if (!*dirty) throw std::bad_alloc(); 1147 | memset(dirty->addr(), 0, dirty->size()); 1148 | 1149 | if (in.size() == 1) { 1150 | Shard shard; 1151 | shard.begin = 0; 1152 | shard.end = base.item; 1153 | shard.valid = base.item; 1154 | auto reader = in.front().get(); 1155 | reader->reset(); 1156 | try { 1157 | BatchFindPos(base, reader->total(), 1158 | [reader, &base](uint8_t *buf) { 1159 | auto key = reader->read(true).key; 1160 | if (key.ptr == nullptr || key.len != base.key_len) { 1161 | throw BuildException(); 1162 | } 1163 | Assign(buf, key.ptr, base.key_len); 1164 | }, 1165 | [&shard, &base, &dirty](uint64_t pos) { 1166 | if (pos < base.item) { 1167 | if (!TestAndSetBit(dirty->addr(), pos)) { 1168 | throw BuildException(); 1169 | } 1170 | shard.valid--; 1171 | } 1172 | }, dirty->addr()); 1173 | } catch (const BuildException&) { 1174 | return {}; 1175 | } 1176 | out.emplace_back(new RebuildReader(dirty, shard, base, *reader)); 1177 | return out; 1178 | } 1179 | 1180 | std::vector shards(in.size()); 1181 | const auto piece = base.item / in.size(); 1182 | const auto remain = base.item % in.size(); 1183 | size_t off = 0; 1184 | for (unsigned i = 0; i < shards.size(); i++) { 1185 | shards[i].begin = off; 1186 | shards[i].valid = i threads; 1192 | threads.reserve(in.size()); 1193 | bool fail = false; 1194 | for (auto& reader : in) { 1195 | reader->reset(); 1196 | threads.emplace_back([&base, &shards, &fail, dirty](IDataReader* reader) { 1197 | std::vector temp(shards.size(), 0); 1198 | try { 1199 | BatchFindPos(base, reader->total(), 1200 | [reader, &base](uint8_t *buf){ 1201 | auto key = reader->read(true).key; 1202 | if (key.ptr == nullptr || key.len != base.key_len) { 1203 | throw BuildException(); 1204 | } 1205 | Assign(buf, key.ptr, base.key_len); 1206 | }, 1207 | [&shards, &temp, &base, &dirty](uint64_t pos) { 1208 | if (pos < base.item) { 1209 | unsigned a = 0; 1210 | unsigned b = shards.size(); 1211 | while (a < b) { 1212 | auto m = (a + b) / 2; 1213 | if (pos < shards[m].end) { 1214 | b = m; 1215 | } else { 1216 | a = m + 1; 1217 | } 1218 | } 1219 | temp[a]++; 1220 | if (!AtomicTestAndSetBit(dirty->addr(), pos)) { 1221 | throw BuildException(); 1222 | } 1223 | } 1224 | }, dirty->addr()); 1225 | } catch (const BuildException&) { 1226 | fail = true; 1227 | return; 1228 | } 1229 | for (unsigned j = 0; j < shards.size(); j++) { 1230 | SubRelaxed(shards[j].valid, temp[j]); 1231 | } 1232 | }, reader.get()); 1233 | } 1234 | for (auto& t : threads) { 1235 | t.join(); 1236 | } 1237 | if (fail) { 1238 | return {}; 1239 | } 1240 | 1241 | out.reserve(in.size()); 1242 | for (unsigned i = 0; i < in.size(); i++) { 1243 | out.emplace_back(new RebuildReader(dirty, shards[i], base, *in[i])); 1244 | } 1245 | return out; 1246 | } 1247 | 1248 | BuildStatus Rebuild(const PackView& base, const DataReaders& in, IDataWriter& out, Retry retry) { 1249 | auto spot1 = std::chrono::steady_clock::now(); 1250 | auto input = PrepareForRebuild(base, in); 1251 | if (input.empty()) { 1252 | return BUILD_STATUS_BAD_INPUT; 1253 | } 1254 | auto spot2 = std::chrono::steady_clock::now(); 1255 | if (g_trace_build_time) { 1256 | Logger::Printf("prepare: %lds\n", DurationS(spot1, spot2)); 1257 | } 1258 | switch (base.type) { 1259 | case Type::KEY_SET: 1260 | return BuildSet(input, out, retry); 1261 | case Type::KV_INLINE: 1262 | return BuildDict(input, out, retry); 1263 | case Type::KV_SEPARATED: 1264 | return BuildDictWithVariedValue(input, out, retry); 1265 | default: 1266 | return BUILD_STATUS_BAD_INPUT; 1267 | } 1268 | } 1269 | 1270 | } //shd 1271 | -------------------------------------------------------------------------------- /src/common.h: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // Skew Hash and Displace Algorithm. 3 | // Copyright (C) 2020 Ruan Kunliang 4 | // 5 | // This library is free software; you can redistribute it and/or modify it under 6 | // the terms of the GNU Lesser General Public License as published by the Free 7 | // Software Foundation; either version 2.1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This library is distributed in the hope that it will be useful, but WITHOUT 11 | // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 | // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 13 | // details. 14 | // 15 | // You should have received a copy of the GNU Lesser General Public License 16 | // along with the This Library; if not, see . 17 | //============================================================================== 18 | 19 | #pragma once 20 | #ifndef SHD_COMMON_H_ 21 | #define SHD_COMMON_H_ 22 | 23 | #include 24 | 25 | namespace shd { 26 | 27 | struct V128 { 28 | uint64_t l; 29 | uint64_t h; 30 | }; 31 | 32 | extern V128 HashTo128(const uint8_t* msg, uint8_t len, uint64_t seed=0); 33 | 34 | } // shd 35 | 36 | #define FORCE_INLINE inline __attribute__((always_inline)) 37 | #define NOINLINE __attribute__((noinline)) 38 | 39 | #define LIKELY(exp) __builtin_expect((exp),1) 40 | #define UNLIKELY(exp) __builtin_expect((exp),0) 41 | 42 | #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ 43 | #error "little endian only" 44 | #endif 45 | 46 | static FORCE_INLINE void PrefetchForNext(const void* ptr) { 47 | __builtin_prefetch(ptr, 0, 3); 48 | } 49 | static FORCE_INLINE void PrefetchForFuture(const void* ptr) { 50 | __builtin_prefetch(ptr, 0, 0); 51 | } 52 | static FORCE_INLINE void PrefetchForWrite(const void* ptr) { 53 | __builtin_prefetch(ptr, 1, 1); 54 | } 55 | 56 | #endif // SHD_COMMON_H_ -------------------------------------------------------------------------------- /src/hash.cc: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // Skew Hash and Displace Algorithm. 3 | // Copyright (C) 2020 Ruan Kunliang 4 | // 5 | // This library is free software; you can redistribute it and/or modify it under 6 | // the terms of the GNU Lesser General Public License as published by the Free 7 | // Software Foundation; either version 2.1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This library is distributed in the hope that it will be useful, but WITHOUT 11 | // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 | // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 13 | // details. 14 | // 15 | // You should have received a copy of the GNU Lesser General Public License 16 | // along with the This Library; if not, see . 17 | //============================================================================== 18 | 19 | #include "internal.h" 20 | 21 | namespace shd { 22 | 23 | static FORCE_INLINE uint64_t Rot64(uint64_t x, unsigned k) { 24 | return (x << k) | (x >> (64U - k)); 25 | } 26 | 27 | static FORCE_INLINE void Mix(uint64_t& h0, uint64_t& h1, uint64_t& h2, uint64_t& h3) { 28 | h2 = Rot64(h2,50); h2 += h3; h0 ^= h2; 29 | h3 = Rot64(h3,52); h3 += h0; h1 ^= h3; 30 | h0 = Rot64(h0,30); h0 += h1; h2 ^= h0; 31 | h1 = Rot64(h1,41); h1 += h2; h3 ^= h1; 32 | h2 = Rot64(h2,54); h2 += h3; h0 ^= h2; 33 | h3 = Rot64(h3,48); h3 += h0; h1 ^= h3; 34 | h0 = Rot64(h0,38); h0 += h1; h2 ^= h0; 35 | h1 = Rot64(h1,37); h1 += h2; h3 ^= h1; 36 | h2 = Rot64(h2,62); h2 += h3; h0 ^= h2; 37 | h3 = Rot64(h3,34); h3 += h0; h1 ^= h3; 38 | h0 = Rot64(h0,5); h0 += h1; h2 ^= h0; 39 | h1 = Rot64(h1,36); h1 += h2; h3 ^= h1; 40 | } 41 | 42 | static FORCE_INLINE void End(uint64_t& h0, uint64_t& h1, uint64_t& h2, uint64_t& h3) { 43 | h3 ^= h2; h2 = Rot64(h2,15); h3 += h2; 44 | h0 ^= h3; h3 = Rot64(h3,52); h0 += h3; 45 | h1 ^= h0; h0 = Rot64(h0,26); h1 += h0; 46 | h2 ^= h1; h1 = Rot64(h1,51); h2 += h1; 47 | h3 ^= h2; h2 = Rot64(h2,28); h3 += h2; 48 | h0 ^= h3; h3 = Rot64(h3,9); h0 += h3; 49 | h1 ^= h0; h0 = Rot64(h0,47); h1 += h0; 50 | h2 ^= h1; h1 = Rot64(h1,54); h2 += h1; 51 | h3 ^= h2; h2 = Rot64(h2,32); h3 += h2; 52 | h0 ^= h3; h3 = Rot64(h3,25); h0 += h3; 53 | h1 ^= h0; h0 = Rot64(h0,63); h1 += h0; 54 | } 55 | 56 | //SpookyHash 57 | V128 HashTo128(const uint8_t* msg, uint8_t len, uint64_t seed) { 58 | constexpr uint64_t magic = 0xdeadbeefdeadbeefULL; 59 | 60 | uint64_t a = seed; 61 | uint64_t b = seed; 62 | uint64_t c = magic; 63 | uint64_t d = magic; 64 | 65 | for (auto end = msg + (len&~0x1fU); msg < end; msg += 32) { 66 | auto x = (const uint64_t*)msg; 67 | c += x[0]; 68 | d += x[1]; 69 | Mix(a, b, c, d); 70 | a += x[2]; 71 | b += x[3]; 72 | } 73 | 74 | if (len & 0x10U) { 75 | auto x = (const uint64_t*)msg; 76 | c += x[0]; 77 | d += x[1]; 78 | Mix(a, b, c, d); 79 | msg += 16; 80 | } 81 | 82 | d += ((uint64_t)len) << 56U; 83 | switch (len & 0xfU) { 84 | case 15: 85 | d += ((uint64_t)msg[14]) << 48U; 86 | case 14: 87 | d += ((uint64_t)msg[13]) << 40U; 88 | case 13: 89 | d += ((uint64_t)msg[12]) << 32U; 90 | case 12: 91 | d += *(uint32_t*)(msg+8); 92 | c += *(uint64_t*)msg; 93 | break; 94 | case 11: 95 | d += ((uint64_t)msg[10]) << 16U; 96 | case 10: 97 | d += ((uint64_t)msg[9]) << 8U; 98 | case 9: 99 | d += (uint64_t)msg[8]; 100 | case 8: 101 | c += *(uint64_t*)msg; 102 | break; 103 | case 7: 104 | c += ((uint64_t)msg[6]) << 48U; 105 | case 6: 106 | c += ((uint64_t)msg[5]) << 40U; 107 | case 5: 108 | c += ((uint64_t)msg[4]) << 32U; 109 | case 4: 110 | c += *(uint32_t*)msg; 111 | break; 112 | case 3: 113 | c += ((uint64_t)msg[2]) << 16U; 114 | case 2: 115 | c += ((uint64_t)msg[1]) << 8U; 116 | case 1: 117 | c += (uint64_t)msg[0]; 118 | break; 119 | case 0: 120 | c += magic; 121 | d += magic; 122 | } 123 | End(a, b, c, d); 124 | 125 | return {a, b}; 126 | } 127 | 128 | } //shd -------------------------------------------------------------------------------- /src/internal.h: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // Skew Hash and Displace Algorithm. 3 | // Copyright (C) 2020 Ruan Kunliang 4 | // 5 | // This library is free software; you can redistribute it and/or modify it under 6 | // the terms of the GNU Lesser General Public License as published by the Free 7 | // Software Foundation; either version 2.1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This library is distributed in the hope that it will be useful, but WITHOUT 11 | // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 | // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 13 | // details. 14 | // 15 | // You should have received a copy of the GNU Lesser General Public License 16 | // along with the This Library; if not, see . 17 | //============================================================================== 18 | 19 | #pragma once 20 | #ifndef SHD_INTERNAL_H_ 21 | #define SHD_INTERNAL_H_ 22 | 23 | #include 24 | #include 25 | //#define SHD_PACK_SIZE 4 26 | #include 27 | #include "common.h" 28 | 29 | namespace shd { 30 | 31 | struct V96 { 32 | uint32_t u[3]; 33 | }; 34 | union V128X { 35 | V128 v; 36 | struct { 37 | V96 l96; 38 | uint32_t h32; 39 | } u; 40 | }; 41 | union V96X { 42 | V96 v; 43 | struct { 44 | uint64_t l64; 45 | uint32_t h32; 46 | } u; 47 | }; 48 | 49 | static FORCE_INLINE bool operator==(const V96& a, const V96& b) { 50 | V96X ax{.v = a}; 51 | V96X bx{.v = b}; 52 | return ax.u.l64 == bx.u.l64 && ax.u.h32 == bx.u.h32; 53 | } 54 | 55 | static FORCE_INLINE V96 GenID(uint32_t seed, const uint8_t* key, uint8_t len) { 56 | V128X tmp{.v = HashTo128(key, len, seed)}; 57 | return tmp.u.l96; 58 | } 59 | static FORCE_INLINE uint16_t L0Hash(const V96& id) { 60 | return id.u[0]; 61 | } 62 | static FORCE_INLINE uint32_t L1Hash(const V96& id) { 63 | return id.u[1]; 64 | } 65 | static FORCE_INLINE uint64_t L2Hash(const V96& id, uint8_t sd8) { 66 | const uint32_t seed = (sd8+1U) * 0xff00ffU; //{sd8, ~sd8, sd8, ~sd8} 67 | V128X tmp{ .u = {id, seed} }; 68 | return tmp.v.l ^ tmp.v.h; 69 | } 70 | 71 | static FORCE_INLINE unsigned PopCount32(uint32_t x) { 72 | static_assert(sizeof(int)==sizeof(uint32_t)); 73 | return __builtin_popcount(x); 74 | } 75 | static FORCE_INLINE unsigned PopCount64(uint64_t x) { 76 | static_assert(sizeof(long long)==sizeof(uint64_t)); 77 | return __builtin_popcountll(x); 78 | } 79 | 80 | template 81 | T FORCE_INLINE AddRelaxed(T& tgt, T val) { 82 | return __atomic_fetch_add(&tgt, val, __ATOMIC_RELAXED); 83 | } 84 | 85 | template 86 | T FORCE_INLINE SubRelaxed(T& tgt, T val) { 87 | return __atomic_fetch_sub(&tgt, val, __ATOMIC_RELAXED); 88 | } 89 | 90 | static FORCE_INLINE bool TestAndSetBit(uint8_t bitmap[], size_t pos) { 91 | auto& b = bitmap[pos>>3U]; 92 | const uint8_t m = 1U << (pos&7U); 93 | if (b & m) { 94 | return false; 95 | } 96 | b |= m; 97 | return true; 98 | } 99 | static FORCE_INLINE bool AtomicTestAndSetBit(uint8_t bitmap[], uint64_t pos) { 100 | auto& b = bitmap[pos>>3U]; 101 | const uint8_t m = 1U << (pos&7U); 102 | while (true) { 103 | auto b0 = __atomic_load_n(&b, __ATOMIC_ACQUIRE); 104 | if (b0 & m) { 105 | return false; 106 | } 107 | auto b1 = b0 | m; 108 | if (__atomic_compare_exchange_n(&b, &b0, b1, true, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) { 109 | return true; 110 | } 111 | } 112 | } 113 | 114 | static FORCE_INLINE void PrefetchBit(const uint8_t bitmap[], size_t pos) { 115 | PrefetchForNext(&bitmap[pos>>3U]); 116 | } 117 | 118 | static FORCE_INLINE bool TestBit(const uint8_t bitmap[], size_t pos) { 119 | return (bitmap[pos>>3U] & (1U<<(pos&7U))) != 0; 120 | } 121 | 122 | static FORCE_INLINE void SetBit(uint8_t bitmap[], size_t pos) { 123 | bitmap[pos>>3U] |= (1U<<(pos&7U)); 124 | } 125 | 126 | static FORCE_INLINE void ClearBit(uint8_t bitmap[], size_t pos) { 127 | bitmap[pos>>3U] &= ~(1U<<(pos&7U)); 128 | } 129 | 130 | static constexpr uint64_t L1H_MAX = 0x7fffffff; 131 | static constexpr uint32_t L1CELL = 5; 132 | static constexpr uint64_t L1TIP = L1H_MAX / L1CELL; 133 | 134 | static FORCE_INLINE constexpr uint32_t L1Size(uint32_t item) { 135 | return ((uint64_t)item+(L1CELL-1))/L1CELL; 136 | } 137 | 138 | static FORCE_INLINE constexpr uint64_t L1Band(uint32_t item) { 139 | auto l1sz = L1Size(item); 140 | return (L1H_MAX*(L1H_MAX+L1TIP) + (l1sz-1)) / l1sz; 141 | } 142 | 143 | static FORCE_INLINE uint32_t SkewMap(uint32_t code, const Divisor& band) { 144 | uint64_t x = code & L1H_MAX; 145 | return x*(x+L1TIP) / band; 146 | } 147 | 148 | static FORCE_INLINE constexpr uint64_t L2Size(uint32_t item) { 149 | return ((uint64_t)item)*2U | 1U; //up to odd 150 | } 151 | 152 | struct BitmapSection { 153 | uint32_t b32[7]; 154 | uint32_t step; 155 | }; 156 | static constexpr unsigned BITMAP_SECTION_SIZE = 28U * 8U; 157 | static FORCE_INLINE constexpr uint32_t SectionSize(uint32_t item) { 158 | return (L2Size(item) + (BITMAP_SECTION_SIZE-1)) / BITMAP_SECTION_SIZE; 159 | } 160 | static FORCE_INLINE constexpr uint32_t BitmapSize(uint32_t item) { 161 | return SectionSize(item) * (BITMAP_SECTION_SIZE/8U); 162 | } 163 | 164 | //optimize for common short cases 165 | static FORCE_INLINE bool Equal(const uint8_t* a, const uint8_t* b, uint8_t len) { 166 | if (len == sizeof(uint64_t)) { 167 | return *(const uint64_t*)a == *(const uint64_t*)b; 168 | } else if (len == sizeof(uint32_t)) { 169 | return *(const uint32_t*)a == *(const uint32_t*)b; 170 | } else { 171 | return memcmp(a, b, len) == 0; 172 | } 173 | } 174 | static FORCE_INLINE void Assign(uint8_t* dest, const uint8_t* src, uint8_t len) { 175 | if (len == sizeof(uint64_t)) { 176 | *(uint64_t*)dest = *(const uint64_t*)src; 177 | } else if (len == sizeof(uint32_t)) { 178 | *(uint32_t*)dest = *(const uint32_t*)src; 179 | } else { 180 | memcpy(dest, src, len); 181 | } 182 | } 183 | 184 | static constexpr uint32_t SHD_MAGIC = 0x4448537f; 185 | 186 | static constexpr uint32_t OFFSET_FIELD_SIZE = 6; 187 | static constexpr uint64_t MAX_OFFSET = (1ULL<<(OFFSET_FIELD_SIZE*8U))-1; 188 | 189 | static FORCE_INLINE size_t ReadOffsetField(const uint8_t* field) { 190 | return (((uint64_t)*(uint16_t*)(field+4))<<32U) | *(uint32_t*)field; 191 | } 192 | 193 | static FORCE_INLINE void WriteOffsetField(uint8_t* field, size_t offset) { 194 | *(uint32_t*)field = offset; 195 | *(uint16_t*)(field+4) = offset>>32U; 196 | } 197 | 198 | 199 | using Type = PerfectHashtable::Type; 200 | 201 | struct Header { 202 | uint32_t magic = SHD_MAGIC; 203 | uint8_t type = Type::INDEX_ONLY; 204 | uint8_t key_len = 0; 205 | uint16_t val_len = 0; 206 | uint32_t seed = 0; 207 | uint32_t item = 0; 208 | uint16_t item_high = 0; 209 | uint16_t seg_cnt = 0; 210 | //uint32_t parts[seg_cnt] = 0; 211 | 212 | // uint8_t cells[] 213 | // 32B align 214 | // BitmapSection sections[] 215 | 216 | // key_val[item] or key_off[item] sizeof(key_off)-key_len is val_len 217 | // separated_value[], dynamic length, length mark is embedded 218 | }; 219 | 220 | struct SegmentView { 221 | const uint8_t* cells = nullptr; 222 | const BitmapSection* sections = nullptr; 223 | Divisor l1bd; 224 | Divisor l2sz; 225 | uint64_t offset = 0; //item offset 226 | }; 227 | 228 | struct PackView { 229 | Type type = Type::INDEX_ONLY; 230 | uint8_t key_len = 0; 231 | uint16_t val_len = 0; 232 | uint32_t line_size = 0; //key_len+val_len 233 | uint32_t seed = 0; 234 | Divisor l0sz; 235 | uint64_t item = 0; 236 | const uint8_t* content = nullptr; 237 | const uint8_t* extend = nullptr; 238 | const uint8_t* space_end = nullptr; 239 | SegmentView segments[0]; 240 | }; 241 | 242 | extern std::unique_ptr CreatePackView(const uint8_t* addr, size_t size); 243 | extern Slice SeparatedValue(const uint8_t* pt, const uint8_t* end); 244 | 245 | extern uint64_t CalcPos(const PackView& index, const uint8_t* key, uint8_t key_len); 246 | 247 | static constexpr unsigned MINI_BATCH = 32; 248 | static constexpr unsigned DOUBLE_COPY_LINE_SIZE_LIMIT = 160; 249 | 250 | extern void BatchDataMapping(const PackView& index, uint8_t* space, size_t batch, 251 | const std::function& reader); 252 | extern void BatchFindPos(const PackView& pack, size_t batch, const std::function& reader, 253 | const std::function& output, const uint8_t* bitmap); 254 | 255 | extern void BatchLocate(const PackView& index, unsigned batch, const uint8_t* __restrict__ keys, 256 | uint8_t key_len, uint64_t* __restrict__ out); 257 | extern unsigned BatchSearch(const PackView& pack, unsigned batch, const uint8_t* const keys[], const uint8_t* out[]); 258 | extern unsigned BatchFetch(const PackView& pack, const uint8_t* __restrict__ dft_val, unsigned batch, 259 | const uint8_t* __restrict__ keys, uint8_t* __restrict__ data, unsigned* __restrict__ miss); 260 | extern unsigned BatchSearch(const PackView& base, const PackView& patch, unsigned batch, 261 | const uint8_t* const keys[], const uint8_t* out[]); 262 | extern unsigned BatchFetch(const PackView& base, const PackView& patch, const uint8_t* __restrict__ dft_val, 263 | unsigned batch, const uint8_t* __restrict__ keys, uint8_t* __restrict__ data, 264 | unsigned* __restrict__ miss); 265 | 266 | extern BuildStatus Rebuild(const PackView& base, const DataReaders& in, IDataWriter& out, Retry retry); 267 | 268 | } //shd 269 | #endif //SHD_INTERNAL_H_ 270 | -------------------------------------------------------------------------------- /src/pipeline.h: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // Skew Hash and Displace Algorithm. 3 | // Copyright (C) 2020 Ruan Kunliang 4 | // 5 | // This library is free software; you can redistribute it and/or modify it under 6 | // the terms of the GNU Lesser General Public License as published by the Free 7 | // Software Foundation; either version 2.1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This library is distributed in the hope that it will be useful, but WITHOUT 11 | // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 | // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 13 | // details. 14 | // 15 | // You should have received a copy of the GNU Lesser General Public License 16 | // along with the This Library; if not, see . 17 | //============================================================================== 18 | 19 | #pragma once 20 | #ifndef SHD_PIPELINE_H_ 21 | #define SHD_PIPELINE_H_ 22 | 23 | /* ======================================================================== 24 | #include 25 | 26 | void GenCode(unsigned depth) { 27 | if (depth < 2) { 28 | return; 29 | } 30 | std::cout << "template \nstatic inline __attribute__((always_inline)) void\nPipeline(size_t n"; 35 | for (unsigned i = 1; i <= depth; i++) { 36 | std::cout << ", const P" << i << "& p" << i; 37 | } 38 | std::cout << ") {\n\tusing S1 = std::result_of_t;\n"; 39 | for (unsigned i = 2; i < depth; i++) { 40 | std::cout << "\tusing S" << i << " = std::result_of_t;\n"; 41 | } 42 | std::cout << "\tconstexpr unsigned M = Bubble + 1;\n" 43 | << "\tif (n < M*" << (depth-1) << ") {\n" 44 | << "\t\tunion {\n\t\t\t"; 45 | for (unsigned i = 1; i < depth; i++) { 46 | std::cout << "S" << i << " s" << i << "; "; 47 | } 48 | std::cout << "\n\t\t} ctx[M*" << (depth-1) << "-1];\n" 49 | << "\t\tfor (size_t i = 0; i < n; i++) ctx[i].s1 = p1(i);\n"; 50 | for (unsigned i = 2; i < depth; i++) { 51 | std::cout << "\t\tfor (size_t i = 0; i < n; i++) ctx[i].s" << i 52 | << " = p" << i << "(ctx[i].s" << (i-1) << ", i);\n"; 53 | } 54 | std::cout << "\t\tfor (size_t i = 0; i < n; i++) p" << depth << "(ctx[i].s" << (depth-1) << ", i);\n" 55 | << "\t\treturn;\n" 56 | << "\t}\n"; 57 | for (unsigned i = 1; i < depth; i++) { 58 | std::cout << "\tS" << i << " s" << i << "[M];\n"; 59 | } 60 | for (unsigned i = 1; i < depth; i++) { 61 | std::cout << "\tfor (unsigned j = 0; j < M; j++) {\n"; 62 | for (unsigned j = i; j > 1; j--) { 63 | std::cout << "\t\ts" << j << "[j] = p" << j << "(s" << (j-1) << "[j], M*" << (i-j) << "+j);\n"; 64 | } 65 | std::cout << "\t\ts1[j] = p1(M*" << (i-1) << "+j);\n" 66 | << "\t}\n"; 67 | } 68 | std::cout << "\tunsigned k = 0;\n" 69 | << "\tfor (size_t i = M*" << (depth-1) << "; i < n; i++) {\n" 70 | << "\t\tp" << depth << "(s" << (depth-1) << "[k], i-M*" << (depth-1) << ");\n"; 71 | for (unsigned i = depth-1; i > 1; i--) { 72 | std::cout << "\t\ts" << i << "[k] = p" << i << "(s" << (i-1) << "[k], i-M*" << (i-1) << ");\n"; 73 | } 74 | std::cout << "\t\ts1[k] = p1(i);\n" 75 | << "\t\tif (++k >= M) k = 0;\n" 76 | << "\t}\n"; 77 | for (unsigned i = 1; i < depth; i++) { 78 | std::cout << "\tfor (unsigned j = 0; j < M; j++) {\n" 79 | << "\t\tp" << depth << "(s" << (depth-1) << "[k], n-M*" << (depth-i) << "+j);\n"; 80 | for (unsigned j = depth-1; j > i; j--) { 81 | std::cout << "\t\ts" << j << "[k] = p" << j << "(s" << (j-1) << "[k], n-M*" << (j-i) << "+j);\n"; 82 | } 83 | std::cout << "\t\tif (++k >= M) k = 0;\n" 84 | << "\t}\n"; 85 | } 86 | std::cout << "}\n" << std::endl; 87 | } 88 | ======================================================================== */ 89 | 90 | #include 91 | 92 | template 93 | static inline __attribute__((always_inline)) void 94 | Pipeline(size_t n, const P1& p1, const P2& p2) { 95 | using S1 = std::result_of_t; 96 | constexpr unsigned M = Bubble + 1; 97 | if (n < M*1) { 98 | union { 99 | S1 s1; 100 | } ctx[M*1-1]; 101 | for (size_t i = 0; i < n; i++) ctx[i].s1 = p1(i); 102 | for (size_t i = 0; i < n; i++) p2(ctx[i].s1, i); 103 | return; 104 | } 105 | S1 s1[M]; 106 | for (unsigned j = 0; j < M; j++) { 107 | s1[j] = p1(M*0+j); 108 | } 109 | unsigned k = 0; 110 | for (size_t i = M*1; i < n; i++) { 111 | p2(s1[k], i-M*1); 112 | s1[k] = p1(i); 113 | if (++k >= M) k = 0; 114 | } 115 | for (unsigned j = 0; j < M; j++) { 116 | p2(s1[k], n-M*1+j); 117 | if (++k >= M) k = 0; 118 | } 119 | } 120 | 121 | template 122 | static inline __attribute__((always_inline)) void 123 | Pipeline(size_t n, const P1& p1, const P2& p2, const P3& p3) { 124 | using S1 = std::result_of_t; 125 | using S2 = std::result_of_t; 126 | constexpr unsigned M = Bubble + 1; 127 | if (n < M*2) { 128 | union { 129 | S1 s1; S2 s2; 130 | } ctx[M*2-1]; 131 | for (size_t i = 0; i < n; i++) ctx[i].s1 = p1(i); 132 | for (size_t i = 0; i < n; i++) ctx[i].s2 = p2(ctx[i].s1, i); 133 | for (size_t i = 0; i < n; i++) p3(ctx[i].s2, i); 134 | return; 135 | } 136 | S1 s1[M]; 137 | S2 s2[M]; 138 | for (unsigned j = 0; j < M; j++) { 139 | s1[j] = p1(M*0+j); 140 | } 141 | for (unsigned j = 0; j < M; j++) { 142 | s2[j] = p2(s1[j], M*0+j); 143 | s1[j] = p1(M*1+j); 144 | } 145 | unsigned k = 0; 146 | for (size_t i = M*2; i < n; i++) { 147 | p3(s2[k], i-M*2); 148 | s2[k] = p2(s1[k], i-M*1); 149 | s1[k] = p1(i); 150 | if (++k >= M) k = 0; 151 | } 152 | for (unsigned j = 0; j < M; j++) { 153 | p3(s2[k], n-M*2+j); 154 | s2[k] = p2(s1[k], n-M*1+j); 155 | if (++k >= M) k = 0; 156 | } 157 | for (unsigned j = 0; j < M; j++) { 158 | p3(s2[k], n-M*1+j); 159 | if (++k >= M) k = 0; 160 | } 161 | } 162 | 163 | template 164 | static inline __attribute__((always_inline)) void 165 | Pipeline(size_t n, const P1& p1, const P2& p2, const P3& p3, const P4& p4) { 166 | using S1 = std::result_of_t; 167 | using S2 = std::result_of_t; 168 | using S3 = std::result_of_t; 169 | constexpr unsigned M = Bubble + 1; 170 | if (n < M*3) { 171 | union { 172 | S1 s1; S2 s2; S3 s3; 173 | } ctx[M*3-1]; 174 | for (size_t i = 0; i < n; i++) ctx[i].s1 = p1(i); 175 | for (size_t i = 0; i < n; i++) ctx[i].s2 = p2(ctx[i].s1, i); 176 | for (size_t i = 0; i < n; i++) ctx[i].s3 = p3(ctx[i].s2, i); 177 | for (size_t i = 0; i < n; i++) p4(ctx[i].s3, i); 178 | return; 179 | } 180 | S1 s1[M]; 181 | S2 s2[M]; 182 | S3 s3[M]; 183 | for (unsigned j = 0; j < M; j++) { 184 | s1[j] = p1(M*0+j); 185 | } 186 | for (unsigned j = 0; j < M; j++) { 187 | s2[j] = p2(s1[j], M*0+j); 188 | s1[j] = p1(M*1+j); 189 | } 190 | for (unsigned j = 0; j < M; j++) { 191 | s3[j] = p3(s2[j], M*0+j); 192 | s2[j] = p2(s1[j], M*1+j); 193 | s1[j] = p1(M*2+j); 194 | } 195 | unsigned k = 0; 196 | for (size_t i = M*3; i < n; i++) { 197 | p4(s3[k], i-M*3); 198 | s3[k] = p3(s2[k], i-M*2); 199 | s2[k] = p2(s1[k], i-M*1); 200 | s1[k] = p1(i); 201 | if (++k >= M) k = 0; 202 | } 203 | for (unsigned j = 0; j < M; j++) { 204 | p4(s3[k], n-M*3+j); 205 | s3[k] = p3(s2[k], n-M*2+j); 206 | s2[k] = p2(s1[k], n-M*1+j); 207 | if (++k >= M) k = 0; 208 | } 209 | for (unsigned j = 0; j < M; j++) { 210 | p4(s3[k], n-M*2+j); 211 | s3[k] = p3(s2[k], n-M*1+j); 212 | if (++k >= M) k = 0; 213 | } 214 | for (unsigned j = 0; j < M; j++) { 215 | p4(s3[k], n-M*1+j); 216 | if (++k >= M) k = 0; 217 | } 218 | } 219 | 220 | template 221 | static inline __attribute__((always_inline)) void 222 | Pipeline(size_t n, const P1& p1, const P2& p2, const P3& p3, const P4& p4, const P5& p5, const P6& p6, const P7& p7) { 223 | using S1 = std::result_of_t; 224 | using S2 = std::result_of_t; 225 | using S3 = std::result_of_t; 226 | using S4 = std::result_of_t; 227 | using S5 = std::result_of_t; 228 | using S6 = std::result_of_t; 229 | constexpr unsigned M = Bubble + 1; 230 | if (n < M*6) { 231 | union { 232 | S1 s1; S2 s2; S3 s3; S4 s4; S5 s5; S6 s6; 233 | } ctx[M*6-1]; 234 | for (size_t i = 0; i < n; i++) ctx[i].s1 = p1(i); 235 | for (size_t i = 0; i < n; i++) ctx[i].s2 = p2(ctx[i].s1, i); 236 | for (size_t i = 0; i < n; i++) ctx[i].s3 = p3(ctx[i].s2, i); 237 | for (size_t i = 0; i < n; i++) ctx[i].s4 = p4(ctx[i].s3, i); 238 | for (size_t i = 0; i < n; i++) ctx[i].s5 = p5(ctx[i].s4, i); 239 | for (size_t i = 0; i < n; i++) ctx[i].s6 = p6(ctx[i].s5, i); 240 | for (size_t i = 0; i < n; i++) p7(ctx[i].s6, i); 241 | return; 242 | } 243 | S1 s1[M]; 244 | S2 s2[M]; 245 | S3 s3[M]; 246 | S4 s4[M]; 247 | S5 s5[M]; 248 | S6 s6[M]; 249 | for (unsigned j = 0; j < M; j++) { 250 | s1[j] = p1(M*0+j); 251 | } 252 | for (unsigned j = 0; j < M; j++) { 253 | s2[j] = p2(s1[j], M*0+j); 254 | s1[j] = p1(M*1+j); 255 | } 256 | for (unsigned j = 0; j < M; j++) { 257 | s3[j] = p3(s2[j], M*0+j); 258 | s2[j] = p2(s1[j], M*1+j); 259 | s1[j] = p1(M*2+j); 260 | } 261 | for (unsigned j = 0; j < M; j++) { 262 | s4[j] = p4(s3[j], M*0+j); 263 | s3[j] = p3(s2[j], M*1+j); 264 | s2[j] = p2(s1[j], M*2+j); 265 | s1[j] = p1(M*3+j); 266 | } 267 | for (unsigned j = 0; j < M; j++) { 268 | s5[j] = p5(s4[j], M*0+j); 269 | s4[j] = p4(s3[j], M*1+j); 270 | s3[j] = p3(s2[j], M*2+j); 271 | s2[j] = p2(s1[j], M*3+j); 272 | s1[j] = p1(M*4+j); 273 | } 274 | for (unsigned j = 0; j < M; j++) { 275 | s6[j] = p6(s5[j], M*0+j); 276 | s5[j] = p5(s4[j], M*1+j); 277 | s4[j] = p4(s3[j], M*2+j); 278 | s3[j] = p3(s2[j], M*3+j); 279 | s2[j] = p2(s1[j], M*4+j); 280 | s1[j] = p1(M*5+j); 281 | } 282 | unsigned k = 0; 283 | for (size_t i = M*6; i < n; i++) { 284 | p7(s6[k], i-M*6); 285 | s6[k] = p6(s5[k], i-M*5); 286 | s5[k] = p5(s4[k], i-M*4); 287 | s4[k] = p4(s3[k], i-M*3); 288 | s3[k] = p3(s2[k], i-M*2); 289 | s2[k] = p2(s1[k], i-M*1); 290 | s1[k] = p1(i); 291 | if (++k >= M) k = 0; 292 | } 293 | for (unsigned j = 0; j < M; j++) { 294 | p7(s6[k], n-M*6+j); 295 | s6[k] = p6(s5[k], n-M*5+j); 296 | s5[k] = p5(s4[k], n-M*4+j); 297 | s4[k] = p4(s3[k], n-M*3+j); 298 | s3[k] = p3(s2[k], n-M*2+j); 299 | s2[k] = p2(s1[k], n-M*1+j); 300 | if (++k >= M) k = 0; 301 | } 302 | for (unsigned j = 0; j < M; j++) { 303 | p7(s6[k], n-M*5+j); 304 | s6[k] = p6(s5[k], n-M*4+j); 305 | s5[k] = p5(s4[k], n-M*3+j); 306 | s4[k] = p4(s3[k], n-M*2+j); 307 | s3[k] = p3(s2[k], n-M*1+j); 308 | if (++k >= M) k = 0; 309 | } 310 | for (unsigned j = 0; j < M; j++) { 311 | p7(s6[k], n-M*4+j); 312 | s6[k] = p6(s5[k], n-M*3+j); 313 | s5[k] = p5(s4[k], n-M*2+j); 314 | s4[k] = p4(s3[k], n-M*1+j); 315 | if (++k >= M) k = 0; 316 | } 317 | for (unsigned j = 0; j < M; j++) { 318 | p7(s6[k], n-M*3+j); 319 | s6[k] = p6(s5[k], n-M*2+j); 320 | s5[k] = p5(s4[k], n-M*1+j); 321 | if (++k >= M) k = 0; 322 | } 323 | for (unsigned j = 0; j < M; j++) { 324 | p7(s6[k], n-M*2+j); 325 | s6[k] = p6(s5[k], n-M*1+j); 326 | if (++k >= M) k = 0; 327 | } 328 | for (unsigned j = 0; j < M; j++) { 329 | p7(s6[k], n-M*1+j); 330 | if (++k >= M) k = 0; 331 | } 332 | } 333 | #endif //SHD_PIPELINE_H_ 334 | -------------------------------------------------------------------------------- /src/search.cc: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // Skew Hash and Displace Algorithm. 3 | // Copyright (C) 2020 Ruan Kunliang 4 | // 5 | // This library is free software; you can redistribute it and/or modify it under 6 | // the terms of the GNU Lesser General Public License as published by the Free 7 | // Software Foundation; either version 2.1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This library is distributed in the hope that it will be useful, but WITHOUT 11 | // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 | // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 13 | // details. 14 | // 15 | // You should have received a copy of the GNU Lesser General Public License 16 | // along with the This Library; if not, see . 17 | //============================================================================== 18 | 19 | #include 20 | #include 21 | #include 22 | #include "internal.h" 23 | #include "pipeline.h" 24 | 25 | namespace shd { 26 | 27 | struct Step1 { 28 | const SegmentView* seg; 29 | V96 id; 30 | uint32_t l1pos; 31 | }; 32 | 33 | struct Step2 { 34 | const SegmentView* seg; 35 | uint32_t section; 36 | uint8_t bit_off; 37 | }; 38 | 39 | struct Step3 { 40 | const uint8_t* line; 41 | }; 42 | 43 | static FORCE_INLINE Step1 Calc1(const PackView& index, const uint8_t* key, uint8_t key_len) { 44 | Step1 out; 45 | out.id = GenID(index.seed, key, key_len); 46 | out.seg = &index.segments[L0Hash(out.id) % index.l0sz]; 47 | out.l1pos = SkewMap(L1Hash(out.id), out.seg->l1bd); 48 | return out; 49 | } 50 | 51 | static FORCE_INLINE Step1 Process1(const PackView& index, const uint8_t* key, uint8_t key_len) { 52 | Step1 out = Calc1(index, key, key_len); 53 | PrefetchForNext(&out.seg->cells[out.l1pos]); 54 | return out; 55 | } 56 | 57 | static FORCE_INLINE Step1 Process1(const PackView& pack, const uint8_t* key) { 58 | return Process1(pack, key, pack.key_len); 59 | } 60 | 61 | static FORCE_INLINE Step2 Calc2(const Step1& in) { 62 | Step2 out; 63 | out.seg = in.seg; 64 | const auto bit_pos = L2Hash(in.id, in.seg->cells[in.l1pos]) % in.seg->l2sz; 65 | out.section = bit_pos / BITMAP_SECTION_SIZE; 66 | out.bit_off = bit_pos % BITMAP_SECTION_SIZE; 67 | return out; 68 | } 69 | 70 | static FORCE_INLINE Step2 Process2(const Step1& in) { 71 | Step2 out = Calc2(in); 72 | PrefetchForNext(&out.seg->sections[out.section]); 73 | return out; 74 | } 75 | 76 | static FORCE_INLINE uint64_t CalcPos(const Step2& in) { 77 | auto& section = in.seg->sections[in.section]; 78 | uint32_t cnt = section.step; //step is the last field of section 79 | auto v = (const uint64_t*)section.b32; 80 | const uint64_t mask = (1LL << (in.bit_off & 63U)) - 1U; 81 | switch (in.bit_off >> 6U) { 82 | case 3: cnt += PopCount64(*v++); 83 | case 2: cnt += PopCount64(*v++); 84 | case 1: cnt += PopCount64(*v++); 85 | case 0: cnt += PopCount64(*v & mask); 86 | } 87 | return in.seg->offset + cnt; 88 | } 89 | 90 | uint64_t CalcPos(const PackView& index, const uint8_t* key, uint8_t key_len) { 91 | return CalcPos(Calc2(Calc1(index, key, key_len))); 92 | } 93 | 94 | #ifndef CACHE_BLOCK_SIZE 95 | #define CACHE_BLOCK_SIZE 64U 96 | #endif 97 | static_assert(CACHE_BLOCK_SIZE >= 64U && (CACHE_BLOCK_SIZE&(CACHE_BLOCK_SIZE-1)) == 0); 98 | 99 | static FORCE_INLINE Step3 Process3(const PackView& pack, const Step2& in, bool fetch_val=false) { 100 | const auto pos = CalcPos(in); 101 | Step3 out; 102 | if (LIKELY(pos < pack.item)) { 103 | out.line = pack.content + pos*pack.line_size; 104 | PrefetchForNext(out.line); 105 | auto off = (uintptr_t)out.line & (CACHE_BLOCK_SIZE-1); 106 | auto blk = (const void*)(((uintptr_t)out.line & ~(uintptr_t)(CACHE_BLOCK_SIZE-1)) + CACHE_BLOCK_SIZE); 107 | if (off + pack.key_len > CACHE_BLOCK_SIZE) { 108 | PrefetchForNext(blk); 109 | } else if (fetch_val && off + pack.line_size > CACHE_BLOCK_SIZE) { 110 | PrefetchForFuture(blk); 111 | } 112 | } else { 113 | out.line = nullptr; 114 | } 115 | return out; 116 | } 117 | 118 | void BatchLocate(const PackView& index, unsigned batch, const uint8_t* __restrict__ keys, 119 | uint8_t key_len, uint64_t* __restrict__ out) { 120 | Pipeline<8>(batch, 121 | [&index, keys, key_len](unsigned i) -> Step1 { 122 | return Process1(index, keys+i*key_len, key_len); 123 | }, 124 | [](const Step1& in, unsigned) -> Step2 { 125 | return Process2(in); 126 | }, 127 | [&out](const Step2& in, unsigned i) { 128 | out[i] = CalcPos(in); 129 | } 130 | ); 131 | } 132 | 133 | unsigned BatchSearch(const PackView& pack, unsigned batch, const uint8_t* const keys[], const uint8_t* out[]) { 134 | if (pack.type != Type::KV_INLINE && pack.type != Type::KEY_SET) { 135 | return 0; 136 | } 137 | unsigned hit = 0; 138 | Pipeline<7>(batch, 139 | [&pack, keys](unsigned i) -> Step1 { 140 | return Process1(pack, keys[i]); 141 | }, 142 | [](const Step1& in, unsigned) -> Step2 { 143 | return Process2(in); 144 | }, 145 | [&pack](const Step2& in, unsigned) -> Step3 { 146 | return Process3(pack, in); 147 | }, 148 | [&pack, &hit, keys, out](const Step3& in, unsigned i) { 149 | if (LIKELY(in.line != nullptr) && Equal(keys[i], in.line, pack.key_len)) { 150 | hit++; 151 | out[i] = in.line + pack.key_len; 152 | } else { 153 | out[i] = nullptr; 154 | } 155 | } 156 | ); 157 | return hit; 158 | } 159 | 160 | unsigned BatchFetch(const PackView& pack, const uint8_t* __restrict__ dft_val, unsigned batch, 161 | const uint8_t* __restrict__ keys, uint8_t* __restrict__ data, unsigned* __restrict__ miss) { 162 | if (pack.type != Type::KV_INLINE) { 163 | return 0; 164 | } 165 | unsigned hit = 0; 166 | Pipeline<6>(batch, 167 | [&pack, keys](unsigned i) -> Step1 { 168 | auto key = keys+i*pack.key_len; 169 | return Process1(pack, key); 170 | }, 171 | [](const Step1& in, unsigned) -> Step2 { 172 | return Process2(in); 173 | }, 174 | [&pack](const Step2& in, unsigned) -> Step3 { 175 | return Process3(pack, in, true); 176 | }, 177 | [&pack, &hit, keys, data, dft_val, &miss](const Step3& in, unsigned i) { 178 | auto key = keys + i*pack.key_len; 179 | auto out = data + i*pack.val_len; 180 | auto src = in.line + pack.key_len; 181 | if (LIKELY(in.line != nullptr) && Equal(key, in.line, pack.key_len)) { 182 | hit++; 183 | } else if (dft_val != nullptr) { 184 | src = dft_val; 185 | } else if (miss != nullptr) { 186 | *miss++ = i; 187 | } else { 188 | return; 189 | } 190 | memcpy(out, src, pack.val_len); 191 | } 192 | ); 193 | return hit; 194 | } 195 | 196 | template 197 | struct Relay { 198 | const uint8_t* v; 199 | T s; 200 | }; 201 | 202 | using Step4 = Relay; 203 | using Step5 = Relay; 204 | using Step6 = Relay; 205 | 206 | unsigned BatchSearch(const PackView& base, const PackView& patch, 207 | unsigned batch, const uint8_t* const keys[], const uint8_t* out[]) { 208 | if ((base.type != Type::KV_INLINE && base.type != Type::KEY_SET) 209 | || base.type != patch.type || base.key_len != patch.key_len) { 210 | return 0; 211 | } 212 | 213 | unsigned hit = 0; 214 | Pipeline<4>(batch, 215 | [&patch, keys](unsigned i) -> Step1 { 216 | return Process1(patch, keys[i]); 217 | }, 218 | [](const Step1& in, unsigned) -> Step2 { 219 | return Process2(in); 220 | }, 221 | [&patch](const Step2& in, unsigned) -> Step3 { 222 | return Process3(patch, in); 223 | }, 224 | [&base, &patch, keys](const Step3& in, unsigned i) -> Step4 { 225 | if (LIKELY(in.line != nullptr) && Equal(keys[i], in.line, patch.key_len)) { 226 | return {in.line + patch.key_len, Step1{}}; 227 | } else { 228 | return {nullptr, Process1(base, keys[i])}; 229 | } 230 | }, 231 | [](const Step4& in, unsigned) -> Step5 { 232 | if (in.v != nullptr) { 233 | return {in.v, Step2{}}; 234 | } else { 235 | return {nullptr, Process2(in.s)}; 236 | } 237 | }, 238 | [&base](const Step5& in, unsigned) -> Step6 { 239 | if (in.v != nullptr) { 240 | return {in.v, Step3{}}; 241 | } else { 242 | return {nullptr, Process3(base, in.s)}; 243 | } 244 | }, 245 | [&base, &hit, keys, out](const Step6& in, unsigned i) { 246 | if (in.v != nullptr) { 247 | hit++; 248 | out[i] = in.v; 249 | } else if (LIKELY(in.s.line != nullptr) && Equal(keys[i], in.s.line, base.key_len)) { 250 | hit++; 251 | out[i] = in.s.line + base.key_len; 252 | } else { 253 | out[i] = nullptr; 254 | } 255 | } 256 | ); 257 | return hit; 258 | } 259 | 260 | unsigned BatchFetch(const PackView& base, const PackView& patch, const uint8_t* __restrict__ dft_val, unsigned batch, 261 | const uint8_t* __restrict__ keys, uint8_t* __restrict__ data, unsigned* __restrict__ miss) { 262 | if (base.type != Type::KV_INLINE || base.type != patch.type 263 | || base.key_len != patch.key_len || base.val_len != patch.val_len) { 264 | return 0; 265 | } 266 | 267 | unsigned hit = 0; 268 | Pipeline<3>(batch, 269 | [&patch, keys](unsigned i) -> Step1 { 270 | auto key = keys+i*patch.key_len; 271 | return Process1(patch, key); 272 | }, 273 | [](const Step1& in, unsigned) -> Step2 { 274 | return Process2(in); 275 | }, 276 | [&patch](const Step2& in, unsigned) -> Step3 { 277 | return Process3(patch, in, true); 278 | }, 279 | [&base, &patch, keys](const Step3& in, unsigned i) -> Step4 { 280 | auto key = keys + i*base.key_len; 281 | if (LIKELY(in.line != nullptr) && Equal(key, in.line, patch.key_len)) { 282 | return {in.line + patch.key_len, Step1{}}; 283 | } else { 284 | return {nullptr, Process1(base, key)}; 285 | } 286 | }, 287 | [](const Step4& in, unsigned) -> Step5 { 288 | if (in.v != nullptr) { 289 | return {in.v, Step2{}}; 290 | } else { 291 | return {nullptr, Process2(in.s)}; 292 | } 293 | }, 294 | [&base](const Step5& in, unsigned) -> Step6 { 295 | if (in.v != nullptr) { 296 | return {in.v, Step3{}}; 297 | } else { 298 | return {nullptr, Process3(base, in.s, true)}; 299 | } 300 | }, 301 | [&base, &hit, keys, data, dft_val, &miss](const Step6& in, unsigned i) { 302 | auto key = keys + i*base.key_len; 303 | auto out = data + i*base.val_len; 304 | auto src = in.v; 305 | if (src != nullptr) { 306 | hit++; 307 | } else if (LIKELY(in.s.line != nullptr) && Equal(key, in.s.line, base.key_len)) { 308 | hit++; 309 | src = in.s.line + base.key_len; 310 | } else if (dft_val != nullptr) { 311 | src = dft_val; 312 | } else if (miss != nullptr) { 313 | *miss++ = i; 314 | } else { 315 | return; 316 | } 317 | memcpy(out, src, base.val_len); 318 | } 319 | ); 320 | return hit; 321 | } 322 | 323 | static constexpr unsigned WINDOW_SIZE = 32; 324 | 325 | void BatchFindPos(const PackView& pack, size_t batch, const std::function& reader, 326 | const std::function& output, const uint8_t* bitmap) { 327 | if (pack.type == Type::INDEX_ONLY) return; 328 | auto buf = std::make_unique(WINDOW_SIZE*pack.key_len); 329 | 330 | union { 331 | Step1 s1; 332 | Step2 s2; 333 | struct { 334 | uint64_t pos; 335 | const uint8_t* line; 336 | } s3; 337 | } state[WINDOW_SIZE]; 338 | 339 | for (size_t i = 0; i < batch; i += WINDOW_SIZE) { 340 | auto m = std::min(static_cast(WINDOW_SIZE), batch-i); 341 | for (unsigned j = 0; j < m; j++) { 342 | auto key = buf.get() + j * pack.key_len; 343 | reader(key); 344 | state[j].s1 = Process1(pack, key); 345 | } 346 | for (unsigned j = 0; j < m; j++) { 347 | state[j].s2 = Process2(state[j].s1); 348 | } 349 | for (unsigned j = 0; j < m; j++) { 350 | auto pos = CalcPos(state[j].s2); 351 | assert(pos < pack.item); 352 | auto line = pack.content + pos*pack.line_size; 353 | PrefetchForNext(line); 354 | if (bitmap != nullptr) { 355 | PrefetchBit(bitmap,pos); 356 | } 357 | state[j].s3 = {pos, line}; 358 | } 359 | for (unsigned j = 0; j < m; j++) { 360 | auto key = buf.get() + j * pack.key_len; 361 | auto& s = state[j].s3; 362 | if (Equal(key, s.line, pack.key_len)) { 363 | output(s.pos); 364 | } else { 365 | output(UINT64_MAX); 366 | } 367 | } 368 | } 369 | } 370 | 371 | void BatchDataMapping(const PackView& index, uint8_t* space, size_t batch, const std::function& reader) { 372 | auto buf = std::make_unique(WINDOW_SIZE*index.line_size); 373 | 374 | union { 375 | Step1 s1; 376 | Step2 s2; 377 | struct { 378 | uint8_t* line; 379 | } s3; 380 | } state[WINDOW_SIZE]; 381 | 382 | for (size_t i = 0; i < batch; i += WINDOW_SIZE) { 383 | auto m = std::min(static_cast(WINDOW_SIZE), batch - i); 384 | for (unsigned j = 0; j < m; j++) { 385 | auto line = buf.get() + j * index.line_size; 386 | reader(line); 387 | state[j].s1 = Process1(index, line); 388 | } 389 | for (unsigned j = 0; j < m; j++) { 390 | state[j].s2 = Process2(state[j].s1); 391 | } 392 | for (unsigned j = 0; j < m; j++) { 393 | auto line = space + CalcPos(state[j].s2)*index.line_size; 394 | PrefetchForWrite(line); 395 | auto off = (uintptr_t)line & (CACHE_BLOCK_SIZE-1); 396 | auto blk = (const void*)(((uintptr_t)line & ~(uintptr_t)(CACHE_BLOCK_SIZE-1)) + CACHE_BLOCK_SIZE); 397 | if (off + index.line_size > CACHE_BLOCK_SIZE) { 398 | PrefetchForWrite(blk); 399 | } 400 | state[j].s3.line = line; 401 | } 402 | for (unsigned j = 0; j < m; j++) { 403 | auto line = buf.get() + j * index.line_size; 404 | auto& s = state[j].s3; 405 | memcpy(s.line, line, index.line_size); 406 | } 407 | } 408 | } 409 | 410 | } //shd 411 | -------------------------------------------------------------------------------- /src/shd.cc: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // Skew Hash and Displace Algorithm. 3 | // Copyright (C) 2020 Ruan Kunliang 4 | // 5 | // This library is free software; you can redistribute it and/or modify it under 6 | // the terms of the GNU Lesser General Public License as published by the Free 7 | // Software Foundation; either version 2.1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This library is distributed in the hope that it will be useful, but WITHOUT 11 | // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 | // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 13 | // details. 14 | // 15 | // You should have received a copy of the GNU Lesser General Public License 16 | // along with the This Library; if not, see . 17 | //============================================================================== 18 | 19 | #include 20 | #include 21 | #include "internal.h" 22 | #include "shd.h" 23 | 24 | 25 | namespace shd { 26 | 27 | std::unique_ptr CreatePackView(const uint8_t* addr, size_t size) { 28 | size_t addr_off = sizeof(Header); 29 | if (size < addr_off) return nullptr; 30 | 31 | auto header = (const Header*)addr; 32 | if (header->magic != SHD_MAGIC) { 33 | return nullptr; 34 | } 35 | switch (header->type) { 36 | case PerfectHashtable::KV_SEPARATED: if (header->val_len != OFFSET_FIELD_SIZE) return nullptr; 37 | case PerfectHashtable::KV_INLINE: if (header->val_len == 0) return nullptr; 38 | case PerfectHashtable::KEY_SET: if (header->key_len == 0) return nullptr; 39 | case PerfectHashtable::INDEX_ONLY: break; 40 | default: return nullptr; 41 | } 42 | 43 | if (header->seg_cnt == 0 || header->seg_cnt > MAX_SEGMENT) { 44 | return nullptr; 45 | } 46 | const auto parts = (const uint32_t*)(addr + addr_off); 47 | addr_off += header->seg_cnt*4U; 48 | if (size < addr_off) return nullptr; 49 | 50 | auto view = std::make_unique(sizeof(PackView) + sizeof(SegmentView) * header->seg_cnt); 51 | auto index = (PackView*)view.get(); 52 | *index = PackView{}; 53 | index->type = (Type)header->type; 54 | index->key_len = header->key_len; 55 | index->val_len = header->val_len; 56 | index->line_size = ((uint32_t)index->key_len) + index->val_len; 57 | index->seed = header->seed; 58 | index->l0sz = header->seg_cnt; 59 | index->item = ((((uint64_t)header->item_high)<<32U) | header->item); 60 | 61 | uint64_t total_item = 0; 62 | for (unsigned i = 0; i < header->seg_cnt; i++) { 63 | index->segments[i] = SegmentView{}; 64 | index->segments[i].l1bd = L1Band(parts[i]); 65 | index->segments[i].l2sz = L2Size(parts[i]); 66 | index->segments[i].offset = total_item; 67 | total_item += parts[i]; 68 | index->segments[i].cells = addr + addr_off; 69 | addr_off += L1Size(parts[i]); 70 | if (size < addr_off) return nullptr; 71 | } 72 | if (total_item != index->item) { 73 | return nullptr; 74 | } 75 | addr_off = (addr_off+31U)&(~31U); 76 | if (size < addr_off) return nullptr; 77 | for (unsigned i = 0; i < header->seg_cnt; i++) { 78 | index->segments[i].sections = (const BitmapSection*)(addr + addr_off); 79 | addr_off += SectionSize(parts[i]) * (size_t)sizeof(BitmapSection); 80 | if (size < addr_off) return nullptr; 81 | } 82 | 83 | if (header->type != PerfectHashtable::INDEX_ONLY) { 84 | index->content = addr + addr_off; 85 | addr_off += index->line_size * total_item; 86 | if (size < addr_off) return nullptr; 87 | if (header->type == PerfectHashtable::KV_SEPARATED) { 88 | index->extend = addr + addr_off; 89 | if (size < addr_off + total_item*2U) return nullptr; 90 | } 91 | } 92 | index->space_end = addr + size; 93 | 94 | return view; 95 | } 96 | 97 | PerfectHashtable::PerfectHashtable(const std::string& path, LoadPolicy load_policy) { 98 | if (load_policy == COPY_DATA) { 99 | auto mem = MemBlock::LoadFile(path.c_str()); 100 | if (!mem) { 101 | return; 102 | } 103 | auto view = CreatePackView(mem.addr(), mem.size()); 104 | if (view == nullptr) { 105 | return; 106 | } 107 | m_mem = std::move(mem); 108 | m_view = std::move(view); 109 | } else { 110 | MemMap::Policy policy = MemMap::MAP_ONLY; 111 | if (load_policy == MAP_FETCH) { 112 | policy = MemMap::FETCH; 113 | } else if (load_policy == MAP_OCCUPY) { 114 | policy = MemMap::OCCUPY; 115 | } 116 | MemMap res(path.c_str(), policy); 117 | if (!res) { 118 | return; 119 | } 120 | auto view = CreatePackView(res.addr(), res.size()); 121 | if (view == nullptr) { 122 | return; 123 | } 124 | m_res = std::move(res); 125 | m_view = std::move(view); 126 | } 127 | _post_init(); 128 | } 129 | 130 | void PerfectHashtable::_post_init() noexcept { 131 | auto index = (const PackView*)m_view.get(); 132 | m_type = index->type; 133 | m_key_len = index->key_len; 134 | if (index->type == KV_SEPARATED) { 135 | m_val_len = 0; 136 | } else { 137 | m_val_len = index->val_len; 138 | } 139 | m_item = index->item; 140 | } 141 | 142 | PerfectHashtable::PerfectHashtable(size_t size, const std::function& load) { 143 | auto mem = MemBlock(size); 144 | if (!mem || !load(mem.addr())) { 145 | return; 146 | } 147 | auto view = CreatePackView(mem.addr(), mem.size()); 148 | if (view == nullptr) { 149 | return; 150 | } 151 | m_mem = std::move(mem); 152 | m_view = std::move(view); 153 | _post_init(); 154 | } 155 | 156 | size_t PerfectHashtable::locate(const uint8_t* key, uint8_t key_len) const noexcept { 157 | auto index = (const PackView*)m_view.get(); 158 | if (UNLIKELY(index == nullptr || key == nullptr || key_len == 0)) { 159 | return 0; 160 | } 161 | return CalcPos(*index, key, key_len); 162 | } 163 | 164 | void PerfectHashtable::batch_locate(unsigned batch, const uint8_t* __restrict__ keys, 165 | uint8_t key_len, uint64_t* __restrict__ out) { 166 | auto index = (const PackView*)m_view.get(); 167 | if (UNLIKELY(index == nullptr || keys == nullptr || key_len == 0 168 | || (index->type != INDEX_ONLY && key_len != index->key_len))) { 169 | return; 170 | } 171 | return BatchLocate(*index, batch, keys, key_len, out); 172 | } 173 | 174 | Slice SeparatedValue(const uint8_t* pt, const uint8_t* end) { 175 | static_assert(MAX_VALUE_LEN_BIT % 7U == 0, "MAX_VALUE_LEN_BIT should be 7x"); 176 | 177 | uint64_t len = 0; 178 | for (unsigned sft = 0; sft < MAX_VALUE_LEN_BIT; sft += 7U) { 179 | if (pt >= end) { 180 | return {}; 181 | } 182 | uint8_t b = *pt++; 183 | if (b & 0x80U) { 184 | len |= static_cast(b & 0x7fU) << sft; 185 | } else { 186 | len |= static_cast(b) << sft; 187 | if (pt+len > end) { 188 | return {}; 189 | } 190 | return {pt, len}; 191 | } 192 | } 193 | return {}; 194 | } 195 | 196 | Slice PerfectHashtable::search(const uint8_t* key) const noexcept { 197 | auto pack = (const PackView*)m_view.get(); 198 | if (UNLIKELY(pack == nullptr || key == nullptr || pack->type == INDEX_ONLY)) { 199 | return {}; 200 | } 201 | auto pos = CalcPos(*pack, key, pack->key_len); 202 | auto line = pack->content + pos*pack->line_size; 203 | if (UNLIKELY(pos >= pack->item) || !Equal(line, key, pack->key_len)) { 204 | return {}; 205 | } 206 | auto field = line + pack->key_len; 207 | if (pack->type != KV_SEPARATED) { 208 | return {field, pack->val_len}; 209 | } 210 | return SeparatedValue(pack->extend+ReadOffsetField(field), pack->space_end); 211 | } 212 | 213 | unsigned PerfectHashtable::batch_search(unsigned batch, const uint8_t* const keys[], const uint8_t* out[], 214 | const PerfectHashtable* patch) const noexcept { 215 | auto base = (const PackView*)m_view.get(); 216 | if (base == nullptr || keys == nullptr || out == nullptr) { 217 | return 0; 218 | } 219 | if (patch == nullptr) { 220 | return BatchSearch(*base, batch, keys, out); 221 | } else { 222 | auto delta = (const PackView*)patch->m_view.get(); 223 | if (delta == nullptr) { 224 | return 0; 225 | } 226 | return BatchSearch(*base, *delta, batch, keys, out); 227 | } 228 | } 229 | 230 | unsigned PerfectHashtable::batch_fetch(unsigned batch, const uint8_t* __restrict__ keys, uint8_t* __restrict__ data, 231 | const uint8_t* __restrict__ dft_val, const PerfectHashtable* patch) const noexcept { 232 | auto base = (const PackView*)m_view.get(); 233 | if (base == nullptr || keys == nullptr || data == nullptr) { 234 | return 0; 235 | } 236 | if (patch == nullptr) { 237 | return BatchFetch(*base, dft_val, batch, keys, data, nullptr); 238 | } else { 239 | auto delta = (const PackView*)patch->m_view.get(); 240 | if (delta == nullptr) { 241 | return 0; 242 | } 243 | return BatchFetch(*base, *delta, dft_val, batch, keys, data, nullptr); 244 | } 245 | } 246 | 247 | unsigned PerfectHashtable::batch_try_fetch(unsigned batch, const uint8_t* __restrict__ keys, uint8_t* __restrict__ data, 248 | unsigned* __restrict__ miss, const PerfectHashtable* patch) const noexcept { 249 | auto base = (const PackView*)m_view.get(); 250 | if (base == nullptr || keys == nullptr || data == nullptr) { 251 | return 0; 252 | } 253 | if (patch == nullptr) { 254 | return BatchFetch(*base, nullptr, batch, keys, data, miss); 255 | } else { 256 | auto delta = (const PackView*)patch->m_view.get(); 257 | if (delta == nullptr) { 258 | return 0; 259 | } 260 | return BatchFetch(*base, *delta, nullptr, batch, keys, data, miss); 261 | } 262 | } 263 | 264 | BuildStatus PerfectHashtable::derive(const DataReaders& in, IDataWriter& out, Retry retry) const { 265 | auto base = (const PackView*)m_view.get(); 266 | if (base == nullptr || base->type == INDEX_ONLY) { 267 | return BUILD_STATUS_BAD_INPUT; 268 | } 269 | return Rebuild(*base, in, out, retry); 270 | } 271 | 272 | } //shd 273 | -------------------------------------------------------------------------------- /src/utils.cc: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // Skew Hash and Displace Algorithm. 3 | // Copyright (C) 2020 Ruan Kunliang 4 | // 5 | // This library is free software; you can redistribute it and/or modify it under 6 | // the terms of the GNU Lesser General Public License as published by the Free 7 | // Software Foundation; either version 2.1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This library is distributed in the hope that it will be useful, but WITHOUT 11 | // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 | // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 13 | // details. 14 | // 15 | // You should have received a copy of the GNU Lesser General Public License 16 | // along with the This Library; if not, see . 17 | //============================================================================== 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | namespace shd { 30 | 31 | struct DefaultLogger : public Logger { 32 | void printf(const char* format, va_list args) override; 33 | static DefaultLogger instance; 34 | }; 35 | void DefaultLogger::printf(const char *format, va_list args) { 36 | ::vfprintf(stderr, format, args); 37 | } 38 | DefaultLogger DefaultLogger::instance; 39 | Logger* Logger::s_instance = &DefaultLogger::instance; 40 | 41 | void Logger::Printf(const char* format, ... ) { 42 | if (s_instance != nullptr) { 43 | va_list args; 44 | va_start(args, format); 45 | s_instance->printf(format, args); 46 | va_end(args); 47 | } 48 | } 49 | 50 | static inline constexpr size_t RoundUp(size_t n) { 51 | constexpr size_t m = 0x1fffff; 52 | return (n+m)&(~m); 53 | }; 54 | 55 | MemBlock::MemBlock(size_t size) noexcept : MemBlock() { 56 | if (size == 0) { 57 | return; 58 | } 59 | if (size >= 0x4000000) { 60 | auto round_up_size = RoundUp(size); 61 | void* addr = mmap(nullptr, round_up_size, PROT_READ | PROT_WRITE, 62 | MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); 63 | if (addr == MAP_FAILED && errno == ENOMEM) { 64 | addr = mmap(nullptr, round_up_size, PROT_READ | PROT_WRITE, 65 | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 66 | } 67 | if (addr != MAP_FAILED) { 68 | m_addr = static_cast(addr); 69 | m_size = size; 70 | m_mmap = 1; 71 | if (madvise(addr, round_up_size, MADV_DONTDUMP) != 0) { 72 | Logger::Printf("fail to madvise[%d]: %p | %lu\n", errno, addr, round_up_size); 73 | } 74 | return; 75 | } 76 | } 77 | m_addr = static_cast(malloc(size)); 78 | if (m_addr != nullptr) { 79 | m_size = size; 80 | } 81 | } 82 | 83 | MemBlock::~MemBlock() noexcept { 84 | if (m_addr != nullptr) { 85 | if (m_mmap) { 86 | if (munmap(m_addr, RoundUp(m_size)) != 0) { 87 | Logger::Printf("fail to munmap[%d]: %p | %lu\n", errno, m_addr, m_size); 88 | }; 89 | } else { 90 | free(m_addr); 91 | } 92 | } 93 | } 94 | 95 | static MemBlock LoadAll(int fd) noexcept { 96 | struct stat stat; 97 | if (fstat(fd, &stat) != 0 || stat.st_size <= 0) { 98 | return {}; 99 | } 100 | MemBlock out(stat.st_size); 101 | if (!out) { 102 | return {}; 103 | } 104 | auto data = out.addr(); 105 | auto remain = out.size(); 106 | constexpr size_t block = 16*1024*1024; 107 | size_t off = 0; 108 | while (remain > block) { 109 | auto next = off + block; 110 | readahead(fd, next, block); 111 | if (pread(fd, data, block, off) != block) { 112 | return {}; 113 | } 114 | off = next; 115 | data += block; 116 | remain -= block; 117 | } 118 | if (pread(fd, data, remain, off) != remain) { 119 | return {}; 120 | } 121 | return out; 122 | } 123 | 124 | MemBlock MemBlock::LoadFile(const char* path) noexcept { 125 | int fd = open(path, O_RDONLY); 126 | if (fd < 0) { 127 | Logger::Printf("fail to open file: %s\n", path); 128 | return {}; 129 | } 130 | auto out = LoadAll(fd); 131 | close(fd); 132 | if (!out) { 133 | Logger::Printf("fail to read whole file: %s\n", path); 134 | } 135 | return out; 136 | } 137 | 138 | 139 | MemMap::MemMap(const char* path, Policy policy) noexcept { 140 | int fd = open(path, O_RDONLY); 141 | if (fd < 0) { 142 | Logger::Printf("fail to open file: %s\n", path); 143 | return; 144 | } 145 | struct stat stat; 146 | if (fstat(fd, &stat) != 0 || stat.st_size <= 0) { 147 | close(fd); 148 | return; 149 | } 150 | int flag = MAP_PRIVATE; 151 | if (policy != MAP_ONLY) { 152 | flag |= MAP_POPULATE; 153 | } 154 | if (policy == OCCUPY && geteuid() == 0) { 155 | flag |= MAP_LOCKED; 156 | } 157 | auto addr = mmap(nullptr, stat.st_size, PROT_READ, flag, fd, 0); 158 | close(fd); 159 | if (addr == MAP_FAILED) { 160 | return; 161 | } 162 | m_addr = static_cast(addr); 163 | m_size = stat.st_size; 164 | } 165 | 166 | MemMap::~MemMap() noexcept { 167 | if (m_addr != nullptr) { 168 | if (munmap(m_addr, m_size) != 0) { 169 | Logger::Printf("fail to munmap[%d]: %p | %lu\n", errno, m_addr, m_size); 170 | }; 171 | } 172 | } 173 | 174 | FileWriter::FileWriter(const char* path) { 175 | m_fd = open(path, O_CREAT|O_TRUNC|O_WRONLY, 0644); 176 | if (m_fd < 0) { 177 | return; 178 | } 179 | m_buf = std::make_unique(BUFSZ); 180 | } 181 | FileWriter::~FileWriter() noexcept { 182 | if (m_fd >= 0) { 183 | _flush(); 184 | ::close(m_fd); 185 | } 186 | } 187 | bool FileWriter::operator!() const noexcept { 188 | return m_fd < 0; 189 | } 190 | 191 | bool FileWriter::_write(const void* data, size_t n) noexcept { 192 | constexpr size_t block = 16*1024*1024; 193 | while (n > block) { 194 | if (::write(m_fd, data, block) != block) { 195 | ::close(m_fd); 196 | m_fd = -1; 197 | return false; 198 | } 199 | n -= block; 200 | data = (uint8_t*)data + block; 201 | } 202 | if (::write(m_fd, data, n) != n) { 203 | ::close(m_fd); 204 | m_fd = -1; 205 | return false; 206 | } 207 | return true; 208 | } 209 | 210 | bool FileWriter::flush() noexcept { 211 | if (m_fd < 0) { 212 | return false; 213 | } 214 | return _flush(); 215 | } 216 | bool FileWriter::_flush() noexcept { 217 | if (m_off == 0) { 218 | return true; 219 | } 220 | auto n = m_off; 221 | m_off = 0; 222 | return _write(m_buf.get(), n); 223 | } 224 | 225 | bool FileWriter::write(const void* data, size_t n) noexcept { 226 | if (m_fd < 0) { 227 | return false; 228 | } 229 | if (m_off + n < BUFSZ) { 230 | memcpy(m_buf.get()+m_off, data, n); 231 | m_off += n; 232 | } else if (m_off + n < BUFSZ*2) { 233 | auto m = BUFSZ - m_off; 234 | memcpy(m_buf.get()+m_off, data, m); 235 | if (!_write(m_buf.get(), BUFSZ)) { 236 | return false; 237 | } 238 | m_off = n - m; 239 | memcpy(m_buf.get(), (const uint8_t*)data+m, m_off); 240 | } else { 241 | return _flush() && _write(data, n); 242 | } 243 | return true; 244 | } 245 | 246 | } //shd -------------------------------------------------------------------------------- /test/test.cc: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // Skew Hash and Displace Algorithm. 3 | // Copyright (C) 2020 Ruan Kunliang 4 | // 5 | // This library is free software; you can redistribute it and/or modify it under 6 | // the terms of the GNU Lesser General Public License as published by the Free 7 | // Software Foundation; either version 2.1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This library is distributed in the hope that it will be useful, but WITHOUT 11 | // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 | // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 13 | // details. 14 | // 15 | // You should have received a copy of the GNU Lesser General Public License 16 | // along with the This Library; if not, see . 17 | //============================================================================== 18 | 19 | #include 20 | 21 | int main(int argc,char **argv){ 22 | testing::InitGoogleTest(&argc,argv); 23 | return RUN_ALL_TESTS(); 24 | } -------------------------------------------------------------------------------- /test/test.h: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // Skew Hash and Displace Algorithm. 3 | // Copyright (C) 2020 Ruan Kunliang 4 | // 5 | // This library is free software; you can redistribute it and/or modify it under 6 | // the terms of the GNU Lesser General Public License as published by the Free 7 | // Software Foundation; either version 2.1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This library is distributed in the hope that it will be useful, but WITHOUT 11 | // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 | // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 13 | // details. 14 | // 15 | // You should have received a copy of the GNU Lesser General Public License 16 | // along with the This Library; if not, see . 17 | //============================================================================== 18 | 19 | #pragma once 20 | 21 | #include 22 | #include 23 | 24 | class EmbeddingGenerator : public shd::IDataReader { 25 | public: 26 | static constexpr uint64_t MASK0 = 0xaaaaaaaaaaaaaaaaUL; 27 | static constexpr uint64_t MASK1 = 0x5555555555555555UL; 28 | explicit EmbeddingGenerator(uint64_t begin, uint64_t total, uint64_t mask=MASK0) 29 | : m_current(begin-1), m_begin(begin), m_total(total), m_mask(mask) 30 | {} 31 | EmbeddingGenerator(const EmbeddingGenerator&) = delete; 32 | EmbeddingGenerator& operator=(const EmbeddingGenerator&) = delete; 33 | 34 | void reset() override { 35 | m_current = m_begin-1; 36 | } 37 | size_t total() override { 38 | return m_total; 39 | } 40 | shd::Record read(bool) override { 41 | m_current++; 42 | auto arr = (uint64_t*)m_val; 43 | arr[0] = m_current ^ m_mask; 44 | arr[1] = m_current ^ m_mask; 45 | arr[2] = m_current ^ m_mask; 46 | arr[3] = m_current ^ m_mask; 47 | return {{(const uint8_t*)&m_current, sizeof(uint64_t)}, {m_val, VALUE_SIZE}}; 48 | } 49 | static constexpr unsigned VALUE_SIZE = 32; //fp16 * 16 50 | 51 | private: 52 | uint64_t m_current; 53 | uint8_t m_val[VALUE_SIZE]; 54 | const uint64_t m_begin; 55 | const uint64_t m_total; 56 | const uint64_t m_mask; 57 | }; 58 | 59 | class VariedValueGenerator : public shd::IDataReader { 60 | public: 61 | explicit VariedValueGenerator(uint64_t begin, uint64_t total, unsigned shift=5U) 62 | : m_current(begin-1), m_begin(begin), m_total(total), m_shift(shift) 63 | {} 64 | VariedValueGenerator(const VariedValueGenerator&) = delete; 65 | VariedValueGenerator& operator=(const VariedValueGenerator&) = delete; 66 | 67 | void reset() override { 68 | m_current = m_begin-1; 69 | } 70 | size_t total() override { 71 | return m_total; 72 | } 73 | shd::Record read(bool) override { 74 | m_current++; 75 | const uint8_t len = m_current + m_shift; 76 | memset(m_val, len, len); 77 | return {{(const uint8_t*)&m_current, sizeof(uint64_t)}, {m_val, len}}; 78 | } 79 | 80 | private: 81 | uint64_t m_current; 82 | uint8_t m_val[UINT8_MAX]; 83 | const uint64_t m_begin; 84 | const uint64_t m_total; 85 | const unsigned m_shift; 86 | }; 87 | 88 | 89 | class FakeWriter : public shd::IDataWriter { 90 | public: 91 | bool operator!() const noexcept override; 92 | bool flush() noexcept override; 93 | bool write(const void*, size_t) noexcept override; 94 | }; 95 | 96 | bool FakeWriter::operator!() const noexcept { 97 | return false; 98 | } 99 | bool FakeWriter::flush() noexcept { 100 | return true; 101 | } 102 | bool FakeWriter::write(const void *, size_t) noexcept { 103 | return true; 104 | } -------------------------------------------------------------------------------- /test/test_bbf.cc: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // Block Bloom Filter with 3.5% false positive rate 3 | // Copyright (C) 2025 Ruan Kunliang 4 | // 5 | // This library is free software; you can redistribute it and/or modify it under 6 | // the terms of the GNU Lesser General Public License as published by the Free 7 | // Software Foundation; either version 2.1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This library is distributed in the hope that it will be useful, but WITHOUT 11 | // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 | // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 13 | // details. 14 | // 15 | // You should have received a copy of the GNU Lesser General Public License 16 | // along with the This Library; if not, see . 17 | //============================================================================== 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | TEST(BBF, SetAndTest) { 25 | bbf::BloomFilter bf(999); 26 | ASSERT_FALSE(!bf); 27 | ASSERT_EQ(1000, bf.capacity()); 28 | 29 | for (unsigned i = 0; i < 500; i++) { 30 | ASSERT_TRUE(bf.set(reinterpret_cast(&i), sizeof(unsigned))); 31 | } 32 | ASSERT_EQ(500, bf.item()); 33 | std::vector keys(500); 34 | for (unsigned i = 0; i < 500; i++) { 35 | keys[i] = i + 1000; 36 | } 37 | bf.batch_set(keys.size(), sizeof(unsigned), reinterpret_cast(keys.data())); 38 | ASSERT_LE(bf.item(), 1000); 39 | ASSERT_GE(bf.item(), 990); 40 | 41 | for (unsigned i = 0; i < 500; i++) { 42 | ASSERT_TRUE(bf.test(reinterpret_cast(&i), sizeof(unsigned))); 43 | } 44 | 45 | for (unsigned i = 0; i < 500; i++) { 46 | keys[i] = i * 2; 47 | } 48 | std::vector result(keys.size()); 49 | unsigned hit = bf.batch_test(keys.size(), sizeof(unsigned), 50 | reinterpret_cast(keys.data()), 51 | reinterpret_cast(result.data())); 52 | for (unsigned i = 0; i < 250; i++) { 53 | ASSERT_TRUE(result[i]); 54 | } 55 | ASSERT_FALSE(result.back()); 56 | ASSERT_GE(hit, 250); 57 | ASSERT_LE(hit, 260); 58 | } 59 | 60 | TEST(BBF, DumpAndLoad) { 61 | bbf::BloomFilter bf1(999); 62 | ASSERT_FALSE(!bf1); 63 | for (unsigned i = 0; i < 500; i++) { 64 | ASSERT_TRUE(bf1.set(reinterpret_cast(&i), sizeof(unsigned))); 65 | } 66 | 67 | const std::string filename = "tmp.bbf"; 68 | { 69 | shd::FileWriter output(filename.c_str()); 70 | ASSERT_TRUE(bf1.dump(output)); 71 | } 72 | 73 | bbf::BloomFilter bf2(filename); 74 | ASSERT_FALSE(!bf2); 75 | for (unsigned i = 0; i < 500; i++) { 76 | ASSERT_TRUE(bf2.test(reinterpret_cast(&i), sizeof(unsigned))); 77 | } 78 | } -------------------------------------------------------------------------------- /test/test_shd.cc: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // Skew Hash and Displace Algorithm. 3 | // Copyright (C) 2020 Ruan Kunliang 4 | // 5 | // This library is free software; you can redistribute it and/or modify it under 6 | // the terms of the GNU Lesser General Public License as published by the Free 7 | // Software Foundation; either version 2.1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This library is distributed in the hope that it will be useful, but WITHOUT 11 | // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 | // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 13 | // details. 14 | // 15 | // You should have received a copy of the GNU Lesser General Public License 16 | // along with the This Library; if not, see . 17 | //============================================================================== 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include "test.h" 26 | 27 | static constexpr unsigned PIECE = 1000; 28 | 29 | template 30 | static shd::DataReaders CreateReaders(unsigned n, Tips tips) { 31 | shd::DataReaders out; 32 | out.reserve(n); 33 | for (unsigned i = 0; i < n; i++) { 34 | out.push_back(std::make_unique(i*PIECE, PIECE, tips)); 35 | } 36 | return out; 37 | } 38 | 39 | TEST(SHD, Build) { 40 | FakeWriter fake_output; 41 | 42 | shd::DataReaders fake_input; 43 | ASSERT_EQ(shd::BuildSet(fake_input, fake_output), shd::BUILD_STATUS_BAD_INPUT); 44 | ASSERT_EQ(shd::BuildDict(fake_input, fake_output), shd::BUILD_STATUS_BAD_INPUT); 45 | ASSERT_EQ(shd::BuildDictWithVariedValue(fake_input, fake_output), shd::BUILD_STATUS_BAD_INPUT); 46 | 47 | fake_input.push_back(std::make_unique(0, 0)); 48 | ASSERT_EQ(shd::BuildSet(fake_input, fake_output), shd::BUILD_STATUS_BAD_INPUT); 49 | ASSERT_EQ(shd::BuildDict(fake_input, fake_output), shd::BUILD_STATUS_BAD_INPUT); 50 | ASSERT_EQ(shd::BuildDictWithVariedValue(fake_input, fake_output), shd::BUILD_STATUS_BAD_INPUT); 51 | 52 | fake_input.push_back(std::make_unique(0, 1)); 53 | ASSERT_EQ(shd::BuildSet(fake_input, fake_output), shd::BUILD_STATUS_OK); 54 | ASSERT_EQ(shd::BuildDict(fake_input, fake_output), shd::BUILD_STATUS_OK); 55 | ASSERT_EQ(shd::BuildDictWithVariedValue(fake_input, fake_output), shd::BUILD_STATUS_OK); 56 | 57 | auto emb_gen = CreateReaders(1, EmbeddingGenerator::MASK0); 58 | ASSERT_EQ(shd::BuildSet(emb_gen, fake_output), shd::BUILD_STATUS_OK); 59 | ASSERT_EQ(shd::BuildDict(emb_gen, fake_output), shd::BUILD_STATUS_OK); 60 | ASSERT_EQ(shd::BuildDictWithVariedValue(emb_gen, fake_output), shd::BUILD_STATUS_OK); 61 | 62 | auto var_gen = CreateReaders(1, 5U); 63 | ASSERT_EQ(shd::BuildDict(var_gen, fake_output), shd::BUILD_STATUS_BAD_INPUT); 64 | ASSERT_EQ(shd::BuildDictWithVariedValue(var_gen, fake_output), shd::BUILD_STATUS_OK); 65 | 66 | emb_gen = CreateReaders(3, EmbeddingGenerator::MASK0); 67 | ASSERT_EQ(shd::BuildSet(emb_gen, fake_output), shd::BUILD_STATUS_OK); 68 | ASSERT_EQ(shd::BuildDict(emb_gen, fake_output), shd::BUILD_STATUS_OK); 69 | 70 | var_gen = CreateReaders(3, 5U); 71 | ASSERT_EQ(shd::BuildDictWithVariedValue(var_gen, fake_output), shd::BUILD_STATUS_OK); 72 | } 73 | 74 | TEST(SHD, KeySet) { 75 | const std::string filename = "keyset.shd"; 76 | { 77 | shd::FileWriter output(filename.c_str()); 78 | auto input = CreateReaders(2, EmbeddingGenerator::MASK0); 79 | ASSERT_EQ(shd::BuildSet(input, output), shd::BUILD_STATUS_OK); 80 | } 81 | shd::PerfectHashtable dict(filename); 82 | ASSERT_FALSE(!dict); 83 | ASSERT_EQ(dict.type(), shd::PerfectHashtable::KEY_SET); 84 | ASSERT_EQ(dict.key_len(), sizeof(uint64_t)); 85 | ASSERT_EQ(dict.val_len(), 0); 86 | ASSERT_EQ(dict.item(), PIECE*2); 87 | 88 | union { 89 | uint64_t v; 90 | uint8_t p[8]; 91 | } tmp; 92 | for (unsigned i = 0; i < PIECE*2; i++) { 93 | tmp.v = i; 94 | auto val = dict.search(tmp.p); 95 | ASSERT_NE(val.ptr, nullptr); 96 | ASSERT_EQ(val.len, 0); 97 | } 98 | for (unsigned i = PIECE*2; i < PIECE*3; i++) { 99 | tmp.v = i; 100 | auto val = dict.search(tmp.p); 101 | ASSERT_EQ(val.ptr, nullptr); 102 | ASSERT_EQ(val.len, 0); 103 | } 104 | 105 | std::vector keys(PIECE*2); 106 | for (unsigned i = 0; i < PIECE; i++) { 107 | keys[i*2] = i; 108 | keys[i*2+1] = PIECE*2+i; 109 | } 110 | std::vector in(keys.size()); 111 | for (unsigned i = 0; i < keys.size(); i++) { 112 | in[i] = (const uint8_t*)&keys[i]; 113 | } 114 | std::vector out(keys.size()); 115 | 116 | ASSERT_EQ(dict.batch_search(keys.size(), in.data(), out.data()), PIECE); 117 | for (unsigned i = 0; i < PIECE; i++) { 118 | ASSERT_NE(out[i*2], nullptr); 119 | ASSERT_EQ(out[i*2+1], nullptr); 120 | } 121 | 122 | ASSERT_EQ(dict.batch_fetch(keys.size(), (const uint8_t*)keys.data(), (uint8_t*)out.data()), 0); 123 | } 124 | 125 | TEST(SHD, SmallSet) { 126 | shd::DataReaders input(1); 127 | const uint64_t shift = 9999; 128 | const unsigned limit = 16; 129 | std::vector keys(limit); 130 | for (unsigned i = 0; i < limit; i++) { 131 | keys[i] = shift + i; 132 | } 133 | std::vector in(keys.size()); 134 | for (unsigned i = 0; i < keys.size(); i++) { 135 | in[i] = (const uint8_t*)&keys[i]; 136 | } 137 | std::vector out(keys.size()); 138 | const std::string filename = "small.shd"; 139 | for (unsigned i = 1; i < limit; i++) { 140 | input[0] = std::make_unique(shift, i); 141 | { 142 | shd::FileWriter output(filename.c_str()); 143 | ASSERT_EQ(shd::BuildSet(input, output), shd::BUILD_STATUS_OK); 144 | } 145 | { 146 | shd::PerfectHashtable dict(filename); 147 | ASSERT_FALSE(!dict); 148 | for (auto& p : out) { 149 | p = nullptr; 150 | } 151 | ASSERT_EQ(dict.batch_search(keys.size(), in.data(), out.data()), i); 152 | for (unsigned j = 0; j < i; j++) { 153 | ASSERT_NE(out[j], nullptr); 154 | } 155 | for (unsigned j = i; j < limit; j++) { 156 | ASSERT_EQ(out[j], nullptr); 157 | } 158 | } 159 | } 160 | } 161 | 162 | TEST(SHD, InlinedDict) { 163 | const std::string filename = "dict.shd"; 164 | { 165 | shd::FileWriter output(filename.c_str()); 166 | auto input = CreateReaders(2, EmbeddingGenerator::MASK0); 167 | ASSERT_EQ(shd::BuildDict(input, output), shd::BUILD_STATUS_OK); 168 | } 169 | shd::PerfectHashtable dict(filename); 170 | ASSERT_FALSE(!dict); 171 | ASSERT_EQ(dict.type(), shd::PerfectHashtable::KV_INLINE); 172 | ASSERT_EQ(dict.key_len(), sizeof(uint64_t)); 173 | ASSERT_EQ(dict.val_len(), EmbeddingGenerator::VALUE_SIZE); 174 | ASSERT_EQ(dict.item(), PIECE*2); 175 | 176 | EmbeddingGenerator checker(PIECE, PIECE*2); 177 | 178 | std::vector keys(PIECE*2); 179 | 180 | for (unsigned i = 0; i < PIECE; i++) { 181 | auto rec = checker.read(false); 182 | auto val = dict.search(rec.key.ptr); 183 | ASSERT_NE(val.ptr, nullptr); 184 | ASSERT_NE(val.ptr, rec.val.ptr); 185 | ASSERT_EQ(val.len, rec.val.len); 186 | ASSERT_EQ(memcmp(val.ptr, rec.val.ptr, rec.val.len), 0); 187 | auto key = *(const uint64_t*)rec.key.ptr; 188 | keys[i*2] = key; 189 | keys[i*2+1] = ~key; 190 | } 191 | for (unsigned i = 0; i < PIECE; i++) { 192 | auto rec = checker.read(false); 193 | auto val = dict.search(rec.key.ptr); 194 | ASSERT_EQ(val.ptr, nullptr); 195 | ASSERT_EQ(val.len, 0); 196 | } 197 | 198 | std::vector in(keys.size()); 199 | for (unsigned i = 0; i < keys.size(); i++) { 200 | in[i] = (const uint8_t*)&keys[i]; 201 | } 202 | std::vector out(keys.size()); 203 | auto buf_sz = (PIECE*2)*EmbeddingGenerator::VALUE_SIZE; 204 | auto buf = std::make_unique(buf_sz); 205 | memset(buf.get(), 0, buf_sz); 206 | auto dft_val = std::make_unique(EmbeddingGenerator::VALUE_SIZE); 207 | memset(dft_val.get(), 0x33, EmbeddingGenerator::VALUE_SIZE); 208 | 209 | ASSERT_EQ(dict.batch_search(keys.size(), in.data(), out.data()), PIECE); 210 | ASSERT_EQ(dict.batch_fetch(keys.size(), (const uint8_t*)keys.data(), buf.get(), dft_val.get()), PIECE); 211 | 212 | checker.reset(); 213 | auto line = buf.get(); 214 | for (unsigned i = 0; i < PIECE; i++) { 215 | auto val = checker.read(false).val; 216 | ASSERT_NE(out[i*2], nullptr); 217 | ASSERT_EQ(memcmp(out[i*2], val.ptr, val.len), 0); 218 | ASSERT_EQ(out[i*2+1], nullptr); 219 | ASSERT_EQ(memcmp(line, val.ptr, val.len), 0); 220 | ASSERT_EQ(memcmp(line+EmbeddingGenerator::VALUE_SIZE, dft_val.get(), val.len), 0); 221 | line += EmbeddingGenerator::VALUE_SIZE*2; 222 | } 223 | } 224 | 225 | TEST(SHD, VariedDict) { 226 | const std::string filename = "var-dict.shd"; 227 | { 228 | shd::FileWriter output(filename.c_str()); 229 | auto input = CreateReaders(2, 5U); 230 | ASSERT_EQ(shd::BuildDictWithVariedValue(input, output), shd::BUILD_STATUS_OK); 231 | } 232 | shd::PerfectHashtable dict(filename); 233 | ASSERT_FALSE(!dict); 234 | ASSERT_EQ(dict.type(), shd::PerfectHashtable::KV_SEPARATED); 235 | ASSERT_EQ(dict.key_len(), sizeof(uint64_t)); 236 | ASSERT_EQ(dict.item(), PIECE*2); 237 | 238 | VariedValueGenerator checker(0, PIECE*3); 239 | for (unsigned i = 0; i < PIECE*2; i++) { 240 | auto rec = checker.read(false); 241 | auto val = dict.search(rec.key.ptr); 242 | ASSERT_NE(val.ptr, nullptr); 243 | ASSERT_NE(val.ptr, rec.val.ptr); 244 | ASSERT_EQ(val.len, rec.val.len); 245 | ASSERT_EQ(memcmp(val.ptr, rec.val.ptr, rec.val.len), 0); 246 | } 247 | for (unsigned i = PIECE*2; i < PIECE*3; i++) { 248 | auto rec = checker.read(false); 249 | auto val = dict.search(rec.key.ptr); 250 | ASSERT_EQ(val.ptr, nullptr); 251 | ASSERT_EQ(val.len, 0); 252 | } 253 | 254 | auto junk = std::make_unique(256U); 255 | ASSERT_EQ(dict.batch_search(1, (const uint8_t**)junk.get(), (const uint8_t**)junk.get()), 0); 256 | ASSERT_EQ(dict.batch_fetch(1, junk.get(), junk.get()), 0); 257 | } 258 | 259 | TEST(SHD, FetchWithPatch) { 260 | const std::string base_filename = "base.shd"; 261 | const std::string patch_filename = "patch.shd"; 262 | { 263 | shd::FileWriter base_output(base_filename.c_str()); 264 | auto base_input = CreateReaders(2, EmbeddingGenerator::MASK1); 265 | ASSERT_EQ(shd::BuildDict(base_input, base_output), shd::BUILD_STATUS_OK); 266 | shd::FileWriter patch_output(patch_filename.c_str()); 267 | auto patch_input = CreateReaders(1, EmbeddingGenerator::MASK0); 268 | ASSERT_EQ(shd::BuildDict(patch_input, patch_output), shd::BUILD_STATUS_OK); 269 | } 270 | 271 | shd::PerfectHashtable base(base_filename); 272 | ASSERT_FALSE(!base); 273 | shd::PerfectHashtable patch(patch_filename); 274 | ASSERT_FALSE(!patch); 275 | 276 | std::vector keys(PIECE*2); 277 | for (unsigned i = 0; i < PIECE*2; i++) { 278 | keys[i] = i; 279 | } 280 | std::vector in(keys.size()); 281 | for (unsigned i = 0; i < keys.size(); i++) { 282 | in[i] = (const uint8_t*)&keys[i]; 283 | } 284 | std::vector out(keys.size()); 285 | auto buf_sz = (PIECE*2)*EmbeddingGenerator::VALUE_SIZE; 286 | auto buf = std::make_unique(buf_sz); 287 | memset(buf.get(), 0, buf_sz); 288 | 289 | ASSERT_EQ(base.batch_search(keys.size(), in.data(), out.data(), &patch), PIECE*2); 290 | ASSERT_EQ(base.batch_fetch(keys.size(), (const uint8_t*)keys.data(), buf.get(), nullptr, &patch), PIECE*2); 291 | 292 | EmbeddingGenerator checker0(0, PIECE, EmbeddingGenerator::MASK0); 293 | EmbeddingGenerator checker1(PIECE, PIECE*2, EmbeddingGenerator::MASK1); 294 | 295 | auto line0 = buf.get(); 296 | auto line1 = buf.get() + buf_sz/2; 297 | for (unsigned i = 0; i < PIECE; i++) { 298 | ASSERT_NE(out[i], nullptr); 299 | ASSERT_NE(out[PIECE+i], nullptr); 300 | auto val0 = checker0.read(false).val; 301 | auto val1 = checker1.read(false).val; 302 | ASSERT_EQ(memcmp(val0.ptr, line0, EmbeddingGenerator::VALUE_SIZE), 0); 303 | ASSERT_EQ(memcmp(val1.ptr, line1, EmbeddingGenerator::VALUE_SIZE), 0); 304 | ASSERT_NE(memcmp(line0, line1, EmbeddingGenerator::VALUE_SIZE), 0); 305 | line0 += EmbeddingGenerator::VALUE_SIZE; 306 | line1 += EmbeddingGenerator::VALUE_SIZE; 307 | } 308 | } 309 | 310 | TEST(SHD, RebuildInlinedDict) { 311 | std::string filename = "dict-old.shd"; 312 | { 313 | shd::FileWriter output(filename.c_str()); 314 | auto input = CreateReaders(3, EmbeddingGenerator::MASK1); 315 | ASSERT_EQ(shd::BuildDict(input, output), shd::BUILD_STATUS_OK); 316 | } 317 | { 318 | shd::PerfectHashtable dict(filename); 319 | ASSERT_FALSE(!dict); 320 | filename = "dict-new.shd"; 321 | shd::FileWriter output(filename.c_str()); 322 | auto input = CreateReaders(2, EmbeddingGenerator::MASK0); 323 | ASSERT_EQ(dict.derive(input, output), shd::BUILD_STATUS_OK); 324 | } 325 | 326 | shd::PerfectHashtable dict(filename); 327 | ASSERT_FALSE(!dict); 328 | 329 | std::vector keys(PIECE*2); 330 | for (unsigned i = 0; i < PIECE*2; i++) { 331 | keys[i] = i + PIECE; 332 | } 333 | std::vector in(keys.size()); 334 | for (unsigned i = 0; i < keys.size(); i++) { 335 | in[i] = (const uint8_t*)&keys[i]; 336 | } 337 | std::vector out(keys.size()); 338 | auto buf_sz = (PIECE*2)*EmbeddingGenerator::VALUE_SIZE; 339 | auto buf = std::make_unique(buf_sz); 340 | memset(buf.get(), 0, buf_sz); 341 | 342 | ASSERT_EQ(dict.batch_search(keys.size(), in.data(), out.data()), PIECE*2); 343 | ASSERT_EQ(dict.batch_fetch(keys.size(), (const uint8_t*)keys.data(), buf.get()), PIECE*2); 344 | 345 | EmbeddingGenerator checker0(PIECE, PIECE*2, EmbeddingGenerator::MASK0); 346 | EmbeddingGenerator checker1(PIECE*2, PIECE*3, EmbeddingGenerator::MASK1); 347 | 348 | auto line0 = buf.get(); 349 | auto line1 = buf.get() + buf_sz/2; 350 | for (unsigned i = 0; i < PIECE; i++) { 351 | ASSERT_NE(out[i], nullptr); 352 | ASSERT_NE(out[PIECE+i], nullptr); 353 | auto val0 = checker0.read(false).val; 354 | auto val1 = checker1.read(false).val; 355 | ASSERT_EQ(memcmp(val0.ptr, line0, EmbeddingGenerator::VALUE_SIZE), 0); 356 | ASSERT_EQ(memcmp(val1.ptr, line1, EmbeddingGenerator::VALUE_SIZE), 0); 357 | ASSERT_NE(memcmp(line0, line1, EmbeddingGenerator::VALUE_SIZE), 0); 358 | line0 += EmbeddingGenerator::VALUE_SIZE; 359 | line1 += EmbeddingGenerator::VALUE_SIZE; 360 | } 361 | } 362 | 363 | TEST(SHD, RebuildVariedDict) { 364 | std::string filename = "var-dict-old.shd"; 365 | { 366 | shd::FileWriter output(filename.c_str()); 367 | auto input = CreateReaders(2, 2U); 368 | ASSERT_EQ(shd::BuildDictWithVariedValue(input, output), shd::BUILD_STATUS_OK); 369 | } 370 | { 371 | shd::PerfectHashtable dict(filename); 372 | ASSERT_FALSE(!dict); 373 | filename = "var-dict-new.shd"; 374 | shd::FileWriter output(filename.c_str()); 375 | auto input = CreateReaders(1, 32U); 376 | ASSERT_EQ(dict.derive(input, output), shd::BUILD_STATUS_OK); 377 | } 378 | 379 | shd::PerfectHashtable dict(filename); 380 | ASSERT_FALSE(!dict); 381 | 382 | VariedValueGenerator checker0(0, PIECE, 32U); 383 | for (unsigned i = 0; i < PIECE; i++) { 384 | auto rec = checker0.read(false); 385 | auto val = dict.search(rec.key.ptr); 386 | ASSERT_NE(val.ptr, nullptr); 387 | ASSERT_EQ(val.len, rec.val.len); 388 | ASSERT_EQ(memcmp(val.ptr, rec.val.ptr, rec.val.len), 0); 389 | } 390 | 391 | VariedValueGenerator checker1(PIECE, PIECE*2, 2U); 392 | for (unsigned i = 0; i < PIECE; i++) { 393 | auto rec = checker1.read(false); 394 | auto val = dict.search(rec.key.ptr); 395 | ASSERT_NE(val.ptr, nullptr); 396 | ASSERT_EQ(val.len, rec.val.len); 397 | ASSERT_EQ(memcmp(val.ptr, rec.val.ptr, rec.val.len), 0); 398 | } 399 | } -------------------------------------------------------------------------------- /test/test_utils.cc: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // Skew Hash and Displace Algorithm. 3 | // Copyright (C) 2020 Ruan Kunliang 4 | // 5 | // This library is free software; you can redistribute it and/or modify it under 6 | // the terms of the GNU Lesser General Public License as published by the Free 7 | // Software Foundation; either version 2.1 of the License, or (at your option) 8 | // any later version. 9 | // 10 | // This library is distributed in the hope that it will be useful, but WITHOUT 11 | // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 | // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 13 | // details. 14 | // 15 | // You should have received a copy of the GNU Lesser General Public License 16 | // along with the This Library; if not, see . 17 | //============================================================================== 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | 25 | template 26 | void DoTestDivisor(Word n) { 27 | ASSERT_NE(n, 0); 28 | shd::Divisor d(n); 29 | std::mt19937_64 rand; 30 | 31 | auto test = [&d](Word m) { 32 | ASSERT_EQ(m / d, m / d.value()); 33 | ASSERT_EQ(m % d, m % d.value()); 34 | }; 35 | test(0); 36 | test(1); 37 | test(std::numeric_limits::max()); 38 | 39 | for (unsigned i = 0; i < 1000; i++) { 40 | Word m = rand(); 41 | test(m); 42 | } 43 | } 44 | 45 | template 46 | void TestDivisor() { 47 | DoTestDivisor(std::numeric_limits::max()); 48 | DoTestDivisor(std::numeric_limits::max()/2+1); 49 | DoTestDivisor(std::numeric_limits::max()/2); 50 | DoTestDivisor(17); 51 | DoTestDivisor(13); 52 | DoTestDivisor(11); 53 | DoTestDivisor(9); 54 | DoTestDivisor(7); 55 | DoTestDivisor(5); 56 | DoTestDivisor(3); 57 | DoTestDivisor(2); 58 | DoTestDivisor(1); 59 | } 60 | 61 | TEST(Divisor, Uint64) { 62 | TestDivisor(); 63 | } 64 | 65 | TEST(Divisor, Uint32) { 66 | TestDivisor(); 67 | } 68 | 69 | TEST(Divisor, Uint16) { 70 | TestDivisor(); 71 | } 72 | 73 | TEST(Divisor, Uint8) { 74 | TestDivisor(); 75 | } --------------------------------------------------------------------------------