├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── bench └── sig_tree_bench.cpp ├── main.cpp ├── src ├── allocator.h ├── autovector.h ├── coding.h ├── kv_trans_trait.h ├── likely.h ├── page_size.h ├── sig_tree.h ├── sig_tree_impl.h ├── sig_tree_mop_impl.h ├── sig_tree_node_impl.h ├── sig_tree_rebuild_impl.h ├── sig_tree_visit_impl.h └── slice.h └── test └── sig_tree_test.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | /.idea/ 2 | /.vscode/ 3 | /cmake-*/ 4 | .DS_Store -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.8) 2 | project(sig_tree) 3 | 4 | set(CMAKE_CXX_STANDARD 17) 5 | 6 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -Wall -Werror") 7 | 8 | add_executable(sig_tree main.cpp 9 | bench/sig_tree_bench.cpp 10 | src/allocator.h 11 | src/autovector.h 12 | src/coding.h 13 | src/kv_trans_trait.h 14 | src/likely.h 15 | src/page_size.h 16 | src/sig_tree.h 17 | src/sig_tree_impl.h 18 | src/sig_tree_mop_impl.h 19 | src/sig_tree_node_impl.h 20 | src/sig_tree_rebuild_impl.h 21 | src/sig_tree_visit_impl.h 22 | src/slice.h 23 | test/sig_tree_test.cpp) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU AFFERO GENERAL PUBLIC LICENSE 2 | Version 3, 19 November 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU Affero General Public License is a free, copyleft license for 11 | software and other kinds of works, specifically designed to ensure 12 | cooperation with the community in the case of network server software. 13 | 14 | The licenses for most software and other practical works are designed 15 | to take away your freedom to share and change the works. By contrast, 16 | our General Public Licenses are intended to guarantee your freedom to 17 | share and change all versions of a program--to make sure it remains free 18 | software for all its users. 19 | 20 | When we speak of free software, we are referring to freedom, not 21 | price. Our General Public Licenses are designed to make sure that you 22 | have the freedom to distribute copies of free software (and charge for 23 | them if you wish), that you receive source code or can get it if you 24 | want it, that you can change the software or use pieces of it in new 25 | free programs, and that you know you can do these things. 26 | 27 | Developers that use our General Public Licenses protect your rights 28 | with two steps: (1) assert copyright on the software, and (2) offer 29 | you this License which gives you legal permission to copy, distribute 30 | and/or modify the software. 31 | 32 | A secondary benefit of defending all users' freedom is that 33 | improvements made in alternate versions of the program, if they 34 | receive widespread use, become available for other developers to 35 | incorporate. Many developers of free software are heartened and 36 | encouraged by the resulting cooperation. However, in the case of 37 | software used on network servers, this result may fail to come about. 38 | The GNU General Public License permits making a modified version and 39 | letting the public access it on a server without ever releasing its 40 | source code to the public. 41 | 42 | The GNU Affero General Public License is designed specifically to 43 | ensure that, in such cases, the modified source code becomes available 44 | to the community. It requires the operator of a network server to 45 | provide the source code of the modified version running there to the 46 | users of that server. Therefore, public use of a modified version, on 47 | a publicly accessible server, gives the public access to the source 48 | code of the modified version. 49 | 50 | An older license, called the Affero General Public License and 51 | published by Affero, was designed to accomplish similar goals. This is 52 | a different license, not a version of the Affero GPL, but Affero has 53 | released a new version of the Affero GPL which permits relicensing under 54 | this license. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | TERMS AND CONDITIONS 60 | 61 | 0. Definitions. 62 | 63 | "This License" refers to version 3 of the GNU Affero General Public License. 64 | 65 | "Copyright" also means copyright-like laws that apply to other kinds of 66 | works, such as semiconductor masks. 67 | 68 | "The Program" refers to any copyrightable work licensed under this 69 | License. Each licensee is addressed as "you". "Licensees" and 70 | "recipients" may be individuals or organizations. 71 | 72 | To "modify" a work means to copy from or adapt all or part of the work 73 | in a fashion requiring copyright permission, other than the making of an 74 | exact copy. The resulting work is called a "modified version" of the 75 | earlier work or a work "based on" the earlier work. 76 | 77 | A "covered work" means either the unmodified Program or a work based 78 | on the Program. 79 | 80 | To "propagate" a work means to do anything with it that, without 81 | permission, would make you directly or secondarily liable for 82 | infringement under applicable copyright law, except executing it on a 83 | computer or modifying a private copy. Propagation includes copying, 84 | distribution (with or without modification), making available to the 85 | public, and in some countries other activities as well. 86 | 87 | To "convey" a work means any kind of propagation that enables other 88 | parties to make or receive copies. Mere interaction with a user through 89 | a computer network, with no transfer of a copy, is not conveying. 90 | 91 | An interactive user interface displays "Appropriate Legal Notices" 92 | to the extent that it includes a convenient and prominently visible 93 | feature that (1) displays an appropriate copyright notice, and (2) 94 | tells the user that there is no warranty for the work (except to the 95 | extent that warranties are provided), that licensees may convey the 96 | work under this License, and how to view a copy of this License. If 97 | the interface presents a list of user commands or options, such as a 98 | menu, a prominent item in the list meets this criterion. 99 | 100 | 1. Source Code. 101 | 102 | The "source code" for a work means the preferred form of the work 103 | for making modifications to it. "Object code" means any non-source 104 | form of a work. 105 | 106 | A "Standard Interface" means an interface that either is an official 107 | standard defined by a recognized standards body, or, in the case of 108 | interfaces specified for a particular programming language, one that 109 | is widely used among developers working in that language. 110 | 111 | The "System Libraries" of an executable work include anything, other 112 | than the work as a whole, that (a) is included in the normal form of 113 | packaging a Major Component, but which is not part of that Major 114 | Component, and (b) serves only to enable use of the work with that 115 | Major Component, or to implement a Standard Interface for which an 116 | implementation is available to the public in source code form. A 117 | "Major Component", in this context, means a major essential component 118 | (kernel, window system, and so on) of the specific operating system 119 | (if any) on which the executable work runs, or a compiler used to 120 | produce the work, or an object code interpreter used to run it. 121 | 122 | The "Corresponding Source" for a work in object code form means all 123 | the source code needed to generate, install, and (for an executable 124 | work) run the object code and to modify the work, including scripts to 125 | control those activities. However, it does not include the work's 126 | System Libraries, or general-purpose tools or generally available free 127 | programs which are used unmodified in performing those activities but 128 | which are not part of the work. For example, Corresponding Source 129 | includes interface definition files associated with source files for 130 | the work, and the source code for shared libraries and dynamically 131 | linked subprograms that the work is specifically designed to require, 132 | such as by intimate data communication or control flow between those 133 | subprograms and other parts of the work. 134 | 135 | The Corresponding Source need not include anything that users 136 | can regenerate automatically from other parts of the Corresponding 137 | Source. 138 | 139 | The Corresponding Source for a work in source code form is that 140 | same work. 141 | 142 | 2. Basic Permissions. 143 | 144 | All rights granted under this License are granted for the term of 145 | copyright on the Program, and are irrevocable provided the stated 146 | conditions are met. This License explicitly affirms your unlimited 147 | permission to run the unmodified Program. The output from running a 148 | covered work is covered by this License only if the output, given its 149 | content, constitutes a covered work. This License acknowledges your 150 | rights of fair use or other equivalent, as provided by copyright law. 151 | 152 | You may make, run and propagate covered works that you do not 153 | convey, without conditions so long as your license otherwise remains 154 | in force. You may convey covered works to others for the sole purpose 155 | of having them make modifications exclusively for you, or provide you 156 | with facilities for running those works, provided that you comply with 157 | the terms of this License in conveying all material for which you do 158 | not control copyright. Those thus making or running the covered works 159 | for you must do so exclusively on your behalf, under your direction 160 | and control, on terms that prohibit them from making any copies of 161 | your copyrighted material outside their relationship with you. 162 | 163 | Conveying under any other circumstances is permitted solely under 164 | the conditions stated below. Sublicensing is not allowed; section 10 165 | makes it unnecessary. 166 | 167 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 168 | 169 | No covered work shall be deemed part of an effective technological 170 | measure under any applicable law fulfilling obligations under article 171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 172 | similar laws prohibiting or restricting circumvention of such 173 | measures. 174 | 175 | When you convey a covered work, you waive any legal power to forbid 176 | circumvention of technological measures to the extent such circumvention 177 | is effected by exercising rights under this License with respect to 178 | the covered work, and you disclaim any intention to limit operation or 179 | modification of the work as a means of enforcing, against the work's 180 | users, your or third parties' legal rights to forbid circumvention of 181 | technological measures. 182 | 183 | 4. Conveying Verbatim Copies. 184 | 185 | You may convey verbatim copies of the Program's source code as you 186 | receive it, in any medium, provided that you conspicuously and 187 | appropriately publish on each copy an appropriate copyright notice; 188 | keep intact all notices stating that this License and any 189 | non-permissive terms added in accord with section 7 apply to the code; 190 | keep intact all notices of the absence of any warranty; and give all 191 | recipients a copy of this License along with the Program. 192 | 193 | You may charge any price or no price for each copy that you convey, 194 | and you may offer support or warranty protection for a fee. 195 | 196 | 5. Conveying Modified Source Versions. 197 | 198 | You may convey a work based on the Program, or the modifications to 199 | produce it from the Program, in the form of source code under the 200 | terms of section 4, provided that you also meet all of these conditions: 201 | 202 | a) The work must carry prominent notices stating that you modified 203 | it, and giving a relevant date. 204 | 205 | b) The work must carry prominent notices stating that it is 206 | released under this License and any conditions added under section 207 | 7. This requirement modifies the requirement in section 4 to 208 | "keep intact all notices". 209 | 210 | c) You must license the entire work, as a whole, under this 211 | License to anyone who comes into possession of a copy. This 212 | License will therefore apply, along with any applicable section 7 213 | additional terms, to the whole of the work, and all its parts, 214 | regardless of how they are packaged. This License gives no 215 | permission to license the work in any other way, but it does not 216 | invalidate such permission if you have separately received it. 217 | 218 | d) If the work has interactive user interfaces, each must display 219 | Appropriate Legal Notices; however, if the Program has interactive 220 | interfaces that do not display Appropriate Legal Notices, your 221 | work need not make them do so. 222 | 223 | A compilation of a covered work with other separate and independent 224 | works, which are not by their nature extensions of the covered work, 225 | and which are not combined with it such as to form a larger program, 226 | in or on a volume of a storage or distribution medium, is called an 227 | "aggregate" if the compilation and its resulting copyright are not 228 | used to limit the access or legal rights of the compilation's users 229 | beyond what the individual works permit. Inclusion of a covered work 230 | in an aggregate does not cause this License to apply to the other 231 | parts of the aggregate. 232 | 233 | 6. Conveying Non-Source Forms. 234 | 235 | You may convey a covered work in object code form under the terms 236 | of sections 4 and 5, provided that you also convey the 237 | machine-readable Corresponding Source under the terms of this License, 238 | in one of these ways: 239 | 240 | a) Convey the object code in, or embodied in, a physical product 241 | (including a physical distribution medium), accompanied by the 242 | Corresponding Source fixed on a durable physical medium 243 | customarily used for software interchange. 244 | 245 | b) Convey the object code in, or embodied in, a physical product 246 | (including a physical distribution medium), accompanied by a 247 | written offer, valid for at least three years and valid for as 248 | long as you offer spare parts or customer support for that product 249 | model, to give anyone who possesses the object code either (1) a 250 | copy of the Corresponding Source for all the software in the 251 | product that is covered by this License, on a durable physical 252 | medium customarily used for software interchange, for a price no 253 | more than your reasonable cost of physically performing this 254 | conveying of source, or (2) access to copy the 255 | Corresponding Source from a network server at no charge. 256 | 257 | c) Convey individual copies of the object code with a copy of the 258 | written offer to provide the Corresponding Source. This 259 | alternative is allowed only occasionally and noncommercially, and 260 | only if you received the object code with such an offer, in accord 261 | with subsection 6b. 262 | 263 | d) Convey the object code by offering access from a designated 264 | place (gratis or for a charge), and offer equivalent access to the 265 | Corresponding Source in the same way through the same place at no 266 | further charge. You need not require recipients to copy the 267 | Corresponding Source along with the object code. If the place to 268 | copy the object code is a network server, the Corresponding Source 269 | may be on a different server (operated by you or a third party) 270 | that supports equivalent copying facilities, provided you maintain 271 | clear directions next to the object code saying where to find the 272 | Corresponding Source. Regardless of what server hosts the 273 | Corresponding Source, you remain obligated to ensure that it is 274 | available for as long as needed to satisfy these requirements. 275 | 276 | e) Convey the object code using peer-to-peer transmission, provided 277 | you inform other peers where the object code and Corresponding 278 | Source of the work are being offered to the general public at no 279 | charge under subsection 6d. 280 | 281 | A separable portion of the object code, whose source code is excluded 282 | from the Corresponding Source as a System Library, need not be 283 | included in conveying the object code work. 284 | 285 | A "User Product" is either (1) a "consumer product", which means any 286 | tangible personal property which is normally used for personal, family, 287 | or household purposes, or (2) anything designed or sold for incorporation 288 | into a dwelling. In determining whether a product is a consumer product, 289 | doubtful cases shall be resolved in favor of coverage. For a particular 290 | product received by a particular user, "normally used" refers to a 291 | typical or common use of that class of product, regardless of the status 292 | of the particular user or of the way in which the particular user 293 | actually uses, or expects or is expected to use, the product. A product 294 | is a consumer product regardless of whether the product has substantial 295 | commercial, industrial or non-consumer uses, unless such uses represent 296 | the only significant mode of use of the product. 297 | 298 | "Installation Information" for a User Product means any methods, 299 | procedures, authorization keys, or other information required to install 300 | and execute modified versions of a covered work in that User Product from 301 | a modified version of its Corresponding Source. The information must 302 | suffice to ensure that the continued functioning of the modified object 303 | code is in no case prevented or interfered with solely because 304 | modification has been made. 305 | 306 | If you convey an object code work under this section in, or with, or 307 | specifically for use in, a User Product, and the conveying occurs as 308 | part of a transaction in which the right of possession and use of the 309 | User Product is transferred to the recipient in perpetuity or for a 310 | fixed term (regardless of how the transaction is characterized), the 311 | Corresponding Source conveyed under this section must be accompanied 312 | by the Installation Information. But this requirement does not apply 313 | if neither you nor any third party retains the ability to install 314 | modified object code on the User Product (for example, the work has 315 | been installed in ROM). 316 | 317 | The requirement to provide Installation Information does not include a 318 | requirement to continue to provide support service, warranty, or updates 319 | for a work that has been modified or installed by the recipient, or for 320 | the User Product in which it has been modified or installed. Access to a 321 | network may be denied when the modification itself materially and 322 | adversely affects the operation of the network or violates the rules and 323 | protocols for communication across the network. 324 | 325 | Corresponding Source conveyed, and Installation Information provided, 326 | in accord with this section must be in a format that is publicly 327 | documented (and with an implementation available to the public in 328 | source code form), and must require no special password or key for 329 | unpacking, reading or copying. 330 | 331 | 7. Additional Terms. 332 | 333 | "Additional permissions" are terms that supplement the terms of this 334 | License by making exceptions from one or more of its conditions. 335 | Additional permissions that are applicable to the entire Program shall 336 | be treated as though they were included in this License, to the extent 337 | that they are valid under applicable law. If additional permissions 338 | apply only to part of the Program, that part may be used separately 339 | under those permissions, but the entire Program remains governed by 340 | this License without regard to the additional permissions. 341 | 342 | When you convey a copy of a covered work, you may at your option 343 | remove any additional permissions from that copy, or from any part of 344 | it. (Additional permissions may be written to require their own 345 | removal in certain cases when you modify the work.) You may place 346 | additional permissions on material, added by you to a covered work, 347 | for which you have or can give appropriate copyright permission. 348 | 349 | Notwithstanding any other provision of this License, for material you 350 | add to a covered work, you may (if authorized by the copyright holders of 351 | that material) supplement the terms of this License with terms: 352 | 353 | a) Disclaiming warranty or limiting liability differently from the 354 | terms of sections 15 and 16 of this License; or 355 | 356 | b) Requiring preservation of specified reasonable legal notices or 357 | author attributions in that material or in the Appropriate Legal 358 | Notices displayed by works containing it; or 359 | 360 | c) Prohibiting misrepresentation of the origin of that material, or 361 | requiring that modified versions of such material be marked in 362 | reasonable ways as different from the original version; or 363 | 364 | d) Limiting the use for publicity purposes of names of licensors or 365 | authors of the material; or 366 | 367 | e) Declining to grant rights under trademark law for use of some 368 | trade names, trademarks, or service marks; or 369 | 370 | f) Requiring indemnification of licensors and authors of that 371 | material by anyone who conveys the material (or modified versions of 372 | it) with contractual assumptions of liability to the recipient, for 373 | any liability that these contractual assumptions directly impose on 374 | those licensors and authors. 375 | 376 | All other non-permissive additional terms are considered "further 377 | restrictions" within the meaning of section 10. If the Program as you 378 | received it, or any part of it, contains a notice stating that it is 379 | governed by this License along with a term that is a further 380 | restriction, you may remove that term. If a license document contains 381 | a further restriction but permits relicensing or conveying under this 382 | License, you may add to a covered work material governed by the terms 383 | of that license document, provided that the further restriction does 384 | not survive such relicensing or conveying. 385 | 386 | If you add terms to a covered work in accord with this section, you 387 | must place, in the relevant source files, a statement of the 388 | additional terms that apply to those files, or a notice indicating 389 | where to find the applicable terms. 390 | 391 | Additional terms, permissive or non-permissive, may be stated in the 392 | form of a separately written license, or stated as exceptions; 393 | the above requirements apply either way. 394 | 395 | 8. Termination. 396 | 397 | You may not propagate or modify a covered work except as expressly 398 | provided under this License. Any attempt otherwise to propagate or 399 | modify it is void, and will automatically terminate your rights under 400 | this License (including any patent licenses granted under the third 401 | paragraph of section 11). 402 | 403 | However, if you cease all violation of this License, then your 404 | license from a particular copyright holder is reinstated (a) 405 | provisionally, unless and until the copyright holder explicitly and 406 | finally terminates your license, and (b) permanently, if the copyright 407 | holder fails to notify you of the violation by some reasonable means 408 | prior to 60 days after the cessation. 409 | 410 | Moreover, your license from a particular copyright holder is 411 | reinstated permanently if the copyright holder notifies you of the 412 | violation by some reasonable means, this is the first time you have 413 | received notice of violation of this License (for any work) from that 414 | copyright holder, and you cure the violation prior to 30 days after 415 | your receipt of the notice. 416 | 417 | Termination of your rights under this section does not terminate the 418 | licenses of parties who have received copies or rights from you under 419 | this License. If your rights have been terminated and not permanently 420 | reinstated, you do not qualify to receive new licenses for the same 421 | material under section 10. 422 | 423 | 9. Acceptance Not Required for Having Copies. 424 | 425 | You are not required to accept this License in order to receive or 426 | run a copy of the Program. Ancillary propagation of a covered work 427 | occurring solely as a consequence of using peer-to-peer transmission 428 | to receive a copy likewise does not require acceptance. However, 429 | nothing other than this License grants you permission to propagate or 430 | modify any covered work. These actions infringe copyright if you do 431 | not accept this License. Therefore, by modifying or propagating a 432 | covered work, you indicate your acceptance of this License to do so. 433 | 434 | 10. Automatic Licensing of Downstream Recipients. 435 | 436 | Each time you convey a covered work, the recipient automatically 437 | receives a license from the original licensors, to run, modify and 438 | propagate that work, subject to this License. You are not responsible 439 | for enforcing compliance by third parties with this License. 440 | 441 | An "entity transaction" is a transaction transferring control of an 442 | organization, or substantially all assets of one, or subdividing an 443 | organization, or merging organizations. If propagation of a covered 444 | work results from an entity transaction, each party to that 445 | transaction who receives a copy of the work also receives whatever 446 | licenses to the work the party's predecessor in interest had or could 447 | give under the previous paragraph, plus a right to possession of the 448 | Corresponding Source of the work from the predecessor in interest, if 449 | the predecessor has it or can get it with reasonable efforts. 450 | 451 | You may not impose any further restrictions on the exercise of the 452 | rights granted or affirmed under this License. For example, you may 453 | not impose a license fee, royalty, or other charge for exercise of 454 | rights granted under this License, and you may not initiate litigation 455 | (including a cross-claim or counterclaim in a lawsuit) alleging that 456 | any patent claim is infringed by making, using, selling, offering for 457 | sale, or importing the Program or any portion of it. 458 | 459 | 11. Patents. 460 | 461 | A "contributor" is a copyright holder who authorizes use under this 462 | License of the Program or a work on which the Program is based. The 463 | work thus licensed is called the contributor's "contributor version". 464 | 465 | A contributor's "essential patent claims" are all patent claims 466 | owned or controlled by the contributor, whether already acquired or 467 | hereafter acquired, that would be infringed by some manner, permitted 468 | by this License, of making, using, or selling its contributor version, 469 | but do not include claims that would be infringed only as a 470 | consequence of further modification of the contributor version. For 471 | purposes of this definition, "control" includes the right to grant 472 | patent sublicenses in a manner consistent with the requirements of 473 | this License. 474 | 475 | Each contributor grants you a non-exclusive, worldwide, royalty-free 476 | patent license under the contributor's essential patent claims, to 477 | make, use, sell, offer for sale, import and otherwise run, modify and 478 | propagate the contents of its contributor version. 479 | 480 | In the following three paragraphs, a "patent license" is any express 481 | agreement or commitment, however denominated, not to enforce a patent 482 | (such as an express permission to practice a patent or covenant not to 483 | sue for patent infringement). To "grant" such a patent license to a 484 | party means to make such an agreement or commitment not to enforce a 485 | patent against the party. 486 | 487 | If you convey a covered work, knowingly relying on a patent license, 488 | and the Corresponding Source of the work is not available for anyone 489 | to copy, free of charge and under the terms of this License, through a 490 | publicly available network server or other readily accessible means, 491 | then you must either (1) cause the Corresponding Source to be so 492 | available, or (2) arrange to deprive yourself of the benefit of the 493 | patent license for this particular work, or (3) arrange, in a manner 494 | consistent with the requirements of this License, to extend the patent 495 | license to downstream recipients. "Knowingly relying" means you have 496 | actual knowledge that, but for the patent license, your conveying the 497 | covered work in a country, or your recipient's use of the covered work 498 | in a country, would infringe one or more identifiable patents in that 499 | country that you have reason to believe are valid. 500 | 501 | If, pursuant to or in connection with a single transaction or 502 | arrangement, you convey, or propagate by procuring conveyance of, a 503 | covered work, and grant a patent license to some of the parties 504 | receiving the covered work authorizing them to use, propagate, modify 505 | or convey a specific copy of the covered work, then the patent license 506 | you grant is automatically extended to all recipients of the covered 507 | work and works based on it. 508 | 509 | A patent license is "discriminatory" if it does not include within 510 | the scope of its coverage, prohibits the exercise of, or is 511 | conditioned on the non-exercise of one or more of the rights that are 512 | specifically granted under this License. You may not convey a covered 513 | work if you are a party to an arrangement with a third party that is 514 | in the business of distributing software, under which you make payment 515 | to the third party based on the extent of your activity of conveying 516 | the work, and under which the third party grants, to any of the 517 | parties who would receive the covered work from you, a discriminatory 518 | patent license (a) in connection with copies of the covered work 519 | conveyed by you (or copies made from those copies), or (b) primarily 520 | for and in connection with specific products or compilations that 521 | contain the covered work, unless you entered into that arrangement, 522 | or that patent license was granted, prior to 28 March 2007. 523 | 524 | Nothing in this License shall be construed as excluding or limiting 525 | any implied license or other defenses to infringement that may 526 | otherwise be available to you under applicable patent law. 527 | 528 | 12. No Surrender of Others' Freedom. 529 | 530 | If conditions are imposed on you (whether by court order, agreement or 531 | otherwise) that contradict the conditions of this License, they do not 532 | excuse you from the conditions of this License. If you cannot convey a 533 | covered work so as to satisfy simultaneously your obligations under this 534 | License and any other pertinent obligations, then as a consequence you may 535 | not convey it at all. For example, if you agree to terms that obligate you 536 | to collect a royalty for further conveying from those to whom you convey 537 | the Program, the only way you could satisfy both those terms and this 538 | License would be to refrain entirely from conveying the Program. 539 | 540 | 13. Remote Network Interaction; Use with the GNU General Public License. 541 | 542 | Notwithstanding any other provision of this License, if you modify the 543 | Program, your modified version must prominently offer all users 544 | interacting with it remotely through a computer network (if your version 545 | supports such interaction) an opportunity to receive the Corresponding 546 | Source of your version by providing access to the Corresponding Source 547 | from a network server at no charge, through some standard or customary 548 | means of facilitating copying of software. This Corresponding Source 549 | shall include the Corresponding Source for any work covered by version 3 550 | of the GNU General Public License that is incorporated pursuant to the 551 | following paragraph. 552 | 553 | Notwithstanding any other provision of this License, you have 554 | permission to link or combine any covered work with a work licensed 555 | under version 3 of the GNU General Public License into a single 556 | combined work, and to convey the resulting work. The terms of this 557 | License will continue to apply to the part which is the covered work, 558 | but the work with which it is combined will remain governed by version 559 | 3 of the GNU General Public License. 560 | 561 | 14. Revised Versions of this License. 562 | 563 | The Free Software Foundation may publish revised and/or new versions of 564 | the GNU Affero General Public License from time to time. Such new versions 565 | will be similar in spirit to the present version, but may differ in detail to 566 | address new problems or concerns. 567 | 568 | Each version is given a distinguishing version number. If the 569 | Program specifies that a certain numbered version of the GNU Affero General 570 | Public License "or any later version" applies to it, you have the 571 | option of following the terms and conditions either of that numbered 572 | version or of any later version published by the Free Software 573 | Foundation. If the Program does not specify a version number of the 574 | GNU Affero General Public License, you may choose any version ever published 575 | by the Free Software Foundation. 576 | 577 | If the Program specifies that a proxy can decide which future 578 | versions of the GNU Affero General Public License can be used, that proxy's 579 | public statement of acceptance of a version permanently authorizes you 580 | to choose that version for the Program. 581 | 582 | Later license versions may give you additional or different 583 | permissions. However, no additional obligations are imposed on any 584 | author or copyright holder as a result of your choosing to follow a 585 | later version. 586 | 587 | 15. Disclaimer of Warranty. 588 | 589 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 590 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 594 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 595 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 597 | 598 | 16. Limitation of Liability. 599 | 600 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 608 | SUCH DAMAGES. 609 | 610 | 17. Interpretation of Sections 15 and 16. 611 | 612 | If the disclaimer of warranty and limitation of liability provided 613 | above cannot be given local legal effect according to their terms, 614 | reviewing courts shall apply local law that most closely approximates 615 | an absolute waiver of all civil liability in connection with the 616 | Program, unless a warranty or assumption of liability accompanies a 617 | copy of the Program in return for a fee. 618 | 619 | END OF TERMS AND CONDITIONS 620 | 621 | How to Apply These Terms to Your New Programs 622 | 623 | If you develop a new program, and you want it to be of the greatest 624 | possible use to the public, the best way to achieve this is to make it 625 | free software which everyone can redistribute and change under these terms. 626 | 627 | To do so, attach the following notices to the program. It is safest 628 | to attach them to the start of each source file to most effectively 629 | state the exclusion of warranty; and each file should have at least 630 | the "copyright" line and a pointer to where the full notice is found. 631 | 632 | 633 | Copyright (C) 634 | 635 | This program is free software: you can redistribute it and/or modify 636 | it under the terms of the GNU Affero General Public License as published 637 | by the Free Software Foundation, either version 3 of the License, or 638 | (at your option) any later version. 639 | 640 | This program is distributed in the hope that it will be useful, 641 | but WITHOUT ANY WARRANTY; without even the implied warranty of 642 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 643 | GNU Affero General Public License for more details. 644 | 645 | You should have received a copy of the GNU Affero General Public License 646 | along with this program. If not, see . 647 | 648 | Also add information on how to contact you by electronic and paper mail. 649 | 650 | If your software can interact with users remotely through a computer 651 | network, you should also make sure that it provides a way for users to 652 | get its source. For example, if your program is a web application, its 653 | interface could display a "Source" link that leads users to an archive 654 | of the code. There are many ways you could offer source, and different 655 | solutions will be better for different programs; see section 13 for the 656 | specific requirements. 657 | 658 | You should also get your employer (if you work as a programmer) or school, 659 | if any, to sign a "copyright disclaimer" for the program, if necessary. 660 | For more information on this, and how to apply and follow the GNU AGPL, see 661 | . 662 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sig_tree 2 | Read ./bench/sig_tree_bench.cpp 3 | 4 | 1M 16B Keys Only Benchmark 5 | ``` 6 | SGT - Add took 941 milliseconds 7 | std::set - emplace took 1027 milliseconds 8 | std::unordered_set - emplace took 440 milliseconds 9 | SGT - Get took 664 milliseconds 10 | std::set - find took 1381 milliseconds 11 | std::unordered_set - find took 192 milliseconds 12 | ``` -------------------------------------------------------------------------------- /bench/sig_tree_bench.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "../src/sig_tree.h" 8 | #include "../src/sig_tree_impl.h" 9 | #include "../src/sig_tree_mop_impl.h" 10 | #include "../src/sig_tree_node_impl.h" 11 | #include "../src/sig_tree_rebuild_impl.h" 12 | #include "../src/sig_tree_visit_impl.h" 13 | 14 | namespace sgt::sig_tree_bench { 15 | // 字符串比较次数 16 | static unsigned int sig_tree_cmp_times = 0; 17 | static unsigned int std_set_cmp_times = 0; 18 | 19 | /* 20 | * 将 SGT 中表示 KV 的 Token 21 | * 翻译还原为 Key 和 Value 的代理类 22 | * 大体为 Token => KV 的单向映射 23 | * 24 | * 这里的实现没有区分 Key 和 Value 25 | */ 26 | class KVTrans { 27 | private: 28 | // KV 在内存中的表达是 C 式字符串 29 | const char * s_; 30 | 31 | public: 32 | explicit KVTrans(const char * s) : s_(s) {} 33 | 34 | public: 35 | bool operator==(const Slice & k) const { 36 | ++sig_tree_cmp_times; 37 | return strcmp(k.data(), s_) == 0; 38 | } 39 | 40 | Slice Key() const { 41 | return {s_}; 42 | } 43 | 44 | bool Get(const Slice & k, std::string * v) const { 45 | if (*this == k) { 46 | if (v != nullptr) { 47 | v->assign(s_); 48 | } 49 | return true; 50 | } 51 | return false; 52 | } 53 | 54 | public: 55 | static uint64_t Pack(size_t offset) { 56 | return offset + 1; 57 | } 58 | 59 | static size_t Unpack(const uint64_t & rep) { 60 | return rep - 1; 61 | } 62 | 63 | static bool IsPacked(const uint64_t & rep) { 64 | return rep % 2 == 1; 65 | } 66 | 67 | static void * Base() { 68 | return nullptr; 69 | } 70 | }; 71 | 72 | /* 73 | * Helper 接口定义了如何生成和使用 KV Token 74 | */ 75 | class Helper final : public SignatureTreeTpl::Helper { 76 | public: 77 | ~Helper() override = default; 78 | 79 | public: 80 | // 根据要存储的 KV 返回一个 Token 81 | uint64_t Add(const Slice & k, const Slice & v) override { 82 | // 我确信 k.data() 会返回一个外部的 C 式字符串 83 | // 如果需要交接资源所有权, 可以在这里进行移动/复制 84 | return reinterpret_cast(k.data()); 85 | } 86 | 87 | // 释放资源, 由于 KV 的所有权在外部, 这里不需要任何操作 88 | void Del(KVTrans & trans) override {} 89 | 90 | // Allocator.AllocatePage() 后获得的 offset 必须要能够打包进 Token 91 | // 言下之意就是 Token(默认类型 uint64_t) 的空间内必须能自省表达两种数据 92 | // union Token { 93 | // TokenByAdd a; 94 | // TokenByAllocatePage b; 95 | // } 96 | // 97 | // Add 和 AllocatePage 必然返回偶数 98 | // 后者 +1 成为奇数, 通过奇偶性区分二者 99 | uint64_t Pack(size_t offset) const override { 100 | return offset + 1; 101 | } 102 | 103 | size_t Unpack(const uint64_t & rep) const override { 104 | return rep - 1; 105 | } 106 | 107 | bool IsPacked(const uint64_t & rep) const override { 108 | return rep % 2 == 1; 109 | } 110 | 111 | KVTrans Trans(const uint64_t & rep) const override { 112 | // Token(rep) => KVTrans => KV 113 | return KVTrans(reinterpret_cast(static_cast(rep))); 114 | } 115 | }; 116 | 117 | /* 118 | * 内存分配器 119 | * 120 | * 如果分配在 file-backed mmap 上可作为硬盘索引 121 | * 直接 malloc 就是内存索引 122 | */ 123 | class AllocatorImpl final : public Allocator { 124 | public: 125 | std::unordered_set records_; 126 | 127 | public: 128 | // 释放已分配的内存 129 | ~AllocatorImpl() override { 130 | for (uintptr_t record:records_) { 131 | free(reinterpret_cast(record)); 132 | } 133 | } 134 | 135 | public: 136 | // Base() + AllocatePage() 返回的 offset = 真实内存位置 137 | // 合理性在于如果使用 mmap, 有可能需要 re-mmap 138 | // 相同的偏移量永远能得到相同的内容, 尽管 Base() 可能返回不同的值 139 | // 如果是内存索引直接返回 0(nullptr) 即可 140 | void * Base() override { 141 | return nullptr; 142 | } 143 | 144 | // 分配一页内存, 大小为 kPageSize 145 | // 如果是 mmap 且需要扩容才能完成分配, 务必 throw AllocatorFullException 146 | // SGT 会捕获这一异常并调用 Grow(), 再根据 Base() 重新计算内存位置 147 | size_t AllocatePage() override { 148 | auto page = reinterpret_cast(malloc(kPageSize)); 149 | records_.emplace(page); 150 | return page; 151 | } 152 | 153 | // 释放内存页 154 | void FreePage(size_t offset) override { 155 | auto it = records_.find(offset); 156 | free(reinterpret_cast(*it)); 157 | records_.erase(it); 158 | } 159 | 160 | // 扩容 161 | void Grow() override {} 162 | }; 163 | 164 | #define TIME_START auto start = std::chrono::high_resolution_clock::now() 165 | #define TIME_END auto end = std::chrono::high_resolution_clock::now() 166 | #define PRINT_TIME(name) \ 167 | std::cout << name " took " << std::chrono::duration_cast(end - start).count() << " milliseconds" << std::endl 168 | 169 | void Run() { 170 | auto seed = std::random_device()(); 171 | std::cout << "sig_tree_bench_seed: " << seed << std::endl; 172 | 173 | std::default_random_engine engine(seed); 174 | std::uniform_int_distribution dist(1); 175 | 176 | // 随机生成 1M 16B C 式字符串 177 | std::vector src(1000000); 178 | for (auto & s:src) { 179 | s = static_cast(malloc(16)); 180 | for (size_t i = 0; i < 15; ++i) { 181 | s[i] = dist(engine); 182 | } 183 | s[15] = 0; 184 | } 185 | 186 | // 初始化 SGT 187 | Helper helper; 188 | AllocatorImpl allocator; 189 | SignatureTreeTpl tree(&helper, &allocator); 190 | 191 | // 初始化 std::set 192 | struct cmp { 193 | bool operator()(const char * a, const char * b) const { 194 | ++std_set_cmp_times; 195 | return strcmp(a, b) < 0; 196 | } 197 | }; 198 | std::set set; 199 | 200 | // 初始化 std::unordered_set 201 | struct hash { 202 | size_t operator()(const char * a) const { 203 | return SliceHasher()(Slice(a)); 204 | } 205 | }; 206 | struct equal { 207 | bool operator()(const char * a, const char * b) const { 208 | return strcmp(a, b) == 0; 209 | } 210 | }; 211 | std::unordered_set unordered_set; 212 | 213 | // Add - 开始 214 | { 215 | TIME_START; 216 | for (const auto & s:src) { 217 | tree.Add(reinterpret_cast(s), {}); 218 | } 219 | TIME_END; 220 | PRINT_TIME("SGT - Add"); 221 | } 222 | { 223 | TIME_START; 224 | for (const auto & s:src) { 225 | set.emplace(reinterpret_cast(s)); 226 | } 227 | TIME_END; 228 | PRINT_TIME("std::set - emplace"); 229 | } 230 | { 231 | TIME_START; 232 | for (const auto & s:src) { 233 | unordered_set.emplace(reinterpret_cast(s)); 234 | } 235 | TIME_END; 236 | PRINT_TIME("std::unordered_set - emplace"); 237 | } 238 | // Add - 结束 239 | // Get - 开始 240 | { 241 | TIME_START; 242 | for (const auto & s:src) { 243 | tree.Get(reinterpret_cast(s), nullptr); 244 | } 245 | TIME_END; 246 | PRINT_TIME("SGT - Get"); 247 | } 248 | { 249 | TIME_START; 250 | for (const auto & s:src) { 251 | set.find(reinterpret_cast(s)); 252 | } 253 | TIME_END; 254 | PRINT_TIME("std::set - find"); 255 | } 256 | { 257 | TIME_START; 258 | for (const auto & s:src) { 259 | unordered_set.find(reinterpret_cast(s)); 260 | } 261 | TIME_END; 262 | PRINT_TIME("std::unordered_set - find"); 263 | } 264 | // Get - 结束 265 | 266 | // 统计 267 | std::cout << "sig_tree_cmp_times: " << sig_tree_cmp_times << std::endl; 268 | std::cout << "sig_tree_mem_pages: " << allocator.records_.size() << std::endl; 269 | std::cout << "std_set_cmp_times : " << std_set_cmp_times << std::endl; 270 | 271 | { 272 | TIME_START; 273 | for (auto it = src.cbegin(); it != src.cend();) { 274 | std::array ss; 275 | ss[0] = reinterpret_cast(*it++); 276 | ss[1] = it != src.cend() ? reinterpret_cast(*it++) : ss[0]; 277 | ss[2] = it != src.cend() ? reinterpret_cast(*it++) : ss[1]; 278 | ss[3] = it != src.cend() ? reinterpret_cast(*it++) : ss[2]; 279 | ss[4] = it != src.cend() ? reinterpret_cast(*it++) : ss[3]; 280 | ss[5] = it != src.cend() ? reinterpret_cast(*it++) : ss[4]; 281 | ss[6] = it != src.cend() ? reinterpret_cast(*it++) : ss[5]; 282 | ss[7] = it != src.cend() ? reinterpret_cast(*it++) : ss[6]; 283 | ss[8] = it != src.cend() ? reinterpret_cast(*it++) : ss[7]; 284 | ss[9] = it != src.cend() ? reinterpret_cast(*it++) : ss[8]; 285 | tree.MultiGetWithCallback<10>(ss.data()); 286 | } 287 | TIME_END; 288 | PRINT_TIME("SGT - MultiGetWithCallback<10>"); 289 | } 290 | { 291 | TIME_START; 292 | for (const auto & s:src) { 293 | tree.GetWithCallback(reinterpret_cast(s)); 294 | } 295 | TIME_END; 296 | PRINT_TIME("SGT - GetWithCallback"); 297 | } 298 | { 299 | TIME_START; 300 | tree.Compact(); 301 | TIME_END; 302 | PRINT_TIME("SGT - Compact"); 303 | } 304 | { 305 | Helper helper_rebuild; 306 | AllocatorImpl allocator_rebuild; 307 | SignatureTreeTpl tree_rebuild(&helper_rebuild, &allocator_rebuild); 308 | { 309 | TIME_START; 310 | tree.Rebuild(&tree_rebuild); 311 | TIME_END; 312 | PRINT_TIME("SGT - Rebuild"); 313 | } 314 | { 315 | TIME_START; 316 | tree_rebuild.VisitDel({}, [](auto) { 317 | return std::make_pair(true, true); 318 | }); 319 | TIME_END; 320 | PRINT_TIME("SGT - VisitDel"); 321 | assert(tree_rebuild.Size() == 0); 322 | } 323 | } 324 | { 325 | TIME_START; 326 | tree.Visit({}, [](auto) { return true; }); 327 | TIME_END; 328 | PRINT_TIME("SGT - Visit"); 329 | } 330 | { 331 | TIME_START; 332 | for (const auto & s:src) { 333 | tree.Del(reinterpret_cast(s)); 334 | } 335 | TIME_END; 336 | PRINT_TIME("SGT - Del"); 337 | } 338 | 339 | for (auto & s:src) { 340 | free(s); 341 | } 342 | } 343 | } -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace sgt { 4 | namespace sig_tree_test { 5 | void Run(); 6 | } 7 | namespace sig_tree_bench { 8 | void Run(); 9 | } 10 | } 11 | 12 | using namespace sgt; 13 | 14 | int main() { 15 | sig_tree_test::Run(); 16 | sig_tree_bench::Run(); 17 | std::cout << "Done." << std::endl; 18 | return 0; 19 | } -------------------------------------------------------------------------------- /src/allocator.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef SIG_TREE_ALLOCATOR_H 3 | #define SIG_TREE_ALLOCATOR_H 4 | 5 | #include 6 | 7 | namespace sgt { 8 | class AllocatorFullException : public std::exception { 9 | public: 10 | const char * what() const noexcept override { 11 | return "no space for allocation"; 12 | } 13 | }; 14 | 15 | class Allocator { 16 | public: 17 | Allocator() = default; 18 | 19 | virtual ~Allocator() = default; 20 | 21 | public: 22 | virtual void * Base() = 0; 23 | 24 | // 如无法分配, 抛出 AllocatorFullException 25 | virtual size_t AllocatePage() = 0; 26 | 27 | virtual void FreePage(size_t offset) = 0; 28 | 29 | virtual void Grow() = 0; 30 | }; 31 | } 32 | 33 | #endif //SIG_TREE_ALLOCATOR_H 34 | -------------------------------------------------------------------------------- /src/autovector.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. 2 | // This source code is licensed under both the GPLv2 (found in the 3 | // COPYING file in the root directory) and Apache 2.0 License 4 | // (found in the LICENSE.Apache file in the root directory). 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | namespace rocksdb { 15 | 16 | // A vector that leverages pre-allocated stack-based array to achieve better 17 | // performance for array with small amount of items. 18 | // 19 | // The interface resembles that of vector, but with less features since we aim 20 | // to solve the problem that we have in hand, rather than implementing a 21 | // full-fledged generic container. 22 | // 23 | // Currently we don't support: 24 | // * reserve()/shrink_to_fit() 25 | // If used correctly, in most cases, people should not touch the 26 | // underlying vector at all. 27 | // * random insert()/erase(), please only use push_back()/pop_back(). 28 | // * No move/swap operations. Each autovector instance has a 29 | // stack-allocated array and if we want support move/swap operations, we 30 | // need to copy the arrays other than just swapping the pointers. In this 31 | // case we'll just explicitly forbid these operations since they may 32 | // lead users to make false assumption by thinking they are inexpensive 33 | // operations. 34 | // 35 | // Naming style of public methods almost follows that of the STL's. 36 | template 37 | class autovector { 38 | public: 39 | // General STL-style container member types. 40 | typedef T value_type; 41 | typedef typename std::vector::difference_type difference_type; 42 | typedef typename std::vector::size_type size_type; 43 | typedef value_type & reference; 44 | typedef const value_type & const_reference; 45 | typedef value_type * pointer; 46 | typedef const value_type * const_pointer; 47 | 48 | // This class is the base for regular/const iterator 49 | template 50 | class iterator_impl { 51 | public: 52 | // -- iterator traits 53 | typedef iterator_impl self_type; 54 | typedef TValueType value_type; 55 | typedef TValueType & reference; 56 | typedef TValueType * pointer; 57 | typedef typename TAutoVector::difference_type difference_type; 58 | typedef std::random_access_iterator_tag iterator_category; 59 | 60 | iterator_impl(TAutoVector * vect, size_t index) 61 | : vect_(vect), index_(index) {}; 62 | 63 | iterator_impl(const iterator_impl &) = default; 64 | 65 | ~iterator_impl() {} 66 | 67 | iterator_impl & operator=(const iterator_impl &) = default; 68 | 69 | // -- Advancement 70 | // ++iterator 71 | self_type & operator++() { 72 | ++index_; 73 | return *this; 74 | } 75 | 76 | // iterator++ 77 | self_type operator++(int) { 78 | auto old = *this; 79 | ++index_; 80 | return old; 81 | } 82 | 83 | // --iterator 84 | self_type & operator--() { 85 | --index_; 86 | return *this; 87 | } 88 | 89 | // iterator-- 90 | self_type operator--(int) { 91 | auto old = *this; 92 | --index_; 93 | return old; 94 | } 95 | 96 | self_type operator-(difference_type len) const { 97 | return self_type(vect_, index_ - len); 98 | } 99 | 100 | difference_type operator-(const self_type & other) const { 101 | assert(vect_ == other.vect_); 102 | return index_ - other.index_; 103 | } 104 | 105 | self_type operator+(difference_type len) const { 106 | return self_type(vect_, index_ + len); 107 | } 108 | 109 | self_type & operator+=(difference_type len) { 110 | index_ += len; 111 | return *this; 112 | } 113 | 114 | self_type & operator-=(difference_type len) { 115 | index_ -= len; 116 | return *this; 117 | } 118 | 119 | // -- Reference 120 | reference operator*() const { 121 | assert(vect_->size() >= index_); 122 | return (*vect_)[index_]; 123 | } 124 | 125 | pointer operator->() const { 126 | assert(vect_->size() >= index_); 127 | return &(*vect_)[index_]; 128 | } 129 | 130 | reference operator[](difference_type len) const { 131 | return *(*this + len); 132 | } 133 | 134 | // -- Logical Operators 135 | bool operator==(const self_type & other) const { 136 | assert(vect_ == other.vect_); 137 | return index_ == other.index_; 138 | } 139 | 140 | bool operator!=(const self_type & other) const { return !(*this == other); } 141 | 142 | bool operator>(const self_type & other) const { 143 | assert(vect_ == other.vect_); 144 | return index_ > other.index_; 145 | } 146 | 147 | bool operator<(const self_type & other) const { 148 | assert(vect_ == other.vect_); 149 | return index_ < other.index_; 150 | } 151 | 152 | bool operator>=(const self_type & other) const { 153 | assert(vect_ == other.vect_); 154 | return index_ >= other.index_; 155 | } 156 | 157 | bool operator<=(const self_type & other) const { 158 | assert(vect_ == other.vect_); 159 | return index_ <= other.index_; 160 | } 161 | 162 | private: 163 | TAutoVector * vect_ = nullptr; 164 | size_t index_ = 0; 165 | }; 166 | 167 | typedef iterator_impl iterator; 168 | typedef iterator_impl const_iterator; 169 | typedef std::reverse_iterator reverse_iterator; 170 | typedef std::reverse_iterator const_reverse_iterator; 171 | 172 | autovector() : values_(reinterpret_cast(buf_)) {} 173 | 174 | autovector(std::initializer_list init_list) 175 | : values_(reinterpret_cast(buf_)) { 176 | for (const T & item : init_list) { 177 | push_back(item); 178 | } 179 | } 180 | 181 | ~autovector() { clear(); } 182 | 183 | // -- Immutable operations 184 | // Indicate if all data resides in in-stack data structure. 185 | bool only_in_stack() const { 186 | // If no element was inserted at all, the vector's capacity will be `0`. 187 | return vect_.capacity() == 0; 188 | } 189 | 190 | size_type size() const { return num_stack_items_ + vect_.size(); } 191 | 192 | // resize does not guarantee anything about the contents of the newly 193 | // available elements 194 | void resize(size_type n) { 195 | if (n > kSize) { 196 | vect_.resize(n - kSize); 197 | while (num_stack_items_ < kSize) { 198 | new((void *) (&values_[num_stack_items_++])) value_type(); 199 | } 200 | num_stack_items_ = kSize; 201 | } else { 202 | vect_.clear(); 203 | while (num_stack_items_ < n) { 204 | new((void *) (&values_[num_stack_items_++])) value_type(); 205 | } 206 | while (num_stack_items_ > n) { 207 | values_[--num_stack_items_].~value_type(); 208 | } 209 | } 210 | } 211 | 212 | bool empty() const { return size() == 0; } 213 | 214 | const_reference operator[](size_type n) const { 215 | assert(n < size()); 216 | if (n < kSize) { 217 | return values_[n]; 218 | } 219 | return vect_[n - kSize]; 220 | } 221 | 222 | reference operator[](size_type n) { 223 | assert(n < size()); 224 | if (n < kSize) { 225 | return values_[n]; 226 | } 227 | return vect_[n - kSize]; 228 | } 229 | 230 | const_reference at(size_type n) const { 231 | assert(n < size()); 232 | return (*this)[n]; 233 | } 234 | 235 | reference at(size_type n) { 236 | assert(n < size()); 237 | return (*this)[n]; 238 | } 239 | 240 | reference front() { 241 | assert(!empty()); 242 | return *begin(); 243 | } 244 | 245 | const_reference front() const { 246 | assert(!empty()); 247 | return *begin(); 248 | } 249 | 250 | reference back() { 251 | assert(!empty()); 252 | return *(end() - 1); 253 | } 254 | 255 | const_reference back() const { 256 | assert(!empty()); 257 | return *(end() - 1); 258 | } 259 | 260 | // -- Mutable Operations 261 | void push_back(T && item) { 262 | if (num_stack_items_ < kSize) { 263 | new((void *) (&values_[num_stack_items_])) value_type(); 264 | values_[num_stack_items_++] = std::move(item); 265 | } else { 266 | vect_.push_back(item); 267 | } 268 | } 269 | 270 | void push_back(const T & item) { 271 | if (num_stack_items_ < kSize) { 272 | new((void *) (&values_[num_stack_items_])) value_type(); 273 | values_[num_stack_items_++] = item; 274 | } else { 275 | vect_.push_back(item); 276 | } 277 | } 278 | 279 | template 280 | void emplace_back(Args && ... args) { 281 | if (num_stack_items_ < kSize) { 282 | new((void *) (&values_[num_stack_items_++])) 283 | value_type(std::forward(args)...); 284 | } else { 285 | vect_.emplace_back(std::forward(args)...); 286 | } 287 | } 288 | 289 | void pop_back() { 290 | assert(!empty()); 291 | if (!vect_.empty()) { 292 | vect_.pop_back(); 293 | } else { 294 | values_[--num_stack_items_].~value_type(); 295 | } 296 | } 297 | 298 | void clear() { 299 | while (num_stack_items_ > 0) { 300 | values_[--num_stack_items_].~value_type(); 301 | } 302 | vect_.clear(); 303 | } 304 | 305 | // -- Copy and Assignment 306 | autovector & assign(const autovector & other); 307 | 308 | autovector(const autovector & other) { assign(other); } 309 | 310 | autovector & operator=(const autovector & other) { return assign(other); } 311 | 312 | // -- Iterator Operations 313 | iterator begin() { return iterator(this, 0); } 314 | 315 | const_iterator begin() const { return const_iterator(this, 0); } 316 | 317 | iterator end() { return iterator(this, this->size()); } 318 | 319 | const_iterator end() const { return const_iterator(this, this->size()); } 320 | 321 | reverse_iterator rbegin() { return reverse_iterator(end()); } 322 | 323 | const_reverse_iterator rbegin() const { 324 | return const_reverse_iterator(end()); 325 | } 326 | 327 | reverse_iterator rend() { return reverse_iterator(begin()); } 328 | 329 | const_reverse_iterator rend() const { 330 | return const_reverse_iterator(begin()); 331 | } 332 | 333 | private: 334 | size_type num_stack_items_ = 0; // current number of items 335 | 336 | alignas(alignof(value_type)) 337 | char buf_[kSize * sizeof(value_type)]; // the first `kSize` items 338 | 339 | pointer values_; 340 | // used only if there are more than `kSize` items. 341 | std::vector vect_; 342 | }; 343 | 344 | template 345 | autovector & autovector::assign(const autovector & other) { 346 | values_ = reinterpret_cast(buf_); 347 | // copy the internal vector 348 | vect_.assign(other.vect_.begin(), other.vect_.end()); 349 | 350 | // copy array 351 | num_stack_items_ = other.num_stack_items_; 352 | std::copy(other.values_, other.values_ + num_stack_items_, values_); 353 | 354 | return *this; 355 | } 356 | 357 | } // namespace rocksdb -------------------------------------------------------------------------------- /src/coding.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef SIG_TREE_CODING_H 3 | #define SIG_TREE_CODING_H 4 | 5 | #include 6 | 7 | namespace sgt { 8 | static_assert(sizeof(uint8_t) == sizeof(char)); 9 | 10 | inline char Uint8ToChar(uint8_t i) { 11 | return (char) i; 12 | } 13 | 14 | inline uint8_t CharToUint8(char c) { 15 | return (uint8_t) c; 16 | } 17 | } 18 | 19 | #endif //SIG_TREE_CODING_H 20 | -------------------------------------------------------------------------------- /src/kv_trans_trait.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef SIG_TREE_KV_TRANS_TRAIT_H 3 | #define SIG_TREE_KV_TRANS_TRAIT_H 4 | 5 | #include 6 | 7 | #include "slice.h" 8 | 9 | #define SGT_CHECK_EXPR(Name, Expr) \ 10 | template \ 11 | struct Name { \ 12 | private: \ 13 | template \ 14 | inline static constexpr std::false_type Check(...); \ 15 | \ 16 | template \ 17 | inline static constexpr decltype((Expr), std::true_type()) Check(int); \ 18 | \ 19 | public: \ 20 | enum { \ 21 | value = decltype(Name::Check(int()))::value \ 22 | }; \ 23 | } 24 | 25 | namespace sgt { 26 | template 27 | struct is_kv_trans { 28 | enum { 29 | value = 30 | std::is_same() == Slice()), bool>::value && 31 | std::is_same().Key()), Slice>::value && 32 | std::is_same().Get(Slice(), (std::string *) {})), bool>::value 33 | }; 34 | }; 35 | 36 | SGT_CHECK_EXPR(has_pack, T::Pack); 37 | 38 | SGT_CHECK_EXPR(has_unpack, T::Unpack); 39 | 40 | SGT_CHECK_EXPR(has_is_packed, T::IsPacked); 41 | 42 | SGT_CHECK_EXPR(has_base, T::Base); 43 | } 44 | 45 | #endif //SIG_TREE_KV_TRANS_TRAIT_H 46 | -------------------------------------------------------------------------------- /src/likely.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef SIG_TREE_LIKELY_H 3 | #define SIG_TREE_LIKELY_H 4 | 5 | #define SGT_LIKELY(x) (__builtin_expect((x), 1)) 6 | #define SGT_UNLIKELY(x) (__builtin_expect((x), 0)) 7 | 8 | #endif //SIG_TREE_LIKELY_H 9 | -------------------------------------------------------------------------------- /src/page_size.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef SIG_TREE_PAGE_SIZE_H 3 | #define SIG_TREE_PAGE_SIZE_H 4 | 5 | namespace sgt { 6 | constexpr unsigned int kPageSize = 4096; // 4KB 7 | } 8 | 9 | #endif //SIG_TREE_PAGE_SIZE_H 10 | -------------------------------------------------------------------------------- /src/sig_tree.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef SIG_TREE_SIG_TREE_H 3 | #define SIG_TREE_SIG_TREE_H 4 | 5 | /* 6 | * 签名树 7 | * 作者: 林诚 8 | * 发布协议: AGPL 9 | * 完成时间: 2018春 10 | * 11 | * SGT 无法区分 "abc\0\0" 与 "abc\0" 12 | * 解决方案: C 式字符串等 13 | */ 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include "allocator.h" 21 | #include "kv_trans_trait.h" 22 | #include "likely.h" 23 | #include "page_size.h" 24 | 25 | namespace sgt { 26 | template< 27 | typename KV_TRANS, // KV_REP => K, V 28 | typename K_DIFF = uint16_t, 29 | typename KV_REP = uint64_t> 30 | class SignatureTreeTpl { 31 | static_assert(is_kv_trans::value); 32 | static_assert(!std::numeric_limits::is_signed); 33 | 34 | public: 35 | class Helper { 36 | public: 37 | Helper() = default; 38 | 39 | virtual ~Helper() = default; 40 | 41 | public: 42 | virtual KV_REP Add(const Slice & k, const Slice & v) = 0; 43 | 44 | virtual void Del(KV_TRANS & trans) = 0; 45 | 46 | virtual KV_REP Pack(size_t offset) const = 0; 47 | 48 | virtual size_t Unpack(const KV_REP & rep) const = 0; 49 | 50 | virtual bool IsPacked(const KV_REP & rep) const = 0; 51 | 52 | virtual KV_TRANS Trans(const KV_REP & rep) const = 0; 53 | }; 54 | 55 | protected: 56 | Helper * const helper_; 57 | Allocator * const allocator_; 58 | void * base_; 59 | const size_t kRootOffset; 60 | 61 | public: 62 | SignatureTreeTpl(Helper * helper, Allocator * allocator); 63 | 64 | SignatureTreeTpl(Helper * helper, Allocator * allocator, size_t root_offset) 65 | : helper_(helper), 66 | allocator_(allocator), 67 | base_(allocator->Base()), 68 | kRootOffset(root_offset) {} 69 | 70 | SignatureTreeTpl(const SignatureTreeTpl &) = delete; 71 | 72 | SignatureTreeTpl & operator=(const SignatureTreeTpl &) = delete; 73 | 74 | public: 75 | bool Get(const Slice & k, std::string * v) const; 76 | 77 | // auto(* callback)(KV_REP * rep) 78 | template 79 | auto GetWithCallback(const Slice & k, 80 | CALLBACK && callback = {} /* [](KV_REP * rep) { return rep; } */); 81 | 82 | // auto(* callback)(std::array & reps) 83 | template 84 | auto MultiGetWithCallback(const Slice * ks, 85 | CALLBACK && callback = {} /* [](std::array & reps) { return reps; } */); 86 | 87 | size_t Size() const; 88 | 89 | size_t RootOffset() const { return kRootOffset; } 90 | 91 | // bool(* visitor)(const KV_REP & rep) 92 | template 93 | void Visit(const Slice & target, VISITOR && visitor, E && expected = {}) const { 94 | VisitGenericImpl(this, target, 95 | std::forward(visitor), 96 | std::forward(expected)); 97 | } 98 | 99 | // std::pair(* visitor)(KV_REP & rep) 100 | template 101 | void VisitDel(const Slice & target, VISITOR && visitor, E && expected = {}) { 102 | VisitGenericImpl(this, target, 103 | std::forward(visitor), 104 | std::forward(expected)); 105 | } 106 | 107 | // bool(* if_dup_callback)(KV_TRANS & trans, KV_REP & rep) 108 | template 109 | bool Add(const Slice & k, V && v, 110 | IF_DUP_CALLBACK && if_dup_callback = {}); 111 | 112 | bool Del(const Slice & k); 113 | 114 | void Compact(); 115 | 116 | void Rebuild(SignatureTreeTpl * dst) const; 117 | 118 | protected: 119 | enum { 120 | kPyramidBrickLength = 8 121 | }; 122 | 123 | inline static constexpr size_t PyramidBrickNum(size_t rank) { 124 | size_t num = 0; 125 | do { 126 | rank = (rank + kPyramidBrickLength - 1) / kPyramidBrickLength; 127 | num += rank; 128 | } while (rank > 1); 129 | return num; 130 | } 131 | 132 | inline static constexpr size_t CalcPyramidHeight(size_t rank) { 133 | size_t height = 0; 134 | do { 135 | rank = (rank + kPyramidBrickLength - 1) / kPyramidBrickLength; 136 | ++height; 137 | } while (rank > 1); 138 | return height; 139 | } 140 | 141 | inline static constexpr size_t PyramidHeight(size_t rank) { 142 | if constexpr (kPyramidBrickLength != 8) { 143 | return CalcPyramidHeight(rank); 144 | } else { 145 | if (SGT_UNLIKELY(rank <= 8)) { 146 | return 1; 147 | } else if (SGT_UNLIKELY(rank <= 64)) { 148 | return 2; 149 | } else { 150 | return 3; 151 | } 152 | } 153 | } 154 | 155 | template 156 | struct NodeTpl { 157 | struct Pyramid { 158 | enum { 159 | kBrickNum = PyramidBrickNum(RANK), 160 | kHeight = PyramidHeight(RANK) 161 | }; 162 | 163 | std::array vals_; 164 | std::array idxes_; 165 | 166 | static constexpr auto kAbsOffsets = []() { 167 | std::array arr{}; 168 | size_t i = 0; 169 | size_t offset = 0; 170 | size_t rank = RANK; 171 | do { 172 | arr[i++] = offset; 173 | rank = (rank + kPyramidBrickLength - 1) / kPyramidBrickLength; 174 | offset += rank; 175 | } while (rank > 1); 176 | return arr; 177 | }(); 178 | 179 | void Build(const K_DIFF * from, const K_DIFF * to, size_t rebuild_idx); 180 | 181 | size_t MinAt(const K_DIFF * from, const K_DIFF * to, 182 | K_DIFF * min_val = nullptr) const; 183 | 184 | size_t TrimLeft(const K_DIFF * cbegin, const K_DIFF * from, const K_DIFF * to, 185 | K_DIFF * min_val = nullptr); 186 | 187 | size_t TrimRight(const K_DIFF * cbegin, const K_DIFF * from, const K_DIFF * to, 188 | K_DIFF * min_val = nullptr); 189 | 190 | size_t CalcOffset(size_t level, size_t index, K_DIFF * min_val) const; 191 | }; 192 | 193 | union CacheEntry { 194 | std::array as_uint8_array; 195 | uint16_t as_uint16; 196 | }; 197 | typedef std::array Cache; 198 | 199 | std::array reps_; 200 | std::array diffs_; 201 | uint32_t size_ = 0; 202 | #ifndef SGT_NO_DENSE_INPUT_CACHE 203 | Cache cache_; 204 | #endif 205 | Pyramid pyramid_; 206 | }; 207 | 208 | template 209 | struct NodeRank { 210 | enum { 211 | kSize = sizeof(NodeTpl), 212 | value = kSize > kPageSize 213 | ? static_cast(NodeRank::value) 214 | : static_cast(NodeRank kPageSize)>::value) 215 | }; 216 | }; 217 | 218 | template 219 | struct NodeRank { 220 | enum { 221 | kSize = sizeof(NodeTpl), 222 | value = RANK 223 | }; 224 | }; 225 | 226 | typedef NodeTpl::value> Node; 227 | static_assert(std::is_standard_layout::value && 228 | std::is_trivially_copyable::value); 229 | 230 | struct Page { 231 | std::vector diffs; 232 | std::vector reps; 233 | }; 234 | 235 | protected: 236 | Node * OffsetToMemNode(size_t offset) const { 237 | return reinterpret_cast(reinterpret_cast(Base()) + offset); 238 | } 239 | 240 | static std::tuple 241 | FindBestMatch(const Node * node, const Slice & k); 242 | 243 | static std::tuple 244 | FindBestMatchImpl(const Node * node, const Slice & k); 245 | 246 | bool CombatInsert(const Slice & opponent, const Slice & k, KV_REP v, 247 | Node * hint, size_t hint_idx, bool hint_direct); 248 | 249 | void NodeSplit(Node * parent); 250 | 251 | void NodeMerge(Node * parent, size_t idx, bool direct, size_t parent_size, 252 | Node * child, size_t child_size); 253 | 254 | void NodeCompact(Node * node); 255 | 256 | Page RebuildHeadNode(const Node * node, SignatureTreeTpl * dst, 257 | std::vector * pool) const; 258 | 259 | Page RebuildInternalNode(const Node * node, 260 | const K_DIFF * cbegin, const K_DIFF * cend, const K_DIFF * min_it, 261 | typename Node::Pyramid & pyramid, bool direct, SignatureTreeTpl * dst, 262 | std::vector * pool) const; 263 | 264 | static Page RebuildLRPagesToTree(Page && l, Page && r, K_DIFF diff, SignatureTreeTpl * dst, 265 | std::vector * pool); 266 | 267 | static size_t RebuildPageToTree(const Page & page, SignatureTreeTpl * dst); 268 | 269 | static void RebuildPageToNode(const Page & page, Node * node); 270 | 271 | static void NodeInsert(Node * node, size_t insert_idx, bool insert_direct, 272 | bool direct, K_DIFF diff, const KV_REP & rep, size_t size); 273 | 274 | static void NodeRemove(Node * node, size_t idx, bool direct, size_t size); 275 | 276 | static void NodeBuild(Node * node, size_t rebuild_idx = 0); 277 | 278 | static size_t NodeSize(const Node * node); 279 | 280 | static bool IsNodeFull(const Node * node); 281 | 282 | static K_DIFF PackDiffAtAndShift(K_DIFF diff_at, uint8_t shift) { 283 | return (diff_at << 3) | (7 - shift); 284 | } 285 | 286 | static std::pair 287 | UnpackDiffAtAndShift(K_DIFF packed_diff) { 288 | return {packed_diff >> 3, (~packed_diff) & 0b111 /* 7 - (packed_diff & 0b111) */}; 289 | } 290 | 291 | template 292 | static void VisitGenericImpl(T self, const Slice & target, VISITOR && visitor, E && expected); 293 | 294 | protected: 295 | inline KV_REP Pack(size_t offset) const { 296 | if constexpr (has_pack::value) { 297 | return KV_TRANS::Pack(offset); 298 | } else { 299 | return helper_->Pack(offset); 300 | } 301 | } 302 | 303 | inline size_t Unpack(const KV_REP & rep) const { 304 | if constexpr (has_unpack::value) { 305 | return KV_TRANS::Unpack(rep); 306 | } else { 307 | return helper_->Unpack(rep); 308 | } 309 | } 310 | 311 | inline bool IsPacked(const KV_REP & rep) const { 312 | if constexpr (has_is_packed::value) { 313 | return KV_TRANS::IsPacked(rep); 314 | } else { 315 | return helper_->IsPacked(rep); 316 | } 317 | } 318 | 319 | inline void * Base() const { 320 | if constexpr (has_base::value) { 321 | return KV_TRANS::Base(); 322 | } else { 323 | return base_; 324 | } 325 | } 326 | 327 | public: 328 | enum { 329 | kNodeRank = NodeRank<>::value, 330 | kNodeRepRank = kNodeRank + 1, 331 | kForward = false, 332 | kBackward = true, 333 | kMajorVersion = 1, 334 | kMinorVersion = 20, 335 | kMaxKeyLength = std::numeric_limits::max() >> 3 336 | }; 337 | 338 | static_assert(PyramidHeight(kNodeRank) == CalcPyramidHeight(kNodeRank)); 339 | }; 340 | } 341 | 342 | #endif //SIG_TREE_SIG_TREE_H 343 | -------------------------------------------------------------------------------- /src/sig_tree_impl.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef SIG_TREE_SIG_TREE_IMPL_H 3 | #define SIG_TREE_SIG_TREE_IMPL_H 4 | 5 | #ifndef SGT_NO_MM_PREFETCH 6 | #include 7 | #endif 8 | 9 | #include 10 | 11 | #include "coding.h" 12 | #include "likely.h" 13 | #include "sig_tree.h" 14 | 15 | namespace sgt { 16 | template 17 | inline const T * SmartMinElem8(const T * from, const T * to, T * min_val); 18 | 19 | template 20 | SignatureTreeTpl:: 21 | SignatureTreeTpl(Helper * helper, Allocator * allocator) 22 | : SignatureTreeTpl(helper, allocator, allocator->AllocatePage()) { 23 | new(OffsetToMemNode(kRootOffset)) Node(); 24 | } 25 | 26 | template 27 | bool SignatureTreeTpl:: 28 | Get(const Slice & k, std::string * v) const { 29 | const Node * cursor = OffsetToMemNode(kRootOffset); 30 | if (SGT_UNLIKELY(NodeSize(cursor) == 0)) { 31 | return false; 32 | } 33 | 34 | while (true) { 35 | auto[idx, direct, _] = FindBestMatch(cursor, k); 36 | const auto & rep = cursor->reps_[idx + direct]; 37 | if (IsPacked(rep)) { 38 | cursor = OffsetToMemNode(Unpack(rep)); 39 | } else { 40 | const auto & trans = helper_->Trans(rep); 41 | return trans.Get(k, v); 42 | } 43 | } 44 | } 45 | 46 | template 47 | template 48 | auto SignatureTreeTpl:: 49 | GetWithCallback(const Slice & k, 50 | CALLBACK && callback) { 51 | Node * cursor = OffsetToMemNode(kRootOffset); 52 | if (SGT_UNLIKELY(NodeSize(cursor) == 0)) { 53 | if constexpr (std::is_same::value) { 54 | return static_cast(nullptr); 55 | } else { 56 | return callback(static_cast(nullptr)); 57 | } 58 | } 59 | 60 | while (true) { 61 | auto[idx, direct, _] = FindBestMatch(cursor, k); 62 | auto & r = cursor->reps_[idx + direct]; 63 | if (IsPacked(r)) { 64 | cursor = OffsetToMemNode(Unpack(r)); 65 | } else { 66 | if constexpr (std::is_same::value) { 67 | return &r; 68 | } else { 69 | return callback(&r); 70 | } 71 | } 72 | } 73 | } 74 | 75 | template 76 | size_t SignatureTreeTpl:: 77 | Size() const { 78 | auto SizeSub = [this](size_t offset, auto && SizeSub) -> size_t { 79 | size_t cnt = 0; 80 | const Node * cursor = OffsetToMemNode(offset); 81 | for (size_t i = 0; i < NodeSize(cursor); ++i) { 82 | const auto & rep = cursor->reps_[i]; 83 | if (IsPacked(rep)) { 84 | cnt += SizeSub(Unpack(rep), SizeSub); 85 | } else { 86 | ++cnt; 87 | } 88 | } 89 | return cnt; 90 | }; 91 | return SizeSub(kRootOffset, SizeSub); 92 | } 93 | 94 | template 95 | template 96 | bool SignatureTreeTpl:: 97 | Add(const Slice & k, V && v, 98 | IF_DUP_CALLBACK && if_dup_callback) { 99 | assert(k.size() < kMaxKeyLength); 100 | Node * cursor = OffsetToMemNode(kRootOffset); 101 | if (SGT_UNLIKELY(NodeSize(cursor) == 0)) { 102 | if constexpr (std::is_convertible::value) { 103 | cursor->reps_[0] = v; 104 | } else { 105 | cursor->reps_[0] = helper_->Add(k, std::forward(v)); 106 | } 107 | cursor->size_ = 1; 108 | return true; 109 | } 110 | 111 | while (true) { 112 | auto[idx, direct, _] = FindBestMatch(cursor, k); 113 | auto & rep = cursor->reps_[idx + direct]; 114 | if (IsPacked(rep)) { 115 | cursor = OffsetToMemNode(Unpack(rep)); 116 | } else { 117 | auto && trans = helper_->Trans(rep); 118 | if (trans == k) { 119 | if constexpr (!std::is_same::value) { 120 | return if_dup_callback(trans, rep); 121 | } else { // cannot overwrite by default 122 | return false; 123 | } 124 | } else { // insert 125 | if constexpr (std::is_convertible::value) { 126 | return CombatInsert(trans.Key(), k, v, 127 | cursor, idx, direct); 128 | } else { 129 | return CombatInsert(trans.Key(), k, helper_->Add(k, std::forward(v)), 130 | cursor, idx, direct); 131 | } 132 | } 133 | } 134 | } 135 | } 136 | 137 | template 138 | bool SignatureTreeTpl:: 139 | Del(const Slice & k) { 140 | Node * cursor = OffsetToMemNode(kRootOffset); 141 | if (SGT_UNLIKELY(NodeSize(cursor) == 0)) { 142 | return false; 143 | } 144 | 145 | Node * parent = nullptr; 146 | size_t parent_idx{}; 147 | bool parent_direct{}; 148 | size_t parent_size{}; 149 | 150 | while (true) { 151 | auto[idx, direct, size] = FindBestMatch(cursor, k); 152 | const auto & rep = cursor->reps_[idx + direct]; 153 | if (IsPacked(rep)) { 154 | parent = cursor; 155 | parent_idx = idx; 156 | parent_direct = direct; 157 | parent_size = size; 158 | cursor = OffsetToMemNode(Unpack(rep)); 159 | } else { 160 | auto && trans = helper_->Trans(rep); 161 | if (trans == k) { 162 | helper_->Del(trans); 163 | NodeRemove(cursor, idx, direct, size--); 164 | if (parent != nullptr && parent->reps_.size() - parent_size + 1 >= size) { 165 | NodeMerge(parent, parent_idx, parent_direct, parent_size, 166 | cursor, size); 167 | } else if (KV_REP r; 168 | size == 1 && (r = cursor->reps_[0], IsPacked(r))) { 169 | assert(parent == nullptr); 170 | Node * child = OffsetToMemNode(Unpack(r)); 171 | NodeMerge(cursor, 0, false, 1, 172 | child, NodeSize(child)); 173 | } 174 | return true; 175 | } else { 176 | return false; 177 | } 178 | } 179 | } 180 | } 181 | 182 | template 183 | void SignatureTreeTpl:: 184 | Compact() { 185 | NodeCompact(OffsetToMemNode(kRootOffset)); 186 | } 187 | 188 | template 189 | std::tuple 190 | SignatureTreeTpl:: 191 | FindBestMatch(const Node * node, const Slice & k) { 192 | #ifndef SGT_NO_MM_PREFETCH 193 | _mm_prefetch(&node->size_, _MM_HINT_T0); 194 | auto p = reinterpret_cast(&node->diffs_); 195 | p -= reinterpret_cast(p) % 64; 196 | _mm_prefetch(p + 64 * 0, _MM_HINT_T2); 197 | _mm_prefetch(p + 64 * 1, _MM_HINT_T2); 198 | _mm_prefetch(p + 64 * 2, _MM_HINT_T2); 199 | _mm_prefetch(p + 64 * 3, _MM_HINT_T2); 200 | _mm_prefetch(p + 64 * 4, _MM_HINT_T2); 201 | #endif 202 | return FindBestMatchImpl(node, k); 203 | } 204 | 205 | template 206 | std::tuple 207 | SignatureTreeTpl:: 208 | FindBestMatchImpl(const Node * node, const Slice & k) { 209 | size_t size = NodeSize(node); 210 | if (SGT_UNLIKELY(size <= 1)) { 211 | return {0, false, size}; 212 | } 213 | 214 | #ifndef SGT_NO_DENSE_INPUT_CACHE 215 | uint8_t diff_a; 216 | uint8_t diff_b; 217 | unsigned int diff_m; 218 | unsigned int diff_n; 219 | typename Node::Pyramid pyramid; 220 | const K_DIFF * base; 221 | K_DIFF base_val; 222 | 223 | const K_DIFF * cbegin = node->diffs_.cbegin(); 224 | const K_DIFF * cend = &node->diffs_[size - 1]; 225 | 226 | K_DIFF min_val; 227 | const K_DIFF * min_it = cbegin + node->pyramid_.MinAt(cbegin, cend, &min_val); 228 | auto[diff_at, shift] = UnpackDiffAtAndShift(min_val); 229 | 230 | uint8_t crit_byte = k.size() > diff_at 231 | ? CharToUint8(k[diff_at]) 232 | : static_cast(0); 233 | unsigned int pos = (crit_byte & ((1 << (shift + 1)) - 1)); 234 | if (shift < 3) { 235 | ++diff_at; 236 | uint8_t remaining = 3 - shift; 237 | pos <<= remaining; 238 | pos |= ((k.size() > diff_at 239 | ? CharToUint8(k[diff_at]) 240 | : static_cast(0)) >> (8 - remaining)); 241 | } else if (shift > 3) { 242 | pos >>= (shift - 3); 243 | } 244 | 245 | auto direct = (pos >> 3); 246 | auto & entry = const_cast(node->cache_)[pos]; 247 | #define entry_as_ar entry.as_uint8_array 248 | #define entry_as_ui entry.as_uint16 249 | 250 | if (entry_as_ui > 1) { 251 | const K_DIFF * cb; 252 | const K_DIFF * ce; 253 | if (!direct) { // left 254 | ce = min_it - entry_as_ar[0]; 255 | cb = ce - entry_as_ar[1]; 256 | } else { // right 257 | cb = min_it + entry_as_ar[0]; 258 | ce = cb + entry_as_ar[1]; 259 | } 260 | 261 | if (entry_as_ar[1] == 9) { 262 | const auto it = &cb[8]; 263 | const auto val = cb[8]; 264 | min_it = SmartMinElem8(cb, it, &min_val); 265 | if (min_val > val) { 266 | min_val = val; 267 | min_it = it; 268 | } 269 | } else if (entry_as_ar[1] <= 8) { 270 | min_it = SmartMinElem8(cb, ce, &min_val); 271 | } else { 272 | pyramid = node->pyramid_; 273 | if (cb != cbegin) { 274 | min_it = node->diffs_.cbegin() + pyramid.TrimLeft(node->diffs_.cbegin(), cb, ce, &min_val); 275 | } 276 | if (ce != cend) { 277 | min_it = node->diffs_.cbegin() + pyramid.TrimRight(node->diffs_.cbegin(), cb, ce, &min_val); 278 | } 279 | } 280 | 281 | cbegin = cb; 282 | cend = ce; 283 | } else if (auto it = min_it + direct; 284 | it == cbegin || // !direct && min_it - direct == cbegin 285 | it == cend) { // direct && min_it + direct == cend 286 | return {min_it - node->diffs_.cbegin(), direct, size}; 287 | } else { 288 | pyramid = node->pyramid_; 289 | 290 | if (entry_as_ui != 0) { 291 | goto search_skip; 292 | } 293 | diff_a = 1; 294 | diff_b = 0; 295 | base = min_it; 296 | base_val = min_val; 297 | if (!direct) { 298 | goto build_cache_left; 299 | } else { 300 | goto build_cache_right; 301 | } 302 | } 303 | 304 | while (true) { 305 | search: 306 | assert(min_it == std::min_element(cbegin, cend) && *min_it == min_val); 307 | std::tie(diff_at, shift) = UnpackDiffAtAndShift(min_val); 308 | 309 | // left or right? 310 | crit_byte = k.size() > diff_at 311 | ? CharToUint8(k[diff_at]) 312 | : static_cast(0); 313 | direct = ((crit_byte >> shift) & 1); 314 | search_skip: 315 | if (!direct) { // go left 316 | cend = min_it; 317 | if (cbegin == cend) { 318 | return {min_it - node->diffs_.cbegin(), direct, size}; 319 | } 320 | min_it = node->diffs_.cbegin() + pyramid.TrimRight(node->diffs_.cbegin(), cbegin, cend, &min_val); 321 | } else { // go right 322 | cbegin = min_it + 1; 323 | if (cbegin == cend) { 324 | return {min_it - node->diffs_.cbegin(), direct, size}; 325 | } 326 | min_it = node->diffs_.cbegin() + pyramid.TrimLeft(node->diffs_.cbegin(), cbegin, cend, &min_val); 327 | } 328 | } 329 | 330 | while (true) { 331 | assert(min_it == std::min_element(cbegin, cend) && *min_it == min_val); 332 | std::tie(diff_at, shift) = UnpackDiffAtAndShift(min_val); 333 | 334 | // left or right? 335 | crit_byte = k.size() > diff_at 336 | ? CharToUint8(k[diff_at]) 337 | : static_cast(0); 338 | direct = ((crit_byte >> shift) & 1); 339 | if (!direct) { // go left 340 | build_cache_left: 341 | cend = min_it; 342 | if (cbegin == cend) { 343 | if ((diff_m = static_cast(base - (cend + 1) /* can be negative */)) <= UINT8_MAX) { 344 | diff_a = diff_m; 345 | diff_b = 1; 346 | } 347 | entry_as_ui = typename Node::CacheEntry{{diff_a, diff_b}}.as_uint16; 348 | return {min_it - node->diffs_.cbegin(), direct, size}; 349 | } 350 | min_it = node->diffs_.cbegin() + pyramid.TrimRight(node->diffs_.cbegin(), cbegin, cend, &min_val); 351 | } else { // go right 352 | cbegin = min_it + 1; 353 | if (cbegin == cend) { 354 | if ((diff_m = static_cast(base - cend)) <= UINT8_MAX) { 355 | diff_a = diff_m; 356 | diff_b = 1; 357 | } 358 | entry_as_ui = typename Node::CacheEntry{{diff_a, diff_b}}.as_uint16; 359 | return {min_it - node->diffs_.cbegin(), direct, size}; 360 | } 361 | min_it = node->diffs_.cbegin() + pyramid.TrimLeft(node->diffs_.cbegin(), cbegin, cend, &min_val); 362 | } 363 | 364 | if ((diff_m = static_cast(base - cend)) <= UINT8_MAX && 365 | (diff_n = cend - cbegin) <= UINT8_MAX) { 366 | diff_a = diff_m; 367 | diff_b = diff_n; 368 | } 369 | if (min_val - base_val >= 4) { 370 | entry_as_ui = typename Node::CacheEntry{{diff_a, diff_b}}.as_uint16; 371 | goto search; 372 | } 373 | } 374 | 375 | while (true) { 376 | assert(min_it == std::min_element(cbegin, cend) && *min_it == min_val); 377 | std::tie(diff_at, shift) = UnpackDiffAtAndShift(min_val); 378 | 379 | // left or right? 380 | crit_byte = k.size() > diff_at 381 | ? CharToUint8(k[diff_at]) 382 | : static_cast(0); 383 | direct = ((crit_byte >> shift) & 1); 384 | if (!direct) { // go left 385 | cend = min_it; 386 | if (cbegin == cend) { 387 | if ((diff_m = static_cast(cbegin - base)) <= UINT8_MAX) { 388 | diff_a = diff_m; 389 | diff_b = 1; 390 | } 391 | entry_as_ui = typename Node::CacheEntry{{diff_a, diff_b}}.as_uint16; 392 | return {min_it - node->diffs_.cbegin(), direct, size}; 393 | } 394 | min_it = node->diffs_.cbegin() + pyramid.TrimRight(node->diffs_.cbegin(), cbegin, cend, &min_val); 395 | } else { // go right 396 | build_cache_right: 397 | cbegin = min_it + 1; 398 | if (cbegin == cend) { 399 | if ((diff_m = static_cast(min_it /* cbegin - 1 */ - base)) <= UINT8_MAX) { 400 | diff_a = diff_m; 401 | diff_b = 1; 402 | } 403 | entry_as_ui = typename Node::CacheEntry{{diff_a, diff_b}}.as_uint16; 404 | return {min_it - node->diffs_.cbegin(), direct, size}; 405 | } 406 | min_it = node->diffs_.cbegin() + pyramid.TrimLeft(node->diffs_.cbegin(), cbegin, cend, &min_val); 407 | } 408 | 409 | if ((diff_m = static_cast(cbegin - base)) <= UINT8_MAX && 410 | (diff_n = cend - cbegin) <= UINT8_MAX) { 411 | diff_a = diff_m; 412 | diff_b = diff_n; 413 | } 414 | if (min_val - base_val >= 4) { 415 | entry_as_ui = typename Node::CacheEntry{{diff_a, diff_b}}.as_uint16; 416 | goto search; 417 | } 418 | } 419 | #undef entry_as_ar 420 | #undef entry_as_ui 421 | #else 422 | const K_DIFF * cbegin = node->diffs_.cbegin(); 423 | const K_DIFF * cend = &node->diffs_[size - 1]; 424 | 425 | K_DIFF min_val; 426 | auto pyramid = node->pyramid_; 427 | const K_DIFF * min_it = cbegin + node->pyramid_.MinAt(cbegin, cend, &min_val); 428 | while (true) { 429 | assert(min_it == std::min_element(cbegin, cend) && *min_it == min_val); 430 | auto[diff_at, shift] = UnpackDiffAtAndShift(min_val); 431 | 432 | // left or right? 433 | uint8_t crit_byte = k.size() > diff_at 434 | ? CharToUint8(k[diff_at]) 435 | : static_cast(0); 436 | auto direct = ((crit_byte >> shift) & 1); 437 | if (!direct) { // go left 438 | cend = min_it; 439 | if (cbegin == cend) { 440 | return {min_it - node->diffs_.cbegin(), direct, size}; 441 | } 442 | min_it = node->diffs_.cbegin() + pyramid.TrimRight(node->diffs_.cbegin(), cbegin, cend, &min_val); 443 | } else { // go right 444 | cbegin = min_it + 1; 445 | if (cbegin == cend) { 446 | return {min_it - node->diffs_.cbegin(), direct, size}; 447 | } 448 | min_it = node->diffs_.cbegin() + pyramid.TrimLeft(node->diffs_.cbegin(), cbegin, cend, &min_val); 449 | } 450 | } 451 | #endif 452 | } 453 | 454 | template 455 | bool SignatureTreeTpl:: 456 | CombatInsert(const Slice & opponent, const Slice & k, KV_REP v, 457 | Node * hint, size_t hint_idx, bool hint_direct) { 458 | K_DIFF diff_at = 0; 459 | char a, b; 460 | while ((a = opponent[diff_at]) == (b = k[diff_at])) { 461 | ++diff_at; 462 | } 463 | 464 | // __builtin_clz: returns the number of leading 0-bits in x, starting at the 465 | // most significant bit position if x is 0, the result is undefined 466 | uint8_t shift = (__builtin_clz(CharToUint8(a ^ b)) ^ 31); // bsr 467 | auto direct = ((CharToUint8(b) >> shift) & 1); 468 | 469 | K_DIFF packed_diff = PackDiffAtAndShift(diff_at, shift); 470 | Node * cursor = hint; 471 | restart: 472 | while (true) { 473 | size_t insert_idx; 474 | bool insert_direct; 475 | 476 | size_t cursor_size = NodeSize(cursor); 477 | if (cursor_size == 1 || (hint != nullptr && packed_diff > hint->diffs_[hint_idx])) { 478 | insert_idx = hint_idx; 479 | insert_direct = hint_direct; 480 | hint = nullptr; 481 | } else { 482 | const K_DIFF * cbegin = cursor->diffs_.cbegin(); 483 | const K_DIFF * cend = &cursor->diffs_[cursor_size - 1]; 484 | 485 | K_DIFF exist_diff; 486 | auto pyramid = cursor->pyramid_; 487 | const K_DIFF * min_it = cbegin + cursor->pyramid_.MinAt(cbegin, cend, &exist_diff); 488 | while (true) { 489 | assert(min_it == std::min_element(cbegin, cend) && *min_it == exist_diff); 490 | if (exist_diff > packed_diff) { 491 | if (hint != nullptr) { 492 | hint = nullptr; 493 | cursor = OffsetToMemNode(kRootOffset); 494 | goto restart; 495 | } 496 | insert_idx = (!direct ? cbegin : (cend - 1)) - cursor->diffs_.cbegin(); 497 | insert_direct = direct; 498 | break; 499 | } 500 | hint = nullptr; 501 | 502 | auto[crit_diff_at, crit_shift] = UnpackDiffAtAndShift(exist_diff); 503 | uint8_t crit_byte = k.size() > crit_diff_at 504 | ? CharToUint8(k[crit_diff_at]) 505 | : static_cast(0); 506 | auto crit_direct = ((crit_byte >> crit_shift) & 1); 507 | if (!crit_direct) { 508 | cend = min_it; 509 | if (cbegin == cend) { 510 | insert_idx = min_it - cursor->diffs_.cbegin(); 511 | insert_direct = crit_direct; 512 | break; 513 | } 514 | min_it = cursor->diffs_.cbegin() + 515 | pyramid.TrimRight(cursor->diffs_.cbegin(), cbegin, cend, &exist_diff); 516 | } else { 517 | cbegin = min_it + 1; 518 | if (cbegin == cend) { 519 | insert_idx = min_it - cursor->diffs_.cbegin(); 520 | insert_direct = crit_direct; 521 | break; 522 | } 523 | min_it = cursor->diffs_.cbegin() + 524 | pyramid.TrimLeft(cursor->diffs_.cbegin(), cbegin, cend, &exist_diff); 525 | } 526 | } 527 | } 528 | 529 | const auto & rep = cursor->reps_[insert_idx + insert_direct]; 530 | if (cursor->diffs_[insert_idx] > packed_diff || !IsPacked(rep)) { 531 | if (IsNodeFull(cursor)) { 532 | try { 533 | NodeSplit(cursor); 534 | } catch (const AllocatorFullException &) { 535 | size_t offset = reinterpret_cast(cursor) - 536 | reinterpret_cast(base_); 537 | allocator_->Grow(); 538 | base_ = allocator_->Base(); 539 | cursor = OffsetToMemNode(offset); 540 | NodeSplit(cursor); 541 | } 542 | continue; 543 | } 544 | NodeInsert(cursor, insert_idx, insert_direct, 545 | direct, packed_diff, v, cursor_size); 546 | break; 547 | } 548 | cursor = OffsetToMemNode(Unpack(rep)); 549 | } 550 | return true; 551 | } 552 | 553 | #define add_gap(arr, idx, size) \ 554 | do { \ 555 | auto idx__ = (idx); \ 556 | auto size__ = (size); \ 557 | memmove(&arr[idx__ + 1], &arr[idx__], sizeof(arr[0]) * (size__ - idx__)); \ 558 | } while (false) 559 | 560 | #define del_gap(arr, idx, size) \ 561 | do { \ 562 | auto idx__ = (idx); \ 563 | auto size__ = (size); \ 564 | auto indx__ = (idx__ + 1); \ 565 | memmove(&arr[idx__], &arr[indx__], sizeof(arr[0]) * (size__ - indx__)); \ 566 | } while (false) 567 | 568 | #define add_gaps(arr, idx, size, n) \ 569 | do { \ 570 | auto idx__ = (idx); \ 571 | auto size__ = (size); \ 572 | auto n__ = (n); \ 573 | memmove(&arr[idx__ + n__], &arr[idx__], sizeof(arr[0]) * (size__ - idx__)); \ 574 | } while (false) 575 | 576 | #define del_gaps(arr, idx, size, n) \ 577 | do { \ 578 | auto idx__ = (idx); \ 579 | auto size__ = (size); \ 580 | auto n__ = (n); \ 581 | auto indx__ = (idx__ + n__); \ 582 | memmove(&arr[idx__], &arr[indx__], sizeof(arr[0]) * (size__ - indx__)); \ 583 | } while (false) 584 | 585 | #define cpy_part(dst, dst_idx, src, src_idx, n) \ 586 | do { \ 587 | auto dst_idx__ = (dst_idx); \ 588 | auto src_idx__ = (src_idx); \ 589 | auto n__ = (n); \ 590 | memcpy(&dst[dst_idx__], &src[src_idx__], sizeof(src[0]) * n__); \ 591 | } while (false) 592 | 593 | template 594 | void SignatureTreeTpl:: 595 | NodeSplit(Node * parent) { 596 | for (size_t i = 0; i < parent->reps_.size(); ++i) { 597 | const auto & rep = parent->reps_[i]; 598 | if (IsPacked(rep)) { 599 | Node * child = OffsetToMemNode(Unpack(rep)); 600 | if (!IsNodeFull(child)) { 601 | size_t child_size = NodeSize(child); 602 | 603 | // left child or right child? 604 | if (i == 0 || 605 | (i != parent->reps_.size() - 1 && parent->diffs_[i - 1] < parent->diffs_[i])) { // left 606 | 607 | // how long? 608 | size_t j = i + 1; 609 | for (; j < parent->diffs_.size(); ++j) { 610 | if (parent->diffs_[j] < parent->diffs_[i]) { 611 | break; 612 | } 613 | } 614 | 615 | // enough space? 616 | size_t range = j - i; 617 | if (child_size + range <= child->reps_.size()) { // move to the tail 618 | size_t child_diff_size = child_size - 1; 619 | j = i + 1; 620 | 621 | cpy_part(child->diffs_, child_diff_size, parent->diffs_, i, range); 622 | cpy_part(child->reps_, child_size, parent->reps_, j, range); 623 | 624 | del_gaps(parent->diffs_, i, parent->diffs_.size(), range); 625 | del_gaps(parent->reps_, j, parent->reps_.size(), range); 626 | 627 | parent->size_ -= range; 628 | child->size_ += range; 629 | assert(NodeSize(parent) == parent->reps_.size() - range); 630 | assert(NodeSize(child) == child_size + range); 631 | NodeBuild(parent, i); 632 | NodeBuild(child, child_diff_size); 633 | return; 634 | } 635 | } else { // right 636 | 637 | size_t j = i - 1; 638 | while (j != 0) { 639 | if (parent->diffs_[j - 1] < parent->diffs_[i - 1]) { 640 | break; 641 | } 642 | --j; 643 | } 644 | 645 | size_t range = i - j; 646 | if (child_size + range <= child->reps_.size()) { // move to the head 647 | add_gaps(child->diffs_, 0, child_size - 1, range); 648 | add_gaps(child->reps_, 0, child_size, range); 649 | 650 | cpy_part(child->diffs_, 0, parent->diffs_, j, range); 651 | cpy_part(child->reps_, 0, parent->reps_, j, range); 652 | 653 | del_gaps(parent->diffs_, j, parent->diffs_.size(), range); 654 | del_gaps(parent->reps_, j, parent->reps_.size(), range); 655 | 656 | parent->size_ -= range; 657 | child->size_ += range; 658 | assert(NodeSize(parent) == parent->reps_.size() - range); 659 | assert(NodeSize(child) == child_size + range); 660 | NodeBuild(parent, j); 661 | NodeBuild(child); 662 | return; 663 | } 664 | } 665 | } 666 | } 667 | } 668 | 669 | size_t offset = allocator_->AllocatePage(); // may throw AllocatorFullException 670 | Node * child = new(OffsetToMemNode(offset)) Node(); 671 | 672 | // find nearly half 673 | const K_DIFF * cbegin = parent->diffs_.cbegin(); 674 | const K_DIFF * cend = parent->diffs_.cend(); 675 | 676 | auto pyramid = parent->pyramid_; 677 | const K_DIFF * min_it = cbegin + parent->pyramid_.MinAt(cbegin, cend); 678 | while (true) { 679 | assert(min_it == std::min_element(cbegin, cend)); 680 | if (min_it - cbegin <= cend - min_it) { // go right 681 | cbegin = min_it + 1; 682 | if (static_cast(cend - cbegin) < parent->diffs_.size() / 2) { 683 | break; 684 | } 685 | min_it = parent->diffs_.cbegin() + 686 | pyramid.TrimLeft(parent->diffs_.cbegin(), cbegin, cend); 687 | } else { // go left 688 | cend = min_it; 689 | if (static_cast(cend - cbegin) < parent->diffs_.size() / 2) { 690 | break; 691 | } 692 | min_it = parent->diffs_.cbegin() + 693 | pyramid.TrimRight(parent->diffs_.cbegin(), cbegin, cend); 694 | } 695 | } 696 | 697 | size_t item_num = cend - cbegin; 698 | size_t child_size = item_num + 1; 699 | size_t nth = cbegin - parent->diffs_.cbegin(); 700 | 701 | cpy_part(child->diffs_, 0, parent->diffs_, nth, item_num); 702 | cpy_part(child->reps_, 0, parent->reps_, nth, child_size); 703 | 704 | del_gaps(parent->diffs_, nth, parent->diffs_.size(), item_num); 705 | del_gaps(parent->reps_, nth + 1, parent->reps_.size(), item_num); 706 | parent->reps_[nth] = Pack(offset); 707 | 708 | child->size_ = static_cast(child_size); 709 | parent->size_ -= item_num; 710 | NodeBuild(parent, nth); 711 | NodeBuild(child); 712 | } 713 | 714 | template 715 | void SignatureTreeTpl:: 716 | NodeMerge(Node * parent, size_t idx, bool direct, size_t parent_size, 717 | Node * child, size_t child_size) { 718 | idx += static_cast(direct); 719 | size_t offset = Unpack(parent->reps_[idx]); 720 | size_t child_diff_size = child_size - 1; 721 | 722 | add_gaps(parent->diffs_, idx, parent_size - 1, child_diff_size); 723 | add_gaps(parent->reps_, idx + 1, parent_size, child_diff_size); 724 | 725 | cpy_part(parent->diffs_, idx, child->diffs_, 0, child_diff_size); 726 | cpy_part(parent->reps_, idx, child->reps_, 0, child_size); 727 | 728 | allocator_->FreePage(offset); 729 | parent->size_ += child_diff_size; 730 | NodeBuild(parent, idx); 731 | } 732 | 733 | template 734 | void SignatureTreeTpl:: 735 | NodeCompact(Node * node) { 736 | for (size_t i = 0; !IsNodeFull(node) && i < NodeSize(node); ++i) { 737 | restart: 738 | const auto & rep = node->reps_[i]; 739 | if (IsPacked(rep)) { 740 | Node * child = OffsetToMemNode(Unpack(rep)); 741 | size_t child_size = NodeSize(child); 742 | size_t node_size = NodeSize(node); 743 | 744 | if (node->reps_.size() - node_size + 1 >= child_size) { 745 | NodeMerge(node, i, false, node_size, 746 | child, child_size); 747 | goto restart; 748 | } 749 | 750 | const K_DIFF * cbegin = child->diffs_.cbegin(); 751 | const K_DIFF * cend = &child->diffs_[child_size - 1]; 752 | const K_DIFF * min_it = cbegin + child->pyramid_.MinAt(cbegin, cend); 753 | assert(min_it == std::min_element(cbegin, cend)); 754 | 755 | if (min_it - cbegin < cend - min_it) { // go left 756 | cend = min_it + 1; 757 | size_t item_num = cend - cbegin; 758 | if (item_num + node_size <= node->reps_.size()) { 759 | add_gaps(node->diffs_, i, node_size - 1, item_num); 760 | add_gaps(node->reps_, i, node_size, item_num); 761 | 762 | cpy_part(node->diffs_, i, child->diffs_, 0, item_num); 763 | cpy_part(node->reps_, i, child->reps_, 0, item_num); 764 | 765 | del_gaps(child->diffs_, 0, child_size - 1, item_num); 766 | del_gaps(child->reps_, 0, child_size, item_num); 767 | 768 | node->size_ += item_num; 769 | child->size_ -= item_num; 770 | NodeBuild(node, i); 771 | NodeBuild(child); 772 | goto restart; 773 | } 774 | } else { // go right 775 | cbegin = min_it; 776 | size_t item_num = cend - cbegin; 777 | if (item_num + node_size <= node->reps_.size()) { 778 | size_t nth = cbegin - child->diffs_.cbegin(); 779 | size_t j = i + 1; 780 | 781 | add_gaps(node->diffs_, i, node_size - 1, item_num); 782 | add_gaps(node->reps_, j, node_size, item_num); 783 | 784 | cpy_part(node->diffs_, i, child->diffs_, nth, item_num); 785 | cpy_part(node->reps_, j, child->reps_, nth + 1, item_num); 786 | 787 | node->size_ += item_num; 788 | child->size_ -= item_num; 789 | NodeBuild(node, i); 790 | NodeBuild(child, nth); 791 | goto restart; 792 | } 793 | } 794 | } 795 | } 796 | 797 | for (size_t i = 0; i < NodeSize(node); ++i) { 798 | const auto & rep = node->reps_[i]; 799 | if (IsPacked(rep)) { 800 | NodeCompact(OffsetToMemNode(Unpack(rep))); 801 | } 802 | } 803 | } 804 | 805 | template 806 | void SignatureTreeTpl:: 807 | NodeInsert(Node * node, size_t insert_idx, bool insert_direct, 808 | bool direct, K_DIFF diff, const KV_REP & rep, size_t size) { 809 | assert(!IsNodeFull(node)); 810 | insert_idx += insert_direct; 811 | size_t rep_idx = insert_idx + direct; 812 | 813 | add_gap(node->diffs_, insert_idx, size - 1); 814 | add_gap(node->reps_, rep_idx, size); 815 | 816 | node->diffs_[insert_idx] = diff; 817 | node->reps_[rep_idx] = rep; 818 | node->size_ = size + 1; 819 | NodeBuild(node, insert_idx); 820 | } 821 | 822 | template 823 | void SignatureTreeTpl:: 824 | NodeRemove(Node * node, size_t idx, bool direct, size_t size) { 825 | assert(size >= 1); 826 | del_gap(node->reps_, idx + direct, size); 827 | node->size_ = --size; 828 | if (SGT_LIKELY(size > 0)) { 829 | del_gap(node->diffs_, idx, size); 830 | NodeBuild(node, idx); 831 | } 832 | } 833 | 834 | template 835 | void SignatureTreeTpl:: 836 | NodeBuild(Node * node, size_t rebuild_idx) { 837 | #ifndef SGT_NO_DENSE_INPUT_CACHE 838 | node->cache_ = {}; 839 | #endif 840 | node->pyramid_.Build(node->diffs_.data(), node->diffs_.data() + NodeSize(node) - 1, rebuild_idx); 841 | } 842 | 843 | #undef add_gap 844 | #undef del_gap 845 | #undef add_gaps 846 | #undef del_gaps 847 | #undef cpy_part 848 | } 849 | 850 | #endif //SIG_TREE_SIG_TREE_IMPL_H 851 | -------------------------------------------------------------------------------- /src/sig_tree_mop_impl.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef SIG_TREE_SIG_TREE_MOP_IMPL_H 3 | #define SIG_TREE_SIG_TREE_MOP_IMPL_H 4 | 5 | #ifndef SGT_NO_MM_PREFETCH 6 | #include 7 | #endif 8 | 9 | #include "likely.h" 10 | #include "sig_tree.h" 11 | 12 | namespace sgt { 13 | template 14 | template 15 | auto SignatureTreeTpl:: 16 | MultiGetWithCallback(const Slice * ks, 17 | CALLBACK && callback) { 18 | std::array reps{}; 19 | Node * root = OffsetToMemNode(kRootOffset); 20 | if (SGT_UNLIKELY(NodeSize(root) == 0)) { 21 | if constexpr (std::is_same::value) { 22 | return reps; 23 | } else { 24 | return callback(reps); 25 | } 26 | } 27 | 28 | std::array cursors; 29 | cursors.fill(root); 30 | 31 | size_t remaining; 32 | do { 33 | remaining = N; 34 | 35 | for (size_t i = 0; i < N; ++i) { 36 | Node * cursor = cursors[i]; 37 | if (cursor != nullptr) { 38 | auto[idx, direct, _] = FindBestMatchImpl(cursor, ks[i]); 39 | auto & rep = reps[i]; 40 | rep = &cursor->reps_[idx + direct]; 41 | #ifndef SGT_NO_MM_PREFETCH 42 | _mm_prefetch(rep, _MM_HINT_T0); 43 | #endif 44 | } 45 | } 46 | 47 | for (size_t i = 0; i < N; ++i) { 48 | Node *& cursor = cursors[i]; 49 | if (cursor != nullptr) { 50 | const auto & r = *reps[i]; 51 | if (IsPacked(r)) { 52 | cursor = OffsetToMemNode(Unpack(r)); 53 | #ifndef SGT_NO_MM_PREFETCH 54 | _mm_prefetch(&cursor->size_, _MM_HINT_T0); 55 | auto p = reinterpret_cast(&cursor->diffs_); 56 | p -= reinterpret_cast(p) % 64; 57 | _mm_prefetch(p + 64 * 0, _MM_HINT_T2); 58 | _mm_prefetch(p + 64 * 1, _MM_HINT_T2); 59 | _mm_prefetch(p + 64 * 2, _MM_HINT_T2); 60 | _mm_prefetch(p + 64 * 3, _MM_HINT_T2); 61 | _mm_prefetch(p + 64 * 4, _MM_HINT_T2); 62 | #endif 63 | continue; 64 | } 65 | cursor = nullptr; 66 | } 67 | --remaining; 68 | } 69 | } while (remaining != 0); 70 | 71 | if constexpr (std::is_same::value) { 72 | return reps; 73 | } else { 74 | return callback(reps); 75 | } 76 | } 77 | } 78 | 79 | #endif //SIG_TREE_SIG_TREE_MOP_IMPL_H 80 | -------------------------------------------------------------------------------- /src/sig_tree_node_impl.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef SIG_TREE_SIG_TREE_NODE_IMPL_H 3 | #define SIG_TREE_SIG_TREE_NODE_IMPL_H 4 | 5 | #if __has_include() && defined(__SSE4_1__) 6 | #include 7 | 8 | namespace sgt { 9 | constexpr bool kHasMinpos = true; 10 | } 11 | #else 12 | namespace sgt { 13 | constexpr bool kHasMinpos = false; 14 | } 15 | #endif 16 | 17 | #include "likely.h" 18 | #include "sig_tree.h" 19 | 20 | namespace sgt { 21 | template 22 | size_t SignatureTreeTpl:: 23 | NodeSize(const Node * node) { 24 | return node->size_; 25 | } 26 | 27 | template 28 | bool SignatureTreeTpl:: 29 | IsNodeFull(const Node * node) { 30 | return node->size_ == kNodeRepRank; 31 | } 32 | 33 | template 34 | inline const T * SmartMinElem8(const T * from, const T * to, T * min_val) { 35 | if constexpr (std::is_same::value && kHasMinpos) { 36 | __m128i vec = _mm_loadu_si128(reinterpret_cast(from)); 37 | 38 | size_t size = to - from; 39 | if (size != 8) { 40 | assert(size < 8); 41 | 42 | static constexpr auto masks = []() { 43 | std::array, 8> arr{}; 44 | arr[1][0] = UINT16_MAX; 45 | arr[2][0] = (arr[1][0] << 16) | UINT16_MAX; 46 | arr[3][0] = (arr[2][0] << 16) | UINT16_MAX; 47 | arr[4][0] = (arr[3][0] << 16) | UINT16_MAX; 48 | for (size_t i = 5; i < 8; ++i) { arr[i][0] = UINT64_MAX; } 49 | arr[5][1] = UINT16_MAX; 50 | arr[6][1] = (arr[5][1] << 16) | UINT16_MAX; 51 | arr[7][1] = (arr[6][1] << 16) | UINT16_MAX; 52 | for (size_t i = 0; i < 8; ++i) { arr[i] = {~arr[i][0], ~arr[i][1]}; } 53 | return arr; 54 | }(); 55 | 56 | vec = _mm_or_si128(vec, _mm_loadu_si128(reinterpret_cast(&masks[size]))); 57 | } 58 | 59 | vec = _mm_minpos_epu16(vec); 60 | if (min_val != nullptr) { *min_val = static_cast(_mm_extract_epi16(vec, 0)); } 61 | return from + _mm_extract_epi8(vec, 2); 62 | } else { 63 | const T * min_it = std::min_element(from, to); 64 | if (min_val != nullptr) { *min_val = *min_it; } 65 | return min_it; 66 | } 67 | } 68 | 69 | template 70 | template 71 | void SignatureTreeTpl:: 72 | NodeTpl::Pyramid::Build(const K_DIFF * from, const K_DIFF * to, size_t rebuild_idx) { 73 | size_t size = to - from; 74 | if (size <= 8) { 75 | return; 76 | } else if (size == 9) { 77 | rebuild_idx = 0; 78 | } 79 | 80 | size_t level = 0; 81 | while (true) { 82 | const size_t q = size / 8; 83 | const size_t r = size % 8; 84 | K_DIFF * val_from = vals_.begin() + kAbsOffsets[level]; 85 | uint8_t * idx_from = idxes_.begin() + kAbsOffsets[level++]; 86 | const K_DIFF * next_from = val_from; 87 | 88 | if (rebuild_idx > 0) { 89 | rebuild_idx /= 8; 90 | val_from += rebuild_idx; 91 | idx_from += rebuild_idx; 92 | from += (8 * rebuild_idx); 93 | } 94 | 95 | while (to - from >= 8) { 96 | K_DIFF val; 97 | uint8_t idx; 98 | if constexpr (std::is_same::value && kHasMinpos) { 99 | __m128i vec = _mm_loadu_si128(reinterpret_cast(from)); 100 | vec = _mm_minpos_epu16(vec); 101 | val = static_cast(_mm_extract_epi16(vec, 0)); 102 | idx = static_cast(_mm_extract_epi8(vec, 2)); 103 | } else { 104 | const K_DIFF * min_elem = std::min_element(from, from + 8); 105 | val = *min_elem; 106 | idx = static_cast(min_elem - from); 107 | } 108 | 109 | (*val_from++) = val; 110 | (*idx_from++) = idx; 111 | from += 8; 112 | } 113 | 114 | if (r != 0) { 115 | size = q + 1; 116 | const K_DIFF * min_elem = SmartMinElem8(from, to, val_from); 117 | (*idx_from) = static_cast(min_elem - from); 118 | } else { 119 | size = q; 120 | } 121 | 122 | if (size == 1) { 123 | break; 124 | } 125 | from = next_from; 126 | to = from + size; 127 | } 128 | } 129 | 130 | template 131 | template 132 | size_t SignatureTreeTpl:: 133 | NodeTpl::Pyramid::MinAt(const K_DIFF * from, const K_DIFF * to, 134 | K_DIFF * min_val) const { 135 | size_t size = to - from; 136 | if (size <= 8) { 137 | static_assert(!(std::is_same::value && kHasMinpos) 138 | || sizeof(idxes_) >= sizeof(uint16_t) * 8); 139 | const K_DIFF * min_it = SmartMinElem8(from, to, min_val); 140 | return min_it - from; 141 | } 142 | return CalcOffset(PyramidHeight(size) - 1, 0, min_val); 143 | } 144 | 145 | template 146 | template 147 | size_t SignatureTreeTpl:: 148 | NodeTpl::Pyramid::TrimLeft(const K_DIFF * cbegin, const K_DIFF * from, const K_DIFF * to, 149 | K_DIFF * min_val) { 150 | size_t pos = from - cbegin; 151 | size_t end_pos = to - cbegin; 152 | assert(end_pos >= pos + 1); 153 | if (end_pos - pos <= 8) { 154 | const K_DIFF * min_it = SmartMinElem8(from, to, min_val); 155 | return min_it - cbegin; 156 | } 157 | 158 | bool accumulator = true; 159 | size_t level = 0; 160 | do { 161 | const size_t q = pos / 8; 162 | const size_t r = pos % 8; 163 | pos = q; 164 | end_pos = end_pos / 8 + static_cast(end_pos % 8 != 0); 165 | 166 | const size_t offset = kAbsOffsets[level++]; 167 | uint8_t & upper_idx = idxes_[offset + pos]; 168 | if ((accumulator = (static_cast(upper_idx + accumulator) > r))) { 169 | cbegin = vals_.cbegin() + offset; 170 | from = cbegin + pos; 171 | to = cbegin + end_pos; 172 | } else { 173 | K_DIFF val; 174 | const K_DIFF * min_elem = SmartMinElem8(from, std::min(from + (8 - r), to), &val); 175 | const size_t idx = (min_elem - from) + r; 176 | 177 | cbegin = vals_.cbegin() + offset; 178 | from = cbegin + pos; 179 | to = cbegin + end_pos; 180 | 181 | *const_cast(from) = val; 182 | upper_idx = static_cast(idx); 183 | } 184 | } while (end_pos - pos > 1); 185 | return CalcOffset(level - 1, pos, min_val); 186 | } 187 | 188 | template 189 | template 190 | size_t SignatureTreeTpl:: 191 | NodeTpl::Pyramid::TrimRight(const K_DIFF * cbegin, const K_DIFF * from, const K_DIFF * to, 192 | K_DIFF * min_val) { 193 | size_t pos = from - cbegin; 194 | size_t end_pos = to - cbegin; 195 | assert(end_pos >= pos + 1); 196 | if (end_pos - pos <= 8) { 197 | const K_DIFF * min_it = SmartMinElem8(from, to, min_val); 198 | return min_it - cbegin; 199 | } 200 | 201 | bool accumulator = true; 202 | size_t level = 0; 203 | do { 204 | --end_pos; 205 | const size_t q = end_pos / 8; 206 | const size_t r = end_pos % 8 + 1; 207 | pos /= 8; 208 | end_pos = q + 1; 209 | 210 | const size_t offset = kAbsOffsets[level++]; 211 | uint8_t & upper_idx = idxes_[offset + q /* end_pos - 1 */]; 212 | if ((accumulator = (upper_idx < r - 1 + accumulator))) { 213 | cbegin = vals_.cbegin() + offset; 214 | from = cbegin + pos; 215 | to = cbegin + end_pos; 216 | } else { 217 | K_DIFF val; 218 | const K_DIFF * start = to - r; 219 | const K_DIFF * min_elem = SmartMinElem8(std::max(from, start), to, &val); 220 | const size_t idx = min_elem - start; 221 | 222 | cbegin = vals_.cbegin() + offset; 223 | from = cbegin + pos; 224 | to = cbegin + end_pos; 225 | 226 | *const_cast(to - 1) = val; 227 | upper_idx = static_cast(idx); 228 | } 229 | } while (end_pos - pos > 1); 230 | return CalcOffset(level - 1, pos, min_val); 231 | } 232 | 233 | template 234 | template 235 | size_t SignatureTreeTpl:: 236 | NodeTpl::Pyramid::CalcOffset(size_t level, size_t index, K_DIFF * min_val) const { 237 | size_t i = kAbsOffsets[level] + index; 238 | if (min_val != nullptr) { *min_val = vals_[i]; } 239 | 240 | size_t r = idxes_[i]; 241 | if constexpr (PyramidHeight(kNodeRank) == 3) { 242 | assert(level < 3); 243 | if (SGT_LIKELY(level == 2)) { 244 | index = index * 8 + r; 245 | r = idxes_[kAbsOffsets[1] + index]; 246 | } 247 | index = index * 8 + r; 248 | r = idxes_[kAbsOffsets[0] + index]; 249 | } else { 250 | do { 251 | index = index * 8 + r; 252 | r = idxes_[kAbsOffsets[--level] + index]; 253 | } while (level != 0); 254 | } 255 | return index * 8 + r; 256 | } 257 | } 258 | 259 | #endif //SIG_TREE_SIG_TREE_NODE_IMPL_H 260 | -------------------------------------------------------------------------------- /src/sig_tree_rebuild_impl.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef SIG_TREE_SIG_TREE_REBUILD_IMPL_H 3 | #define SIG_TREE_SIG_TREE_REBUILD_IMPL_H 4 | 5 | #include "likely.h" 6 | #include "sig_tree.h" 7 | 8 | namespace sgt { 9 | template 10 | void SignatureTreeTpl:: 11 | Rebuild(SignatureTreeTpl * dst) const { 12 | assert(dst != this); 13 | std::vector pool; 14 | RebuildPageToNode(RebuildHeadNode(OffsetToMemNode(kRootOffset), dst, &pool), 15 | dst->OffsetToMemNode(dst->kRootOffset)); 16 | } 17 | 18 | template 19 | typename SignatureTreeTpl::Page 20 | SignatureTreeTpl:: 21 | RebuildHeadNode(const Node * node, SignatureTreeTpl * dst, 22 | std::vector * pool) const { 23 | size_t size = NodeSize(node); 24 | if (SGT_UNLIKELY(size <= 1)) { 25 | return {{}, 26 | {node->reps_.data(), node->reps_.data() + size}}; 27 | } 28 | 29 | const K_DIFF * cbegin = node->diffs_.cbegin(); 30 | const K_DIFF * cend = &node->diffs_[size - 1]; 31 | 32 | K_DIFF min_val; 33 | auto pyramid = node->pyramid_; 34 | const K_DIFF * min_it = cbegin + node->pyramid_.MinAt(cbegin, cend, &min_val); 35 | 36 | Page l = RebuildInternalNode(node, cbegin, cend, min_it, pyramid, false, dst, pool); 37 | Page r = RebuildInternalNode(node, cbegin, cend, min_it, pyramid, true, dst, pool); 38 | return RebuildLRPagesToTree(std::move(l), std::move(r), min_val, dst, pool); 39 | } 40 | 41 | template 42 | typename SignatureTreeTpl::Page 43 | SignatureTreeTpl:: 44 | RebuildInternalNode(const Node * node, 45 | const K_DIFF * cbegin, const K_DIFF * cend, const K_DIFF * min_it, 46 | typename Node::Pyramid & pyramid, bool direct, SignatureTreeTpl * dst, 47 | std::vector * pool) const { 48 | assert(min_it == std::min_element(cbegin, cend)); 49 | 50 | auto make_page = [](std::vector * p, KV_REP rep) { 51 | if (p->empty()) { 52 | return Page{{}, 53 | {{rep}}}; 54 | } else { 55 | Page page = std::move(p->back()); 56 | p->pop_back(); 57 | page.diffs.clear(); 58 | page.reps = {rep}; 59 | return page; 60 | } 61 | }; 62 | 63 | K_DIFF min_val; 64 | if (!direct) { // go left 65 | cend = min_it; 66 | if (cbegin == cend) { 67 | const auto & rep = node->reps_[cend - node->diffs_.cbegin()]; 68 | return IsPacked(rep) ? RebuildHeadNode(OffsetToMemNode(Unpack(rep)), dst, pool) 69 | : make_page(pool, rep); 70 | } 71 | min_it = node->diffs_.cbegin() + pyramid.TrimRight(node->diffs_.cbegin(), cbegin, cend, &min_val); 72 | } else { // go right 73 | cbegin = min_it + 1; 74 | if (cbegin == cend) { 75 | const auto & rep = node->reps_[cend - node->diffs_.cbegin()]; 76 | return IsPacked(rep) ? RebuildHeadNode(OffsetToMemNode(Unpack(rep)), dst, pool) 77 | : make_page(pool, rep); 78 | } 79 | min_it = node->diffs_.cbegin() + pyramid.TrimLeft(node->diffs_.cbegin(), cbegin, cend, &min_val); 80 | } 81 | 82 | Page l = RebuildInternalNode(node, cbegin, cend, min_it, pyramid, false, dst, pool); 83 | Page r = RebuildInternalNode(node, cbegin, cend, min_it, pyramid, true, dst, pool); 84 | return RebuildLRPagesToTree(std::move(l), std::move(r), min_val, dst, pool); 85 | } 86 | 87 | template 88 | typename SignatureTreeTpl::Page 89 | SignatureTreeTpl:: 90 | RebuildLRPagesToTree(Page && l, Page && r, K_DIFF diff, SignatureTreeTpl * dst, 91 | std::vector * pool) { 92 | if (l.reps.size() + r.reps.size() <= kNodeRepRank) { 93 | l.diffs.emplace_back(diff); 94 | l.diffs.insert(l.diffs.end(), r.diffs.begin(), r.diffs.end()); 95 | l.reps.insert(l.reps.end(), r.reps.begin(), r.reps.end()); 96 | pool->emplace_back(std::move(r)); 97 | return std::move(l); 98 | } else { 99 | constexpr uint64_t kAcceptable = kNodeRepRank * 0.625; 100 | if (std::min(l.reps.size(), r.reps.size()) >= kAcceptable) { 101 | l.reps = {dst->Pack(RebuildPageToTree(l, dst)), 102 | dst->Pack(RebuildPageToTree(r, dst))}; 103 | l.diffs = {diff}; 104 | pool->emplace_back(std::move(r)); 105 | return std::move(l); 106 | } 107 | if (l.reps.size() <= r.reps.size()) { 108 | l.diffs.emplace_back(diff); 109 | l.reps.emplace_back(dst->Pack(RebuildPageToTree(r, dst))); 110 | pool->emplace_back(std::move(r)); 111 | return std::move(l); 112 | } else { 113 | r.diffs.insert(r.diffs.begin(), diff); 114 | r.reps.insert(r.reps.begin(), dst->Pack(RebuildPageToTree(l, dst))); 115 | pool->emplace_back(std::move(l)); 116 | return std::move(r); 117 | } 118 | } 119 | } 120 | 121 | template 122 | size_t SignatureTreeTpl:: 123 | RebuildPageToTree(const Page & page, SignatureTreeTpl * dst) { 124 | size_t offset; 125 | try { 126 | offset = dst->allocator_->AllocatePage(); 127 | } catch (const AllocatorFullException &) { 128 | dst->allocator_->Grow(); 129 | dst->base_ = dst->allocator_->Base(); 130 | offset = dst->allocator_->AllocatePage(); 131 | } 132 | Node * node = new(dst->OffsetToMemNode(offset)) Node(); 133 | RebuildPageToNode(page, node); 134 | return offset; 135 | } 136 | 137 | template 138 | void SignatureTreeTpl:: 139 | RebuildPageToNode(const Page & page, Node * node) { 140 | std::copy(page.reps.cbegin(), page.reps.cend(), node->reps_.begin()); 141 | std::copy(page.diffs.cbegin(), page.diffs.cend(), node->diffs_.begin()); 142 | node->size_ = static_cast(page.reps.size()); 143 | NodeBuild(node); 144 | } 145 | } 146 | 147 | #endif //SIG_TREE_SIG_TREE_REBUILD_IMPL_H 148 | -------------------------------------------------------------------------------- /src/sig_tree_visit_impl.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef SIG_TREE_SIG_TREE_VISIT_IMPL_H 3 | #define SIG_TREE_SIG_TREE_VISIT_IMPL_H 4 | 5 | #include "autovector.h" 6 | #include "likely.h" 7 | #include "sig_tree.h" 8 | 9 | namespace sgt { 10 | template 11 | template 12 | void SignatureTreeTpl:: 13 | VisitGenericImpl(T self, const Slice & target, VISITOR && visitor, E && expected) { 14 | Node * cursor = self->OffsetToMemNode(self->kRootOffset); 15 | if (SGT_UNLIKELY(NodeSize(cursor) == 0)) { 16 | return; 17 | } 18 | rocksdb::autovector, 16> que; 19 | 20 | [[maybe_unused]] auto leftmost = [self, &que](Node * cursor) { 21 | while (true) { 22 | que.emplace_back(cursor, 0); 23 | const auto & rep = cursor->reps_[0]; 24 | if (self->IsPacked(rep)) { 25 | cursor = self->OffsetToMemNode(self->Unpack(rep)); 26 | } else { 27 | break; 28 | } 29 | } 30 | }; 31 | 32 | [[maybe_unused]] auto next = [self, &que, &leftmost]() { 33 | while (!que.empty()) { 34 | auto & p = que.back(); 35 | if (++p.second < NodeSize(p.first)) { 36 | const auto & rep = p.first->reps_[p.second]; 37 | if (self->IsPacked(rep)) { 38 | leftmost(self->OffsetToMemNode(self->Unpack(rep))); 39 | } 40 | break; 41 | } 42 | que.pop_back(); 43 | } 44 | }; 45 | 46 | [[maybe_unused]] auto rightmost = [self, &que](Node * cursor) { 47 | while (true) { 48 | size_t rep_idx = NodeSize(cursor) - 1; 49 | que.emplace_back(cursor, rep_idx); 50 | const auto & rep = cursor->reps_[rep_idx]; 51 | if (self->IsPacked(rep)) { 52 | cursor = self->OffsetToMemNode(self->Unpack(rep)); 53 | } else { 54 | break; 55 | } 56 | } 57 | }; 58 | 59 | [[maybe_unused]] auto prev = [self, &que, &rightmost]() { 60 | while (!que.empty()) { 61 | auto & p = que.back(); 62 | if (p.second != 0) { 63 | --p.second; 64 | const auto & rep = p.first->reps_[p.second]; 65 | if (self->IsPacked(rep)) { 66 | rightmost(self->OffsetToMemNode(self->Unpack(rep))); 67 | } 68 | break; 69 | } 70 | que.pop_back(); 71 | } 72 | }; 73 | 74 | if (target.size() == 0) { 75 | if constexpr (!BACKWARD) { 76 | leftmost(cursor); 77 | } else { 78 | rightmost(cursor); 79 | } 80 | } else { // Seek 81 | while (true) { 82 | auto[idx, direct, _] = FindBestMatch(cursor, target); 83 | size_t rep_idx = idx + direct; 84 | que.emplace_back(cursor, rep_idx); 85 | 86 | const auto & rep = cursor->reps_[rep_idx]; 87 | if (self->IsPacked(rep)) { 88 | cursor = self->OffsetToMemNode(self->Unpack(rep)); 89 | } else { 90 | if constexpr (!std::is_same::value) { 91 | if (expected == rep) { 92 | break; 93 | } 94 | } 95 | 96 | const auto & trans = self->helper_->Trans(rep); 97 | if (trans == target) { 98 | } else { // Reseek 99 | que.pop_back(); 100 | 101 | [self, &que, &next, &leftmost](const Slice & opponent, const Slice & k, 102 | Node * hint, size_t hint_idx, bool hint_direct) { 103 | K_DIFF diff_at = 0; 104 | char a, b; 105 | while ((a = opponent[diff_at]) == (b = k[diff_at])) { 106 | ++diff_at; 107 | } 108 | 109 | // __builtin_clz: returns the number of leading 0-bits in x, starting at the 110 | // most significant bit position if x is 0, the result is undefined 111 | uint8_t shift = (__builtin_clz(CharToUint8(a ^ b)) ^ 31); // bsr 112 | auto direct = ((CharToUint8(b) >> shift) & 1); 113 | 114 | K_DIFF packed_diff = PackDiffAtAndShift(diff_at, shift); 115 | Node * cursor = hint; 116 | restart: 117 | while (true) { 118 | size_t insert_idx; 119 | bool insert_direct; 120 | 121 | size_t cursor_size = NodeSize(cursor); 122 | if (cursor_size == 1 || (hint != nullptr && packed_diff > hint->diffs_[hint_idx])) { 123 | insert_idx = hint_idx; 124 | insert_direct = hint_direct; 125 | assert(!self->IsPacked(cursor->reps_[insert_idx + insert_direct])); 126 | } else { 127 | const K_DIFF * cbegin = cursor->diffs_.cbegin(); 128 | const K_DIFF * cend = &cursor->diffs_[cursor_size - 1]; 129 | 130 | K_DIFF exist_diff; 131 | auto pyramid = cursor->pyramid_; 132 | const K_DIFF * min_it = cbegin + cursor->pyramid_.MinAt(cbegin, cend, &exist_diff); 133 | while (true) { 134 | assert(min_it == std::min_element(cbegin, cend) && *min_it == exist_diff); 135 | if (exist_diff > packed_diff) { 136 | if (hint != nullptr) { 137 | hint = nullptr; 138 | cursor = self->OffsetToMemNode(self->kRootOffset); 139 | que.clear(); 140 | goto restart; 141 | } 142 | insert_idx = (!direct ? cbegin : (cend - 1)) - cursor->diffs_.cbegin(); 143 | insert_direct = direct; 144 | break; 145 | } 146 | hint = nullptr; 147 | 148 | auto[crit_diff_at, crit_shift] = UnpackDiffAtAndShift(exist_diff); 149 | uint8_t crit_byte = k.size() > crit_diff_at 150 | ? CharToUint8(k[crit_diff_at]) 151 | : static_cast(0); 152 | auto crit_direct = ((crit_byte >> crit_shift) & 1); 153 | if (!crit_direct) { 154 | cend = min_it; 155 | if (cbegin == cend) { 156 | insert_idx = min_it - cursor->diffs_.cbegin(); 157 | insert_direct = crit_direct; 158 | break; 159 | } 160 | min_it = cursor->diffs_.cbegin() + 161 | pyramid.TrimRight(cursor->diffs_.cbegin(), cbegin, cend, 162 | &exist_diff); 163 | } else { 164 | cbegin = min_it + 1; 165 | if (cbegin == cend) { 166 | insert_idx = min_it - cursor->diffs_.cbegin(); 167 | insert_direct = crit_direct; 168 | break; 169 | } 170 | min_it = cursor->diffs_.cbegin() + 171 | pyramid.TrimLeft(cursor->diffs_.cbegin(), cbegin, cend, 172 | &exist_diff); 173 | } 174 | } 175 | } 176 | 177 | size_t rep_idx = insert_idx + insert_direct; 178 | que.emplace_back(cursor, rep_idx); 179 | 180 | const auto & rep = cursor->reps_[rep_idx]; 181 | if (cursor->diffs_[insert_idx] > packed_diff || !self->IsPacked(rep)) { 182 | if (direct) { 183 | next(); 184 | } else if (self->IsPacked(rep)) { 185 | leftmost(self->OffsetToMemNode(self->Unpack(rep))); 186 | } 187 | break; 188 | } 189 | cursor = self->OffsetToMemNode(self->Unpack(rep)); 190 | } 191 | }(trans.Key(), target, cursor, idx, direct); 192 | 193 | } 194 | break; 195 | } 196 | } 197 | } 198 | 199 | if constexpr (std::is_same::value) { 200 | while (!que.empty()) { 201 | auto & p = que.back(); 202 | if (visitor(p.first->reps_[p.second])) { 203 | if constexpr (!BACKWARD) { 204 | next(); 205 | } else { 206 | prev(); 207 | } 208 | } else { 209 | break; 210 | } 211 | } 212 | } else { // del 213 | while (!que.empty()) { 214 | auto it = que.end(); 215 | auto[node, rep_idx] = *(--it); 216 | auto[proceed, del] = visitor(node->reps_[rep_idx]); 217 | 218 | if (proceed) { 219 | if (del) { 220 | Node * parent = nullptr; 221 | size_t parent_rep_idx{}; 222 | size_t parent_size{}; 223 | if (it != que.begin()) { 224 | std::tie(parent, parent_rep_idx) = *(--it); 225 | parent_size = NodeSize(parent); 226 | } 227 | 228 | size_t size = NodeSize(node); 229 | bool direct = !(rep_idx == 0 || (rep_idx != size - 1 230 | && node->diffs_[rep_idx - 1] < node->diffs_[rep_idx])); 231 | 232 | NodeRemove(node, rep_idx - direct, direct, size--); 233 | if (parent != nullptr && parent->reps_.size() - parent_size + 1 >= size) { 234 | self->NodeMerge(parent, parent_rep_idx, false, parent_size, 235 | node, size); 236 | it->second += rep_idx; 237 | que.pop_back(); 238 | } else if (KV_REP r; 239 | size == 1 && (r = node->reps_[0], self->IsPacked(r))) { 240 | Node * child = self->OffsetToMemNode(self->Unpack(r)); 241 | size_t child_size = NodeSize(child); 242 | self->NodeMerge(node, 0, false, 1, 243 | child, child_size); 244 | if (rep_idx == 0) { 245 | if constexpr (!BACKWARD) { 246 | } else { 247 | que.pop_back(); 248 | } 249 | } else { 250 | assert(rep_idx == 1); 251 | if constexpr (!BACKWARD) { 252 | que.pop_back(); 253 | } else { 254 | it->second = child_size - 1; 255 | } 256 | } 257 | continue; 258 | } 259 | 260 | if constexpr (!BACKWARD) { 261 | std::tie(node, rep_idx) = que.back(); 262 | if (rep_idx < NodeSize(node)) { 263 | const auto & rep = node->reps_[rep_idx]; 264 | if (self->IsPacked(rep)) { 265 | leftmost(self->OffsetToMemNode(self->Unpack(rep))); 266 | } 267 | continue; 268 | } 269 | } 270 | } 271 | 272 | if constexpr (!BACKWARD) { 273 | next(); 274 | } else { 275 | prev(); 276 | } 277 | } else { 278 | break; 279 | } 280 | } 281 | } 282 | } 283 | } 284 | 285 | #endif //SIG_TREE_SIG_TREE_VISIT_IMPL_H 286 | -------------------------------------------------------------------------------- /src/slice.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef SIG_TREE_SLICE_H 3 | #define SIG_TREE_SLICE_H 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "coding.h" 11 | 12 | namespace sgt { 13 | class Slice { 14 | private: 15 | const char * data_ = ""; 16 | size_t size_ = 0; 17 | 18 | public: 19 | Slice() = default; 20 | 21 | Slice(const char * d, size_t n) : data_(d), size_(n) {} 22 | 23 | template 24 | Slice(const T * s, size_t n) 25 | : Slice(reinterpret_cast(s), n) { 26 | static_assert(sizeof(T) == sizeof(char)); 27 | } 28 | 29 | template::value>> 30 | Slice(const T & s) : Slice(s.data(), s.size()) {} 31 | 32 | template::value>> 33 | Slice(T s) : data_(s), size_(strlen(s)) {} 34 | 35 | template 36 | Slice(const char (& s)[L]) : data_(s), size_(L - 1) {} 37 | 38 | public: 39 | // same as STL 40 | const char * data() const { return data_; } 41 | 42 | // same as STL 43 | size_t size() const { return size_; } 44 | 45 | const char & operator[](size_t n) const { 46 | assert(n < size_); 47 | return data_[n]; 48 | } 49 | 50 | bool operator==(const Slice & another) const { 51 | return size_ == another.size_ && memcmp(data_, another.data_, size_) == 0; 52 | } 53 | 54 | bool operator!=(const Slice & another) const { return !operator==(another); } 55 | 56 | std::string_view ToStringView() const { return {data_, size_}; } 57 | 58 | std::string ToString() const { return {data_, size_}; } 59 | }; 60 | 61 | struct SliceComparator { 62 | using is_transparent = std::true_type; 63 | 64 | bool operator()(Slice a, Slice b) const { 65 | return a.ToStringView() < b.ToStringView(); 66 | } 67 | }; 68 | 69 | struct SliceHasher { 70 | std::size_t operator()(const Slice & s) const { 71 | return std::hash()(s.ToStringView()); 72 | } 73 | }; 74 | 75 | template::value>> 76 | inline O & operator<<(O & o, const S & s) { 77 | for (size_t i = 0; i < s.size(); ++i) { 78 | char c = s[i]; 79 | if (isprint(c)) { 80 | o << c; 81 | } else { 82 | o << '[' 83 | << static_cast(CharToUint8(c)) 84 | << ']'; 85 | } 86 | } 87 | return o; 88 | } 89 | } 90 | 91 | #endif //SIG_TREE_SLICE_H 92 | -------------------------------------------------------------------------------- /test/sig_tree_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "../src/sig_tree.h" 7 | #include "../src/sig_tree_impl.h" 8 | #include "../src/sig_tree_mop_impl.h" 9 | #include "../src/sig_tree_node_impl.h" 10 | #include "../src/sig_tree_rebuild_impl.h" 11 | #include "../src/sig_tree_visit_impl.h" 12 | 13 | namespace sgt::sig_tree_test { 14 | /* 15 | * K = uint32_t 16 | * V = uint32_t 17 | * little-endian REP : uint64_t = (V << 32) | K 18 | * 19 | * if REP % 2 = 0, then REP is packed 20 | */ 21 | 22 | class KVTrans { 23 | private: 24 | const uint64_t rep_; 25 | 26 | public: 27 | explicit KVTrans(uint64_t rep) : rep_(rep) {} 28 | 29 | public: 30 | bool operator==(const Slice & k) const { 31 | return memcmp(&rep_, k.data(), sizeof(uint32_t)) == 0; 32 | } 33 | 34 | Slice Key() const { 35 | return {reinterpret_cast(&rep_), sizeof(uint32_t)}; 36 | } 37 | 38 | bool Get(const Slice & k, std::string * v) const { 39 | if (*this == k) { 40 | if (v != nullptr) { 41 | v->assign(reinterpret_cast(&rep_) + sizeof(uint32_t), 42 | reinterpret_cast(&rep_) + sizeof(uint64_t)); 43 | } 44 | return true; 45 | } 46 | return false; 47 | } 48 | }; 49 | 50 | class Helper : public SignatureTreeTpl::Helper { 51 | public: 52 | ~Helper() override = default; 53 | 54 | public: 55 | uint64_t Add(const Slice & k, const Slice & v) override { 56 | assert(k.size() == sizeof(uint32_t) && v.size() == sizeof(uint32_t)); 57 | uint32_t ki; 58 | uint32_t vi; 59 | memcpy(&ki, k.data(), sizeof(ki)); 60 | memcpy(&vi, v.data(), sizeof(vi)); 61 | assert(ki % 2 == 1); 62 | return (static_cast(vi) << 32) | ki; 63 | } 64 | 65 | void Del(KVTrans & trans) override {} 66 | 67 | uint64_t Pack(size_t offset) const override { 68 | assert(offset % 2 == 0); 69 | return offset; 70 | } 71 | 72 | size_t Unpack(const uint64_t & rep) const override { 73 | return rep; 74 | } 75 | 76 | bool IsPacked(const uint64_t & rep) const override { 77 | return rep % 2 == 0; 78 | } 79 | 80 | KVTrans Trans(const uint64_t & rep) const override { 81 | return KVTrans(rep); 82 | } 83 | }; 84 | 85 | class AllocatorImpl : public Allocator { 86 | public: 87 | std::unordered_set records_; 88 | 89 | public: 90 | ~AllocatorImpl() override { 91 | for (uintptr_t record:records_) { 92 | free(reinterpret_cast(record)); 93 | } 94 | } 95 | 96 | public: 97 | void * Base() override { 98 | return nullptr; 99 | } 100 | 101 | size_t AllocatePage() override { 102 | auto page = reinterpret_cast(malloc(kPageSize)); 103 | records_.emplace(page); 104 | return page; 105 | } 106 | 107 | void FreePage(size_t offset) override { 108 | auto it = records_.find(offset); 109 | free(reinterpret_cast(*it)); 110 | records_.erase(it); 111 | } 112 | 113 | void Grow() override {} 114 | }; 115 | 116 | void Run() { 117 | constexpr unsigned int kTestTimes = 10000; 118 | 119 | Helper helper; 120 | AllocatorImpl allocator; 121 | SignatureTreeTpl tree(&helper, &allocator); 122 | assert(tree.RootOffset() == *allocator.records_.cbegin()); 123 | 124 | struct cmp { 125 | bool operator()(uint32_t a, uint32_t b) const { 126 | return memcmp(&a, &b, sizeof(uint32_t)) < 0; 127 | } 128 | }; 129 | std::set set; 130 | 131 | auto seed = std::random_device()(); 132 | std::cout << "sig_tree_test_seed: " << seed << std::endl; 133 | 134 | std::default_random_engine engine(seed); 135 | std::uniform_int_distribution dist(0, UINT32_MAX >> 1); 136 | for (size_t i = 0; i < kTestTimes; ++i) { 137 | uint32_t v = (dist(engine) << 16) | (dist(engine) % 8); 138 | v += (v % 2 == 0); 139 | 140 | set.emplace(v); 141 | Slice s(reinterpret_cast(&v), sizeof(v)); 142 | tree.Add(s, s); 143 | assert(tree.Size() == set.size()); 144 | } 145 | tree.Compact(); 146 | 147 | { 148 | auto it = set.cbegin(); 149 | tree.Visit("", [&it](const uint64_t & rep) { 150 | uint32_t v = *it++; 151 | return v == (rep >> 32); 152 | }); 153 | assert(it == set.cend()); 154 | } 155 | { 156 | auto it = set.crbegin(); 157 | tree.Visit("", [&it](const uint64_t & rep) { 158 | uint32_t v = *it++; 159 | return v == (rep >> 32); 160 | }); 161 | assert(it == set.crend()); 162 | } 163 | { 164 | auto val = dist(engine); 165 | auto it = set.lower_bound(val); 166 | Slice s(reinterpret_cast(&val), sizeof(val)); 167 | tree.Visit(s, [&it](const uint64_t & rep) { 168 | uint32_t v = *it++; 169 | return v == (rep >> 32); 170 | }); 171 | assert(it == set.end()); 172 | } 173 | { 174 | auto val = dist(engine); 175 | auto it = set.lower_bound(val); 176 | if (it != set.end()) { 177 | ++it; 178 | Slice s(reinterpret_cast(&val), sizeof(val)); 179 | tree.Visit(s, [&it](const uint64_t & rep) { 180 | uint32_t v = *--it; 181 | return v == (rep >> 32); 182 | }); 183 | assert(it == set.begin()); 184 | } 185 | } 186 | 187 | for (auto it = set.cbegin(); it != set.cend();) { 188 | uint32_t v0 = *it++; 189 | uint32_t v1 = it != set.cend() ? *it++ : v0; 190 | 191 | std::array ss; 192 | ss[0] = {reinterpret_cast(&v0), sizeof(v0)}; 193 | ss[1] = {reinterpret_cast(&v1), sizeof(v1)}; 194 | 195 | tree.MultiGetWithCallback<2>(ss.data(), [&](const auto & reps) { 196 | assert(v0 == (*reps[0] >> 32)); 197 | assert(v1 == (*reps[1] >> 32)); 198 | }); 199 | } 200 | 201 | std::string out; 202 | for (uint32_t v:set) { 203 | Slice s(reinterpret_cast(&v), sizeof(v)); 204 | tree.Get(s, &out); 205 | assert(s == out); 206 | assert(v == (*tree.GetWithCallback(s) >> 32)); 207 | 208 | out.clear(); 209 | tree.Del(s); 210 | tree.Get(s, &out); 211 | assert(s != out); 212 | } 213 | assert(allocator.records_.size() == 1); 214 | assert(tree.Size() == 0); 215 | 216 | decltype(set) expect; 217 | { 218 | for (uint32_t v:set) { 219 | Slice s(reinterpret_cast(&v), sizeof(v)); 220 | tree.Add(s, s); 221 | } 222 | 223 | auto it = set.cbegin(); 224 | tree.VisitDel("", [&](const uint64_t & rep) -> std::pair { 225 | uint32_t v = *it++; 226 | bool proceed = (v == (rep >> 32)); 227 | 228 | if (std::bernoulli_distribution()(engine)) { 229 | return {proceed, true}; 230 | } else { 231 | expect.emplace(v); 232 | return {proceed, false}; 233 | } 234 | }); 235 | assert(it == set.cend()); 236 | 237 | it = expect.cbegin(); 238 | tree.Visit("\0", [&it](const uint64_t & rep) { 239 | uint32_t v = *it++; 240 | return v == (rep >> 32); 241 | }, 0u); 242 | assert(it == expect.cend()); 243 | } 244 | { 245 | Helper dst_helper; 246 | AllocatorImpl dst_allocator; 247 | SignatureTreeTpl dst(&dst_helper, &dst_allocator); 248 | tree.Rebuild(&dst); 249 | 250 | auto it = expect.cbegin(); 251 | dst.Visit("", [&it](const uint64_t & rep) { 252 | uint32_t v = *it++; 253 | return v == (rep >> 32); 254 | }); 255 | assert(it == expect.cend()); 256 | } 257 | } 258 | } --------------------------------------------------------------------------------