├── .github └── workflows │ └── ocaml.yml ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── RELEASE_CHECKLIST.md ├── dune-project ├── patricia-tree.opam └── src ├── PatriciaTree.ml ├── PatriciaTree.mli ├── dune ├── functors.ml ├── functors.mli ├── index.mld ├── int_builtins.c ├── ints.ml ├── ints.mli ├── key_value.ml ├── key_value.mli ├── nodes.ml ├── nodes.mli ├── signatures.ml └── test ├── dune ├── mdx_prelude.ml └── patriciaTreeTest.ml /.github/workflows/ocaml.yml: -------------------------------------------------------------------------------- 1 | name: Main workflow 2 | 3 | on: 4 | pull_request: 5 | branches: [main] 6 | push: 7 | branches: [main] 8 | tags: 9 | [ 10 | "v[0-9]+", 11 | "v[0-9]+.[0-9]+", 12 | "v[0-9]+.[0-9]+.[0-9]+", 13 | "v[0-9]+.[0-9]+.[0-9]+-*", 14 | ] 15 | 16 | jobs: 17 | # We have two separate build jobs: 18 | # - build-test-doc should run only once (one OS, one OCaml version), to avoid 19 | # needlessly building the doc artifact repeatedly 20 | # - build-test can run on as many version as we want 21 | # - doc is also only run once, and only on push events, not on pull requests 22 | build-test-doc: 23 | strategy: 24 | fail-fast: false 25 | matrix: 26 | os: 27 | - ubuntu-latest 28 | # - macos-latest 29 | # - windows-latest 30 | ocaml-compiler: 31 | - 4.14.x 32 | 33 | runs-on: ${{ matrix.os }} 34 | outputs: 35 | ref: ${{ steps.vars.outputs.ref }} 36 | 37 | steps: 38 | - name: Checkout code 39 | uses: actions/checkout@v4 40 | 41 | - name: Use OCaml ${{ matrix.ocaml-compiler }} 42 | uses: ocaml/setup-ocaml@v3 43 | with: 44 | ocaml-compiler: ${{ matrix.ocaml-compiler }} 45 | 46 | - name: Install dependencies 47 | run: opam install . --deps-only --with-test 48 | 49 | - name: Build library 50 | run: opam exec -- dune build 51 | 52 | - name: Run unit tests 53 | run: opam exec -- dune test 54 | 55 | - name: Build documentation 56 | run: | 57 | opam install . --deps-only --with-doc 58 | opam exec -- dune build @doc 59 | 60 | # - id: vars 61 | # shell: bash 62 | # run: echo "ref=$(echo ${GITHUB_REF#refs/*/})" >> ${GITHUB_OUTPUT} 63 | 64 | # - uses: actions/upload-artifact@v4 65 | # if: github.event_name == 'push' && 66 | # ( github.ref == 'refs/heads/main' || startsWith(github.ref,'refs/tags') ) 67 | # with: 68 | # name: doc-${{ steps.vars.outputs.ref }} 69 | # path: _build/default/_doc/_html/patricia-tree/ 70 | 71 | build-test: 72 | strategy: 73 | fail-fast: false 74 | matrix: 75 | os: 76 | - ubuntu-latest 77 | - macos-latest 78 | # - windows-latest 79 | ocaml-compiler: 80 | - 4.14.x 81 | - 5.1.x 82 | - 5.2.x 83 | - 5.3.x 84 | exclude: 85 | - os: "ubuntu-latest" 86 | ocaml-compiler: 4.14.x 87 | 88 | runs-on: ${{ matrix.os }} 89 | 90 | steps: 91 | - name: Checkout code 92 | uses: actions/checkout@v4 93 | 94 | - name: Use OCaml ${{ matrix.ocaml-compiler }} 95 | uses: ocaml/setup-ocaml@v3 96 | with: 97 | ocaml-compiler: ${{ matrix.ocaml-compiler }} 98 | 99 | - name: Install dependencies 100 | run: opam install . --deps-only --with-test 101 | 102 | - name: Build library 103 | run: opam exec -- dune build 104 | 105 | - name: Run unit tests 106 | run: opam exec -- dune test 107 | 108 | # doc: 109 | # needs: build-test-doc 110 | # if: github.event_name == 'push' && 111 | # ( github.ref == 'refs/heads/main' || startsWith(github.ref,'refs/tags') ) 112 | # permissions: 113 | # contents: write 114 | # runs-on: ubuntu-latest 115 | # steps: 116 | # - name: Checkout gh-pages 117 | # uses: actions/checkout@v4 118 | # with: 119 | # ref: gh-pages 120 | # - name: Remove previous doc 121 | # run: rm -rf ${{ needs.build-test-doc.outputs.ref }} 122 | # - name: Retrieve new documentation 123 | # uses: actions/download-artifact@v4 124 | # with: 125 | # name: doc-${{ needs.build-test-doc.outputs.ref }} 126 | # path: ${{ needs.build-test-doc.outputs.ref }} 127 | # - name: Deploy documentation 128 | # run: | 129 | # git config user.email "${{ github.actor }}@users.noreply.github.com" 130 | # git config user.name "${{ github.actor }}" 131 | # git add ${{ needs.build-test-doc.outputs.ref }} 132 | # if ! git diff --cached --quiet --exit-code; 133 | # then 134 | # git commit -m "Deploy ${GITHUB_SHA}" 135 | # git push -f "https://${{ github.actor }}:${{ github.token }}@github.com/${{ github.repository }}.git" gh-pages 136 | # else 137 | # echo "No changes to push" 138 | # fi 139 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # dune build folder 2 | _build/ 3 | _html/ 4 | .vscode 5 | headers/ 6 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # v0.11.0 - 2025-01-27 2 | 3 | - Add some `reflexive_equal` and `reflexive_compare` functions 4 | - Add `min_binding_inter` for maps, `min_elt_inter` for sets and their max counterparts 5 | - Add `difference` and `symmetric_difference` function to maps (and add `difference` to `WithForeign`) 6 | - Add `diff` functions to sets 7 | - Internal refactor. 8 | 9 | # v0.10.0 - 2024-06-01 10 | 11 | ## Main changes 12 | 13 | - Added hash-consed nodes and functors to build hash-consed maps and sets. 14 | - Added new functions `fold_on_nonequal_inter` and `fold_on_nonequal_union` to maps. 15 | - Now support using negative keys, removed `zarith` dependency. 16 | - Fixed some bugs 17 | 18 | ## Detailed changes 19 | 20 | **Breaking changes:** 21 | - Renamed `MakeCustom` to `MakeCustomMap`, added new functor `MakeCustomSet`. 22 | `MakeCustomMap` changed to take a new argument to specify the `'a value` type. 23 | - Renamed `MakeCustomHeterogeneous` to `MakeCustomHeterogeneousMap`, added new functor 24 | `MakeCustomHeterogeneousSet`. 25 | - Renamed `NODE_WITH_ID.get_id` to `NODE_WITH_ID.to_int`, this allows using 26 | instances `NODE_WITH_ID` directly as a `KEY`. 27 | - Renamed `VALUE` to `HETEROGENEOUS_VALUE`, added a `VALUE` module type (previously unnamed). 28 | - Renamed `min_binding`, `max_binding`, `pop_minimum`, `pop_maximum`, `min_elt` 29 | and `max_elt` to `unsigned_min_binding`, `unsigned_max_binding`, 30 | `pop_unsigned_minimum`, `pop_unsigned_maximum`, `unsigned_min_elt` 31 | and `unsigned_max_elt` respectively, to clarify that these functions consider 32 | negative numbers as larger than positive ones. 33 | 34 | **New features:** 35 | - Added new interface `MAP_WITH_VALUE` which is the same as `MAP` but with a custom 36 | type `'a value` instead of just `'a`. 37 | - Added `HashconsedNode`, `HashconsedSetNode` as well as four functors to create 38 | hash-consed heterogeneous/homogeneous maps/sets: `MakeHashconsedMap`, `MakeHashconsedSet`, 39 | `MakeHashconsedHeterogeneousMap` and `MakeHashconsedHeterogeneousSet`. 40 | - Now support using negative keys. Trees are built using the bitwise representation 41 | of integer, meaning they effectively use an unsigned order. Negative keys are 42 | considered bigger than positive keys, `0` is the minimal number and `-1` the maximal one. 43 | - Added new functions `fold_on_nonequal_inter` and `fold_on_nonequal_union` to maps. 44 | 45 | **Bug fixes:** 46 | - Fixed a bug where `NodeWithId` wasn't incrementing ids properly 47 | - `zarith` is no longer a dependency, used GCC's `__builtin_clz` as a faster 48 | method of finding an integer's highest bit. 49 | - Fixed a bug where `pop_minimum` and `pop_maximum` could throw a private exception 50 | `Dissappeared` when using `WeakNode`. 51 | - Fixed a possible assertion error when using `idempotent_subset_domain_forall2` 52 | with `WeakNode`. 53 | - Fix compilation warnings when compiling on ocaml 5.2. 54 | 55 | # v0.9.0 - 2024-04-18 56 | 57 | - Initial release of Patricia Tree 58 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 2.1, February 1999 3 | 4 | Copyright (C) 1991, 1999 Free Software Foundation, Inc. 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | [This is the first released version of the Lesser GPL. It also counts 10 | as the successor of the GNU Library Public License, version 2, hence 11 | the version number 2.1.] 12 | 13 | Preamble 14 | 15 | The licenses for most software are designed to take away your 16 | freedom to share and change it. By contrast, the GNU General Public 17 | Licenses are intended to guarantee your freedom to share and change 18 | free software--to make sure the software is free for all its users. 19 | 20 | This license, the Lesser General Public License, applies to some 21 | specially designated software packages--typically libraries--of the 22 | Free Software Foundation and other authors who decide to use it. You 23 | can use it too, but we suggest you first think carefully about whether 24 | this license or the ordinary General Public License is the better 25 | strategy to use in any particular case, based on the explanations below. 26 | 27 | When we speak of free software, we are referring to freedom of use, 28 | not price. Our General Public Licenses are designed to make sure that 29 | you have the freedom to distribute copies of free software (and charge 30 | for this service if you wish); that you receive source code or can get 31 | it if you want it; that you can change the software and use pieces of 32 | it in new free programs; and that you are informed that you can do 33 | these things. 34 | 35 | To protect your rights, we need to make restrictions that forbid 36 | distributors to deny you these rights or to ask you to surrender these 37 | rights. These restrictions translate to certain responsibilities for 38 | you if you distribute copies of the library or if you modify it. 39 | 40 | For example, if you distribute copies of the library, whether gratis 41 | or for a fee, you must give the recipients all the rights that we gave 42 | you. You must make sure that they, too, receive or can get the source 43 | code. If you link other code with the library, you must provide 44 | complete object files to the recipients, so that they can relink them 45 | with the library after making changes to the library and recompiling 46 | it. And you must show them these terms so they know their rights. 47 | 48 | We protect your rights with a two-step method: (1) we copyright the 49 | library, and (2) we offer you this license, which gives you legal 50 | permission to copy, distribute and/or modify the library. 51 | 52 | To protect each distributor, we want to make it very clear that 53 | there is no warranty for the free library. Also, if the library is 54 | modified by someone else and passed on, the recipients should know 55 | that what they have is not the original version, so that the original 56 | author's reputation will not be affected by problems that might be 57 | introduced by others. 58 | 59 | Finally, software patents pose a constant threat to the existence of 60 | any free program. We wish to make sure that a company cannot 61 | effectively restrict the users of a free program by obtaining a 62 | restrictive license from a patent holder. Therefore, we insist that 63 | any patent license obtained for a version of the library must be 64 | consistent with the full freedom of use specified in this license. 65 | 66 | Most GNU software, including some libraries, is covered by the 67 | ordinary GNU General Public License. This license, the GNU Lesser 68 | General Public License, applies to certain designated libraries, and 69 | is quite different from the ordinary General Public License. We use 70 | this license for certain libraries in order to permit linking those 71 | libraries into non-free programs. 72 | 73 | When a program is linked with a library, whether statically or using 74 | a shared library, the combination of the two is legally speaking a 75 | combined work, a derivative of the original library. The ordinary 76 | General Public License therefore permits such linking only if the 77 | entire combination fits its criteria of freedom. The Lesser General 78 | Public License permits more lax criteria for linking other code with 79 | the library. 80 | 81 | We call this license the "Lesser" General Public License because it 82 | does Less to protect the user's freedom than the ordinary General 83 | Public License. It also provides other free software developers Less 84 | of an advantage over competing non-free programs. These disadvantages 85 | are the reason we use the ordinary General Public License for many 86 | libraries. However, the Lesser license provides advantages in certain 87 | special circumstances. 88 | 89 | For example, on rare occasions, there may be a special need to 90 | encourage the widest possible use of a certain library, so that it becomes 91 | a de-facto standard. To achieve this, non-free programs must be 92 | allowed to use the library. A more frequent case is that a free 93 | library does the same job as widely used non-free libraries. In this 94 | case, there is little to gain by limiting the free library to free 95 | software only, so we use the Lesser General Public License. 96 | 97 | In other cases, permission to use a particular library in non-free 98 | programs enables a greater number of people to use a large body of 99 | free software. For example, permission to use the GNU C Library in 100 | non-free programs enables many more people to use the whole GNU 101 | operating system, as well as its variant, the GNU/Linux operating 102 | system. 103 | 104 | Although the Lesser General Public License is Less protective of the 105 | users' freedom, it does ensure that the user of a program that is 106 | linked with the Library has the freedom and the wherewithal to run 107 | that program using a modified version of the Library. 108 | 109 | The precise terms and conditions for copying, distribution and 110 | modification follow. Pay close attention to the difference between a 111 | "work based on the library" and a "work that uses the library". The 112 | former contains code derived from the library, whereas the latter must 113 | be combined with the library in order to run. 114 | 115 | GNU LESSER GENERAL PUBLIC LICENSE 116 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 117 | 118 | 0. This License Agreement applies to any software library or other 119 | program which contains a notice placed by the copyright holder or 120 | other authorized party saying it may be distributed under the terms of 121 | this Lesser General Public License (also called "this License"). 122 | Each licensee is addressed as "you". 123 | 124 | A "library" means a collection of software functions and/or data 125 | prepared so as to be conveniently linked with application programs 126 | (which use some of those functions and data) to form executables. 127 | 128 | The "Library", below, refers to any such software library or work 129 | which has been distributed under these terms. A "work based on the 130 | Library" means either the Library or any derivative work under 131 | copyright law: that is to say, a work containing the Library or a 132 | portion of it, either verbatim or with modifications and/or translated 133 | straightforwardly into another language. (Hereinafter, translation is 134 | included without limitation in the term "modification".) 135 | 136 | "Source code" for a work means the preferred form of the work for 137 | making modifications to it. For a library, complete source code means 138 | all the source code for all modules it contains, plus any associated 139 | interface definition files, plus the scripts used to control compilation 140 | and installation of the library. 141 | 142 | Activities other than copying, distribution and modification are not 143 | covered by this License; they are outside its scope. The act of 144 | running a program using the Library is not restricted, and output from 145 | such a program is covered only if its contents constitute a work based 146 | on the Library (independent of the use of the Library in a tool for 147 | writing it). Whether that is true depends on what the Library does 148 | and what the program that uses the Library does. 149 | 150 | 1. You may copy and distribute verbatim copies of the Library's 151 | complete source code as you receive it, in any medium, provided that 152 | you conspicuously and appropriately publish on each copy an 153 | appropriate copyright notice and disclaimer of warranty; keep intact 154 | all the notices that refer to this License and to the absence of any 155 | warranty; and distribute a copy of this License along with the 156 | Library. 157 | 158 | You may charge a fee for the physical act of transferring a copy, 159 | and you may at your option offer warranty protection in exchange for a 160 | fee. 161 | 162 | 2. You may modify your copy or copies of the Library or any portion 163 | of it, thus forming a work based on the Library, and copy and 164 | distribute such modifications or work under the terms of Section 1 165 | above, provided that you also meet all of these conditions: 166 | 167 | a) The modified work must itself be a software library. 168 | 169 | b) You must cause the files modified to carry prominent notices 170 | stating that you changed the files and the date of any change. 171 | 172 | c) You must cause the whole of the work to be licensed at no 173 | charge to all third parties under the terms of this License. 174 | 175 | d) If a facility in the modified Library refers to a function or a 176 | table of data to be supplied by an application program that uses 177 | the facility, other than as an argument passed when the facility 178 | is invoked, then you must make a good faith effort to ensure that, 179 | in the event an application does not supply such function or 180 | table, the facility still operates, and performs whatever part of 181 | its purpose remains meaningful. 182 | 183 | (For example, a function in a library to compute square roots has 184 | a purpose that is entirely well-defined independent of the 185 | application. Therefore, Subsection 2d requires that any 186 | application-supplied function or table used by this function must 187 | be optional: if the application does not supply it, the square 188 | root function must still compute square roots.) 189 | 190 | These requirements apply to the modified work as a whole. If 191 | identifiable sections of that work are not derived from the Library, 192 | and can be reasonably considered independent and separate works in 193 | themselves, then this License, and its terms, do not apply to those 194 | sections when you distribute them as separate works. But when you 195 | distribute the same sections as part of a whole which is a work based 196 | on the Library, the distribution of the whole must be on the terms of 197 | this License, whose permissions for other licensees extend to the 198 | entire whole, and thus to each and every part regardless of who wrote 199 | it. 200 | 201 | Thus, it is not the intent of this section to claim rights or contest 202 | your rights to work written entirely by you; rather, the intent is to 203 | exercise the right to control the distribution of derivative or 204 | collective works based on the Library. 205 | 206 | In addition, mere aggregation of another work not based on the Library 207 | with the Library (or with a work based on the Library) on a volume of 208 | a storage or distribution medium does not bring the other work under 209 | the scope of this License. 210 | 211 | 3. You may opt to apply the terms of the ordinary GNU General Public 212 | License instead of this License to a given copy of the Library. To do 213 | this, you must alter all the notices that refer to this License, so 214 | that they refer to the ordinary GNU General Public License, version 2, 215 | instead of to this License. (If a newer version than version 2 of the 216 | ordinary GNU General Public License has appeared, then you can specify 217 | that version instead if you wish.) Do not make any other change in 218 | these notices. 219 | 220 | Once this change is made in a given copy, it is irreversible for 221 | that copy, so the ordinary GNU General Public License applies to all 222 | subsequent copies and derivative works made from that copy. 223 | 224 | This option is useful when you wish to copy part of the code of 225 | the Library into a program that is not a library. 226 | 227 | 4. You may copy and distribute the Library (or a portion or 228 | derivative of it, under Section 2) in object code or executable form 229 | under the terms of Sections 1 and 2 above provided that you accompany 230 | it with the complete corresponding machine-readable source code, which 231 | must be distributed under the terms of Sections 1 and 2 above on a 232 | medium customarily used for software interchange. 233 | 234 | If distribution of object code is made by offering access to copy 235 | from a designated place, then offering equivalent access to copy the 236 | source code from the same place satisfies the requirement to 237 | distribute the source code, even though third parties are not 238 | compelled to copy the source along with the object code. 239 | 240 | 5. A program that contains no derivative of any portion of the 241 | Library, but is designed to work with the Library by being compiled or 242 | linked with it, is called a "work that uses the Library". Such a 243 | work, in isolation, is not a derivative work of the Library, and 244 | therefore falls outside the scope of this License. 245 | 246 | However, linking a "work that uses the Library" with the Library 247 | creates an executable that is a derivative of the Library (because it 248 | contains portions of the Library), rather than a "work that uses the 249 | library". The executable is therefore covered by this License. 250 | Section 6 states terms for distribution of such executables. 251 | 252 | When a "work that uses the Library" uses material from a header file 253 | that is part of the Library, the object code for the work may be a 254 | derivative work of the Library even though the source code is not. 255 | Whether this is true is especially significant if the work can be 256 | linked without the Library, or if the work is itself a library. The 257 | threshold for this to be true is not precisely defined by law. 258 | 259 | If such an object file uses only numerical parameters, data 260 | structure layouts and accessors, and small macros and small inline 261 | functions (ten lines or less in length), then the use of the object 262 | file is unrestricted, regardless of whether it is legally a derivative 263 | work. (Executables containing this object code plus portions of the 264 | Library will still fall under Section 6.) 265 | 266 | Otherwise, if the work is a derivative of the Library, you may 267 | distribute the object code for the work under the terms of Section 6. 268 | Any executables containing that work also fall under Section 6, 269 | whether or not they are linked directly with the Library itself. 270 | 271 | 6. As an exception to the Sections above, you may also combine or 272 | link a "work that uses the Library" with the Library to produce a 273 | work containing portions of the Library, and distribute that work 274 | under terms of your choice, provided that the terms permit 275 | modification of the work for the customer's own use and reverse 276 | engineering for debugging such modifications. 277 | 278 | You must give prominent notice with each copy of the work that the 279 | Library is used in it and that the Library and its use are covered by 280 | this License. You must supply a copy of this License. If the work 281 | during execution displays copyright notices, you must include the 282 | copyright notice for the Library among them, as well as a reference 283 | directing the user to the copy of this License. Also, you must do one 284 | of these things: 285 | 286 | a) Accompany the work with the complete corresponding 287 | machine-readable source code for the Library including whatever 288 | changes were used in the work (which must be distributed under 289 | Sections 1 and 2 above); and, if the work is an executable linked 290 | with the Library, with the complete machine-readable "work that 291 | uses the Library", as object code and/or source code, so that the 292 | user can modify the Library and then relink to produce a modified 293 | executable containing the modified Library. (It is understood 294 | that the user who changes the contents of definitions files in the 295 | Library will not necessarily be able to recompile the application 296 | to use the modified definitions.) 297 | 298 | b) Use a suitable shared library mechanism for linking with the 299 | Library. A suitable mechanism is one that (1) uses at run time a 300 | copy of the library already present on the user's computer system, 301 | rather than copying library functions into the executable, and (2) 302 | will operate properly with a modified version of the library, if 303 | the user installs one, as long as the modified version is 304 | interface-compatible with the version that the work was made with. 305 | 306 | c) Accompany the work with a written offer, valid for at 307 | least three years, to give the same user the materials 308 | specified in Subsection 6a, above, for a charge no more 309 | than the cost of performing this distribution. 310 | 311 | d) If distribution of the work is made by offering access to copy 312 | from a designated place, offer equivalent access to copy the above 313 | specified materials from the same place. 314 | 315 | e) Verify that the user has already received a copy of these 316 | materials or that you have already sent this user a copy. 317 | 318 | For an executable, the required form of the "work that uses the 319 | Library" must include any data and utility programs needed for 320 | reproducing the executable from it. However, as a special exception, 321 | the materials to be distributed need not include anything that is 322 | normally distributed (in either source or binary form) with the major 323 | components (compiler, kernel, and so on) of the operating system on 324 | which the executable runs, unless that component itself accompanies 325 | the executable. 326 | 327 | It may happen that this requirement contradicts the license 328 | restrictions of other proprietary libraries that do not normally 329 | accompany the operating system. Such a contradiction means you cannot 330 | use both them and the Library together in an executable that you 331 | distribute. 332 | 333 | 7. You may place library facilities that are a work based on the 334 | Library side-by-side in a single library together with other library 335 | facilities not covered by this License, and distribute such a combined 336 | library, provided that the separate distribution of the work based on 337 | the Library and of the other library facilities is otherwise 338 | permitted, and provided that you do these two things: 339 | 340 | a) Accompany the combined library with a copy of the same work 341 | based on the Library, uncombined with any other library 342 | facilities. This must be distributed under the terms of the 343 | Sections above. 344 | 345 | b) Give prominent notice with the combined library of the fact 346 | that part of it is a work based on the Library, and explaining 347 | where to find the accompanying uncombined form of the same work. 348 | 349 | 8. You may not copy, modify, sublicense, link with, or distribute 350 | the Library except as expressly provided under this License. Any 351 | attempt otherwise to copy, modify, sublicense, link with, or 352 | distribute the Library is void, and will automatically terminate your 353 | rights under this License. However, parties who have received copies, 354 | or rights, from you under this License will not have their licenses 355 | terminated so long as such parties remain in full compliance. 356 | 357 | 9. You are not required to accept this License, since you have not 358 | signed it. However, nothing else grants you permission to modify or 359 | distribute the Library or its derivative works. These actions are 360 | prohibited by law if you do not accept this License. Therefore, by 361 | modifying or distributing the Library (or any work based on the 362 | Library), you indicate your acceptance of this License to do so, and 363 | all its terms and conditions for copying, distributing or modifying 364 | the Library or works based on it. 365 | 366 | 10. Each time you redistribute the Library (or any work based on the 367 | Library), the recipient automatically receives a license from the 368 | original licensor to copy, distribute, link with or modify the Library 369 | subject to these terms and conditions. You may not impose any further 370 | restrictions on the recipients' exercise of the rights granted herein. 371 | You are not responsible for enforcing compliance by third parties with 372 | this License. 373 | 374 | 11. If, as a consequence of a court judgment or allegation of patent 375 | infringement or for any other reason (not limited to patent issues), 376 | conditions are imposed on you (whether by court order, agreement or 377 | otherwise) that contradict the conditions of this License, they do not 378 | excuse you from the conditions of this License. If you cannot 379 | distribute so as to satisfy simultaneously your obligations under this 380 | License and any other pertinent obligations, then as a consequence you 381 | may not distribute the Library at all. For example, if a patent 382 | license would not permit royalty-free redistribution of the Library by 383 | all those who receive copies directly or indirectly through you, then 384 | the only way you could satisfy both it and this License would be to 385 | refrain entirely from distribution of the Library. 386 | 387 | If any portion of this section is held invalid or unenforceable under any 388 | particular circumstance, the balance of the section is intended to apply, 389 | and the section as a whole is intended to apply in other circumstances. 390 | 391 | It is not the purpose of this section to induce you to infringe any 392 | patents or other property right claims or to contest validity of any 393 | such claims; this section has the sole purpose of protecting the 394 | integrity of the free software distribution system which is 395 | implemented by public license practices. Many people have made 396 | generous contributions to the wide range of software distributed 397 | through that system in reliance on consistent application of that 398 | system; it is up to the author/donor to decide if he or she is willing 399 | to distribute software through any other system and a licensee cannot 400 | impose that choice. 401 | 402 | This section is intended to make thoroughly clear what is believed to 403 | be a consequence of the rest of this License. 404 | 405 | 12. If the distribution and/or use of the Library is restricted in 406 | certain countries either by patents or by copyrighted interfaces, the 407 | original copyright holder who places the Library under this License may add 408 | an explicit geographical distribution limitation excluding those countries, 409 | so that distribution is permitted only in or among countries not thus 410 | excluded. In such case, this License incorporates the limitation as if 411 | written in the body of this License. 412 | 413 | 13. The Free Software Foundation may publish revised and/or new 414 | versions of the Lesser General Public License from time to time. 415 | Such new versions will be similar in spirit to the present version, 416 | but may differ in detail to address new problems or concerns. 417 | 418 | Each version is given a distinguishing version number. If the Library 419 | specifies a version number of this License which applies to it and 420 | "any later version", you have the option of following the terms and 421 | conditions either of that version or of any later version published by 422 | the Free Software Foundation. If the Library does not specify a 423 | license version number, you may choose any version ever published by 424 | the Free Software Foundation. 425 | 426 | 14. If you wish to incorporate parts of the Library into other free 427 | programs whose distribution conditions are incompatible with these, 428 | write to the author to ask for permission. For software which is 429 | copyrighted by the Free Software Foundation, write to the Free 430 | Software Foundation; we sometimes make exceptions for this. Our 431 | decision will be guided by the two goals of preserving the free status 432 | of all derivatives of our free software and of promoting the sharing 433 | and reuse of software generally. 434 | 435 | NO WARRANTY 436 | 437 | 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO 438 | WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. 439 | EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR 440 | OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY 441 | KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE 442 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 443 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE 444 | LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME 445 | THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 446 | 447 | 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN 448 | WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY 449 | AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU 450 | FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR 451 | CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE 452 | LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING 453 | RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A 454 | FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF 455 | SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 456 | DAMAGES. 457 | 458 | END OF TERMS AND CONDITIONS 459 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Patricia Tree 2 | 3 | [![Latest version](https://img.shields.io/badge/version-0.10.0-yellow)](https://github.com/codex-semantics-library/patricia-tree/releases) 4 | [![OCaml Version](https://img.shields.io/badge/OCaml-4.14_--_5.2-blue?logo=ocaml&logoColor=white)](https://github.com/codex-semantics-library/patricia-tree/blob/main/dune-project) 5 | [![GitHub License](https://img.shields.io/github/license/codex-semantics-library/patricia-tree)](https://github.com/codex-semantics-library/patricia-tree/blob/main/LICENSE) 6 | [![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/codex-semantics-library/patricia-tree/ocaml.yml)](https://github.com/codex-semantics-library/patricia-tree/actions/workflows/ocaml.yml) 7 | [![Documentation](https://img.shields.io/website?url=https%3A%2F%2Fcodex.top%2Fapi%2Fpatricia-tree%2F&up_message=online&down_message=offline&label=documentation)](https://codex.top/api/patricia-tree/) 8 | 9 | This is an [OCaml](https://ocaml.org/) library that implements sets and maps as 10 | Patricia Trees, as described in Okasaki and Gill's 1998 paper [*Fast mergeable integer maps*](https://www.semanticscholar.org/paper/Fast-Mergeable-Integer-Maps-Okasaki-Gill/23003be706e5f586f23dd7fa5b2a410cc91b659d). 11 | It is a space-efficient prefix trie over the big-endian representation of the key's integer identifier. 12 | 13 | The documentation for this library can be found online at 14 | [codex.top/patricia-tree/](https://codex.top/patricia-tree/). 15 | 16 | This library was written by [Matthieu Lemerre](https://www.researchgate.net/profile/Matthieu-Lemerre) then further improved 17 | by [Dorian Lesbre](https://www.normalesup.org/~dlesbre/), 18 | as part of the [Codex semantics library](https://codex.top/), developed at [CEA List](https://list.cea.fr/en/). 19 | 20 | **Table of Contents:** 21 | 22 | - [Installation](#installation) 23 | - [Features](#features) 24 | - [Quick overview](#quick-overview) 25 | - [Functors](#functors) 26 | - [Interfaces](#interfaces) 27 | - [Examples](#examples) 28 | - [Homogeneous map](#homogeneous-map) 29 | - [Heterogeneous map](#heterogeneous-map) 30 | - [Release status](#release-status) 31 | - [Known issues](#known-issues) 32 | - [Comparison to other OCaml libraries](#comparison-to-other-ocaml-libraries) 33 | - [ptmap and ptset](#ptmap-and-ptset) 34 | - [dmap](#dmap) 35 | - [Contributions and bug reports](#contributions-and-bug-reports) 36 | 37 | 38 | ## Installation 39 | 40 | This library can be installed with [opam](https://opam.ocaml.org/): 41 | ```bash 42 | opam install patricia-tree 43 | ``` 44 | Alternatively, you can clone the source repository and install with [dune](https://dune.build/): 45 | ```bash 46 | git clone git@github.com:codex-semantics-library/patricia-tree.git 47 | cd patricia-tree 48 | opan install . --deps-only 49 | dune build -p patricia-tree 50 | dune install 51 | # To build documentation 52 | opam install . --deps-only --with-doc 53 | dune build @doc 54 | ``` 55 | 56 | See the [examples](#examples) to jump right into using this library. 57 | 58 | ## Features 59 | 60 | - Similar to OCaml's `Map` and `Set`, using the same function names when possible 61 | and the same convention for order of arguments. This should allow switching to 62 | and from Patricia Tree with minimal effort. 63 | - The functor parameters (`KEY` module) requires an injective `to_int : t -> int` 64 | function instead of a `compare` function. `to_int` should be fast and injective. 65 | This works well with [hash-consed](https://en.wikipedia.org/wiki/Hash_consing) types. 66 | - The Patricia Tree representation is stable, contrary to maps, inserting nodes 67 | in any order will return the same shape. 68 | This allows different versions of a map to share more subtrees in memory, and 69 | the operations over two maps to benefit from this sharing. The functions in 70 | this library attempt to **maximally preserve sharing and benefit from sharing**, 71 | allowing very important improvements in complexity and running time when 72 | combining maps or sets is a frequent operation. 73 | 74 | To do so, these functions often have extra requirements on their argument 75 | (e.g. `inter f m1 m2` can be optimized by not inspecting common subtrees when 76 | `f` is idempotent). To avoid accidental errors, they are renamed (e.g. to 77 | `idempotent_inter` for the efficient version and `nonidempotent_inter_no_share` 78 | for the general one) 79 | 80 | - Since our Patricia Tree use big-endian order on keys, the maps and sets are 81 | sorted in increasing **unsigned order** of keys. 82 | This means negative keys are sorted above positive keys, with `-1` being the 83 | largest possible key, and `0` the smallest. 84 | This also avoids a bug in Okasaki's paper discussed in [*QuickChecking Patricia Trees*](https://www.cs.tufts.edu/comp/150FP/archive/jan-midtgaard/qc-patricia.pdf) 85 | by Jan Mitgaard. 86 | 87 | It also affects functions like `unsigned_min_binding` and `pop_unsigned_minimum`. They will return the smallest 88 | positive integer of both positive and negative keys are present; and not the smallest negative, as one might expect. 89 | - Supports generic maps and sets: a `'m map` that maps `'k key` to `('k, 'm) value`. 90 | This is especially useful when using [GADTs](https://v2.ocaml.org/manual/gadts-tutorial.html) for the type of keys. This is also sometimes called a dependent map. 91 | - Allows easy and fast operations across different types of maps and set 92 | which have the same type of keys (e.g. an intersection between a map and a set). 93 | - Multiple choices for internal representation (`NODE`), which allows for efficient 94 | storage (no need to store a value for sets), or using weak nodes only (values removed from the tree if no other pointer to it exists). This system can also 95 | be extended to store size information in nodes if needed. 96 | - Exposes a common interface (`view`) to allow users to write their own pattern 97 | matching on the tree structure without depending on the `NODE` being used. 98 | - hash-consed versions of heterogeneous/homogeneous maps/sets are 99 | available. These provide constant time equality and comparison, and ensure 100 | maps/set with the same constants are always physically equal. It comes at the cost 101 | of a constant overhead in memory usage (at worst, as hash-consing may allow memory gains) and constant time overhead 102 | when calling constructors. 103 | 104 | ## Quick overview 105 | 106 | ### Functors 107 | 108 | This library contains a single module, `PatriciaTree`. 109 | The main functors used to build our maps and sets are the following: 110 | ```ocaml 111 | (** {2 Homogeneous maps and sets} *) 112 | 113 | module MakeMap(Key: KEY) : MAP with type key = Key.t 114 | module MakeSet(Key: KEY) : SET with type elt = Key.t 115 | 116 | (** {2 Heterogeneous maps and sets} *) 117 | 118 | module MakeHeterogeneousSet(Key: HETEROGENEOUS_KEY) : HETEROGENEOUS_SET 119 | with type 'a elt = 'a Key.t 120 | module MakeHeterogeneousMap(Key: HETEROGENEOUS_KEY)(Value: HETEROGENEOUS_VALUE) : 121 | HETEROGENEOUS_MAP 122 | with type 'a key = 'a Key.t 123 | and type ('k,'m) value = ('k,'m) Value.t 124 | ``` 125 | 126 | There are also [hash-consed](https://en.wikipedia.org/wiki/Hash_consing) versions 127 | of these four functors: `MakeHashconsedMap`, `MakeHashconsedSet`, 128 | `MakeHashconsedHeterogeneousMap` and `MakeHashconsedHeterogeneousSet`. 129 | These uniquely number their nodes, which means: 130 | - `equal` and `compare` become constant time operations; 131 | - two maps with the same bindings (where keys are compared by `KEY.to_int` and 132 | values by `HASHED_VALUE.polyeq`) will always be physically equal; 133 | - functions that benefit from sharing will see improved performance; 134 | - constructors are slightly slower, as they now require a hash-table lookup; 135 | - memory usage is increased: nodes store their tags inside themselves, and 136 | a global hash-table of all built nodes must be maintained; 137 | - hash-consed maps assume their values are immutable; 138 | - **WARNING:** when using physical equality as `HASHED_VALUE.polyeq`, 139 | some maps of different types may be given the same identifier. See the end of 140 | the documentation of `HASHED_VALUE.polyeq` for details. 141 | Note that this is the case in the default implementations `HashedValue` 142 | and `HeterogeneousHashedValue`. 143 | - All hash-consing functors are **generative**, since each functor call will 144 | create a new hash-table to store the created nodes. Calling a functor 145 | twice with same arguments will lead to two numbering systems for identifiers, 146 | and thus the types should not be considered compatible. 147 | 148 | ### Interfaces 149 | 150 | Here is a brief overview of the various module types of our library: 151 | - `BASE_MAP`: the underlying module type of all our trees (maps end sets). It 152 | represents a `'b map` binding `'a key` to `('a,'b) value`, as well as all functions needed to manipulate them. 153 | 154 | It can be accessed from any of the more specific maps types, thus providing a 155 | unified representation, useful for cross map operations. However, for practical 156 | purposes, it is often best to use the more specific interfaces: 157 | - `HETEROGENEOUS_MAP` for heterogeneous maps (this is just `BASE_MAP` with a 158 | `WithForeign` functor). 159 | - `MAP` for homogeneous maps, this interface is close to [`Stdlib.Map.S`](https://ocaml.org/api/Map.S.html). 160 | - `HETEROGENEOUS_SET` for heterogeneous sets (sets of `'a elt`). These are just 161 | maps to unit, but with a custom node representation to avoid storing unit in 162 | nodes. 163 | - `SET` for homogeneous sets, this interface is close to [`Stdlib.Set.S`](https://ocaml.org/api/Set.S.html). 164 | - The parameter of our functor are either `KEY` or `HETEROGENEOUS_KEY`. 165 | These just consist of a type, a (polymorphic) equality function, and an 166 | injective `to_int` coercion. 167 | 168 | The heterogeneous map functor also has a `HETEROGENEOUS_VALUE` parameter to specify the 169 | `('a, 'b) value` type 170 | - The internal representations of our tree can be customized to use different 171 | internal `NODE`. Each node come with its own private constructors and destructors, 172 | as well as a cast to a uniform `view` type used for pattern matching. 173 | 174 | A number of implementations are provided: 175 | - `SimpleNode`: exactly the `NODE.view` type; 176 | - `WeakNode`: only store weak pointer to its elements; 177 | - `NodeWithId`: node which contains a unique identifier; 178 | - `SetNode`: optimized for sets, doesn't store the [unit] value; 179 | - `WeakSetNode`: both a `WeakNode` and as `SetNode` 180 | - `HashconsedNode`: performs hash-consing (it also stores a unique identifier, but checks when 181 | building a new node whether a node with similar content already exists); 182 | - `HashconsedSetNode`: both a `HashconsedNode` and a `SetNode`. 183 | 184 | Use the functors `MakeCustomMap` and `MakeCustomSet` (or their heterogeneous 185 | versions `MakeCustomHeterogeneousMap` and `MakeCustomHeterogeneousSet`) to build 186 | maps using these nodes, or any other custom nodes. 187 | 188 | ## Examples 189 | 190 | To use this library, [install it](#installation) and add the following to your 191 | dune files: 192 | ```dune 193 | (executable ; or library 194 | ... 195 | (libraries patricia-tree ...) 196 | ) 197 | ``` 198 | 199 | ### Homogeneous map 200 | 201 | Here is a small example of a non-generic map: 202 | 203 | ```ocaml 204 | (** Create a key struct *) 205 | module Int (*: PatriciaTree.KEY*) = struct 206 | type t = int 207 | let to_int x = x 208 | end 209 | 210 | (** Call the map and/or set functors *) 211 | module IMap = PatriciaTree.MakeMap(Int) 212 | module ISet = PatriciaTree.MakeSet(Int) 213 | 214 | (** Use all the usual map operations *) 215 | let map = 216 | IMap.empty |> 217 | IMap.add 1 "hello" |> 218 | IMap.add 2 "world" |> 219 | IMap.add 3 "how do you do?" 220 | (* Also has an [of_list] and [of_seq] operation for initialization *) 221 | 222 | let _ = IMap.find 1 map (* "hello" *) 223 | let _ = IMap.cardinal map (* 3 *) 224 | 225 | (** The strength of Patricia Tree is the speedup of operation on multiple maps 226 | with common subtrees. *) 227 | let map2 = 228 | IMap.idempotent_inter_filter (fun _key _l _r -> None) 229 | (IMap.add 4 "something" map) (IMap.add 5 "something else" map) 230 | let _ = map == map2 (* true *) 231 | (* physical equality is preserved as much as possible, although some intersections 232 | may need to build new nodes and won't be fully physically equal, they will 233 | still share subtrees if possible. *) 234 | 235 | (** Many operations preserve physical equality whenever possible *) 236 | let _ = (IMap.add 1 "hello" map) == map (* true: already present *) 237 | 238 | (** Example of cross map/set operation: only keep the bindings of [map] 239 | whose keys are in a given set *) 240 | let set = ISet.of_list [1; 3] 241 | module CrossOperations = IMap.WithForeign(ISet.BaseMap) 242 | let restricted_map = CrossOperations.nonidempotent_inter 243 | { f = fun _key value () -> value } map set 244 | ``` 245 | 246 | ### Heterogeneous map 247 | 248 | ```ocaml 249 | (** Very small typed expression language *) 250 | type 'a expr = 251 | | G_Const_Int : int -> int expr 252 | | G_Const_Bool : bool -> bool expr 253 | | G_Addition : int expr * int expr -> int expr 254 | | G_Equal : 'a expr * 'a expr -> bool expr 255 | 256 | module Expr : PatriciaTree.HETEROGENEOUS_KEY with type 'a t = 'a expr = struct 257 | type 'a t = 'a expr 258 | 259 | (** Injective, so long as expression are small enough 260 | (encodes the constructor discriminant in two lowest bits). 261 | Ideally, use a hash-consed type, to_int needs to be fast *) 262 | let rec to_int : type a. a expr -> int = function 263 | | G_Const_Int i -> 0 + 4*i 264 | | G_Const_Bool b -> 1 + 4*(if b then 1 else 0) 265 | | G_Addition(l,r) -> 2 + 4*(to_int l mod 10000 + 10000*(to_int r)) 266 | | G_Equal(l,r) -> 3 + 4*(to_int l mod 10000 + 10000*(to_int r)) 267 | 268 | (** Full polymorphic equality *) 269 | let rec polyeq : type a b. a expr -> b expr -> (a, b) PatriciaTree.cmp = 270 | fun l r -> match l, r with 271 | | G_Const_Int l, G_Const_Int r -> if l = r then Eq else Diff 272 | | G_Const_Bool l, G_Const_Bool r -> if l = r then Eq else Diff 273 | | G_Addition(ll, lr), G_Addition(rl, rr) -> ( 274 | match polyeq ll rl with 275 | | Eq -> polyeq lr rr 276 | | Diff -> Diff) 277 | | G_Equal(ll, lr), G_Equal(rl, rr) -> ( 278 | match polyeq ll rl with 279 | | Eq -> (match polyeq lr rr with Eq -> Eq | Diff -> Diff) (* Match required by typechecker *) 280 | | Diff -> Diff) 281 | | _ -> Diff 282 | end 283 | 284 | (** Map from expression to their values: here the value only depends on the type 285 | of the key, not that of the map *) 286 | module EMap = PatriciaTree.MakeHeterogeneousMap(Expr)(struct type ('a, _) t = 'a end) 287 | 288 | (** You can use all the usual map operations *) 289 | let map : unit EMap.t = 290 | EMap.empty |> 291 | EMap.add (G_Const_Bool false) false |> 292 | EMap.add (G_Const_Int 5) 5 |> 293 | EMap.add (G_Addition (G_Const_Int 3, G_Const_Int 6)) 9 |> 294 | EMap.add (G_Equal (G_Const_Bool false, G_Equal (G_Const_Int 5, G_Const_Int 7))) true 295 | 296 | let _ = EMap.find (G_Const_Bool false) map (* false *) 297 | let _ = EMap.cardinal map (* 4 *) 298 | 299 | (** Fast operations on multiple maps with common subtrees. *) 300 | let map2 = 301 | EMap.idempotent_inter_filter 302 | { f = fun _key _l _r -> None } (* polymorphic 1rst order functions are wrapped in records *) 303 | (EMap.add (G_Const_Int 0) 8 map) 304 | (EMap.add (G_Const_Int 0) 9 map) 305 | ``` 306 | 307 | ## Release status 308 | 309 | This should be close to a stable release. It is already being 310 | used as part of a [larger project](https://codex.top) successfully, and this usage as helped us mature 311 | the interface. As is, we believe the project is usable, and we don't anticipate 312 | any major change before 1.0.0. We didn't commit to a stable release straight 313 | away as we would like a bit more time using this library before doing so. 314 | 315 | ## Known issues 316 | 317 | There is a bug in the OCaml typechecker which prevents us from directly 318 | defining non-generic maps as instances of generic maps. To avoid this, non-generic maps use a separate value type (instead of just using `'b`) 319 | ```ocaml 320 | type (_, 'b) snd = Snd of 'b [@@unboxed] 321 | ``` 322 | It should not incur any extra performance cost as it is unboxed, but can appear 323 | when manipulating non-generic maps. 324 | 325 | For more details about this issue, see [the OCaml discourse discussion](https://discuss.ocaml.org/t/weird-behaviors-with-first-order-polymorphism/13783) or [the github issue](https://github.com/ocaml/ocaml/issues/13292). 326 | 327 | ## Comparison to other OCaml libraries 328 | 329 | ### ptmap and ptset 330 | 331 | There are other implementations of Patricia Tree in OCaml, namely 332 | [ptmap](https://github.com/backtracking/ptmap) and 333 | [ptset](https://github.com/backtracking/ptset), both by J.C. Filliatre. 334 | These are smaller and closer to OCaml's built-in Map and Set, however: 335 | - Our library allows using any type `key` that comes with an injective `to_int` 336 | function, instead of requiring `key = int`. 337 | - We support generic (heterogeneous) types for keys/elements. 338 | - We support operations between sets and maps of different types. 339 | - We use a big-endian representation, allowing easy access to min/max elements of 340 | maps and trees. 341 | - Our interface and implementation tries to maximize the sharing between different 342 | versions of the tree, and to benefit from this memory sharing. Theirs do not. 343 | - These libraries work with older version of OCaml (`>= 4.05` I believe), whereas 344 | ours requires OCaml `>= 4.14` (for the new interface of `Ephemeron` used in 345 | `WeakNode`). 346 | 347 | ### dmap 348 | 349 | Additionally, there is a dependent map library: [dmap](https://gitlab.inria.fr/bmontagu/dmap), 350 | which gave us the idea of making our PatriciaTree dependent. 351 | It allows creating type safe dependent maps similar to our heterogeneous maps. 352 | However, its maps aren't Patricia trees. They are binary trees build using a 353 | (polymorphic) comparison function, similarly to the maps of the standard library. 354 | 355 | Another difference is that the type of values in the map is independent of the type of the keys, 356 | allowing keys to be associated with different values in different maps. i.e. 357 | we map `'a key` to any `('a, 'b) value` type, whereas dmap only maps `'a key` 358 | to `'a` or `'a value`. 359 | 360 | `dmap` also works with OCaml `>= 4.12`, whereas we require OCaml `>= 4.14`. 361 | 362 | ## Contributions and bug reports 363 | 364 | Any contributions are welcome! 365 | 366 | You can report any bug, issues, or desired features using the [Github issue tracker](https://github.com/codex-semantics-library/patricia-tree/issues). 367 | Please include OCaml, dune, and library version information in you bug reports. 368 | 369 | If you want to contribute code, feel free to fork [the repository on Github](https://github.com/codex-semantics-library/patricia-tree) 370 | and open a pull request. By doing so you agree to release your code under this 371 | project's license ([LGPL-2.1](https://choosealicense.com/licenses/lgpl-2.1/)). 372 | 373 | 374 | There is no imposed coding style for this repository, here are just a few guidelines and conventions: 375 | - Module type names should use `SCREAMING_SNAKE_CASE`. 376 | - Module and functor names use `PascalCase`, functors names start with `Make`. 377 | - Even though the library implements homogeneous maps as a specialization of 378 | heterogeneous ones, the naming convention is that no prefix means homogeneous, 379 | and all heterogeneous objects are prefixed with `heterogeneous`. 380 | - Please document any new functions in the interface, using [ocamldoc style comments](https://v2.ocaml.org/manual/ocamldoc.html#s%3Aocamldoc-comments). 381 | - Please consider adding test for new features/fixed bugs if at all possible. 382 | This library uses a [QuickCheck](https://www.ocaml.org/p/quickcheck/latest/doc/QuickCheck/index.html) framework for tests. 383 | -------------------------------------------------------------------------------- /RELEASE_CHECKLIST.md: -------------------------------------------------------------------------------- 1 | # Release checklist 2 | 3 | Step-by-step guide to creating and publishing a new release. 4 | 5 | 1. Check that file headers are up-to-date 6 | 2. Update version numbers in [dune-project](./dune-project) and [src/index.mld](./src/index.mld) 7 | 3. Fill in the [changelog](./CHANGELOG.md), it should start with 8 | ```md 9 | # vX.Y.Z - YYYY-MM-DD 10 | 11 | - bullet list of changes 12 | 13 | # 14 | ``` 15 | 16 | 4. Create a new tag for the release: 17 | ```bash 18 | dune-release tag 19 | ``` 20 | This should auto-create a tag `vX.Y.Z` (name read from the CHANGELOG) 21 | 22 | 5. Push the new tag to Github: 23 | ```bash 24 | git push origin vX.Y.Z 25 | ``` 26 | Check that [github-action](https://github.com/codex-semantics-library/patricia-tree/actions) succeeds (build, tests and documentation). 27 | It should create a new folder `vX.Y.Z` on the `gh-pages` branch. 28 | 29 | 6. On the [codex-semantics-library.github.io](https://github.com/codex-semantics-library/codex-semantics-library.github.io) 30 | repository: 31 | - update the `patricia-tree.latest-version` field in `_data/packages.yaml` 32 | - update the `api/patricia-tree.md` page to add a link to the new version. 33 | - run `dune build @doc-json`, copy the json files from 34 | `/_build/default/_doc/html/patricia-tree` to 35 | `/_data/api/patricia-tree/vX__Y__Z` (note the `__` instead of `.`), 36 | also copy the `db.js` file to `/assets/js/sherlodoc-db/patricia-tree.X.Y.Z.js` 37 | 38 | 7. Run: 39 | ```bash 40 | dune-release publish distrib 41 | ``` 42 | This will create a new Github release, it requires a Github Access Token. 43 | 44 | 8. Run: 45 | ```bash 46 | dune-release opam pkg 47 | ``` 48 | To prepare the package for an opam release 49 | 50 | 9. Run: 51 | ```bash 52 | dune-release opam submit 53 | ``` 54 | This will create a PR to [opam-repository](https://github.com/ocaml/opam-repository) 55 | to publish the new version. It requires a locally cloned fork of opam-repository. 56 | -------------------------------------------------------------------------------- /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 3.0) 2 | 3 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 4 | ;; This file is part of the Codex semantics library ;; 5 | ;; (patricia-tree sub-component). ;; 6 | ;; ;; 7 | ;; Copyright (C) 2024-2025 ;; 8 | ;; CEA (Commissariat à l'énergie atomique et aux énergies ;; 9 | ;; alternatives) ;; 10 | ;; ;; 11 | ;; You can redistribute it and/or modify it under the terms of the GNU ;; 12 | ;; Lesser General Public License as published by the Free Software ;; 13 | ;; Foundation, version 2.1. ;; 14 | ;; ;; 15 | ;; It is distributed in the hope that it will be useful, ;; 16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; 17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; 18 | ;; GNU Lesser General Public License for more details. ;; 19 | ;; ;; 20 | ;; See the GNU Lesser General Public License version 2.1 ;; 21 | ;; for more details (enclosed in the file LICENSE). ;; 22 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 23 | 24 | (name patricia-tree) 25 | 26 | (version 0.11.0) 27 | 28 | (using mdx 0.2) 29 | 30 | (maintainers "Dorian Lesbre ") 31 | 32 | (authors 33 | "Matthieu Lemerre " 34 | "Dorian Lesbre ") 35 | 36 | (license "LGPL-2.1-only") 37 | 38 | (bug_reports 39 | "https://github.com/codex-semantics-library/patricia-tree/issues") 40 | 41 | (homepage "https://codex.top/api/patricia-tree/") 42 | 43 | (documentation "https://codex.top/api/patricia-tree/") 44 | 45 | (source 46 | (github "codex-semantics-library/patricia-tree")) 47 | 48 | (generate_opam_files) 49 | 50 | (package 51 | (name patricia-tree) 52 | (synopsis 53 | "Patricia Tree data structure in OCaml for maps and sets. Supports generic key-value pairs") 54 | (depends 55 | (ocaml 56 | (>= 4.14)) 57 | dune 58 | (qcheck-core 59 | (and 60 | (>= "0.21.2") 61 | :with-test)) 62 | (ppx_inline_test 63 | (and 64 | (>= "v0.16.0") 65 | :with-test)) 66 | (mdx 67 | (and 68 | (>= "2.4.1") 69 | :with-test)) 70 | (odoc 71 | (and 72 | (>= "2.4.0") 73 | :with-doc)) 74 | (sherlodoc :with-doc))) 75 | -------------------------------------------------------------------------------- /patricia-tree.opam: -------------------------------------------------------------------------------- 1 | # This file is generated by dune, edit dune-project instead 2 | opam-version: "2.0" 3 | version: "0.11.0" 4 | synopsis: 5 | "Patricia Tree data structure in OCaml for maps and sets. Supports generic key-value pairs" 6 | maintainer: ["Dorian Lesbre "] 7 | authors: [ 8 | "Matthieu Lemerre " 9 | "Dorian Lesbre " 10 | ] 11 | license: "LGPL-2.1-only" 12 | homepage: "https://codex.top/api/patricia-tree/" 13 | doc: "https://codex.top/api/patricia-tree/" 14 | bug-reports: 15 | "https://github.com/codex-semantics-library/patricia-tree/issues" 16 | depends: [ 17 | "ocaml" {>= "4.14"} 18 | "dune" {>= "3.0"} 19 | "qcheck-core" {>= "0.21.2" & with-test} 20 | "ppx_inline_test" {>= "v0.16.0" & with-test} 21 | "mdx" {>= "2.4.1" & with-test} 22 | "odoc" {>= "2.4.0" & with-doc} 23 | "sherlodoc" {with-doc} 24 | ] 25 | build: [ 26 | ["dune" "subst"] {dev} 27 | [ 28 | "dune" 29 | "build" 30 | "-p" 31 | name 32 | "-j" 33 | jobs 34 | "@install" 35 | "@runtest" {with-test} 36 | "@doc" {with-doc} 37 | ] 38 | ] 39 | dev-repo: "git+https://github.com/codex-semantics-library/patricia-tree.git" 40 | -------------------------------------------------------------------------------- /src/PatriciaTree.ml: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of the Codex semantics library *) 3 | (* (patricia-tree sub-component). *) 4 | (* *) 5 | (* Copyright (C) 2024-2025 *) 6 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 7 | (* alternatives) *) 8 | (* *) 9 | (* You can redistribute it and/or modify it under the terms of the GNU *) 10 | (* Lesser General Public License as published by the Free Software *) 11 | (* Foundation, version 2.1. *) 12 | (* *) 13 | (* It is distributed in the hope that it will be useful, *) 14 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 15 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 16 | (* GNU Lesser General Public License for more details. *) 17 | (* *) 18 | (* See the GNU Lesser General Public License version 2.1 *) 19 | (* for more details (enclosed in the file LICENSE). *) 20 | (**************************************************************************) 21 | 22 | include Ints 23 | include Signatures 24 | include Key_value 25 | include Functors 26 | include Nodes 27 | -------------------------------------------------------------------------------- /src/PatriciaTree.mli: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of the Codex semantics library *) 3 | (* (patricia-tree sub-component). *) 4 | (* *) 5 | (* Copyright (C) 2024-2025 *) 6 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 7 | (* alternatives) *) 8 | (* *) 9 | (* You can redistribute it and/or modify it under the terms of the GNU *) 10 | (* Lesser General Public License as published by the Free Software *) 11 | (* Foundation, version 2.1. *) 12 | (* *) 13 | (* It is distributed in the hope that it will be useful, *) 14 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 15 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 16 | (* GNU Lesser General Public License for more details. *) 17 | (* *) 18 | (* See the GNU Lesser General Public License version 2.1 *) 19 | (* for more details (enclosed in the file LICENSE). *) 20 | (**************************************************************************) 21 | 22 | (** Association maps from key to values, and sets, implemented with 23 | Patricia Trees, allowing fast merge operations by making use of 24 | physical equality between subtrees; and custom implementation of 25 | {{!node_impl}tree nodes} (allowing normal maps, {{!hash_consed}hash-consed maps}, weak key or 26 | value maps, sets, custom maps, etc.). 27 | 28 | The main entry points into this library are the functors that build maps 29 | and sets: 30 | {table 31 | {tr 32 | {th } 33 | {th Map} 34 | {th Set} 35 | } 36 | {tr 37 | {th Homogeneous} 38 | {td {!MakeMap}} 39 | {td {!MakeSet}} 40 | } 41 | {tr 42 | {th Heterogeneous} 43 | {td {!MakeHeterogeneousMap}} 44 | {td {!MakeHeterogeneousSet}} 45 | } 46 | {tr 47 | {th {{!hash_consed}Hash-consed} Homogeneous} 48 | {td {!MakeHashconsedMap}} 49 | {td {!MakeHashconsedSet}} 50 | } 51 | {tr 52 | {th {{!hash_consed}Hash-consed} Heterogeneous} 53 | {td {!MakeHashconsedHeterogeneousMap}} 54 | {td {!MakeHashconsedHeterogeneousSet}} 55 | } 56 | } 57 | 58 | 59 | Differences between this library and OCaml's {{: https://ocaml.org/api/Map.S.html}[Map]} include: 60 | 61 | {ul 62 | {- The required signature for keys is different, in that we require 63 | each key to be mapped to a unique integer identifier.} 64 | 65 | {- The implementation uses Patricia Tree, as described in Okasaki 66 | and Gill's 1998 paper 67 | {{: https://www.semanticscholar.org/paper/Fast-Mergeable-Integer-Maps-Okasaki-Gill/23003be706e5f586f23dd7fa5b2a410cc91b659d}{i Fast mergeable integer maps}}, 68 | i.e. it is a space-efficient prefix trie over the big-endian representation of 69 | the key's integer identifier. 70 | 71 | Example of a 5-bit patricia tree containing five numbers: 0 [0b0000], 1 [0b0001], 72 | 5 [0b0101] and 7 [0b0111] and -8 [0b1111]: 73 | {v 74 | Branch 75 | (prefix=0b?___) 76 | / \ 77 | Branch Leaf(-8) 78 | (prefix=0b0?__) 0b1111 79 | / \ 80 | Branch Branch 81 | (prefix=0b000?) (prefix=0b01?_) 82 | | | | | 83 | Leaf(0) Leaf(1) Leaf(5) Leaf(7) 84 | 0b0000 0b0001 0b0101 0b0111 85 | v} 86 | 87 | The main benefit of Patricia Tree is that their representation 88 | is stable (contrary to maps, inserting nodes in any order will 89 | return the same shape), which allows different versions of a map 90 | to share more subtrees in memory, and the 91 | {{!BASE_MAP.functions_on_pairs}operations over two maps} 92 | to benefit from this sharing. The functions in this library 93 | attempt to maximally preserve sharing and benefit from sharing, 94 | allowing very important improvements in complexity and running 95 | time when combining maps or sets is a frequent operation.} 96 | 97 | {- Finally, the implementation is more customizable, allowing 98 | notably (key,value) pairs or different types to be in the same map, 99 | or to choose the memory representation of the nodes of the tree.} 100 | 101 | {- Some operations like {{!BASE_MAP.pop_unsigned_minimum}[pop_unsigned_minimum]} and 102 | {{!BASE_MAP.pop_unsigned_maximum}[pop_unsigned_maximum]} make our Set 103 | suitable as priority queue (but remember that each element in the 104 | queue must map to a distinct integer, and that using the {{!unsigned_lt}unsigned order} 105 | means elements with negative priority are seen as greater than elements with 106 | positive ones).} 107 | } 108 | 109 | {b Note on complexity:} in the following, n represents the size of the 110 | map when there is one (and [|map1|] is the number of elements in 111 | [map1]). The term log(n) correspond to the maximum height of the 112 | tree, which is log(n) if we assume an even distribution of numbers 113 | in the map (e.g. random distribution, or integers chosen 114 | contiguously using a counter). The worst-case height is 115 | O(min(n,64)) which is actually constant, but not really 116 | informative; log(n) corresponds to the real complexity in usual 117 | distributions. *) 118 | 119 | (** {1 Integer manipulations} *) 120 | 121 | include module type of Ints 122 | 123 | (** {1 Signatures} *) 124 | 125 | include module type of Signatures 126 | 127 | (** {1 Functors} *) 128 | 129 | include module type of Functors 130 | 131 | (** {1 Default KEY and VALUE implementations} *) 132 | (** These can be used as parameters to {!MakeMap}/{!MakeSet} functors in the 133 | most common use cases. *) 134 | 135 | include module type of Key_value 136 | 137 | (** {1:node_impl Some implementations of NODE} *) 138 | (** We provide a few different implementations of {!NODE}, the internal representation 139 | of a PatriciaTree's nodes. They can be used with 140 | the {!MakeCustomMap}, {!MakeCustomSet}, {!MakeCustomHeterogeneousMap} and 141 | {!MakeCustomHeterogeneousSet} functors to build maps and sets with custom 142 | internal representation. *) 143 | 144 | include module type of Nodes 145 | -------------------------------------------------------------------------------- /src/dune: -------------------------------------------------------------------------------- 1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2 | ;; This file is part of the Codex semantics library ;; 3 | ;; (patricia-tree sub-component). ;; 4 | ;; ;; 5 | ;; Copyright (C) 2024-2025 ;; 6 | ;; CEA (Commissariat à l'énergie atomique et aux énergies ;; 7 | ;; alternatives) ;; 8 | ;; ;; 9 | ;; You can redistribute it and/or modify it under the terms of the GNU ;; 10 | ;; Lesser General Public License as published by the Free Software ;; 11 | ;; Foundation, version 2.1. ;; 12 | ;; ;; 13 | ;; It is distributed in the hope that it will be useful, ;; 14 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; 15 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; 16 | ;; GNU Lesser General Public License for more details. ;; 17 | ;; ;; 18 | ;; See the GNU Lesser General Public License version 2.1 ;; 19 | ;; for more details (enclosed in the file LICENSE). ;; 20 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 21 | 22 | (library 23 | (name PatriciaTree) 24 | (public_name patricia-tree) 25 | (foreign_stubs 26 | (language c) 27 | (names int_builtins))) 28 | 29 | (documentation 30 | (package patricia-tree)) 31 | 32 | (mdx 33 | (files *.mld *.mli) 34 | (libraries patricia-tree) 35 | (preludes test/mdx_prelude.ml)) 36 | -------------------------------------------------------------------------------- /src/functors.mli: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of the Codex semantics library *) 3 | (* (patricia-tree sub-component). *) 4 | (* *) 5 | (* Copyright (C) 2024-2025 *) 6 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 7 | (* alternatives) *) 8 | (* *) 9 | (* You can redistribute it and/or modify it under the terms of the GNU *) 10 | (* Lesser General Public License as published by the Free Software *) 11 | (* Foundation, version 2.1. *) 12 | (* *) 13 | (* It is distributed in the hope that it will be useful, *) 14 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 15 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 16 | (* GNU Lesser General Public License for more details. *) 17 | (* *) 18 | (* See the GNU Lesser General Public License version 2.1 *) 19 | (* for more details (enclosed in the file LICENSE). *) 20 | (**************************************************************************) 21 | 22 | open Signatures 23 | 24 | (** This section presents the functors which can be used to build patricia tree 25 | maps and sets. *) 26 | 27 | (** {1 Homogeneous maps and sets} *) 28 | (** These are homogeneous maps and set, their keys/elements are a single 29 | non-generic type, just like the standard library's [Map] and [Set] modules. *) 30 | 31 | (** Create a Patricia tree based map, analogous to the standard library's 32 | {{: https://ocaml.org/api/Map.Make.html}[Map.Make]} *) 33 | module MakeMap(Key: KEY) : MAP with type key = Key.t 34 | 35 | (** Create a Patricia tree based set, analogous to the standard library's 36 | {{: https://ocaml.org/api/Set.Make.html}[Set.Make]} *) 37 | module MakeSet(Key: KEY) : SET with type elt = Key.t 38 | 39 | (** {1 Heterogeneous maps and sets} *) 40 | (** Heterogeneous maps are ['map map], which store bindings of ['key key] 41 | to [('key, 'map) value]. Note that ['key] is quantified existentially, 42 | so ['key key] should be a GADT, as we must be able 43 | to compare keys of different types together. 44 | 45 | Similarly, heterogeneous sets store sets of ['key key]. *) 46 | 47 | (** Create a Patricia tree based heterogeneous set, analogous to the standard library's 48 | {{: https://ocaml.org/api/Set.Make.html}[Set.Make]}, but with an extra type 49 | parameter: a set stores elements of type ['a elt]. *) 50 | module MakeHeterogeneousSet(Key: HETEROGENEOUS_KEY) : HETEROGENEOUS_SET 51 | with type 'a elt = 'a Key.t 52 | 53 | (** Create a Patricia tree based heterogeneous map, analogous to the standard library's 54 | {{: https://ocaml.org/api/Map.Make.html}[Map.Make]}, but with an extra type 55 | parameter: a ['map map] stores bindings of ['key key] to [('key, 'map) value]. *) 56 | module MakeHeterogeneousMap(Key: HETEROGENEOUS_KEY)(Value: HETEROGENEOUS_VALUE) : HETEROGENEOUS_MAP 57 | with type 'a key = 'a Key.t 58 | and type ('k,'m) value = ('k,'m) Value.t 59 | 60 | 61 | (** {1 Maps and sets with custom nodes} *) 62 | (** We can also customize the representation and creation of nodes, to 63 | gain space or time. 64 | 65 | Possibitities include having weak key and/or values, hash-consing, 66 | giving unique number to nodes or keeping them in sync with the 67 | disk, lazy evaluation and/or caching, adding size information for 68 | constant time [cardinal] functions, etc. 69 | 70 | See {!node_impl} for the provided implementations of {!NODE}, or create your own. *) 71 | 72 | (** Create a homogeneous map with a custom {!NODE}. Also allows 73 | customizing the map values *) 74 | module MakeCustomMap 75 | (Key: KEY) 76 | (Value: VALUE) 77 | (Node: NODE with type 'a key = Key.t and type ('key,'map) value = ('key, 'map Value.t) snd) 78 | : MAP_WITH_VALUE 79 | with type key = Key.t 80 | and type 'm value = 'm Value.t 81 | and type 'm t = 'm Node.t 82 | 83 | 84 | (** Create a homogeneous set with a custom {!NODE}. 85 | @since v0.10.0 *) 86 | module MakeCustomSet 87 | (Key: KEY) 88 | (Node: NODE with type 'a key = Key.t and type ('key,'map) value = unit) 89 | : SET 90 | with type elt = Key.t 91 | and type 'a BaseMap.t = 'a Node.t 92 | 93 | (** Create an heterogeneous map with a custom {!NODE}. *) 94 | module MakeCustomHeterogeneousMap 95 | (Key: HETEROGENEOUS_KEY) 96 | (Value: HETEROGENEOUS_VALUE) 97 | (Node: NODE with type 'a key = 'a Key.t and type ('key,'map) value = ('key,'map) Value.t) 98 | : HETEROGENEOUS_MAP 99 | with type 'a key = 'a Key.t 100 | and type ('k,'m) value = ('k,'m) Value.t 101 | and type 'm t = 'm Node.t 102 | 103 | (** Create an heterogeneous set with a custom {!NODE}. 104 | @since v0.10.0 *) 105 | module MakeCustomHeterogeneousSet 106 | (Key: HETEROGENEOUS_KEY) 107 | (NODE: NODE with type 'a key = 'a Key.t and type ('key,'map) value = unit) 108 | : HETEROGENEOUS_SET 109 | with type 'a elt = 'a Key.t 110 | and type 'a BaseMap.t = 'a NODE.t 111 | 112 | (** {1:hash_consed Hash-consed maps and sets} *) 113 | (** Hash-consed maps and sets uniquely number each of their nodes. 114 | Upon creation, they check whether a similar node has been created before, 115 | if so they return it, else they return a new node with a new number. 116 | With this unique numbering: 117 | - [equal] and [compare] become constant time operations; 118 | - two maps with the same bindings (where keys are compared by {!KEY.to_int} and 119 | values by {!HASHED_VALUE.polyeq}) will always be physically equal; 120 | - functions that benefit from sharing, like {!BASE_MAP.idempotent_union} and 121 | {!BASE_MAP.idempotent_inter} will see improved performance; 122 | - constructors are slightly slower, as they now require a hash-table lookup; 123 | - memory usage is increased: nodes store their tags inside themselves, and 124 | a global hash-table of all built nodes must be maintained. 125 | This is quickly amortized if multiple identical nodes are built, 126 | as only one will be kept in memory. 127 | - hash-consed maps assume their keys and values are immutable, where regular 128 | maps can mutate values freely; 129 | - {b WARNING:} when using physical equality as {!HASHED_VALUE.polyeq}, some 130 | {b maps of different types may be given the same identifier}. See the end of 131 | the documentation of {!HASHED_VALUE.polyeq} for details. 132 | Note that this is the case in the default implementations {!HashedValue} 133 | and {!HeterogeneousHashedValue}. 134 | 135 | All hash-consing functors are {b generative}, since each functor call will 136 | create a new hash-table to store the created nodes. Calling a functor 137 | twice with same arguments will lead to two numbering systems for identifiers, 138 | and thus the types should not be considered compatible. *) 139 | 140 | (** Hash-consed version of {!MAP}. See {!hash_consed} for the differences between 141 | hash-consed and non hash-consed maps. 142 | 143 | This is a generative functor, as calling it creates a new hash-table to store 144 | the created nodes, and a reference to store the next unallocated identifier. 145 | Maps/sets from different hash-consing functors (even if these functors have 146 | the same arguments) will have different (incompatible) numbering systems and 147 | be stored in different hash-tables (thus they will never be physically equal). 148 | 149 | @since v0.10.0 *) 150 | module MakeHashconsedMap(Key: KEY)(Value: HASHED_VALUE)() : sig 151 | include MAP_WITH_VALUE with type key = Key.t and type 'a value = 'a Value.t (** @closed *) 152 | 153 | include HASH_CONSED_OPERATIONS with type 'a t := 'a t (** @inline *) 154 | end 155 | 156 | (** Hash-consed version of {!SET}. See {!hash_consed} for the differences between 157 | hash-consed and non hash-consed sets. 158 | 159 | This is a generative functor, as calling it creates a new hash-table to store 160 | the created nodes, and a reference to store the next unallocated identifier. 161 | Maps/sets from different hash-consing functors (even if these functors have 162 | the same arguments) will have different (incompatible) numbering systems and 163 | be stored in different hash-tables (thus they will never be physically equal). 164 | 165 | @since v0.10.0 *) 166 | module MakeHashconsedSet(Key: KEY)() : sig 167 | include SET with type elt = Key.t (** @closed *) 168 | 169 | include HASH_CONSED_OPERATIONS with type 'a t := t (** @inline *) 170 | end 171 | 172 | (** Hash-consed version of {!HETEROGENEOUS_SET}. See {!hash_consed} for the differences between 173 | hash-consed and non hash-consed sets. 174 | 175 | This is a generative functor, as calling it creates a new hash-table to store 176 | the created nodes, and a reference to store the next unallocated identifier. 177 | Maps/sets from different hash-consing functors (even if these functors have 178 | the same arguments) will have different (incompatible) numbering systems and 179 | be stored in different hash-tables (thus they will never be physically equal). 180 | 181 | @since v0.10.0 *) 182 | module MakeHashconsedHeterogeneousSet(Key: HETEROGENEOUS_KEY)() : sig 183 | include HETEROGENEOUS_SET with type 'a elt = 'a Key.t (** @closed *) 184 | 185 | include HASH_CONSED_OPERATIONS with type 'a t := t (** @inline *) 186 | end 187 | 188 | (** Hash-consed version of {!HETEROGENEOUS_MAP}. See {!hash_consed} for the differences between 189 | hash-consed and non hash-consed maps. 190 | 191 | This is a generative functor, as calling it creates a new hash-table to store 192 | the created nodes, and a reference to store the next unallocated identifier. 193 | Maps/sets from different hash-consing functors (even if these functors have 194 | the same arguments) will have different (incompatible) numbering systems and 195 | be stored in different hash-tables (thus they will never be physically equal). 196 | 197 | @since v0.10.0 *) 198 | module MakeHashconsedHeterogeneousMap(Key: HETEROGENEOUS_KEY)(Value: HETEROGENEOUS_HASHED_VALUE)() : sig 199 | include HETEROGENEOUS_MAP 200 | with type 'a key = 'a Key.t 201 | and type ('k,'m) value = ('k, 'm) Value.t (** @closed *) 202 | 203 | include HASH_CONSED_OPERATIONS with type 'a t := 'a t (** @inline *) 204 | end 205 | -------------------------------------------------------------------------------- /src/index.mld: -------------------------------------------------------------------------------- 1 | {0 Package patricia-tree} 2 | 3 | This library contains a single module: {!PatriciaTree}. 4 | 5 | This is version [0.11.0] of the library. It is known to work with OCaml versions 6 | ranging from [4.14] to [5.3]. 7 | 8 | This is an {{: https://ocaml.org/}OCaml} library that implements sets and maps as 9 | Patricia Trees, as described in Okasaki and Gill's 1998 paper 10 | {{: https://www.semanticscholar.org/paper/Fast-Mergeable-Integer-Maps-Okasaki-Gill/23003be706e5f586f23dd7fa5b2a410cc91b659d}{i Fast mergeable integer maps}}. 11 | It is a space-efficient prefix trie over the big-endian representation of the key's integer identifier. 12 | 13 | The source code of this library is available {{: https://github.com/codex-semantics-library/patricia-tree}on Github} 14 | under an {{: https://choosealicense.com/licenses/lgpl-2.1/}LGPL-2.1} license. 15 | 16 | This library was written by {{: https://www.researchgate.net/profile/Matthieu-Lemerre}Matthieu Lemerre}, 17 | then further improved by {{: https://www.normalesup.org/~dlesbre/}Dorian Lesbre}, 18 | as part of the {{: https://codex.top/}Codex semantics library}, developed at 19 | {{: https://list.cea.fr/en/} CEA List}. 20 | 21 | {1:install Installation} 22 | 23 | This library can be installed with {{: https://opam.ocaml.org/}opam}: 24 | {@bash skip[ 25 | opam install patricia-tree 26 | ]} 27 | 28 | Alternatively, you can clone the source repository and install with {{: https://dune.build/}dune}: 29 | {@bash skip[ 30 | git clone git@github.com:codex-semantics-library/patricia-tree.git 31 | cd patricia-tree 32 | opan install . --deps-only 33 | dune build -p patricia-tree 34 | dune install 35 | # To build documentation 36 | opam install . --deps-only --with-doc 37 | dune build @doc 38 | ]} 39 | 40 | See the {{!examples}examples} to jump right into using this library. 41 | 42 | {1 Features} 43 | 44 | {ul 45 | {li Similar to OCaml's {{: https://ocaml.org/api/Map.S.html}[Map]} and {{: https://ocaml.org/api/Set.S.html}[Set]}, 46 | using the same function names when possible 47 | and the same convention for order of arguments. This should allow switching to 48 | and from Patricia Tree with minimal effort.} 49 | {li The functor parameters ({{!PatriciaTree.KEY}[KEY]} module) requires an injective [to_int : t -> int] 50 | function instead of a [compare] function. {{!PatriciaTree.KEY.to_int}[KEY.to_int]} should be fast, 51 | and injective. 52 | This works well with {{: https://en.wikipedia.org/wiki/Hash_consing}hash-consed} types.} 53 | {li The Patricia Tree representation is stable, contrary to maps, inserting nodes 54 | in any order will return the same shape. 55 | This allows different versions of a map to share more subtrees in memory, and 56 | the {{!PatriciaTree.BASE_MAP.functions_on_pairs}operations over two maps} to benefit from this sharing. The functions in 57 | this library attempt to {b maximally preserve sharing and benefit from sharing}, 58 | allowing very important improvements in complexity and running time when 59 | combining maps or sets is a frequent operation. 60 | 61 | To do so, these functions often have extra requirements on their argument 62 | (e.g. [inter f m1 m2] can be optimized by not inspecting common subtrees when 63 | [f] is idempotent). To avoid accidental errors, they are renamed (e.g. to 64 | [idempotent_inter] for the efficient version and [nonidempotent_inter_no_share] 65 | for the general one)} 66 | {li Since our Patricia Tree use big-endian order on keys, the maps and sets are 67 | sorted in increasing {b {{!PatriciaTree.unsigned_lt}unsigned order}} of keys. 68 | This means negative keys are sorted above positive keys, with [-1] being the 69 | largest possible key, and [0] the smallest. 70 | This also avoids a bug in Okasaki's paper discussed in 71 | {{: https://www.cs.tufts.edu/comp/150FP/archive/jan-midtgaard/qc-patricia.pdf}{i QuickChecking Patricia Trees}} 72 | by Jan Mitgaard. 73 | 74 | It also affects functions like {{!PatriciaTree.BASE_MAP.unsigned_min_binding}[unsigned_min_binding]} 75 | and {{!PatriciaTree.BASE_MAP.pop_unsigned_minimum}[pop_unsigned_minimum]}. They will return the smallest 76 | positive integer of both positive and negative keys are present; and not the smallest negative, 77 | as one might expect.} 78 | {li Supports generic maps and sets: a ['m map] that maps ['k key] to [('k, 'm) value]. 79 | This is especially useful when using {{: https://v2.ocaml.org/manual/gadts-tutorial.html}GADTs} 80 | for the type of keys. This is also sometimes called a dependent map.} 81 | {li Allows easy and fast operations across different types of maps and set 82 | which have the same type of keys (e.g. an intersection between a map and a set).} 83 | {li Multiple choices for internal representation ({{!PatriciaTree.NODE}[NODE]}), which allows for efficient 84 | storage (no need to store a value for sets), or using weak nodes only (values removed from the tree if no other pointer to it exists). This system can also 85 | be extended to store size information in nodes if needed.} 86 | {li Exposes a common interface ({!type:PatriciaTree.NODE.view}) to allow users to write their own pattern 87 | matching on the tree structure without depending on the {{!PatriciaTree.NODE}[NODE]} being used.} 88 | {li Additionally, hashconsed versions of heterogeneous/homogeneous maps/sets are 89 | available. These provide constant time equality and comparison, and ensure 90 | maps/set with the same constants are always physically equal. It comes at the cost 91 | of a constant overhead in memory usage (at worst, as hash-consing may allow 92 | memory gains) and constant time overhead when calling constructors.}} 93 | 94 | {1 Quick overview} 95 | 96 | {2 Functors} 97 | 98 | This library contains a single module, {!PatriciaTree}. 99 | The functors used to build maps and sets are the following: 100 | {ul 101 | {li For homogeneous (non-generic) maps and sets: {{!PatriciaTree.MakeMap}[MakeMap]} and 102 | {{!PatriciaTree.MakeSet}[MakeSet]}. These are similar to the standard library's maps and sets. 103 | {@ocaml skip[ 104 | module MakeMap(Key: KEY) : MAP with type key = Key.t 105 | module MakeSet(Key: KEY) : SET with type elt = Key.t 106 | ]}} 107 | {li For Heterogeneous (generic) maps and sets: {{!PatriciaTree.MakeHeterogeneousMap}[MakeHeterogeneousMap]} 108 | and {{!PatriciaTree.MakeHeterogeneousSet}[MakeHeterogeneousSet]}. 109 | {@ocaml skip[ 110 | module MakeHeterogeneousMap(Key: HETEROGENEOUS_KEY)(Value: HETEROGENEOUS_VALUE) : 111 | HETEROGENEOUS_MAP 112 | with type 'a key = 'a Key.t 113 | and type ('k,'m) value = ('k,'m) Value.t 114 | module MakeHeterogeneousSet(Key: HETEROGENEOUS_KEY) : HETEROGENEOUS_SET 115 | with type 'a elt = 'a Key.t 116 | ]}} 117 | {li 118 | There are also {{: https://en.wikipedia.org/wiki/Hash_consing}hash-consed} versions 119 | of these four functors: {{!PatriciaTree.MakeHashconsedMap}[MakeHashconsedMap]}, {{!PatriciaTree.MakeHashconsedSet}[MakeHashconsedSet]}, 120 | {{!PatriciaTree.MakeHashconsedHeterogeneousMap}[MakeHashconsedHeterogeneousMap]} and {{!PatriciaTree.MakeHashconsedHeterogeneousSet}[MakeHashconsedHeterogeneousSet]}. 121 | These uniquely number their nodes, which means: 122 | - [equal] and [compare] become constant time operations; 123 | - two maps with the same bindings (where keys are compared by {{!PatriciaTree.KEY.to_int}[KEY.to_int]} and 124 | values by {{!PatriciaTree.HASHED_VALUE.polyeq}[HASHED_VALUE.polyeq]}) will always be physically equal; 125 | - functions that benefit from sharing will see improved performance; 126 | - constructors are slightly slower, as they now require a hash-table lookup; 127 | - memory usage is increased: nodes store their tags inside themselves, and 128 | a global hash-table of all built nodes must be maintained; 129 | - hash-consed maps assume their values are immutable; 130 | - {b WARNING:} when using physical equality as {{!PatriciaTree.HASHED_VALUE.polyeq}[HASHED_VALUE.polyeq]}, some maps of different 131 | types may be given the same identifier. See the end of 132 | the documentation of {{!PatriciaTree.HASHED_VALUE.polyeq}[HASHED_VALUE.polyeq]} for details. 133 | Note that this is the case in the default implementations 134 | {{!PatriciaTree.HashedValue}[HashedValue]} 135 | and {{!PatriciaTree.HeterogeneousHashedValue}[HeterogeneousHashedValue]}. 136 | - All hash-consing functors are {b generative}, since each functor call will 137 | create a new hash-table to store the created nodes. Calling a functor 138 | twice with same arguments will lead to two numbering systems for identifiers, 139 | and thus the types should not be considered compatible. 140 | }} 141 | 142 | {2 Interfaces} 143 | 144 | Here is a brief overview of the various module types of our library: 145 | {ul 146 | {li {{!PatriciaTree.BASE_MAP}[BASE_MAP]}: the underlying module type of all our trees (maps end sets). It 147 | represents a ['b map] binding ['a key] to [('a,'b) value], as well as all 148 | functions needed to manipulate them. 149 | 150 | It can be accessed from any of the more specific maps types, thus providing a 151 | unified representation, useful for cross map operations. However, for practical 152 | purposes, it is often best to use the more specific interfaces: 153 | {ul 154 | {li {{!PatriciaTree.HETEROGENEOUS_MAP}[HETEROGENEOUS_MAP]} for heterogeneous maps (this is just {{!PatriciaTree.BASE_MAP}[BASE_MAP]} with a 155 | [WithForeign] functor).} 156 | {li {{!PatriciaTree.MAP}[MAP]} for homogeneous maps, this interface is close to {{: https://ocaml.org/api/Map.S.html}[Stdlib.Map.S]}.} 157 | {li {{!PatriciaTree.HETEROGENEOUS_SET}[HETEROGENEOUS_SET]} for heterogeneous sets (sets of ['a elt]). These are just 158 | maps to [unit], but with a custom node representation to avoid storing [unit] in 159 | nodes.} 160 | {li {{!PatriciaTree.SET}[SET]} for homogeneous sets, this interface is close to {{: https://ocaml.org/api/Set.S.html}[Stdlib.Set.S]}.} 161 | }} 162 | {li The parameter of our functor are either {{!PatriciaTree.KEY}[KEY]} or {{!PatriciaTree.HETEROGENEOUS_KEY}[HETEROGENEOUS_KEY]}. 163 | These just consist of a type, a (polymorphic) equality function, and an 164 | injective [to_int] coercion. 165 | 166 | The heterogeneous map functor also has a {{!PatriciaTree.HETEROGENEOUS_VALUE}[HETEROGENEOUS_VALUE]} parameter to specify the 167 | [('a, 'b) value] type.} 168 | {li The internal representations of our tree can be customized to use different 169 | internal {{!PatriciaTree.NODE}[NODE]}. Each node come with its own private constructors and destructors, 170 | as well as a cast to a uniform {{!type:PatriciaTree.NODE.view}[NODE.view]} type used for pattern matching. 171 | 172 | A number of implementations are provided: 173 | - {{!PatriciaTree.SimpleNode}[SimpleNode]}: exactly the {{!type:PatriciaTree.NODE.view}[NODE.view]} type; 174 | - {{!PatriciaTree.WeakNode}[WeakNode]}: only store weak pointer to its elements; 175 | - {{!PatriciaTree.NodeWithId}[NodeWithId]}: node which contains a unique identifier; 176 | - {{!PatriciaTree.SetNode}[SetNode]}: optimized for sets, doesn't store the [unit] value; 177 | - {{!PatriciaTree.WeakSetNode}[WeakSetNode]}: both a {{!PatriciaTree.WeakNode}[WeakNode]} and a {{!PatriciaTree.SetNode}[SetNode]} 178 | - {{!PatriciaTree.HashconsedNode}[HashconsedNode]}: performs hash-consing (it also stores a unique identifier, but checks when 179 | building a new node whether a node with similar content already exists); 180 | - {{!PatriciaTree.HashconsedSetNode}[HashconsedSetNode]}: both a {{!PatriciaTree.HashconsedNode}[HashconsedNode]} and a {{!PatriciaTree.SetNode}[SetNode]}. 181 | 182 | Use the functors {{!PatriciaTree.MakeCustomMap}[MakeCustomMap]} and {{!PatriciaTree.MakeCustomSet}[MakeCustomSet]} 183 | (or their heterogeneous versions {{!PatriciaTree.MakeCustomHeterogeneousMap}[MakeCustomHeterogeneousMap]} and 184 | {{!PatriciaTree.MakeCustomHeterogeneousSet}[MakeCustomHeterogeneousSet]}) to build 185 | maps using these nodes, or any other custom nodes.} 186 | } 187 | 188 | {1:examples Examples} 189 | 190 | To use this library, {{!install}install it} and add the following to your 191 | dune files: 192 | {@dune[ 193 | (executable ; or library 194 | ... 195 | (libraries patricia-tree ...) 196 | ) 197 | ]} 198 | 199 | {2 Homogeneous map} 200 | 201 | Here is a small example of a non-generic map: 202 | 203 | {ol 204 | {li Start by creating a key module. We use [type int] for keys in this example, 205 | but you can use any type, so long as it supports an efficient and injective 206 | {{!PatriciaTree.KEY.to_int}[to_int]} function. 207 | {@ocaml[ 208 | module IntKey : PatriciaTree.KEY with type t = int = struct 209 | type t = int 210 | let to_int x = x (* to_int must be injective and fast *) 211 | end 212 | ]}} 213 | {li Use it to instanciate the map/set functors: 214 | {[ 215 | module IMap : PatriciaTree.MAP with type key = int 216 | = PatriciaTree.MakeMap(IntKey) 217 | 218 | module ISet : PatriciaTree.SET with type elt = int 219 | = PatriciaTree.MakeSet(IntKey) 220 | ]}} 221 | {li You can now use it as you would any other map, most of the interface is 222 | shared with the standard library's {{: https://ocaml.org/api/Map.S.html}[Map]} 223 | and {{: https://ocaml.org/api/Set.S.html}[Set]} (some functions have 224 | been renamed to highlight their differing requirements). 225 | {[ 226 | # let map = 227 | IMap.empty |> 228 | IMap.add 1 "hello" |> 229 | IMap.add 2 "world" |> 230 | IMap.add 3 "how do you do?";; 231 | val map : string IMap.t = 232 | ]} 233 | (We also have {{!PatriciaTree.MAP.of_list}[of_list]} and 234 | {{!PatriciaTree.MAP.of_seq}[of_seq]} functions for quick initialization) 235 | {[ 236 | # IMap.find 1 map;; 237 | - : string = "hello" 238 | 239 | # IMap.cardinal map;; 240 | - : int = 3 241 | ]}} 242 | {li The strength of Patricia Tree is the speedup of operations on multiple maps 243 | with common subtrees. For example, in the following, the 244 | {{!PatriciaTree.MAP.idempotent_inter_filter}[idempotent_inter_filter]} function 245 | will skip recursive calls to physically equal subtrees (kept as-is in the intersection). 246 | This allows faster than [O(n)] intersections. 247 | {[ 248 | # let map2 = 249 | IMap.idempotent_inter_filter (fun _key _l _r -> None) 250 | (IMap.add 4 "something" map) 251 | (IMap.add 5 "something else" map);; 252 | val map2 : string IMap.t = 253 | 254 | # map == map2;; 255 | - : bool = true 256 | ]} 257 | Physical equality is preserved as much as possible, although some intersections 258 | may need to build new nodes and won't be fully physically equal, they will 259 | still share some subtrees. 260 | {[ 261 | # let str = IMap.find 1 map;; 262 | val str : string = "hello" 263 | 264 | # IMap.add 1 str map == map (* already present *);; 265 | - : bool = true 266 | 267 | # IMap.add 1 "hello" map == map 268 | (* new string copy isn't physically equal to the old one *);; 269 | - : bool = false 270 | ]} 271 | Note that physical equality isn't preserved when creating new copies of values 272 | (the newly created string ["hello"] isn't physically equal to [str]). 273 | It can also fail when maps have the same bindings but were created differently: 274 | {[ 275 | # let map3 = IMap.remove 2 map;; 276 | val map3 : string IMap.t = 277 | 278 | # IMap.add 2 (IMap.find 2 map) map3 == map;; 279 | - : bool = false 280 | ]} 281 | If you want to maintain full physical equality (and thus get 282 | cheap equality test between maps), use the provided 283 | {{!PatriciaTree.section-hash_consed}hash-consed maps and sets}.} 284 | {li Our library also allows cross map/set operations through the 285 | {{!PatriciaTree.MAP.WithForeign}[WithForeign]} functors: 286 | {[ 287 | module CrossOperations = IMap.WithForeign(ISet.BaseMap) 288 | ]} 289 | For example, you can only keep the bindings of [map] 290 | whose keys are in a given set: 291 | {[ 292 | # let set = ISet.of_list [1; 3];; 293 | val set : ISet.t = 294 | 295 | # let restricted_map = CrossOperations.nonidempotent_inter 296 | { f = fun _key value () -> value } map set;; 297 | val restricted_map : string IMap.t = 298 | 299 | # IMap.to_list map;; 300 | - : (int * string) list = [(1, "hello"); (2, "world"); (3, "how do you do?")] 301 | 302 | # IMap.to_list restricted_map;; 303 | - : (int * string) list = [(1, "hello"); (3, "how do you do?")] 304 | ]} 305 | }} 306 | 307 | {2 Heterogeneous map} 308 | 309 | Heterogeneous maps work very similarly to homogeneous ones, but come with extra 310 | liberty of having a generic type as a key. 311 | 312 | {ol 313 | {li Here is a GADT example to use for our keys: a small typed expression language. 314 | {[ 315 | type 'a expr = 316 | | G_Const_Int : int -> int expr 317 | | G_Const_Bool : bool -> bool expr 318 | | G_Addition : int expr * int expr -> int expr 319 | | G_Equal : 'a expr * 'a expr -> bool expr 320 | ]} 321 | We can create our {{!PatriciaTree.HETEROGENEOUS_KEY}[HETEROGENEOUS_KEY]} functor 322 | parameter using this type as follows: 323 | {[ 324 | module Expr : PatriciaTree.HETEROGENEOUS_KEY with type 'a t = 'a expr = struct 325 | type 'a t = 'a expr 326 | 327 | (** Injective, so long as expressions are small enough 328 | (encodes the constructor discriminant in two lowest bits). 329 | Ideally, use a hash-consed type, to_int needs to be fast *) 330 | let rec to_int : type a. a expr -> int = function 331 | | G_Const_Int i -> 0 + 4*i 332 | | G_Const_Bool b -> 1 + 4*(if b then 1 else 0) 333 | | G_Addition(l,r) -> 2 + 4*(to_int l mod 10000 + 10000*(to_int r)) 334 | | G_Equal(l,r) -> 3 + 4*(to_int l mod 10000 + 10000*(to_int r)) 335 | 336 | (** Full polymorphic equality, requires annotation to type properly *) 337 | let rec polyeq : type a b. a expr -> b expr -> (a, b) PatriciaTree.cmp = 338 | fun l r -> match l, r with 339 | | G_Const_Int l, G_Const_Int r -> if l = r then Eq else Diff 340 | | G_Const_Bool l, G_Const_Bool r -> if l = r then Eq else Diff 341 | | G_Addition(ll, lr), G_Addition(rl, rr) -> ( 342 | match polyeq ll rl with 343 | | Eq -> polyeq lr rr 344 | | Diff -> Diff) 345 | | G_Equal(ll, lr), G_Equal(rl, rr) -> ( 346 | match polyeq ll rl with 347 | | Eq -> (* this match is no-op, but it is required to typecheck *) 348 | (match polyeq lr rr with Eq -> Eq | Diff -> Diff) 349 | | Diff -> Diff) 350 | | _ -> Diff 351 | end 352 | ]} 353 | Note the full polymorphic equality, that returns a GADT term {!PatriciaTree.cmp} 354 | which, when equal ({{!PatriciaTree.Eq}[Eq]}), is a proof of type equality 355 | between the type parameters.} 356 | {li We can now instanciate our map functor. Note that in the heterogeneous case, 357 | we must also specify the value type (second functor argument) and how it depends 358 | on the key type (first parameter) and the map type (second parameter). 359 | Here the value only depends on the type of the key, not that of the map 360 | {[ 361 | module EMap = PatriciaTree.MakeHeterogeneousMap 362 | (Expr) 363 | (struct type ('key, 'map) t = 'key end) 364 | ]}} 365 | {li You can now use this as you would any other dependent map: 366 | {[ 367 | # let map : unit EMap.t = 368 | EMap.empty |> 369 | EMap.add (G_Const_Bool false) false |> 370 | EMap.add (G_Const_Int 5) 5 |> 371 | EMap.add (G_Addition (G_Const_Int 3, G_Const_Int 6)) 9 |> 372 | EMap.add (G_Equal (G_Const_Bool false, G_Equal (G_Const_Int 5, G_Const_Int 7))) true 373 | val map : unit EMap.t = 374 | 375 | # EMap.find (G_Const_Bool false) map;; 376 | - : bool = false 377 | 378 | # EMap.find (G_Const_Int 5) map;; 379 | - : int = 5 380 | 381 | # EMap.cardinal map;; 382 | - : int = 4 383 | ]}} 384 | {li Physical equality preservation allows fast operations on multiple maps with common 385 | ancestors. In the heterogeneous case, these functions are a bit more complex 386 | since OCaml requires that first-order polymorphic functions be wrapped in records: 387 | {[ 388 | # let map2 = EMap.idempotent_inter_filter 389 | { f = fun _key _l _r -> None } (* polymorphic 1rst order functions are wrapped in records *) 390 | (EMap.add (G_Const_Int 0) 8 map) 391 | (EMap.add (G_Const_Int 0) 9 map) 392 | val map2 : unit EMap.t = 393 | ]} 394 | Even though [map] and [map2] have the same elements, they may not always 395 | be physically equal: 396 | {[ 397 | # map == map2;; 398 | - : bool = false 399 | ]} 400 | This is because they were created through different processes. They will still 401 | share subtrees. If you want to maintain full physical equality (and thus get 402 | cheap equality test between maps), use the provided 403 | {{!PatriciaTree.section-hash_consed}hash-consed maps and sets}. 404 | }} 405 | 406 | 407 | {1 Release status} 408 | 409 | This should be close to a stable release. It is already being 410 | used as part of a {{: https://codex.top}larger project} successfully, and this usage as helped us mature 411 | the interface. As is, we believe the project is usable, and we don't anticipate 412 | any major change before 1.0.0. We didn't commit to a stable release straight 413 | away as we would like a bit more time using this library before doing so. 414 | 415 | {1 Known issues} 416 | 417 | There is a bug in the OCaml typechecker which prevents us from directly 418 | defining non-generic maps as instances of generic maps. To avoid this, non-generic maps 419 | use a separate value type {{!PatriciaTree.snd}[('a, 'b) snd]} (instead of just using ['b]) 420 | {[ 421 | type (_, 'b) snd = Snd of 'b [@@unboxed] 422 | ]} 423 | It should not incur any extra performance cost as it is unboxed, but can appear 424 | when manipulating non-generic maps. 425 | 426 | For more details about this issue, see {{: https://discuss.ocaml.org/t/weird-behaviors-with-first-order-polymorphism/13783}the OCaml discourse discussion} 427 | or {{: https://github.com/ocaml/ocaml/issues/13292}the github issue}. 428 | 429 | {1 Comparison to other OCaml libraries} 430 | 431 | {2 ptmap and ptset} 432 | 433 | There are other implementations of Patricia Tree in OCaml, namely 434 | {{: https://github.com/backtracking/ptmap}ptmap} and 435 | {{: https://github.com/backtracking/ptset}ptset}, both by J.C. Filliatre. 436 | These are smaller and closer to OCaml's built-in [Map] and [Set], however: 437 | - Our library allows using any type [key] that comes with an injective [to_int] 438 | function, instead of requiring [key = int]. 439 | - We support generic types for keys/elements. 440 | - We support operations between sets and maps of different types. 441 | - We use a big-endian representation, allowing easy access to min/max elements of 442 | maps and trees. 443 | - Our interface and implementation tries to maximize the sharing between different 444 | versions of the tree, and to benefit from this memory sharing. Theirs do not. 445 | - These libraries work with older version of OCaml ([>= 4.05] I believe), whereas 446 | ours requires OCaml [>= 4.14] (for the new interface of {{: https://v2.ocaml.org/api/Ephemeron.html}[Ephemeron]} used in 447 | {{!PatriciaTree.WeakNode}[WeakNode]}). 448 | 449 | {2 dmap} 450 | 451 | Additionally, there is a dependent map library: {{: https://gitlab.inria.fr/bmontagu/dmap}dmap}, 452 | which gave us the idea of making our PatriciaTree dependent. 453 | It allows creating type safe dependent maps similar to our heterogeneous maps. 454 | However, its maps aren't Patricia trees. They are binary trees build using a 455 | (polymorphic) comparison function, similarly to the maps of the standard library. 456 | 457 | Another difference is that the type of values in the map is independent from the 458 | type of the keys, allowing keys to be associated with different values in different maps. 459 | i.e. we map ['a key] to any [('a, 'b) value] type, whereas dmap only maps ['a key] 460 | to ['a] or ['a value]. 461 | 462 | [dmap] also works with OCaml [>= 4.12], whereas we require OCaml [>= 4.14]. 463 | 464 | {1 Contributions and bug reports} 465 | 466 | Any contributions are welcome! 467 | 468 | You can report any bug, issues, or desired features using the {{: https://github.com/codex-semantics-library/patricia-tree/issues}Github issue tracker}. 469 | Please include OCaml, dune, and library version information in you bug reports. 470 | 471 | If you want to contribute code, feel free to fork {{: https://github.com/codex-semantics-library/patricia-tree}the repository on Github} 472 | and open a pull request. By doing so you agree to release your code under this 473 | project's license ({{: https://choosealicense.com/licenses/lgpl-2.1/}LGPL-2.1}). 474 | 475 | 476 | There is no imposed coding style for this repository, here are just a few guidelines and conventions: 477 | - Module type names should use [SCREAMING_SNAKE_CASE]. 478 | - Module and functor names use [PascalCase], functors names start with [Make]. 479 | - Even though the library implements homogeneous maps as a specialization of 480 | heterogeneous ones, the naming convention is that no prefix means homogeneous, 481 | and all heterogeneous objects are prefixed with [heterogeneous]. 482 | - Please document any new functions in the interface, using {{: https://v2.ocaml.org/manual/ocamldoc.html#s%3Aocamldoc-comments}ocamldoc style comments}. 483 | - Please consider adding test for new features/fixed bugs if at all possible. 484 | This library uses a {{: https://www.ocaml.org/p/quickcheck/latest/doc/QuickCheck/index.html}QuickCheck} framework for tests. 485 | -------------------------------------------------------------------------------- /src/int_builtins.c: -------------------------------------------------------------------------------- 1 | /**************************************************************************/ 2 | /* This file is part of the Codex semantics library. */ 3 | /* */ 4 | /* Copyright (C) 2013-2024 */ 5 | /* CEA (Commissariat à l'énergie atomique et aux énergies */ 6 | /* alternatives) */ 7 | /* */ 8 | /* you can redistribute it and/or modify it under the terms of the GNU */ 9 | /* Lesser General Public License as published by the Free Software */ 10 | /* Foundation, version 2.1. */ 11 | /* */ 12 | /* It is distributed in the hope that it will be useful, */ 13 | /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ 14 | /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ 15 | /* GNU Lesser General Public License for more details. */ 16 | /* */ 17 | /* See the GNU Lesser General Public License version 2.1 */ 18 | /* for more details (enclosed in the file LICENSE). */ 19 | /* */ 20 | /**************************************************************************/ 21 | 22 | #define CAML_NAME_SPACE 23 | #include 24 | #include 25 | #include 26 | 27 | #ifdef _MSC_VER 28 | #include 29 | #endif 30 | 31 | __attribute__((__always_inline__)) 32 | static inline uintnat clz(uintnat v){ 33 | /* Note: on a 64 bit platform, GCC's _builtin_clz will perform a 32 34 | bit operation (even if the argument has type int). We have to use 35 | _builtin_clzll instead. */ 36 | #if __GNUC__ 37 | #ifdef ARCH_SIXTYFOUR 38 | return __builtin_clzll(v); 39 | #else 40 | return __builtin_clz(v); 41 | #endif 42 | #endif 43 | #ifdef _MSC_VER 44 | int res = 0; 45 | #ifdef ARCH_SIXTYFOUR 46 | _BitScanReverse64(&res,v); 47 | #else 48 | _BitScanReverse(&res,v); 49 | #endif 50 | return res; 51 | #endif 52 | } 53 | 54 | /**************** Highest bit ****************/ 55 | 56 | CAMLprim uintnat caml_int_builtin_highest_bit (value i){ 57 | /* printf("Highest bit In C: %x %x %x %x\n", */ 58 | /* i, i >> 1, 62-clz(i), 1 << (62 - clz(i))); */ 59 | /* fflush(stdout); */ 60 | return ((uintnat) 1 << (8*sizeof(value) - 2 - clz(i))); 61 | } 62 | 63 | CAMLprim value caml_int_builtin_highest_bit_byte (value i){ 64 | return Val_int(caml_int_builtin_highest_bit(i)); 65 | } 66 | -------------------------------------------------------------------------------- /src/ints.ml: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of the Codex semantics library *) 3 | (* (patricia-tree sub-component). *) 4 | (* *) 5 | (* Copyright (C) 2024-2025 *) 6 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 7 | (* alternatives) *) 8 | (* *) 9 | (* You can redistribute it and/or modify it under the terms of the GNU *) 10 | (* Lesser General Public License as published by the Free Software *) 11 | (* Foundation, version 2.1. *) 12 | (* *) 13 | (* It is distributed in the hope that it will be useful, *) 14 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 15 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 16 | (* GNU Lesser General Public License for more details. *) 17 | (* *) 18 | (* See the GNU Lesser General Public License version 2.1 *) 19 | (* for more details (enclosed in the file LICENSE). *) 20 | (**************************************************************************) 21 | 22 | (** The integer associated with a key *) 23 | type intkey = int 24 | 25 | (** A mask is an integer with a single bit set (i.e. a power of 2). *) 26 | type mask = int 27 | 28 | (** Fast highest bit computation in c, using GCC's __builtin_clz 29 | which compile to efficient instruction (bsr) when possible. *) 30 | external highest_bit: int -> (int[@untagged]) = 31 | "caml_int_builtin_highest_bit_byte" "caml_int_builtin_highest_bit" [@@noalloc] 32 | 33 | let unsigned_lt x y = x - min_int < y - min_int 34 | (* if x >= 0 && y >= 0 35 | then x < y 36 | else if x >= 0 37 | then (* pos < neg *) true 38 | else if y >= 0 then false 39 | else x < y *) 40 | 41 | (** Note: in the original version, okasaki give the masks as arguments 42 | to optimize the computation of highest_bit. *) 43 | let branching_bit a b = highest_bit (a lxor b) 44 | 45 | let mask i m = i land (lnot (2*m-1)) 46 | -------------------------------------------------------------------------------- /src/ints.mli: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of the Codex semantics library *) 3 | (* (patricia-tree sub-component). *) 4 | (* *) 5 | (* Copyright (C) 2024-2025 *) 6 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 7 | (* alternatives) *) 8 | (* *) 9 | (* You can redistribute it and/or modify it under the terms of the GNU *) 10 | (* Lesser General Public License as published by the Free Software *) 11 | (* Foundation, version 2.1. *) 12 | (* *) 13 | (* It is distributed in the hope that it will be useful, *) 14 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 15 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 16 | (* GNU Lesser General Public License for more details. *) 17 | (* *) 18 | (* See the GNU Lesser General Public License version 2.1 *) 19 | (* for more details (enclosed in the file LICENSE). *) 20 | (**************************************************************************) 21 | 22 | (** Small utilities used to manipulate the integer masks and branching bits *) 23 | 24 | type intkey = private int 25 | (** Private type used to represent prefix stored in nodes. 26 | These are integers with all bits after branching bit (included) set to zero *) 27 | 28 | type mask = private int 29 | (** Private type: integers with a single bit set. *) 30 | 31 | val unsigned_lt : int -> int -> bool 32 | (** All integers comparisons in this library are done according to their 33 | {b unsigned representation}. This is the same as signed comparison for same 34 | sign integers, but all negative integers are greater than the positives. 35 | This means [-1] is the greatest possible number, and [0] is the smallest. 36 | {[ 37 | # unsigned_lt 2 (-1);; 38 | - : bool = true 39 | # unsigned_lt max_int min_int;; 40 | - : bool = true 41 | # unsigned_lt 3 2;; 42 | - : bool = false 43 | # unsigned_lt 2 3;; 44 | - : bool = true 45 | # unsigned_lt (-2) (-3);; 46 | - : bool = false 47 | # unsigned_lt (-4) (-3);; 48 | - : bool = true 49 | # unsigned_lt 0 0;; 50 | - : bool = false 51 | ]} 52 | 53 | Using this unsigned order helps avoid a bug described in 54 | {{: https://www.cs.tufts.edu/comp/150FP/archive/jan-midtgaard/qc-patricia.pdf}{i QuickChecking Patricia Trees}} 55 | by Jan Mitgaard. 56 | 57 | @since v0.10.0 *) 58 | 59 | (**/**) 60 | (** For internal use and testing *) 61 | 62 | val branching_bit : int -> int -> mask 63 | (** Returns the {!type-mask} corresponding to the highest bit that differs between 64 | both arguments. *) 65 | 66 | val mask : int -> mask -> intkey 67 | (** Only keeps the bits above mask set *) 68 | 69 | external highest_bit: int -> (int[@untagged]) = 70 | "caml_int_builtin_highest_bit_byte" "caml_int_builtin_highest_bit" [@@noalloc] 71 | (** [highest_bit x] is an integer with a single bit set: the highest set bit of [x]. 72 | exported for test purposes only. 73 | 74 | @since v0.10.0 *) 75 | 76 | (**/**) 77 | -------------------------------------------------------------------------------- /src/key_value.ml: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of the Codex semantics library *) 3 | (* (patricia-tree sub-component). *) 4 | (* *) 5 | (* Copyright (C) 2024-2025 *) 6 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 7 | (* alternatives) *) 8 | (* *) 9 | (* You can redistribute it and/or modify it under the terms of the GNU *) 10 | (* Lesser General Public License as published by the Free Software *) 11 | (* Foundation, version 2.1. *) 12 | (* *) 13 | (* It is distributed in the hope that it will be useful, *) 14 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 15 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 16 | (* GNU Lesser General Public License for more details. *) 17 | (* *) 18 | (* See the GNU Lesser General Public License version 2.1 *) 19 | (* for more details (enclosed in the file LICENSE). *) 20 | (**************************************************************************) 21 | 22 | open Signatures 23 | 24 | (** {1 Keys and values} *) 25 | 26 | module HomogeneousValue = struct 27 | type ('a,'map) t = 'map 28 | end 29 | 30 | module WrappedHomogeneousValue = struct 31 | type ('a, 'map) t = ('a, 'map) snd 32 | end 33 | 34 | module HeterogeneousKeyFromKey(Key: KEY): HETEROGENEOUS_KEY with type 'a t = Key.t = 35 | struct 36 | type _ t = Key.t 37 | 38 | (** The type-safe way to do it would be to define this type, to 39 | guarantee that 'a is always bound to the same type, and Eq is 40 | safe. But this requires a lot of conversion code, and identity 41 | functions that may not be well detected. [polyeq] is unsafe in 42 | that it allows arbitrary conversion of t1 by t2 in t1 t, but 43 | this unsafety is not exported, and I don't think we can do 44 | something wrong using it. *) 45 | (* type 'a t = K: Key.t -> unit t [@@unboxed] *) 46 | let polyeq: type a b. a t -> b t -> (a,b) cmp = 47 | fun a b -> match a,b with 48 | | a, b when (Key.to_int a) == (Key.to_int b) -> Obj.magic Eq 49 | | _ -> Diff 50 | let to_int = Key.to_int 51 | end 52 | 53 | 54 | module Value : VALUE with type 'a t = 'a = struct type 'a t = 'a end 55 | 56 | module HashedValue : HASHED_VALUE with type 'a t = 'a = struct 57 | include Value 58 | let hash x = Hashtbl.hash x 59 | let polyeq: type a b. a -> b -> bool = fun a b -> a == Obj.magic b 60 | end 61 | module HeterogeneousHashedValue : HETEROGENEOUS_HASHED_VALUE with type ('k, 'm) t = 'm = 62 | struct 63 | include HomogeneousValue 64 | let hash x = Hashtbl.hash x 65 | let polyeq: type a b. a -> b -> bool = fun a b -> a == Obj.magic b 66 | end 67 | 68 | module HeterogeneousHashedValueFromHashedValue(Value: HASHED_VALUE) 69 | : HETEROGENEOUS_HASHED_VALUE with type ('a, 'map) t = ('a, 'map Value.t) snd = 70 | struct 71 | type ('a, 'map) t = ('a, 'map Value.t) snd 72 | let hash (Snd x) = Value.hash x 73 | let polyeq (Snd a) (Snd b) = Value.polyeq a b 74 | end 75 | -------------------------------------------------------------------------------- /src/key_value.mli: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of the Codex semantics library *) 3 | (* (patricia-tree sub-component). *) 4 | (* *) 5 | (* Copyright (C) 2024-2025 *) 6 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 7 | (* alternatives) *) 8 | (* *) 9 | (* You can redistribute it and/or modify it under the terms of the GNU *) 10 | (* Lesser General Public License as published by the Free Software *) 11 | (* Foundation, version 2.1. *) 12 | (* *) 13 | (* It is distributed in the hope that it will be useful, *) 14 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 15 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 16 | (* GNU Lesser General Public License for more details. *) 17 | (* *) 18 | (* See the GNU Lesser General Public License version 2.1 *) 19 | (* for more details (enclosed in the file LICENSE). *) 20 | (**************************************************************************) 21 | 22 | open Signatures 23 | 24 | module Value : VALUE with type 'a t = 'a 25 | (** Default implementation of {!VALUE}, used in {!MakeMap}. 26 | @since v0.10.0 *) 27 | 28 | module HomogeneousValue : HETEROGENEOUS_VALUE with type ('a,'map) t = 'map 29 | (** Default implementation of {!HETEROGENEOUS_VALUE}, to use when the type of the 30 | value in a heterogeneous map does not depend on the type of the key, only on 31 | the type of the map. *) 32 | 33 | module WrappedHomogeneousValue : HETEROGENEOUS_VALUE with type ('a,'map) t = ('a,'map) snd 34 | (** Same as {!HomogeneousValue}, but uses a wrapper (unboxed) type instead of direct 35 | equality. This avoids a problem in the typechecker with overly eager simplification of aliases. 36 | More info on 37 | {{: https://discuss.ocaml.org/t/weird-behaviors-with-first-order-polymorphism/13783} the OCaml discourse post} 38 | and {{: https://github.com/ocaml/ocaml/issues/13292}the github issue}. *) 39 | 40 | module HashedValue : HASHED_VALUE with type 'a t = 'a 41 | (** Generic implementation of {!HASHED_VALUE}. 42 | Uses {{: https://ocaml.org/api/Hashtbl.html#VALhash}[Hashtbl.hash]} for hashing 43 | and physical equality for equality. 44 | Note that this may lead to maps of different types having the same identifier 45 | ({!MakeHashconsedMap.to_int}), see the documentation of {!HASHED_VALUE.polyeq} 46 | for details on this. *) 47 | 48 | module HeterogeneousHashedValue : HETEROGENEOUS_HASHED_VALUE with type ('k, 'm) t = 'm 49 | (** Generic implementation of {!HETEROGENEOUS_HASHED_VALUE}. 50 | Uses {{: https://ocaml.org/api/Hashtbl.html#VALhash}[Hashtbl.hash]} for hashing 51 | and physical equality for equality. 52 | Note that this may lead to maps of different types having the same identifier 53 | ({!MakeHashconsedHeterogeneousMap.to_int}), see the documentation of 54 | {!HASHED_VALUE.polyeq} for details on this. *) 55 | 56 | 57 | (**/**) 58 | (** For local library use only *) 59 | 60 | module HeterogeneousKeyFromKey(Key: KEY): HETEROGENEOUS_KEY with type 'a t = Key.t 61 | (** Create a {!HETEROGENEOUS_KEY} from a non-polymorphic {!KEY} *) 62 | 63 | module HeterogeneousHashedValueFromHashedValue(Value: HASHED_VALUE) 64 | : HETEROGENEOUS_HASHED_VALUE with type ('a, 'map) t = ('a, 'map Value.t) snd 65 | (** Create a {!HETEROGENEOUS_HASHED_VALUE} from a {!HASHED_VALUE} *) 66 | 67 | (**/**) 68 | -------------------------------------------------------------------------------- /src/nodes.ml: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of the Codex semantics library *) 3 | (* (patricia-tree sub-component). *) 4 | (* *) 5 | (* Copyright (C) 2024-2025 *) 6 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 7 | (* alternatives) *) 8 | (* *) 9 | (* You can redistribute it and/or modify it under the terms of the GNU *) 10 | (* Lesser General Public License as published by the Free Software *) 11 | (* Foundation, version 2.1. *) 12 | (* *) 13 | (* It is distributed in the hope that it will be useful, *) 14 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 15 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 16 | (* GNU Lesser General Public License for more details. *) 17 | (* *) 18 | (* See the GNU Lesser General Public License version 2.1 *) 19 | (* for more details (enclosed in the file LICENSE). *) 20 | (**************************************************************************) 21 | 22 | open Ints 23 | open Signatures 24 | 25 | let sdbm x y = y + (x lsl 16) + (x lsl 6) - x 26 | (** Combine two numbers into a new hash *) 27 | 28 | (** Simple node, with no hash consing. *) 29 | module [@inline] SimpleNode(Key:sig type 'a t end)(Value:HETEROGENEOUS_VALUE) = struct 30 | type 'a key = 'a Key.t 31 | type ('key,'map) value = ('key,'map) Value.t 32 | 33 | type 'map view = 34 | | Empty: 'map view 35 | | Branch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t} -> 'map view 36 | | Leaf: {key:'key key; value:('key,'map) value} -> 'map view 37 | and 'map t = 'map view 38 | let view x = x 39 | 40 | let empty = Empty 41 | let is_empty x = x == Empty 42 | let leaf key value = Leaf {key;value} 43 | let branch ~prefix ~branching_bit ~tree0 ~tree1 = 44 | match tree0,tree1 with 45 | | Empty, x -> x 46 | | x, Empty -> x 47 | | _ -> Branch{prefix;branching_bit;tree0;tree1} 48 | end 49 | 50 | module WeakNode(Key:sig type 'a t end)(Value:HETEROGENEOUS_VALUE)(* :NODE *) = struct 51 | type 'a key = 'a Key.t 52 | type ('key,'map) value = ('key,'map) Value.t 53 | 54 | type 'map view = 55 | | Empty: 'map view 56 | | Branch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t} -> 'map view 57 | | Leaf: {key:'key key; value:('key,'map) value} -> 'map view 58 | and 'a t = 59 | | TEmpty: 'map t 60 | | TBranch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t} -> 'map t 61 | (* Additional hidden case: leaf, which is an Ephemeron.K1, whose 62 | tag is 251, so it can be discriminated against the other 63 | cases. This avoids an indirection. *) 64 | 65 | let empty = TEmpty 66 | let is_empty x = x == TEmpty 67 | let leaf key value = Obj.magic (Ephemeron.K1.make key value) 68 | let branch ~prefix ~branching_bit ~tree0 ~tree1 = 69 | match tree0,tree1 with 70 | | TEmpty, x -> x 71 | | x, TEmpty -> x 72 | | _ -> TBranch{prefix;branching_bit;tree0;tree1} 73 | 74 | let view (type k) (type map) (t:map t) = 75 | let obj = Obj.repr t in 76 | if Obj.is_block obj && Obj.tag obj != 0 then 77 | (* Ephemeron.K1.get_(key|value) are no longer available in 5.0, 78 | so we do that instead. *) 79 | let ephe:Obj.Ephemeron.t = Obj.magic obj in 80 | let key:k key option = Obj.magic @@ Obj.Ephemeron.get_key ephe 0 in 81 | let data:(k,map) Value.t option = Obj.magic @@ Obj.Ephemeron.get_data ephe in 82 | match key,data with 83 | | Some key, Some value -> Leaf{key;value} 84 | | _ -> Empty 85 | else match t with 86 | | TEmpty -> Empty 87 | | TBranch{prefix;branching_bit;tree0;tree1} -> Branch{prefix;branching_bit;tree0;tree1} 88 | 89 | end 90 | 91 | 92 | (** Add a unique id to nodes, e.g. so that they can be used as keys in maps or sets. *) 93 | module NodeWithId(Key:sig type 'a t end)(Value:HETEROGENEOUS_VALUE):NODE_WITH_ID 94 | with type 'key key = 'key Key.t 95 | and type ('key,'map) value = ('key,'map) Value.t 96 | = struct 97 | 98 | type 'a key = 'a Key.t 99 | type ('key,'map) value = ('key,'map) Value.t 100 | 101 | type 'map view = 102 | | Empty: 'map view 103 | | Branch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t} -> 'map view 104 | | Leaf: {key:'key key; value:('key,'map) value} -> 'map view 105 | and 'map t = 106 | | NEmpty: 'map t 107 | | NBranch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t;id:int} -> 'map t 108 | | NLeaf: {key:'key key;value:('key,'map) value;id:int} -> 'map t 109 | 110 | let view = function 111 | | NEmpty -> Empty 112 | | NBranch{prefix;branching_bit;tree0;tree1;_} -> Branch{prefix;branching_bit;tree0;tree1} 113 | | NLeaf{key;value;_} -> Leaf{key;value} 114 | 115 | let to_int = function 116 | | NEmpty -> 0 117 | | NBranch{id;_} -> id 118 | | NLeaf{id;_} -> id 119 | 120 | let count = ref 0 121 | 122 | let empty = NEmpty 123 | let is_empty x = x == NEmpty 124 | let leaf key value = incr count; NLeaf {key;value;id=(!count)} 125 | let branch ~prefix ~branching_bit ~tree0 ~tree1 = 126 | match tree0,tree1 with 127 | | NEmpty, x -> x 128 | | x, NEmpty -> x 129 | | _ -> incr count; NBranch{prefix;branching_bit;tree0;tree1;id=(!count)} 130 | end 131 | 132 | 133 | (** NODE for sets, i.e. when there is no associated values. *) 134 | module SetNode(Key:sig type 'a t end):NODE 135 | with type 'key key = 'key Key.t 136 | and type ('key,'map) value = unit 137 | = struct 138 | 139 | type 'a key = 'a Key.t 140 | type ('key,'map) value = unit 141 | 142 | type 'map view = 143 | | Empty: 'map view 144 | | Branch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t} -> 'map view 145 | | Leaf: {key:'key key; value:('key,'map) value} -> 'map view 146 | and 'map t = 147 | | NEmpty: 'map t 148 | | NBranch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t} -> 'map t 149 | | NLeaf: {key:'key key} -> 'map t 150 | 151 | 152 | let view = function 153 | | NEmpty -> Empty 154 | | NBranch{prefix;branching_bit;tree0;tree1} -> Branch{prefix;branching_bit;tree0;tree1} 155 | | NLeaf{key} -> Leaf{key;value=()} 156 | 157 | let empty = NEmpty 158 | let is_empty x = x == NEmpty 159 | let leaf key _value = NLeaf {key} 160 | let branch ~prefix ~branching_bit ~tree0 ~tree1 = 161 | match tree0,tree1 with 162 | | NEmpty, x -> x 163 | | x, NEmpty -> x 164 | | _ -> NBranch{prefix;branching_bit;tree0;tree1} 165 | 166 | end 167 | 168 | module WeakSetNode(Key:sig type 'a t end)(* :NODE *) = struct 169 | type 'a key = 'a Key.t 170 | type ('key,'map) value = unit 171 | 172 | type 'map view = 173 | | Empty: 'map view 174 | | Branch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t} -> 'map view 175 | | Leaf: {key:'key key; value:('key,'map) value} -> 'map view 176 | and 'a t = 177 | | TEmpty: 'map t 178 | | TBranch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t} -> 'map t 179 | (* Additional hidden case: leaf, which is a Weak array, whose tag 180 | is 251, so it can be discriminated against the other 181 | cases. This avoids an indirection. *) 182 | 183 | let empty = TEmpty 184 | let is_empty x = x == TEmpty 185 | let leaf key () = Obj.magic (let a = Weak.create 1 in Weak.set a 0 (Some key)) 186 | let branch ~prefix ~branching_bit ~tree0 ~tree1 = 187 | match tree0,tree1 with 188 | | TEmpty, x -> x 189 | | x, TEmpty -> x 190 | | _ -> TBranch{prefix;branching_bit;tree0;tree1} 191 | 192 | let view t = 193 | let obj = Obj.repr t in 194 | if Obj.is_block obj && Obj.tag obj != 0 then 195 | let weak = Obj.magic obj in 196 | let key = Weak.get weak 0 in 197 | match key with 198 | | Some key -> Leaf{key;value=()} 199 | | _ -> Empty 200 | else match t with (* Identity in memory. *) 201 | | TEmpty -> Empty 202 | | TBranch{prefix;branching_bit;tree0;tree1} -> Branch{prefix;branching_bit;tree0;tree1} 203 | end 204 | 205 | module HashconsedNode(Key:HETEROGENEOUS_KEY)(Value:HETEROGENEOUS_HASHED_VALUE)() 206 | (* : HASH_CONSED_NODE 207 | with type 'key key = 'key Key.t 208 | and type ('key, 'map) value = ('key, 'map) Value.t *) 209 | = struct 210 | 211 | type 'a key = 'a Key.t 212 | type ('key, 'map) value = ('key, 'map) Value.t 213 | 214 | type 'map view = 215 | | Empty: 'map view 216 | | Branch: { prefix:intkey; branching_bit:mask; tree0:'map t; tree1:'map t } -> 'map view 217 | | Leaf: { key:'key key; value:('key,'map) value } -> 'map view 218 | and 'map t = 219 | | NEmpty: 'map t 220 | | NBranch: { prefix:intkey; branching_bit:mask; tree0:'map t; tree1:'map t; id:int } -> 'map t 221 | | NLeaf: { key:'key key; value:('key, 'map) Value.t; id:int } -> 'map t 222 | 223 | let view = function 224 | | NEmpty -> Empty 225 | | NBranch{prefix;branching_bit;tree0;tree1;_} -> Branch{prefix;branching_bit;tree0;tree1} 226 | | NLeaf{key;value;_} -> Leaf{key;value} 227 | 228 | let to_int = function 229 | | NEmpty -> 0 230 | | NBranch{ id; _ } -> id 231 | | NLeaf{ id; _ } -> id 232 | 233 | let count = ref 1 (** Start at 1 as we increment in post *) 234 | 235 | type any_map = AnyMap : 'a t -> any_map [@@unboxed] 236 | 237 | module HashArg = struct 238 | type t = any_map 239 | let equal (AnyMap a) (AnyMap b) = match a, b with 240 | | NEmpty, NEmpty -> true 241 | | NLeaf{key=key1;value=value1;_}, NLeaf{key=key2;value=value2;_} -> 242 | begin match Key.polyeq key1 key2 with 243 | | Eq -> Value.polyeq value1 value2 244 | | Diff -> false 245 | end 246 | | NBranch{prefix=prefixa;branching_bit=branching_bita;tree0=tree0a;tree1=tree1a;_}, 247 | NBranch{prefix=prefixb;branching_bit=branching_bitb;tree0=tree0b;tree1=tree1b;_} -> 248 | prefixa == prefixb && branching_bita == branching_bitb && 249 | to_int tree0a = to_int tree0b && to_int tree1a = to_int tree1b 250 | | _ -> false 251 | 252 | let hash (AnyMap x) = match x with 253 | | NEmpty -> 0 254 | | NLeaf{key; value; _} -> 255 | let hash = sdbm (Key.to_int key) (Value.hash value) in 256 | (hash lsl 1) lor 1 257 | (* All leaf hashes are odd *) 258 | | NBranch{prefix; branching_bit; tree0; tree1; _} -> (* All branch hashes are even *) 259 | (sdbm ((prefix :> int) lor (branching_bit :> int)) @@ sdbm (to_int tree0) (to_int tree1)) lsl 1 260 | end 261 | 262 | module WeakHash = Weak.Make(HashArg) 263 | 264 | let weakh = WeakHash.create 120 265 | 266 | let empty = NEmpty 267 | let is_empty x = x == NEmpty 268 | 269 | let try_find (tentative : 'a t) = 270 | let AnyMap x = WeakHash.merge weakh (AnyMap tentative) in 271 | let x : 'a t = Obj.magic x in 272 | if x == tentative then incr count; 273 | x 274 | 275 | let leaf key value = try_find (NLeaf{key;value;id= !count}) 276 | 277 | let branch ~prefix ~branching_bit ~tree0 ~tree1 = 278 | match tree0,tree1 with 279 | | NEmpty, x -> x 280 | | x, NEmpty -> x 281 | | _ -> try_find (NBranch{prefix;branching_bit;tree0;tree1;id=(!count)}) 282 | 283 | let equal x y = x == y 284 | let compare x y = Int.compare (to_int x) (to_int y) 285 | end 286 | 287 | module HashconsedSetNode(Key:HETEROGENEOUS_KEY)(): HASH_CONSED_NODE 288 | with type 'key key = 'key Key.t 289 | and type ('key,'map) value = unit 290 | = struct 291 | 292 | type 'a key = 'a Key.t 293 | type ('key,'map) value = unit 294 | 295 | type map = 296 | | NEmpty: map 297 | | NBranch: { prefix:intkey; branching_bit:mask; tree0:map; tree1:map; id:int } -> map 298 | | NLeaf: { key:'key key; id:int } -> map 299 | type 'map view = 300 | | Empty: 'map view 301 | | Branch: { prefix:intkey; branching_bit:mask; tree0:'map t; tree1:'map t } -> 'map view 302 | | Leaf: { key:'key key; value:unit } -> 'map view 303 | and _ t = map 304 | 305 | let view = function 306 | | NEmpty -> Empty 307 | | NBranch{prefix;branching_bit;tree0;tree1;_} -> Branch{prefix;branching_bit;tree0;tree1} 308 | | NLeaf{ key; _ } -> Leaf{ key; value=() } 309 | 310 | let to_int = function 311 | | NEmpty -> 0 312 | | NBranch{ id; _ } -> id 313 | | NLeaf{ id; _ } -> id 314 | 315 | let count = ref 1 (** Start at 1 as we increment in post *) 316 | 317 | module HashArg = struct 318 | type t = map 319 | let equal a b = match a, b with 320 | | NEmpty, NEmpty -> true 321 | | NLeaf{key=key1;_}, NLeaf{key=key2;_} -> 322 | begin match Key.polyeq key1 key2 with 323 | | Eq -> true 324 | | Diff -> false 325 | end 326 | | NBranch{prefix=prefixa;branching_bit=branching_bita;tree0=tree0a;tree1=tree1a;_}, 327 | NBranch{prefix=prefixb;branching_bit=branching_bitb;tree0=tree0b;tree1=tree1b;_} -> 328 | prefixa == prefixb && branching_bita == branching_bitb && 329 | tree0a == tree0b && tree1a == tree1b 330 | | _ -> false 331 | 332 | let hash a = match a with 333 | | NEmpty -> 0 334 | | NLeaf{key; _} -> ((Key.to_int key) lsl 1) lor 1 (* All leaf hashes are odd *) 335 | | NBranch{prefix; branching_bit; tree0; tree1; _} -> (* All branch hashes are even *) 336 | (sdbm ((prefix :> int) lor (branching_bit :> int)) @@ sdbm (to_int tree0) (to_int tree1)) lsl 1 337 | end 338 | 339 | module WeakHash = Weak.Make(HashArg) 340 | 341 | let weakh = WeakHash.create 120 342 | 343 | let empty = NEmpty 344 | let is_empty x = x == NEmpty 345 | 346 | let try_find tentative = 347 | let x = WeakHash.merge weakh tentative in 348 | if x == tentative then incr count; 349 | x 350 | 351 | let leaf key () = try_find (NLeaf{ key; id = !count }) 352 | 353 | let branch ~prefix ~branching_bit ~tree0 ~tree1 = 354 | match tree0,tree1 with 355 | | NEmpty, x -> x 356 | | x, NEmpty -> x 357 | | _ -> try_find (NBranch{prefix;branching_bit;tree0;tree1;id=(!count)}) 358 | 359 | let equal x y = x == y 360 | let compare x y = Int.compare (to_int x) (to_int y) 361 | end 362 | -------------------------------------------------------------------------------- /src/nodes.mli: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of the Codex semantics library *) 3 | (* (patricia-tree sub-component). *) 4 | (* *) 5 | (* Copyright (C) 2024-2025 *) 6 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 7 | (* alternatives) *) 8 | (* *) 9 | (* You can redistribute it and/or modify it under the terms of the GNU *) 10 | (* Lesser General Public License as published by the Free Software *) 11 | (* Foundation, version 2.1. *) 12 | (* *) 13 | (* It is distributed in the hope that it will be useful, *) 14 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 15 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 16 | (* GNU Lesser General Public License for more details. *) 17 | (* *) 18 | (* See the GNU Lesser General Public License version 2.1 *) 19 | (* for more details (enclosed in the file LICENSE). *) 20 | (**************************************************************************) 21 | 22 | open Signatures 23 | 24 | (** {1 Basic nodes} *) 25 | 26 | (** This module is such that ['map t = 'map view]. 27 | This is the node used in {!MakeHeterogeneousMap} and {!MakeMap}. *) 28 | module SimpleNode(Key: sig type 'k t end)(Value: HETEROGENEOUS_VALUE) : NODE 29 | with type 'a key = 'a Key.t 30 | and type ('key,'map) value = ('key,'map) Value.t 31 | 32 | (** Here, nodes also contain a unique id, e.g. so that they can be 33 | used as keys of maps or hash-tables. *) 34 | module NodeWithId(Key: sig type 'k t end)(Value: HETEROGENEOUS_VALUE) : NODE_WITH_ID 35 | with type 'a key = 'a Key.t 36 | and type ('key,'map) value = ('key,'map) Value.t 37 | 38 | 39 | (* Maybe: we can make variations around NodeWithId; e.g. a version 40 | that does HashConsing, or a version that replicates the node to a 41 | key-value store on disk, etc. *) 42 | 43 | (** An optimized representation for sets, i.e. maps to unit: we do not 44 | store a reference to unit (note that you can further optimize when 45 | you know the representation of the key). 46 | This is the node used in {!MakeHeterogeneousSet} and {!MakeSet}. *) 47 | module SetNode(Key: sig type 'k t end) : NODE 48 | with type 'a key = 'a Key.t 49 | and type ('key,'map) value = unit 50 | 51 | (** {1 Weak nodes} *) 52 | 53 | (** NODE used to implement weak key hashes (the key-binding pair is an 54 | Ephemeron, the reference to the key is weak, and if the key is 55 | garbage collected, the binding disappears from the map *) 56 | module WeakNode(Key: sig type 'k t end)(Value: HETEROGENEOUS_VALUE) : NODE 57 | with type 'a key = 'a Key.t 58 | and type ('key,'map) value = ('key,'map) Value.t 59 | 60 | (** Both a {!WeakNode} and a {!SetNode}, useful to implement Weak sets. *) 61 | module WeakSetNode(Key: sig type 'k t end) : NODE 62 | with type 'a key = 'a Key.t 63 | and type ('key,'map) value = unit 64 | 65 | 66 | (** {1 Hashconsed nodes} *) 67 | 68 | (** Gives a unique number to each node like {!NodeWithId}, 69 | but also performs hash-consing. So two maps with the same bindings will 70 | always be physically equal. See {!hash_consed} for more details on this. 71 | 72 | This is a generative functor, as calling it creates a new hash-table to store 73 | the created nodes, and a reference to store the next unallocated identifier. 74 | Maps/sets from different hash-consing functors (even if these functors have 75 | the same arguments) will have different (incompatible) numbering systems and 76 | be stored in different hash-tables (thus they will never be physically equal). 77 | 78 | Using a single {!HashconsedNode} in multiple {!MakeCustomMap} functors will result in 79 | all those maps being hash-consed together (stored in the same hash-table, 80 | same numbering system). 81 | 82 | @since v0.10.0 *) 83 | module HashconsedNode(Key: HETEROGENEOUS_KEY)(Value: HETEROGENEOUS_HASHED_VALUE)() : HASH_CONSED_NODE 84 | with type 'a key = 'a Key.t 85 | and type ('key,'map) value = ('key, 'map) Value.t 86 | 87 | (** Both a {!HashconsedNode} and a {!SetNode}. 88 | @since v0.10.0 *) 89 | module HashconsedSetNode(Key: HETEROGENEOUS_KEY)() : HASH_CONSED_NODE 90 | with type 'a key = 'a Key.t 91 | and type ('key,'map) value = unit 92 | -------------------------------------------------------------------------------- /src/test/dune: -------------------------------------------------------------------------------- 1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2 | ;; This file is part of the Codex semantics library ;; 3 | ;; (patricia-tree sub-component). ;; 4 | ;; ;; 5 | ;; Copyright (C) 2024-2025 ;; 6 | ;; CEA (Commissariat à l'énergie atomique et aux énergies ;; 7 | ;; alternatives) ;; 8 | ;; ;; 9 | ;; You can redistribute it and/or modify it under the terms of the GNU ;; 10 | ;; Lesser General Public License as published by the Free Software ;; 11 | ;; Foundation, version 2.1. ;; 12 | ;; ;; 13 | ;; It is distributed in the hope that it will be useful, ;; 14 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; 15 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; 16 | ;; GNU Lesser General Public License for more details. ;; 17 | ;; ;; 18 | ;; See the GNU Lesser General Public License version 2.1 ;; 19 | ;; for more details (enclosed in the file LICENSE). ;; 20 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 21 | 22 | (library 23 | (name PatriciaTreeTest) 24 | (inline_tests 25 | (libraries qcheck-core)) 26 | (preprocess 27 | (pps ppx_inline_test)) 28 | (libraries PatriciaTree qcheck-core) 29 | (modules PatriciaTreeTest)) 30 | -------------------------------------------------------------------------------- /src/test/mdx_prelude.ml: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of the Codex semantics library *) 3 | (* (patricia-tree sub-component). *) 4 | (* *) 5 | (* Copyright (C) 2024-2025 *) 6 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 7 | (* alternatives) *) 8 | (* *) 9 | (* You can redistribute it and/or modify it under the terms of the GNU *) 10 | (* Lesser General Public License as published by the Free Software *) 11 | (* Foundation, version 2.1. *) 12 | (* *) 13 | (* It is distributed in the hope that it will be useful, *) 14 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 15 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 16 | (* GNU Lesser General Public License for more details. *) 17 | (* *) 18 | (* See the GNU Lesser General Public License version 2.1 *) 19 | (* for more details (enclosed in the file LICENSE). *) 20 | (**************************************************************************) 21 | 22 | (** File run by MDX before running all others, sets up some stuff so the 23 | comments don't have to *) 24 | 25 | open PatriciaTree 26 | 27 | type foo 28 | 29 | module IntKey = struct 30 | type 'a t = int 31 | let to_int x = x 32 | let polyeq : type a b. a t -> b t -> (a, b) cmp = fun a b -> 33 | if a == Obj.magic b then Obj.magic Eq else Diff 34 | end 35 | module MyValue = Int 36 | module MyMap = MakeHeterogeneousMap(IntKey)(struct type ('a,'b) t = int end) 37 | -------------------------------------------------------------------------------- /src/test/patriciaTreeTest.ml: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of the Codex semantics library *) 3 | (* (patricia-tree sub-component). *) 4 | (* *) 5 | (* Copyright (C) 2024-2025 *) 6 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 7 | (* alternatives) *) 8 | (* *) 9 | (* You can redistribute it and/or modify it under the terms of the GNU *) 10 | (* Lesser General Public License as published by the Free Software *) 11 | (* Foundation, version 2.1. *) 12 | (* *) 13 | (* It is distributed in the hope that it will be useful, *) 14 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 15 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 16 | (* GNU Lesser General Public License for more details. *) 17 | (* *) 18 | (* See the GNU Lesser General Public License version 2.1 *) 19 | (* for more details (enclosed in the file LICENSE). *) 20 | (**************************************************************************) 21 | 22 | open PatriciaTree 23 | 24 | let check_highest_bit x res = 25 | (* Printf.printf "CHECK_HIGHEST_BIT: %x %x\n%!" x res; *) 26 | if (x = 0) 27 | then (res = 0) 28 | else begin 29 | x != 0 && 30 | (* The result is a single bit set. *) 31 | res land (res-1) == 0 && 32 | (* The bit x is set. *) 33 | x land res = res && 34 | (* It is the highest bit. *) 35 | x land (lnot res) land (lnot (res - 1)) = 0 36 | end 37 | 38 | let () = QCheck.Test.check_exn @@ 39 | QCheck.Test.make ~count:1000 ~name:"highest_bit" QCheck.int (fun x -> 40 | check_highest_bit x (highest_bit x)) 41 | 42 | let unsigned_lt_ref x y = 43 | if x >= 0 && y >= 0 44 | then x < y 45 | else if x >= 0 46 | then (* pos < neg *) true 47 | else if y >= 0 then false 48 | else x < y 49 | 50 | let () = QCheck.Test.check_exn @@ 51 | QCheck.Test.make ~count:1000 ~name:"unsigned_lt" QCheck.(pair int int) (fun (x,y) -> 52 | unsigned_lt x y = unsigned_lt_ref x y) 53 | 54 | let%test_module "TestHeterogeneous" = (module struct 55 | 56 | module MyKey = struct 57 | type 'a t = 58 | | Int: int -> int t 59 | | Bool: bool -> bool t 60 | 61 | let to_int (type a) (key:a t):int = match key with 62 | | Bool false -> 1 63 | | Bool true -> 3 64 | | Int x -> (x lsl 1) 65 | 66 | let polyeq: type a b. a t -> b t -> (a,b) cmp = fun a b -> match a,b with 67 | | Bool _, Int _ -> Diff 68 | | Int _, Bool _ -> Diff 69 | | Bool a, Bool b -> if a == b then Eq else Diff 70 | | Int a, Int b -> if a == b then Eq else Diff 71 | end 72 | 73 | type a = | 74 | type b = | 75 | 76 | module MyValue = struct 77 | type ('a,'b) t = 78 | | AString: string -> (int,a) t 79 | | APair: (int * int) -> (bool,a) t 80 | | BInt: int -> (int,b) t 81 | | BString: string -> (bool,b) t 82 | end 83 | 84 | 85 | module Map = MakeCustomHeterogeneousMap(MyKey)(MyValue)(SimpleNode(MyKey)(MyValue)) 86 | open Map 87 | 88 | let _m1 = singleton (MyKey.Int 7) (MyValue.AString "seven") 89 | let _m1 = add (MyKey.Bool false) (MyValue.APair (11,22)) _m1 90 | let _m1 = remove (MyKey.Int 7) _m1 91 | 92 | let _m2 = singleton (MyKey.Int 7) (MyValue.BInt 21) 93 | let _m2 = add (MyKey.Bool true) (MyValue.BString "hello") _m2 94 | 95 | 96 | (* Here we redefine a node to store key and value a flat way, which 97 | should decrease the amount of allocations. *) 98 | module OptimizedNode : NODE 99 | with type 'key key = 'key MyKey.t 100 | and type ('key,'map) value = ('key,'map) MyValue.t 101 | = struct 102 | 103 | type 'a key = 'a MyKey.t 104 | type ('key,'map) value = ('key,'map) MyValue.t 105 | 106 | type 'map view = 107 | | Empty: 'map view (* Does not appear below interior nodes. *) 108 | | Branch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t} -> 'map view 109 | | Leaf: {key:'key key; value:('key,'map) value} -> 'map view (* The entire key. *) 110 | 111 | and 'map t = 112 | | NEmpty: 'map t 113 | | NBranch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t} -> 'map t 114 | | LeafAString: (int * string) -> a t 115 | | LeafAPair: (bool * int * int) -> a t 116 | | LeafBInt: (int * int) -> b t 117 | | LeafBString: (bool * string) -> b t 118 | 119 | let view: type map. map t -> map view = function 120 | | LeafAString(a,s) -> Leaf{key=MyKey.Int a;value=MyValue.AString s} 121 | | LeafAPair(b,p1,p2) -> Leaf{key=MyKey.Bool b;value=MyValue.APair(p1,p2)} 122 | | LeafBInt(a,s) -> Leaf{key=MyKey.Int a;value=MyValue.BInt s} 123 | | LeafBString(a,s) -> Leaf{key=MyKey.Bool a;value=MyValue.BString s} 124 | | NBranch{prefix;branching_bit;tree0;tree1} -> Branch{prefix;branching_bit;tree0;tree1} 125 | | NEmpty -> Empty 126 | 127 | let empty:'map t = NEmpty 128 | let is_empty (x:'map t) = x == NEmpty 129 | let leaf: type a map. a key -> (a,map) value -> map t = 130 | fun key value -> match key,value with 131 | | MyKey.Int a, MyValue.AString s -> LeafAString(a,s) 132 | | MyKey.Bool b, MyValue.APair(p1,p2) -> LeafAPair(b,p1,p2) | MyKey.Int a, MyValue.BInt s -> LeafBInt(a,s) 133 | | MyKey.Bool a, MyValue.BString s -> LeafBString(a,s) 134 | 135 | let branch ~prefix ~branching_bit ~tree0 ~tree1 = 136 | match (tree0:'map t),(tree1:'map t) with 137 | | NEmpty, x -> x 138 | | x, NEmpty -> x 139 | | _ -> NBranch{prefix;branching_bit;tree0;tree1} 140 | end 141 | 142 | module Map2 = MakeCustomHeterogeneousMap(MyKey)(MyValue)(SimpleNode(MyKey)(MyValue)) 143 | open Map2 144 | 145 | let _m1 = singleton (MyKey.Int 7) (MyValue.AString "seven") 146 | let _m1 = add (MyKey.Bool false) (MyValue.APair (11,22)) _m1 147 | let _m1 = remove (MyKey.Int 7) _m1 148 | 149 | let _m2 = singleton (MyKey.Int 7) (MyValue.BInt 21) 150 | let _m2 = add (MyKey.Bool true) (MyValue.BString "hello") _m2 151 | end) 152 | 153 | (* module IntKey = struct type 'a t = Int: int -> 'a t [@@unboxed];; let to_int (Int x) = x end *) 154 | (* module Test = Make(IntKey)(SimpleNode(IntKey)) *) 155 | (* open Test;; *) 156 | 157 | (* (\**************** Tests ****************\) *) 158 | (* let _m1 = singleton (IntKey.Int 7) 1;; *) 159 | (* let _m1 = add (IntKey.Int 3) 2 _m1;; *) 160 | (* let _m1 = remove (IntKey.Int 7) _m1;; *) 161 | 162 | (* let _m1 = singleton (7) 1;; *) 163 | (* let _m1 = add (3) 2 _m1;; *) 164 | (* let _m1 = remove (7) _m1;; *) 165 | 166 | 167 | 168 | (* let _m2 = singleton 4 3;; *) 169 | (* let _m2 = add 10 4 _m2;; *) 170 | 171 | (* let _m3 = union (Obj.magic 0) _m1 _m2;; *) 172 | 173 | (* (\* let m8 = m1;; *\) *) 174 | (* let _m4 = inter (fun a b -> a) _m1 _m3;; *) 175 | (* let _m5 = inter (fun a b -> a) _m2 _m3;; *) 176 | 177 | (* let _m6 = inter (fun a b -> a) _m1 _m2;; *) 178 | let unsigned_compare x y = 179 | if unsigned_lt x y then -1 180 | else if x = y then 0 else 1 181 | 182 | module HIntKey : sig 183 | type t = int 184 | val to_int : t -> int 185 | end = struct 186 | type t = int 187 | let to_int x = x 188 | end 189 | 190 | (* A model. *) 191 | module IntMap = struct 192 | module M = Map.Make(struct 193 | type t = int 194 | let compare = unsigned_compare 195 | end) 196 | include M 197 | let subset_domain_for_all_2 m1 m2 f = 198 | let exception False in 199 | try 200 | let res = M.merge (fun key v1 v2 -> match v1,v2 with 201 | | None, None -> assert false 202 | | Some _, None -> raise False 203 | | None, Some _ -> None 204 | | Some v1, Some v2 -> 205 | if f key v1 v2 then None else raise False) m1 m2 in 206 | assert (M.is_empty res); 207 | true 208 | with False -> false 209 | 210 | let same_domain_for_all_2 m1 m2 f = 211 | let exception False in 212 | try 213 | let res = M.merge (fun key v1 v2 -> match v1,v2 with 214 | | None, None -> assert false 215 | | Some _, None -> raise False 216 | | None, Some _ -> raise False 217 | | Some v1, Some v2 -> 218 | if f key v1 v2 then None else raise False) m1 m2 in 219 | assert (M.is_empty res); 220 | true 221 | with False -> false 222 | 223 | let inter m1 m2 f = 224 | M.merge (fun key a b -> 225 | match a,b with 226 | | None, _ | _, None -> None 227 | | Some a, Some b -> Some (f key a b)) m1 m2 228 | 229 | let symmetric_difference f m1 m2 = 230 | M.merge (fun key a b -> 231 | match a, b with 232 | | None, x | x, None -> x 233 | | Some a, Some b when a == b -> None 234 | | Some a, Some b -> f key a b 235 | ) m1 m2 236 | 237 | let difference f m1 m2 = filter_map (fun k v -> 238 | match find_opt k m2 with 239 | | None -> Some v 240 | | Some v' -> f k v v' 241 | ) m1 242 | 243 | let update_multiple_from_foreign m1 m2 f = 244 | M.merge (fun key a b -> 245 | match a, b with 246 | | a, None -> a 247 | | a, Some b -> f key a b) m1 m2 248 | 249 | let update_multiple_from_inter_with_foreign m1 m2 f = 250 | M.fold (fun key value acc -> 251 | match M.find key acc with 252 | | exception Not_found -> acc 253 | | v -> begin match f key v value with 254 | | None -> M.remove key acc 255 | | Some v' -> M.add key v' acc 256 | end) m2 m1 257 | 258 | let inter_filter m1 m2 f = 259 | M.merge (fun key a b -> 260 | match a,b with 261 | | None, _ | _, None -> None 262 | | Some a, Some b -> (f key a b)) m1 m2 263 | 264 | let fold_on_nonequal_inter f m1 m2 acc = 265 | let racc = ref acc in 266 | ignore @@ M.merge (fun key a b -> 267 | match a,b with 268 | | None, _ | _, None -> None 269 | | Some a, Some b -> 270 | if a != b 271 | then racc := f key a b !racc; 272 | None) m1 m2; 273 | !racc 274 | 275 | let fold_on_nonequal_union f ma mb acc = 276 | let union = M.merge (fun _key a b -> 277 | match a,b with 278 | | None, None -> assert false 279 | | Some a, Some b when a == b -> None 280 | | None, Some _ | Some _, None | Some _, Some _ -> Some(a,b)) ma mb in 281 | let elts = M.bindings union in 282 | let elts = List.sort (fun (key1,_val1) (key2,_val2) -> unsigned_compare key1 key2) elts in 283 | List.fold_left (fun acc (key,(val1,val2)) -> f key val1 val2 acc) acc elts 284 | 285 | let pop_unsigned_minimum m = 286 | match M.min_binding m with 287 | | exception Not_found -> None 288 | | (key,value) -> Some(key,value,M.remove key m) 289 | 290 | let pop_unsigned_maximum m = 291 | match M.max_binding m with 292 | | exception Not_found -> None 293 | | (key,value) -> Some(key,value,M.remove key m) 294 | end 295 | 296 | (* An implementation. *) 297 | module IntValue : sig 298 | type ('a, 'b) t = int 299 | val pretty : Format.formatter -> ('a, 'b) t -> unit 300 | end = struct 301 | type ('a,'b) t = int 302 | let pretty fmt x = Format.pp_print_int fmt x 303 | end 304 | 305 | 306 | module TestImpl(MyMap : MAP with type key = int)(Param : sig 307 | val test_id : bool 308 | val number_gen : int QCheck.arbitrary 309 | (* val to_int : 'a MyMap.t -> int option *) 310 | end) = struct 311 | 312 | (* Add a list of pair of ints to a map. *) 313 | let rec extend_map mymap alist = 314 | match alist with 315 | | [] -> mymap 316 | | (a,b)::rest -> 317 | extend_map (MyMap.add a b mymap) rest 318 | 319 | let rec remove_map mymap alist = 320 | match alist with 321 | | [] -> mymap 322 | | (a,_)::rest -> 323 | remove_map (MyMap.remove a mymap) rest 324 | 325 | let intmap_of_mymap m = 326 | MyMap.fold (fun key value acc -> IntMap.add key value acc) m IntMap.empty 327 | 328 | let two_maps_from_three_lists (alist1,alist2,alist3) = 329 | let first = extend_map MyMap.empty alist1 in 330 | let second = extend_map first alist2 in 331 | let third = extend_map first alist3 in 332 | (second,third) 333 | 334 | let number_gen = Param.number_gen 335 | 336 | let gen = QCheck.(triple 337 | (small_list (pair number_gen number_gen)) 338 | (small_list (pair number_gen number_gen)) 339 | (small_list (pair number_gen number_gen))) 340 | 341 | let model_from_gen x = 342 | let (m1,m2) = two_maps_from_three_lists x in 343 | (m1,intmap_of_mymap m1,m2,intmap_of_mymap m2) 344 | 345 | (* let dump_model m = 346 | Printf.printf "["; 347 | m |> IntMap.iter (fun key value -> Printf.printf "%d %d\n" key value); 348 | Printf.printf "]\n%!" 349 | 350 | let dump_test m1 m2 res expected = 351 | Printf.printf "=========\n"; 352 | dump_model m1; 353 | dump_model m2; 354 | dump_model res; 355 | dump_model expected; 356 | Printf.printf "result is %b\n%!" (IntMap.equal (=) res expected) *) 357 | 358 | (** Fast hash function *) 359 | let sdbm x y = y + (x lsl 16) + (x lsl 6) - x 360 | let sdbm3 x y z = sdbm x @@ sdbm y z 361 | 362 | module Foreign = MyMap.WithForeign(MyMap.BaseMap) 363 | 364 | let test_pop_minimum = QCheck.Test.make ~count:1000 ~name:"pop_unsigned_minimum" 365 | QCheck.(small_list (pair number_gen number_gen)) (fun x -> 366 | let m = extend_map MyMap.empty x in 367 | let model = intmap_of_mymap m in 368 | match MyMap.pop_unsigned_minimum m, IntMap.pop_unsigned_minimum model with 369 | | None, Some _ | Some _, None -> false 370 | | None, None -> true 371 | | Some(key1,val1,m'), Some(key2,val2,model') -> 372 | key1 = key2 && val1 = val2 && IntMap.equal (=) (intmap_of_mymap m') model') 373 | let () = QCheck.Test.check_exn test_pop_minimum 374 | 375 | let test_pop_maximum = QCheck.Test.make ~count:1000 ~name:"pop_unsigned_maximum" 376 | QCheck.(small_list (pair number_gen number_gen)) (fun x -> 377 | let m = extend_map MyMap.empty x in 378 | let model = intmap_of_mymap m in 379 | match MyMap.pop_unsigned_maximum m, IntMap.pop_unsigned_maximum model with 380 | | None, Some _ | Some _, None -> false 381 | | None, None -> true 382 | | Some(key1,val1,m'), Some(key2,val2,model') -> 383 | key1 = key2 && val1 = val2 && IntMap.equal (=) (intmap_of_mymap m') model') 384 | let () = QCheck.Test.check_exn test_pop_maximum 385 | 386 | (** Create a function to check calls are made in increasing order *) 387 | let check_increases () = 388 | let seen = ref None in (* Store [to_int last_key]*) 389 | let f key = 390 | let key_int = HIntKey.to_int key in 391 | let () = match !seen with 392 | | None -> () 393 | | Some old_key_int -> 394 | if unsigned_compare old_key_int key_int < 0 395 | then () 396 | else QCheck.Test.fail_reportf 397 | "Non increasing calls to f : key %d seen after %d" 398 | key_int old_key_int 399 | in seen := Some key_int 400 | in f 401 | 402 | (** Create a function to check calls are made in increasing order and not on equal values *) 403 | let check_increases_and_neq () = 404 | let chk = check_increases () in 405 | let f key v1 v2 = 406 | chk key; 407 | if v1 == v2 then 408 | QCheck.Test.fail_reportf 409 | "Called on physically equal values %a and %a" 410 | IntValue.pretty v1 IntValue.pretty v2 411 | in f 412 | 413 | let test_map_filter = QCheck.Test.make ~count:1000 ~name:"map_filter" 414 | QCheck.(small_list (pair number_gen number_gen)) (fun x -> 415 | let m1 = extend_map MyMap.empty x in 416 | let model1 = intmap_of_mymap m1 in 417 | let chk_calls1 = check_increases () in 418 | let chk_calls2 = check_increases () in 419 | let f k x = if (x mod 3 == 0) then None else Some (x - k + 1) in 420 | let res1 = intmap_of_mymap @@ MyMap.filter_map ( 421 | fun k v -> chk_calls1 k; f k v) m1 in 422 | let res2 = intmap_of_mymap @@ MyMap.filter_map_no_share ( 423 | fun k v -> chk_calls2 k; f k v) m1 in 424 | let modelres = IntMap.filter_map f model1 in 425 | IntMap.equal (=) res1 modelres && 426 | IntMap.equal (=) res2 modelres) 427 | let () = QCheck.Test.check_exn test_map_filter 428 | 429 | 430 | (* This way of generating the test has the benefits that it is easy to understand when a test fails. *) 431 | let test_reflexive_subset_domain_for_all2 = QCheck.Test.make ~count:1000 ~name:"reflexive_subset_domain_for_all2" 432 | gen (fun x -> 433 | let (m1,model1,m2,model2) = model_from_gen x in 434 | let f _key a b = a <= b in (* This is reflexive. *) 435 | let myres = MyMap.reflexive_subset_domain_for_all2 f m1 m2 in 436 | let modelres = IntMap.subset_domain_for_all_2 model1 model2 f in 437 | myres = modelres) 438 | let () = QCheck.Test.check_exn test_reflexive_subset_domain_for_all2 439 | 440 | let test_reflexive_same_domain_for_all2 = QCheck.Test.make ~count:1000 ~name:"reflexive_same_domain_for_all2" 441 | gen (fun x -> 442 | let (m1,model1,m2,model2) = model_from_gen x in 443 | let f _key a b = a <= b in (* This is reflexive. *) 444 | let myres = MyMap.reflexive_same_domain_for_all2 f m1 m2 in 445 | let modelres = IntMap.same_domain_for_all_2 model1 model2 f in 446 | myres = modelres) 447 | let () = QCheck.Test.check_exn test_reflexive_same_domain_for_all2 448 | 449 | let test_idempotent_union = QCheck.Test.make ~count:1000 ~name:"idempotent_union" 450 | gen (fun x -> 451 | let (m1,model1,m2,model2) = model_from_gen x in 452 | let f key (a:int) b = if key mod 2 == 0 then min a b else max a b in 453 | let chk_calls = check_increases_and_neq () in 454 | let myres = intmap_of_mymap @@ MyMap.idempotent_union 455 | (fun k a b -> chk_calls k a b; f k a b) m1 m2 in 456 | let modelres = IntMap.union (fun key a b -> Some (f key a b)) model1 model2 in 457 | (* dump_test model1 model2 myres modelres; *) 458 | IntMap.equal (=) modelres myres) 459 | let () = QCheck.Test.check_exn test_idempotent_union 460 | 461 | 462 | let test_idempotent_inter = QCheck.Test.make ~count:1000 ~name:"idempotent_inter" 463 | gen (fun x -> 464 | let (m1,model1,m2,model2) = model_from_gen x in 465 | let f key (a:int) b = if key mod 2 == 0 then min a b else max a b in 466 | let chk_calls = check_increases_and_neq () in 467 | let myres = intmap_of_mymap @@ MyMap.idempotent_inter 468 | (fun k a b -> chk_calls k a b; f k a b) m1 m2 in 469 | let modelres = IntMap.inter model1 model2 f in 470 | (* dump_test model1 model2 myres modelres; *) 471 | IntMap.equal (=) modelres myres) 472 | let () = QCheck.Test.check_exn test_idempotent_inter 473 | 474 | 475 | 476 | let test_nonidempotent_inter_no_share = QCheck.Test.make ~count:1000 ~name:"nonidempotent_inter_no_share" 477 | gen (fun x -> 478 | let (m1,model1,m2,model2) = model_from_gen x in 479 | let f key (a:int) b = sdbm3 key a b in 480 | let chk_calls = check_increases () in 481 | let myres = intmap_of_mymap @@ MyMap.nonidempotent_inter_no_share 482 | (fun k a b -> chk_calls k; f k a b) m1 m2 in 483 | let modelres = IntMap.inter model1 model2 f in 484 | (* dump_test model1 model2 myres modelres; *) 485 | IntMap.equal (=) modelres myres) 486 | let () = QCheck.Test.check_exn test_nonidempotent_inter_no_share 487 | 488 | 489 | let test_nonidempotent_inter_no_share_foreign = QCheck.Test.make ~count:1000 ~name:"nonidempotent_inter_no_share_foreign" 490 | gen (fun x -> 491 | let (m1,model1,m2,model2) = model_from_gen x in 492 | let orig_f = sdbm3 in 493 | let chk_calls = check_increases () in 494 | let f : int -> int -> int -> int = fun key (a:int) b -> chk_calls key; orig_f key a b in 495 | let myres = intmap_of_mymap @@ Foreign.nonidempotent_inter {f= 496 | fun k v (Snd v2) -> f k v v2 } m1 m2 in 497 | let modelres = IntMap.inter model1 model2 orig_f in 498 | (* dump_test model1 model2 myres modelres; *) 499 | IntMap.equal (=) modelres myres) 500 | let () = QCheck.Test.check_exn test_nonidempotent_inter_no_share_foreign 501 | 502 | 503 | let test_update_multiple_foreign = QCheck.Test.make ~count:1000 ~name:"update_multiple_foreign" 504 | gen (fun x -> 505 | let (m1,model1,m2,model2) = model_from_gen x in 506 | let orig_f key va vb = 507 | let res = match va with 508 | | None -> sdbm key vb 509 | | Some va -> sdbm3 key va vb 510 | in if res mod 2 == 0 then None else Some res 511 | in 512 | let chk_calls = check_increases () in 513 | let f = fun key a b -> chk_calls key; orig_f key a b in 514 | let myres = intmap_of_mymap @@ Foreign.update_multiple_from_foreign m2 {f= 515 | fun k v (Snd v') -> f k v v' } m1 in 516 | let modelres = IntMap.update_multiple_from_foreign model1 model2 orig_f in 517 | (* dump_test model1 model2 myres modelres; *) 518 | IntMap.equal (=) modelres myres) 519 | let () = QCheck.Test.check_exn test_update_multiple_foreign 520 | 521 | let test_update_multiple_inter_foreign = QCheck.Test.make ~count:1000 ~name:"update_multiple_inter_foreign" 522 | gen (fun x -> 523 | let (m1,model1,m2,model2) = model_from_gen x in 524 | let orig_f = fun key va vb -> 525 | let res = sdbm3 key va vb in 526 | if res mod 2 == 0 then None else Some res 527 | in 528 | let chk_calls = check_increases () in 529 | let f key (a:int) b = chk_calls key; orig_f key a b in 530 | let myres = intmap_of_mymap @@ Foreign.update_multiple_from_inter_with_foreign m2 {f= 531 | fun k v (Snd v') -> f k v v' } m1 in 532 | let modelres = IntMap.update_multiple_from_inter_with_foreign model1 model2 orig_f in 533 | (* dump_test model1 model2 myres modelres; *) 534 | IntMap.equal (=) modelres myres) 535 | let () = QCheck.Test.check_exn test_update_multiple_inter_foreign 536 | 537 | let test_idempotent_inter_filter = QCheck.Test.make ~count:1000 ~name:"idempotent_inter_filter" 538 | gen (fun x -> 539 | let (m1,model1,m2,model2) = model_from_gen x in 540 | let f key (a:int) b = 541 | if a == b then Some a 542 | else let res = sdbm3 key a b in 543 | if res mod 3 == 0 then None else Some res 544 | in 545 | let chk_calls = check_increases_and_neq () in 546 | let myres = intmap_of_mymap @@ MyMap.idempotent_inter_filter 547 | (fun k a b -> chk_calls k a b; f k a b) m1 m2 in 548 | let modelres = IntMap.inter_filter model1 model2 f in 549 | (* dump_test model1 model2 myres modelres; *) 550 | IntMap.equal (=) modelres myres) 551 | let () = QCheck.Test.check_exn test_idempotent_inter_filter 552 | 553 | let test_slow_merge = QCheck.Test.make ~count:1000 ~name:"slow_merge" 554 | gen (fun x -> 555 | let (m1,model1,m2,model2) = model_from_gen x in 556 | (* A nonidempotent function that changes a lot according to inputs *) 557 | let f key a b = match a,b with 558 | | Some a, None -> if ((a * a) - key == 0) then None else Some((a * a) - key) 559 | | None, Some b -> if (key - b) == 0 then None else Some(key - b) 560 | | Some a, Some b -> if ((a - b - key) == 0) then None else Some(a-b-key) 561 | | None, None -> assert false 562 | in 563 | let myres = intmap_of_mymap @@ MyMap.slow_merge (fun key a b -> 564 | f key a b 565 | ) m1 m2 in 566 | let modelres = IntMap.merge f model1 model2 in 567 | (* dump_test model1 model2 myres modelres; *) 568 | (* Printf.printf "res is %b\n%!" @@ IntMap.equal (=) modelres myres; *) 569 | IntMap.equal (=) modelres myres) 570 | let () = QCheck.Test.check_exn test_slow_merge 571 | 572 | let test_disjoint = QCheck.Test.make ~count:1000 ~name:"disjoint" 573 | gen (fun x -> 574 | let (m1,model1,m2,model2) = model_from_gen x in 575 | let myres = MyMap.disjoint m1 m2 in 576 | let modelres = IntMap.is_empty (IntMap.inter model1 model2 (fun _ a _ -> a)) in 577 | (* dump_test model1 model2 myres modelres; *) 578 | (* Printf.printf "res is %b\n%!" @@ IntMap.equal (=) modelres myres; *) 579 | modelres == myres) 580 | let () = QCheck.Test.check_exn test_disjoint 581 | 582 | let test_fold_on_nonequal_inter = QCheck.Test.make ~count:1000 ~name:"fold_on_nonequal_inter" 583 | gen (fun x -> 584 | let (m1,model1,m2,model2) = model_from_gen x in 585 | let orig_f key v1 v2 acc = sdbm key @@ sdbm v1 @@ sdbm v2 acc in 586 | let chk_calls = check_increases () in 587 | let f key v1 v2 acc = 588 | chk_calls key; 589 | orig_f key v1 v2 acc 590 | in 591 | let myres = MyMap.fold_on_nonequal_inter f m1 m2 117 in 592 | let modelres = IntMap.fold_on_nonequal_inter orig_f model1 model2 117 in 593 | modelres == myres) 594 | let () = QCheck.Test.check_exn test_fold_on_nonequal_inter 595 | 596 | let test_fold_on_nonequal_union = QCheck.Test.make ~count:1000 ~name:"fold_on_nonequal_union" 597 | gen (fun x -> 598 | let (m1,model1,m2,model2) = model_from_gen x in 599 | let orig_f key v1 v2 acc = 600 | (* Printf.printf "Calling f key=%d v1=%s v2=%s acc=%d\n%!" *) 601 | (* key (match v1 with None -> "None" | Some v -> string_of_int v) *) 602 | (* (match v2 with None -> "None" | Some v -> string_of_int v) acc; *) 603 | (* chk_calls key; *) 604 | let v1 = match v1 with None -> 421 | Some v -> v in 605 | let v2 = match v2 with None -> 567 | Some v -> v in 606 | sdbm key @@ sdbm v1 @@ sdbm v2 acc in 607 | let chk_calls = check_increases () in 608 | let f key v1 v2 acc = chk_calls key; orig_f key v1 v2 acc in 609 | let myres = MyMap.fold_on_nonequal_union f m1 m2 117 in 610 | let modelres = IntMap.fold_on_nonequal_union orig_f model1 model2 117 in 611 | modelres == myres) 612 | let () = QCheck.Test.check_exn test_fold_on_nonequal_union 613 | 614 | let _pp_l fmt = Format.pp_print_list ~pp_sep:(fun fmt () -> Format.fprintf fmt "; ") 615 | (fun fmt (k,l) -> Format.fprintf fmt "(%x, %x)" k l) fmt 616 | 617 | let%test "negative_keys" = 618 | let map = MyMap.add 0 0 MyMap.empty in 619 | let map2 = MyMap.add min_int 5 map in 620 | let map3 = MyMap.add max_int 8 map2 in 621 | let map4 = MyMap.add 25 8 map2 in 622 | let map5 = MyMap.idempotent_inter_filter (fun _ _ _ -> None) map3 map4 in 623 | (* Format.printf "[%a]@." pp_l (MyMap.to_list map3); 624 | Format.printf "[%a]@." pp_l (MyMap.to_list map4); 625 | Format.printf "[%a]@." pp_l (MyMap.to_list map5); 626 | (match MyMap.BaseMap.view map3 with 627 | | Branch{prefix; branching_bit; _} -> Format.printf "%x : %x@." (Obj.magic prefix) (Obj.magic branching_bit) 628 | | _ -> () 629 | ); *) 630 | MyMap.to_list map = [(0, 0)] && 631 | MyMap.to_list map2 = [(0,0); (min_int,5)] && 632 | MyMap.to_list map3 = [(0,0); (max_int,8); (min_int,5)] && 633 | MyMap.to_list map4 = [(0,0); (25,8); (min_int,5)] && 634 | MyMap.to_list map5 = MyMap.to_list map2 635 | 636 | let test_id_unique = QCheck.Test.make ~count:1000 ~name:"unique_hashcons_id" 637 | gen (fun (one,two,three) -> 638 | (* Remove duplicates *) 639 | let two = List.filter (fun (x, _) -> not (List.mem_assoc x one)) two in 640 | let three = List.filter (fun (x, _) -> not (List.mem_assoc x one || List.mem_assoc x two)) three in 641 | let m = extend_map MyMap.empty one in 642 | let m1 = extend_map (extend_map m two) three in 643 | m1 == extend_map (extend_map (extend_map MyMap.empty three) one) two && 644 | m1 == extend_map (extend_map (extend_map MyMap.empty two) three) one && 645 | m1 == extend_map (extend_map (extend_map MyMap.empty three) two) one && 646 | m1 == extend_map (extend_map (extend_map MyMap.empty one) three) two && 647 | m1 == extend_map m1 one && 648 | m == remove_map (extend_map m two) two && 649 | MyMap.empty == remove_map m one 650 | ) 651 | let () = if Param.test_id then QCheck.Test.check_exn test_id_unique 652 | 653 | let test_difference = QCheck.Test.make ~count:1000 ~name:"symmetric_difference" 654 | gen (fun x -> 655 | let (m1,model1,m2,model2) = model_from_gen x in 656 | let orig_f _ x y = if x=y then None else Some (x+y) in 657 | let chk_calls = check_increases_and_neq () in 658 | let f k x y = chk_calls k x y; orig_f k x y in 659 | let myres = intmap_of_mymap @@ MyMap.symmetric_difference f m1 m2 in 660 | let modelres = IntMap.symmetric_difference orig_f model1 model2 in 661 | IntMap.equal (=) modelres myres 662 | (* if not b then 663 | Format.printf "[%a] diff [%a] is [%a] or [%a]@." 664 | _pp_l (MyMap.to_list m1) _pp_l (MyMap.to_list m2) _pp_l (List.of_seq (IntMap.to_seq myres)) 665 | _pp_l (List.of_seq (IntMap.to_seq modelres)); 666 | b *) 667 | ) 668 | let () = QCheck.Test.check_exn test_difference 669 | 670 | 671 | let test_domain_difference = QCheck.Test.make ~count:1000 ~name:"difference" 672 | gen (fun x -> 673 | let (m1,model1,m2,model2) = model_from_gen x in 674 | let orig_f _ x y = if x=y then None else Some (x+y) in 675 | let chk_calls = check_increases_and_neq () in 676 | let f k x y = chk_calls k x y; orig_f k x y in 677 | let myres = intmap_of_mymap @@ MyMap.difference f m1 m2 in 678 | let modelres = IntMap.difference orig_f model1 model2 in 679 | IntMap.equal (=) modelres myres) 680 | let () = QCheck.Test.check_exn test_difference 681 | 682 | 683 | let test_min_binding_inter = QCheck.Test.make ~count:1000 ~name:"min_binding_inter" 684 | gen (fun x -> 685 | let (m1,_,m2,_) = model_from_gen x in 686 | let min_t = MyMap.min_binding_inter m1 m2 in 687 | let min_l = 688 | try MyMap.idempotent_inter (fun _ l _ -> l) m1 m2 |> MyMap.unsigned_min_binding |> Option.some 689 | with Not_found -> None in 690 | let min_r = 691 | try MyMap.idempotent_inter (fun _ _ r -> r) m1 m2 |> MyMap.unsigned_min_binding |> Option.some 692 | with Not_found -> None in 693 | match min_t, min_l, min_r with 694 | | None, None, None -> true 695 | | Some(k,l,r), Some(k',l'), Some(k'',r') -> k = k' && k' = k'' && l = l' && r = r' 696 | | _ -> false) 697 | 698 | let test_max_binding_inter = QCheck.Test.make ~count:1000 ~name:"min_binding_inter" 699 | gen (fun x -> 700 | let (m1,_,m2,_) = model_from_gen x in 701 | let max_t = MyMap.max_binding_inter m1 m2 in 702 | let max_l = 703 | try MyMap.idempotent_inter (fun _ l _ -> l) m1 m2 |> MyMap.unsigned_max_binding |> Option.some 704 | with Not_found -> None in 705 | let max_r = 706 | try MyMap.idempotent_inter (fun _ _ r -> r) m1 m2 |> MyMap.unsigned_max_binding |> Option.some 707 | with Not_found -> None in 708 | match max_t, max_l, max_r with 709 | | None, None, None -> true 710 | | Some(k,l,r), Some(k',l'), Some(k'',r') -> k = k' && k' = k'' && l = l' && r = r' 711 | | _ -> false) 712 | let () = QCheck.Test.check_exn test_difference 713 | end 714 | 715 | module MyMap = MakeMap(HIntKey) 716 | module MyHashedMap = MakeHashconsedMap(HIntKey)(HashedValue)() 717 | 718 | let%test_module "TestMap_SmallNat" = (module TestImpl(MyMap)(struct 719 | let test_id = false 720 | let number_gen = QCheck.small_nat 721 | end)) 722 | 723 | let%test_module "TestMap_Int" = (module TestImpl(MyMap)(struct 724 | let test_id = false 725 | let number_gen = QCheck.int 726 | end)) 727 | 728 | let%test_module "TestHashconsedMap_SmallNat" = (module TestImpl(MyHashedMap)(struct 729 | let test_id = true 730 | let number_gen = QCheck.small_nat 731 | end)) 732 | 733 | let%test_module "TestHashconsedMap_Int" = (module TestImpl(MyHashedMap)(struct 734 | let test_id = true 735 | let number_gen = QCheck.int 736 | end)) 737 | 738 | let%test_module "TestWeak" = (module struct 739 | 740 | module MyKey(* :KEY *) = struct 741 | type t = Block of int [@@ocaml.boxed] 742 | let to_int (Block x) = x 743 | end 744 | 745 | module Node = WeakNode(struct type 'a t = MyKey.t end)(WrappedHomogeneousValue) 746 | module Map = MakeCustomMap(MyKey)(Value)(Node) 747 | open Map 748 | 749 | let _m1 = singleton (MyKey.Block 7) "seven" 750 | let _m1 = add (MyKey.Block 9) "nine" _m1 751 | 752 | (* let dump_map m = 753 | Printf.printf "----\n%!"; 754 | Map.iter (fun (Block key) value -> Printf.printf "key: %d value: %s\n%!" key value) m *) 755 | 756 | let length m = 757 | Map.fold (fun (MyKey.Block _key) _value acc -> acc + 1) m 0 758 | 759 | (* dump_map _m1;; *) 760 | 761 | let add n v m = add (MyKey.Block n) v m 762 | 763 | (* We have to make the test sufficiently complex, other as 764 | e.g. (Block 11) could be installed as a static value that would 765 | never get garbage-collected. *) 766 | let test i = 767 | let n1,m2 = 768 | let m2 = (Sys.opaque_identity add) (11 + i) "eleven" _m1 in 769 | (* dump_map m2; *) 770 | length m2, m2 771 | in 772 | (* dump_map m2; *) 773 | Gc.full_major(); 774 | let n2 = length m2 in 775 | (* Check that the key is removed. *) 776 | assert (n1 == 3); 777 | assert (n2 == 2); 778 | () 779 | 780 | let () = 781 | for i = 0 to 10 do 782 | (* Printf.printf "==========="; *) 783 | test i 784 | done 785 | end) 786 | --------------------------------------------------------------------------------