├── .gitignore ├── .ocamlformat ├── LICENSE ├── README.md ├── bench ├── README.md ├── analysis │ └── main.py ├── compact_bench.ml ├── dune └── results │ ├── hashset-memory-usage.png │ └── hashtbl-memory-usage.png ├── compact-bench.opam ├── compact.opam ├── dune-project ├── src ├── arena.ml ├── arena.mli ├── compact.ml ├── compact.mli ├── dune ├── hashed_container.ml ├── hashed_container.mli ├── hashed_container_bucket.ml ├── hashed_container_bucket.mli ├── hashed_container_intf.ml ├── hashset.ml ├── hashset.mli ├── hashset_fixed_size_string.ml ├── hashset_fixed_size_string.mli ├── hashtbl.ml ├── hashtbl.mli ├── immediate_array.ml ├── immediate_array.mli ├── import.ml ├── internal.ml ├── invariant.ml ├── invariant.mli ├── invariant_intf.ml ├── obj_array.ml ├── obj_array.mli ├── obj_either.ml ├── obj_either.mli ├── type_equality.ml ├── type_equality.mli ├── uniform_array.ml ├── uniform_array.mli └── uniform_array_intf.ml └── test ├── compact_test_helpers.ml ├── dune ├── test.ml ├── test_arena.ml ├── test_arena.mli ├── test_hashed_container.ml ├── test_hashed_container.mli ├── test_hashtbl.ml ├── test_hashtbl.mli ├── test_immediate_array.ml ├── test_immediate_array.mli ├── test_uniform_array.ml └── test_uniform_array.mli /.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | _coverage 3 | _fuzz 4 | _opam 5 | .#* 6 | -------------------------------------------------------------------------------- /.ocamlformat: -------------------------------------------------------------------------------- 1 | version = 0.20.0 2 | profile = conventional 3 | 4 | parse-docstrings 5 | break-infix = fit-or-vertical 6 | break-separators = before 7 | dock-collection-brackets = false 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2020-2021 Craig Ferguson 4 | Copyright (c) 2016–2020 Jane Street Group, LLC 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 |

Compact

3 |

Memory-efficient data structures for OCaml

4 |

5 | 6 | OCaml-CI Build Status 7 | 8 |

9 |
10 | 11 | Currently provides the following modules: 12 | 13 | - **`Hashset`**: an unordered set container. Comes with three specialisations: 14 | - `Immediate`, for elements with an immediate representation 15 | (e.g. `int`). 16 | - `Immediate64`, for elements with an immediate 17 | representation on 64-bit platforms only (e.g. `Int63.t`). 18 | - `Fixed_size_string`, for elements that are strings of a fixed length. 19 | 20 | - **`Hashtbl`**: an unordered associative container. 21 | 22 | - **`Hashed_container`**: a generic hashtable implementation with support for 23 | externally-allocated bindings. 24 | 25 | - **`Arena`**: an arena for fixed-width strings; 26 | 27 | - **`Uniform_array`**: an array implementation that forbids the [flat float 28 | array optimisation][flat-float-array]. Comes with `Tuple2` and `Tuple3` 29 | specialisations for compact arrays of _pairs_ and _triples_. 30 | 31 | 32 | See the [`./bench` subdirectory](./bench/) for benchmarks (and benchmark 33 | results) for the above modules. 34 | 35 | [flat-float-array]: https://dev.realworldocaml.org/runtime-memory-layout.html#scrollNav-3-1 36 | 37 | ### Installation 38 | 39 | `Compact` can be installed with `opam`: 40 | 41 | ``` 42 | opam pin add -n compact.dev git+https://github.com/CraigFe/compact 43 | opam install compact 44 | ``` 45 | 46 | ### Acknowledgements 47 | 48 | This library pulls code and ideas from various open-source Jane Street 49 | libraries, including a [uniform array][base-uniform-array] implementation from 50 | `Base`. 51 | 52 | [base-uniform-array]: https://github.com/janestreet/base/blob/caae3c2bacebd558ae2b0bbea807ec9703fb7508/src/uniform_array.ml 53 | -------------------------------------------------------------------------------- /bench/README.md: -------------------------------------------------------------------------------- 1 | # Benchmark results 2 | 3 | `Compact` datastructures are optimised for low static memory usage, at the cost 4 | of generally higher transient allocations and slower runtime performance. 5 | 6 | - [`Hashset`](#hashset) 7 | - [`Hashtbl`](#hashtbl) 8 | 9 | ## Hashset 10 | 11 | Memory and runtime performance of `int Hashset.t` for various implementations: 12 | 13 | ![Comparative benchmark results for a range of hashset implementations](./results/hashset-memory-usage.png) 14 | 15 | Implementations are configured as follows: 16 | 17 | | Implementation | Type | Description | 18 | | ------------------- | ------------------------------ | ---------------------------------------------------------------------------------------------------------------------------- | 19 | | `stdlib` | `(int, unit) Stdlib.Hashtbl.t` | From the OCaml standard library (and the [`CCHashSet`][cchashset] module from [`containers-data`][c-cube/ocaml-containers]). Uses lists of `int * unit` pairs as buckets (as we're co-opting a hashtable implementation). | 20 | | `backtracking` | `int Hashset.t` | From [`backtracking/hashset`][backtracking/hashset]. Uses `int` lists as buckets. | 21 | | `base` | `int Base.Hash_set.t` | From [`janestreet/base`][janestreet/base]. Uses AVL trees as buckets (with `unit` dummy values). | 22 | | `compact` | `int Compact.Hashset.t` | From this library. Uses `int` arrays as buckets, resulting in more copying of buckets but smaller buckets overall than the above implementations. | 23 | | `compact-immediate` | `int Compact.Hashset.Int.t` | From this library. Uses `int` arrays as buckets with *inlined singleton buckets* (taking advantage of the fact that integers always have an immediate representation). | 24 | 25 | [cchashset]: https://c-cube.github.io/ocaml-containers/last/containers-data/CCHashSet/index.html 26 | [c-cube/ocaml-containers]: https://github.com/c-cube/ocaml-containers 27 | [backtracking/hashset]: https://github.com/backtracking/hashset 28 | [janestreet/base]: https://github.com/janestreet/base 29 | 30 | ## Hashtbl 31 | 32 | Memory and runtime performance of `(int, int) Hashtbl.t` for various implementations: 33 | 34 | ![Comparative benchmark results for a range of hashtable implementations](./results/hashtbl-memory-usage.png) 35 | -------------------------------------------------------------------------------- /bench/analysis/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | import pandas as pd 5 | import matplotlib.pyplot as plt 6 | import seaborn as sns 7 | 8 | df = pd.read_csv(sys.argv[1]) 9 | figure_file = sys.argv[2] 10 | 11 | # Categorise the entries by implementation 12 | # implementations=['stdlib','base','containers','backtracking','compact','compact-immediate'] 13 | # df['implementation'] = pd.Categorical(df['implementation'], implementations) 14 | 15 | fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(13, 5)) 16 | 17 | sns.lineplot(data = df, ax = ax1, x = 'entries', y = 'reachable_words', hue = 'implementation') 18 | sns.lineplot(data = df, ax = ax2, x = 'entries', y = 'allocated_words', hue = 'implementation') 19 | sns.boxplot(data = df, ax = ax3, x = 'implementation', y = 'time(ns)') 20 | 21 | ax3.set_yscale('log') 22 | ax1.grid(which ='major', color='gray', linewidth=0.2) 23 | ax2.grid(which ='major', color='gray', linewidth=0.2) 24 | ax3.grid(which ='minor', color='gray', linewidth=0.2) 25 | 26 | fig.suptitle('Relative performance of implementations') 27 | fig.tight_layout() 28 | 29 | print('Saving to \'%s\'' % figure_file) 30 | fig.savefig(figure_file) 31 | -------------------------------------------------------------------------------- /bench/compact_bench.ml: -------------------------------------------------------------------------------- 1 | let random_int () = Random.int 0x3FFFFFFF 2 | 3 | module Key = struct 4 | include Int 5 | 6 | let sexp_of_t = Base.Int.sexp_of_t 7 | let hash = Hashtbl.hash 8 | let hash_size = 30 9 | end 10 | 11 | let stabilize_garbage_collector () = 12 | let rec go fail last_heap_live_words = 13 | if fail <= 0 then 14 | failwith "Unable to stabilize the number of live words in the major heap"; 15 | Gc.compact (); 16 | let stat = Gc.stat () in 17 | if stat.Gc.live_words <> last_heap_live_words then 18 | go (fail - 1) stat.Gc.live_words 19 | in 20 | go 10 0 21 | 22 | let measure_size t = 23 | (* NOTE: this measurement relies on _not_ using [--no-naked-pointers] or the 24 | OCaml Multicore runtime to get accurate results when using arrays as 25 | hashtable buckets, since otherwise [Obj.reachable_words] includes atoms 26 | (counting the empty array many times). 27 | 28 | When OCaml 5.0 is released, a custom size measurement function will be 29 | needed for hashtables with array buckets. *) 30 | Obj.reachable_words (Obj.repr t) 31 | 32 | let allocated_words () = 33 | let s = Gc.quick_stat () in 34 | s.minor_words +. s.major_words -. s.promoted_words 35 | 36 | let run_loop ~name ~out ~iterations ~action t = 37 | let start_time = Mtime_clock.counter () in 38 | let last = ref Mtime.Span.zero in 39 | let initial_allocations = allocated_words () in 40 | stabilize_garbage_collector (); 41 | for i = 1 to iterations do 42 | action t; 43 | if i mod 1_000 = 0 then ( 44 | let time = Mtime_clock.count start_time in 45 | let diff = Mtime.Span.abs_diff time !last in 46 | Printf.eprintf "\r%s : %#d / %#d%!" name i iterations; 47 | Printf.fprintf out "%d,%s,%d,%f,%Ld\n%!" i name (measure_size t) 48 | (allocated_words () -. initial_allocations) 49 | (Int64.div (Mtime.Span.to_uint64_ns diff) 1_000L); 50 | last := Mtime_clock.count start_time) 51 | done; 52 | Printf.eprintf "\r%s : done\x1b[K\n%!" name 53 | 54 | let open_stat_file name = 55 | let stat_file = 56 | let rnd = Random.bits () land 0xFFFFFF in 57 | let ( / ) = Filename.concat in 58 | "_build" / Printf.sprintf "%s-%06x.csv" name rnd 59 | in 60 | Printf.printf "Sending stats to '%s'\n%!" stat_file; 61 | let out = open_out stat_file in 62 | Printf.fprintf out 63 | "entries,implementation,reachable_words,allocated_words,time(ns)\n"; 64 | out 65 | 66 | module Hashset = struct 67 | (** Compute metrics of various hashset implementations, as a function of the 68 | number of entries: 69 | 70 | - total size in memory 71 | - extra allocations per entry 72 | - cost of [add] per entry 73 | 74 | Stats are emitted to a trace file to be interpreted by [analysis/main.py]. *) 75 | 76 | module type S = sig 77 | type t 78 | 79 | val name : string 80 | val create : int -> t 81 | val add : t -> int -> unit 82 | end 83 | 84 | module Hs_compact_imm : S = struct 85 | module T = Compact.Hashset.Int 86 | 87 | type t = T.t 88 | 89 | let name = "compact-immediate" 90 | let create n = T.create ~initial_capacity:n () 91 | let add = T.add 92 | end 93 | 94 | module Hs_compact : S = struct 95 | module T = Compact.Hashset 96 | 97 | type t = int T.t 98 | 99 | let name = "compact" 100 | let create n = T.create ~initial_capacity:n (module Key) 101 | let add = T.add 102 | end 103 | 104 | module Hs_backtracking : S = struct 105 | module T = Hashset 106 | 107 | type nonrec t = int Hashset.t 108 | 109 | let name = "backtracking" 110 | let create = T.create 111 | let add = T.add 112 | end 113 | 114 | module Hs_base : S = struct 115 | module T = Base.Hash_set 116 | 117 | type t = int T.t 118 | 119 | let name = "base" 120 | let create n = T.create ~size:n (module Key) 121 | let add = T.add 122 | end 123 | 124 | module Hs_stdlib : S = struct 125 | module T = Stdlib.Hashtbl 126 | 127 | type nonrec t = (int, unit) T.t 128 | 129 | let name = "stdlib" 130 | let create n = T.create n 131 | let add t k = T.add t k () 132 | end 133 | 134 | let run_loop ~out (module Hashset : S) = 135 | let t = Hashset.create 0 in 136 | run_loop t ~iterations:300_000 ~name:Hashset.name ~out ~action:(fun t -> 137 | Hashset.add t (random_int ())) 138 | 139 | let run () = 140 | let out = open_stat_file "hashset-memory-usage" in 141 | List.iter (run_loop ~out) 142 | [ (module Hs_stdlib) 143 | ; (module Hs_backtracking) 144 | ; (module Hs_base) 145 | ; (module Hs_compact_imm) 146 | ; (module Hs_compact) 147 | ]; 148 | Printf.printf "\nDone\n" 149 | end 150 | 151 | module Hashtbl = struct 152 | module Hs_compact = Compact.Hashtbl 153 | module Hs_base = Base.Hashtbl 154 | module Hs_stdlib = Stdlib.Hashtbl 155 | 156 | let run_loop ~name ~out ~add t = 157 | run_loop t ~iterations:300_000 ~name ~out ~action:(fun t -> 158 | let k, v = (random_int (), random_int ()) in 159 | add t k v) 160 | 161 | let run () = 162 | let out = open_stat_file "hashtbl-memory-usage" in 163 | run_loop ~name:"compact" ~out 164 | ~add:(fun t key data -> Hs_compact.replace t ~key ~data) 165 | (Hs_compact.create ~initial_capacity:0 (module Key)); 166 | run_loop ~name:"stdlib" ~out ~add:Hs_stdlib.add (Hs_stdlib.create 0); 167 | run_loop ~name:"base" ~out 168 | ~add:(fun t key data -> Hs_base.add_exn t ~key ~data) 169 | (Hs_base.create (module Key)); 170 | Printf.printf "\nDone\n" 171 | end 172 | 173 | (* let () = 174 | * Memtrace.trace_if_requested (); 175 | * let total = 10_000_000 in 176 | * let t = Compact.Hashset.Immediate.create ~initial_capacity:0 (module Key) in 177 | * for _ = 1 to total do 178 | * let n = random_int () in 179 | * Compact.Hashset.Immediate.add t n 180 | * done; 181 | * Printf.eprintf "done\n%!" *) 182 | 183 | let () = 184 | match Sys.argv with 185 | | [| _; "hashtbl" |] -> Hashtbl.run () 186 | | [| _; "hashset" |] -> Hashset.run () 187 | | _ -> 188 | Printf.eprintf "usage: %s [hashtbl | hashset]\n%!" Sys.argv.(0); 189 | exit 1 190 | -------------------------------------------------------------------------------- /bench/dune: -------------------------------------------------------------------------------- 1 | (executable 2 | (public_name compact_bench) 3 | (package compact-bench) 4 | (libraries mtime mtime.clock.os base compact hashset memtrace sexplib0) 5 | (ocamlopt_flags -O3)) 6 | -------------------------------------------------------------------------------- /bench/results/hashset-memory-usage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/craigfe/compact/daa1b516c917585b80e2fbace74690766a9ac907/bench/results/hashset-memory-usage.png -------------------------------------------------------------------------------- /bench/results/hashtbl-memory-usage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/craigfe/compact/daa1b516c917585b80e2fbace74690766a9ac907/bench/results/hashtbl-memory-usage.png -------------------------------------------------------------------------------- /compact-bench.opam: -------------------------------------------------------------------------------- 1 | # This file is generated by dune, edit dune-project instead 2 | opam-version: "2.0" 3 | synopsis: "Benchmarks for the `compact` library" 4 | description: "Benchmarks for the `compact` library" 5 | maintainer: ["Craig Ferguson "] 6 | authors: ["Craig Ferguson "] 7 | license: "MIT" 8 | homepage: "https://github.com/CraigFe/compact" 9 | bug-reports: "https://github.com/CraigFe/compact/issues" 10 | depends: [ 11 | "dune" {>= "2.9"} 12 | "ocaml" {>= "4.12.0"} 13 | "compact" {= version} 14 | "base" 15 | "hashset" 16 | "mtime" {>= "1.3.0"} 17 | "memtrace" 18 | "sexplib0" 19 | "odoc" {with-doc} 20 | ] 21 | build: [ 22 | ["dune" "subst"] {dev} 23 | [ 24 | "dune" 25 | "build" 26 | "-p" 27 | name 28 | "-j" 29 | jobs 30 | "--promote-install-files=false" 31 | "@install" 32 | "@runtest" {with-test} 33 | "@doc" {with-doc} 34 | ] 35 | ["dune" "install" "-p" name "--create-install-files" name] 36 | ] 37 | dev-repo: "git+https://github.com/CraigFe/compact.git" 38 | -------------------------------------------------------------------------------- /compact.opam: -------------------------------------------------------------------------------- 1 | # This file is generated by dune, edit dune-project instead 2 | opam-version: "2.0" 3 | synopsis: "Containers with small memory footprints" 4 | description: "Containers with small memory footprints" 5 | maintainer: ["Craig Ferguson "] 6 | authors: ["Craig Ferguson "] 7 | license: "MIT" 8 | homepage: "https://github.com/CraigFe/compact" 9 | doc: "https://CraigFe.github.io/compact/" 10 | bug-reports: "https://github.com/CraigFe/compact/issues" 11 | depends: [ 12 | "dune" {>= "2.9"} 13 | "ocaml" {>= "4.12.0"} 14 | "bigstringaf" 15 | "bisect_ppx" {dev} 16 | "vector" {with-test} 17 | "fmt" {with-test} 18 | "alcotest" {with-test & >= "1.4.0"} 19 | "odoc" {with-doc} 20 | ] 21 | build: [ 22 | ["dune" "subst"] {dev} 23 | [ 24 | "dune" 25 | "build" 26 | "-p" 27 | name 28 | "-j" 29 | jobs 30 | "--promote-install-files=false" 31 | "@install" 32 | "@runtest" {with-test} 33 | "@doc" {with-doc} 34 | ] 35 | ["dune" "install" "-p" name "--create-install-files" name] 36 | ] 37 | dev-repo: "git+https://github.com/CraigFe/compact.git" 38 | -------------------------------------------------------------------------------- /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 2.9) 2 | (name compact) 3 | (implicit_transitive_deps false) 4 | 5 | (generate_opam_files true) 6 | (source (github CraigFe/compact)) 7 | (license MIT) 8 | (maintainers "Craig Ferguson ") 9 | (authors "Craig Ferguson ") 10 | 11 | (package 12 | (name compact) 13 | (synopsis "Containers with small memory footprints") 14 | (description "Containers with small memory footprints") 15 | (documentation https://CraigFe.github.io/compact/) 16 | (depends 17 | (ocaml (>= 4.12.0)) 18 | bigstringaf 19 | (bisect_ppx :dev) 20 | (vector :with-test) 21 | (fmt :with-test) 22 | (alcotest (and :with-test (>= 1.4.0))))) 23 | 24 | (package 25 | (name compact-bench) 26 | (synopsis "Benchmarks for the `compact` library") 27 | (description "Benchmarks for the `compact` library") 28 | (depends 29 | (ocaml (>= 4.12.0)) 30 | (compact (= :version)) 31 | base 32 | hashset 33 | (mtime (>= 1.3.0)) 34 | memtrace 35 | sexplib0)) -------------------------------------------------------------------------------- /src/arena.ml: -------------------------------------------------------------------------------- 1 | (*———————————————————————————————————————————————————————————————————————————— 2 | Copyright (c) 2020–2021 Craig Ferguson 3 | Distributed under the MIT license. See terms at the end of this file. 4 | ————————————————————————————————————————————————————————————————————————————*) 5 | 6 | open! Import 7 | 8 | type internal = 9 | { elt_length : int; mutable data : Bigstringaf.t; mutable next_offset : int } 10 | 11 | type t = internal 12 | 13 | (* An offset into a data bigstring. *) 14 | type id = int 15 | 16 | let create ~elt_length ~initial_capacity = 17 | { elt_length 18 | ; data = Bigstringaf.create (elt_length * initial_capacity) 19 | ; next_offset = 0 20 | } 21 | 22 | let elt_equal t offset elt = 23 | Bigstringaf.memcmp_string t.data offset elt 0 t.elt_length = 0 24 | 25 | let is_full t = t.next_offset = Bigstringaf.length t.data 26 | 27 | let allocate t elt = 28 | if is_full t then invalid_arg "Arena.allocate: arena is full"; 29 | (* Write the element to the next available arena offset. *) 30 | let offset = t.next_offset in 31 | Bigstringaf.blit_from_string elt ~src_off:0 t.data ~dst_off:offset 32 | ~len:t.elt_length; 33 | t.next_offset <- offset + t.elt_length; 34 | offset 35 | 36 | let dereference t off = 37 | if off + t.elt_length > t.next_offset then 38 | invalid_arg "Arena.dereference: reference doesn't belong to this arena"; 39 | Bigstringaf.substring t.data ~off ~len:t.elt_length 40 | 41 | let expand t size = 42 | let old_len = Bigstringaf.length t.data in 43 | let new_len = size * t.elt_length in 44 | if new_len < old_len then 45 | invalid_arg "Arena.expand: can't reduce the size of an existing arena"; 46 | let new_data = Bigstringaf.create new_len in 47 | Bigstringaf.blit t.data ~src_off:0 new_data ~dst_off:0 ~len:t.next_offset; 48 | t.data <- new_data 49 | 50 | module Internal = struct 51 | type nonrec t = t 52 | 53 | let repr = Type_equality.Refl 54 | end 55 | 56 | (*———————————————————————————————————————————————————————————————————————————— 57 | Copyright (c) 2020–2021 Craig Ferguson 58 | 59 | Permission to use, copy, modify, and/or distribute this software for any 60 | purpose with or without fee is hereby granted, provided that the above 61 | copyright notice and this permission notice appear in all copies. 62 | 63 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 64 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 65 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 66 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 67 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 68 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 69 | DEALINGS IN THE SOFTWARE. 70 | ————————————————————————————————————————————————————————————————————————————*) 71 | -------------------------------------------------------------------------------- /src/arena.mli: -------------------------------------------------------------------------------- 1 | (*———————————————————————————————————————————————————————————————————————————— 2 | Copyright (c) 2020–2021 Craig Ferguson 3 | Distributed under the MIT license. See terms at the end of this file. 4 | ————————————————————————————————————————————————————————————————————————————*) 5 | 6 | type t 7 | 8 | val create : elt_length:int -> initial_capacity:int -> t 9 | (** [create ~elt_length:len ~initial_capacity:n] is an empty arena of strings of 10 | length [len], with space sufficient to store [n] values. *) 11 | 12 | val is_full : t -> bool 13 | (** [is_full t] is true iff [t] has no remaining space for elements. *) 14 | 15 | val expand : t -> int -> unit 16 | (** [expand t n] re-allocates arena [t] to support storing up to [n]-many 17 | elements. Existing elements in the arena retain their original {!id}s. 18 | 19 | @raise Invalid_argument if [n] is less than the current capacity of [t]. *) 20 | 21 | type id 22 | (** The type of references to allocated elements in an arena. *) 23 | 24 | val allocate : t -> string -> id 25 | (** [allocate t s] adds the string [s] to arena [t], returning a reference to 26 | the storage location that may later be {!dereference}d to get back [s]. 27 | 28 | @raise Invalid_argument 29 | if [t] {!is_full}. The behaviour is undefined if the length of [s] is not 30 | equal to the [elt_length] of [t]. *) 31 | 32 | val dereference : t -> id -> string 33 | (** [dereference t id] is the string that was passed to the {!allocate} call 34 | that returned [id]. The behaviour is undefined if [id] was not created by an 35 | allocation within [t]. *) 36 | 37 | val elt_equal : t -> id -> string -> bool 38 | (** [elt_equal t id s] is equivalent to [String.equal (dereference t id) s], but 39 | more efficient. *) 40 | 41 | type internal = 42 | { elt_length : int; mutable data : Bigstringaf.t; mutable next_offset : int } 43 | 44 | module Internal : Internal.S0 with type outer_t := t and type t = internal 45 | 46 | (*———————————————————————————————————————————————————————————————————————————— 47 | Copyright (c) 2020–2021 Craig Ferguson 48 | 49 | Permission to use, copy, modify, and/or distribute this software for any 50 | purpose with or without fee is hereby granted, provided that the above 51 | copyright notice and this permission notice appear in all copies. 52 | 53 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 54 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 55 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 56 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 57 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 58 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 59 | DEALINGS IN THE SOFTWARE. 60 | ————————————————————————————————————————————————————————————————————————————*) 61 | -------------------------------------------------------------------------------- /src/compact.ml: -------------------------------------------------------------------------------- 1 | (*———————————————————————————————————————————————————————————————————————————— 2 | Copyright (c) 2020–2021 Craig Ferguson 3 | Distributed under the MIT license. See terms at the end of this file. 4 | ————————————————————————————————————————————————————————————————————————————*) 5 | 6 | module Arena = Arena 7 | module Hashed_container = Hashed_container 8 | module Hashset = Hashset 9 | module Hashtbl = Hashtbl 10 | module Internal = Internal 11 | module Type_equality = Type_equality 12 | module Uniform_array = Uniform_array 13 | module Immediate_array = Immediate_array 14 | 15 | (*———————————————————————————————————————————————————————————————————————————— 16 | Copyright (c) 2020–2021 Craig Ferguson 17 | 18 | Permission to use, copy, modify, and/or distribute this software for any 19 | purpose with or without fee is hereby granted, provided that the above 20 | copyright notice and this permission notice appear in all copies. 21 | 22 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 24 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 25 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 26 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 27 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 28 | DEALINGS IN THE SOFTWARE. 29 | ————————————————————————————————————————————————————————————————————————————*) 30 | -------------------------------------------------------------------------------- /src/compact.mli: -------------------------------------------------------------------------------- 1 | (*———————————————————————————————————————————————————————————————————————————— 2 | Copyright (c) 2020–2021 Craig Ferguson 3 | Distributed under the MIT license. See terms at the end of this file. 4 | ————————————————————————————————————————————————————————————————————————————*) 5 | 6 | (** Containers with compact memory footprints. *) 7 | 8 | (** {2 Generic containers} *) 9 | 10 | module Hashtbl = Hashtbl 11 | module Hashset = Hashset 12 | module Hashed_container = Hashed_container 13 | module Uniform_array = Uniform_array 14 | module Immediate_array = Immediate_array 15 | 16 | (** {2 Containers for fixed-length strings} *) 17 | 18 | module Arena = Arena 19 | (** An implementation of append-only arenas of fixed-length strings, with 20 | support for manual expansion. *) 21 | 22 | (** Utility modules: *) 23 | 24 | module Type_equality = Type_equality 25 | module Internal = Internal 26 | 27 | (*———————————————————————————————————————————————————————————————————————————— 28 | Copyright (c) 2020–2021 Craig Ferguson 29 | 30 | Permission to use, copy, modify, and/or distribute this software for any 31 | purpose with or without fee is hereby granted, provided that the above 32 | copyright notice and this permission notice appear in all copies. 33 | 34 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 35 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 36 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 37 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 38 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 39 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 40 | DEALINGS IN THE SOFTWARE. 41 | ————————————————————————————————————————————————————————————————————————————*) 42 | -------------------------------------------------------------------------------- /src/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (public_name compact) 3 | (libraries bigstringaf) 4 | (instrumentation 5 | (backend bisect_ppx))) 6 | -------------------------------------------------------------------------------- /src/hashed_container.ml: -------------------------------------------------------------------------------- 1 | (*———————————————————————————————————————————————————————————————————————————— 2 | Copyright (c) 2020–2021 Craig Ferguson 3 | Distributed under the MIT license. See terms at the end of this file. 4 | ————————————————————————————————————————————————————————————————————————————*) 5 | 6 | (* This module defines the hashtable implementation that underlies most 7 | containers in this library. *) 8 | 9 | open! Import 10 | include Hashed_container_intf 11 | include Hashed_container_intf.Types 12 | module Bucket = Hashed_container_bucket 13 | module Entry_size = Bucket.Entry_size 14 | 15 | type ('key, 'value, 'packed, 'decoder, 'entry) vtable = 16 | { key_hash : 'key -> int 17 | ; key_hash_size : int 18 | ; key_equal : 'key -> 'key -> bool 19 | ; entry_key : 'entry -> 'key 20 | ; entry_value : 'entry -> 'value 21 | ; entry_compare : 'entry -> 'entry -> int 22 | ; packed_key : 'decoder -> 'packed -> 'key 23 | ; packed_entry : 'decoder -> 'packed -> 'entry 24 | ; packed_of_entry : 'decoder -> 'entry -> 'packed 25 | } 26 | 27 | type ('k, 'v, 'packed, 's, 'entry, 'bucket) unboxed = 28 | { entry_size : ('packed, 'bucket) Entry_size.t 29 | ; mutable hashtbl : 'bucket Array.t 30 | ; mutable bucket_count_log2 : int 31 | ; mutable cardinal : int 32 | ; mutable mutation_allowed : bool (* Set during all iteration operations *) 33 | ; vtable : ('k, 'v, 'packed, 's, 'entry) vtable 34 | } 35 | 36 | type (_, _, _, _, _) internal = 37 | | T : 38 | ('k, 'v, 'packed, 's, 'entry, _) unboxed 39 | -> ('k, 'v, 'packed, 's, 'entry) internal 40 | [@@ocaml.unboxed] 41 | 42 | type ('a, 'b, 'c, 'd, 'e) t = ('a, 'b, 'c, 'd, 'e) internal 43 | 44 | let ensure_mutation_allowed ~__FUNCTION__:ctx t = 45 | if not t.mutation_allowed then 46 | Format.kasprintf failwith "%s: mutation not allowed during iteration" ctx 47 | 48 | let with_mutation_disallowed t ~f = 49 | let m = t.mutation_allowed in 50 | t.mutation_allowed <- false; 51 | match f () with 52 | | a -> 53 | t.mutation_allowed <- m; 54 | a 55 | | exception exn -> 56 | t.mutation_allowed <- m; 57 | raise exn 58 | 59 | let create ~vtable ~initial_capacity ~entry_size () = 60 | let bucket_count_log2, bucket_count = 61 | let rec aux n_log2 n = 62 | if n >= initial_capacity then (n_log2, n) 63 | else if n * 2 > Sys.max_array_length then (n_log2, n) 64 | else aux (n_log2 + 1) (n * 2) 65 | in 66 | aux 4 16 67 | in 68 | let hashtbl = Array.make bucket_count (Bucket.empty entry_size) in 69 | T 70 | { hashtbl 71 | ; bucket_count_log2 72 | ; cardinal = 0 73 | ; mutation_allowed = true 74 | ; entry_size 75 | ; vtable 76 | } 77 | 78 | module T = struct 79 | let copy (T t) = T { t with hashtbl = Array.copy t.hashtbl } 80 | let cardinal (T t) = t.cardinal 81 | let vtable (T t) = t.vtable 82 | 83 | let clear (T t) = 84 | ensure_mutation_allowed ~__FUNCTION__ t; 85 | let empty = Bucket.empty t.entry_size in 86 | t.hashtbl <- [| empty |]; 87 | t.bucket_count_log2 <- 0; 88 | t.cardinal <- 0 89 | 90 | let elt_index : type a b c d e. (a, b, c, d, e) t -> a -> int = 91 | fun (T t) key -> 92 | let v = t.vtable in 93 | (* NOTE: we use the _uppermost_ bits of the key hash to index the bucket 94 | array, so that the hashtbl is approximately sorted by key hash (with only 95 | the entries within each bucket being relatively out of order). *) 96 | let unneeded_bits = v.key_hash_size - t.bucket_count_log2 in 97 | (v.key_hash key lsr unneeded_bits) land ((1 lsl t.bucket_count_log2) - 1) 98 | 99 | let partition_bucket : 100 | type a b c d e bucket. 101 | decoder:d 102 | -> (a, b, c, d, e, bucket) unboxed 103 | -> int 104 | -> bucket 105 | -> bucket * bucket = 106 | let f : 107 | type a b c d e. 108 | a:(a, b, c, d, e, _) unboxed -> b:d -> c:int -> c -> bool = 109 | fun ~a:t ~b:decoder ~c:index packed -> 110 | let v = t.vtable in 111 | let key = v.packed_key decoder packed in 112 | let new_index = elt_index (T t) key in 113 | assert (new_index lsr 1 = index); 114 | new_index land 1 = 0 115 | in 116 | fun ~decoder ({ entry_size; _ } as t) index bucket -> 117 | Bucket.partition3 entry_size bucket ~a:t ~b:decoder ~c:index ~f 118 | 119 | let resize : type a b c d e. (a, b, c, d, e) t -> d -> unit = 120 | fun (T t) decoder -> 121 | (* Scale the number of hashtbl buckets. *) 122 | t.bucket_count_log2 <- t.bucket_count_log2 + 1; 123 | let new_bucket_count = 1 lsl t.bucket_count_log2 in 124 | if new_bucket_count > Sys.max_array_length then 125 | Format.kasprintf failwith 126 | "Log_file.resize: can't construct a hashtbl with %d buckets \ 127 | (Sys.max_array_length = %d)" 128 | new_bucket_count Sys.max_array_length; 129 | let new_hashtbl = Array.make new_bucket_count (Bucket.empty t.entry_size) in 130 | Array.iteri t.hashtbl ~f:(fun i bucket -> 131 | (* The bindings in this bucket will be split into two new buckets, using 132 | the next bit of [Key.hash] as a discriminator. *) 133 | let bucket_2i, bucket_2i_plus_1 = 134 | partition_bucket ~decoder t i bucket 135 | in 136 | new_hashtbl.(2 * i) <- bucket_2i; 137 | new_hashtbl.((2 * i) + 1) <- bucket_2i_plus_1); 138 | t.hashtbl <- new_hashtbl 139 | 140 | let maybe_resize t ~decoder = 141 | let should_grow = t.cardinal > 2 * Array.length t.hashtbl in 142 | if should_grow then resize (T t) decoder 143 | 144 | type add_result = Add | Replace 145 | 146 | let add_worker : 147 | type a b c d e. 148 | (a, b, c, d, e) t -> replace:bool -> decoder:d -> a -> c -> add_result = 149 | fun (T t) ~replace ~decoder key packed -> 150 | let v = t.vtable in 151 | if t.cardinal > 2 * Array.length t.hashtbl then resize (T t) decoder; 152 | let elt_idx = elt_index (T t) key in 153 | let bucket = t.hashtbl.(elt_idx) in 154 | let length = Bucket.length t.entry_size bucket in 155 | let bucket' = 156 | Bucket.replace t.entry_size bucket ~replace ~decoder ~unpack:v.packed_key 157 | ~key ~key_equal:v.key_equal ~data:packed 158 | in 159 | let length' = Bucket.length t.entry_size bucket' in 160 | (* Avoid [caml_modify] when new bucket is identical: *) 161 | if not (bucket == bucket') then t.hashtbl.(elt_idx) <- bucket'; 162 | if length' > length then ( 163 | t.cardinal <- t.cardinal + 1; 164 | maybe_resize t ~decoder; 165 | Add) 166 | else Replace 167 | 168 | let replace (T t) ~decoder key packed = 169 | ensure_mutation_allowed ~__FUNCTION__ t; 170 | let (_ : add_result) = add_worker (T t) ~replace:true ~decoder key packed in 171 | () 172 | 173 | let add (T t) ~decoder key packed = 174 | ensure_mutation_allowed ~__FUNCTION__ t; 175 | match add_worker (T t) ~replace:false ~decoder key packed with 176 | | Add -> `Ok 177 | | Replace -> `Duplicate 178 | 179 | let add_exn t ~decoder key packed = 180 | match add t ~decoder key packed with 181 | | `Ok -> () 182 | | `Duplicate -> 183 | Printf.ksprintf failwith "%s: got key already present" __FUNCTION__ 184 | 185 | let remove : type a b c d e. (a, b, c, d, e) t -> decoder:d -> a -> unit = 186 | fun (T t) ~decoder key -> 187 | ensure_mutation_allowed ~__FUNCTION__ t; 188 | let v = t.vtable in 189 | let elt_idx = elt_index (T t) key in 190 | let bucket = t.hashtbl.(elt_idx) in 191 | let key_found = ref false in 192 | let bucket' = 193 | Bucket.fold_left t.entry_size bucket ~init:[] ~f:(fun acc entry -> 194 | if !key_found then 195 | (* We ensure there's at most one binding for a given key *) 196 | entry :: acc 197 | else 198 | let key' = v.packed_key decoder entry in 199 | match v.key_equal key key' with 200 | | false -> entry :: acc 201 | | true -> 202 | (* Drop this binding *) 203 | key_found := true; 204 | acc) 205 | |> Bucket.of_list_rev t.entry_size 206 | in 207 | match !key_found with 208 | | false -> (* Nothing to do *) () 209 | | true -> 210 | t.cardinal <- t.cardinal - 1; 211 | t.hashtbl.(elt_idx) <- bucket' 212 | 213 | let mem : type a b c d e. (a, b, c, d, e) t -> decoder:d -> a -> bool = 214 | fun (T t) ~decoder key -> 215 | let v = t.vtable in 216 | let elt_idx = elt_index (T t) key in 217 | let bucket = t.hashtbl.(elt_idx) in 218 | Bucket.exists t.entry_size bucket ~f:(fun packed -> 219 | v.key_equal key (v.packed_key decoder packed)) 220 | 221 | let find_and_call : 222 | type k v c d e r. 223 | (k, v, c, d, e) t 224 | -> decoder:d 225 | -> k 226 | -> if_found:(key:k -> data:v -> r) 227 | -> if_not_found:(k -> r) 228 | -> r = 229 | fun (T t) ~decoder key ~if_found ~if_not_found -> 230 | let v = t.vtable in 231 | let elt_idx = elt_index (T t) key in 232 | let bucket = t.hashtbl.(elt_idx) in 233 | match 234 | Bucket.find_map t.entry_size bucket ~f:(fun packed -> 235 | (* We expect the keys to match most of the time, so we decode the 236 | value at the same time. *) 237 | let entry = v.packed_entry decoder packed in 238 | match v.key_equal key (v.entry_key entry) with 239 | | false -> None 240 | | true -> Some entry) 241 | with 242 | (* TODO: avoid option allocation here *) 243 | | Some entry -> 244 | if_found ~key:(v.entry_key entry) ~data:(v.entry_value entry) 245 | | None -> if_not_found key 246 | 247 | let find = 248 | let if_found ~key:_ ~data = Some data in 249 | let if_not_found _ = None in 250 | fun t ~decoder key -> find_and_call t key ~decoder ~if_found ~if_not_found 251 | 252 | let find_exn = 253 | let if_found ~key:_ ~data = data in 254 | let if_not_found _ = raise Not_found in 255 | fun t ~decoder key -> find_and_call t key ~decoder ~if_found ~if_not_found 256 | 257 | let fold : 258 | type a b c d e acc. 259 | (a, b, c, d, e) t -> decoder:d -> f:(acc -> e -> acc) -> init:acc -> acc = 260 | fun (T t) ~decoder ~f ~init -> 261 | let v = t.vtable in 262 | with_mutation_disallowed t ~f:(fun () -> 263 | Array.fold_left t.hashtbl ~init ~f:(fun acc bucket -> 264 | Bucket.fold_left t.entry_size bucket ~init:acc ~f:(fun acc packed -> 265 | let entry = v.packed_entry decoder packed in 266 | f acc entry))) 267 | 268 | let map_poly : 269 | type a v1 v2 c1 c2 d1 d2 e1 e2. 270 | (a, v1, c1, d1, e1) t 271 | -> vtable:(a, v2, c2, d2, e2) vtable 272 | -> decoder_src:d1 273 | -> decoder_dst:d2 274 | -> f:(e1 -> e2) 275 | -> (a, v2, c2, d2, e2) t = 276 | fun (T t) ~vtable ~decoder_src ~decoder_dst ~f -> 277 | let v = t.vtable in 278 | let hashtbl = 279 | with_mutation_disallowed t ~f:(fun () -> 280 | Array.map t.hashtbl ~f:(fun bucket -> 281 | Bucket.map t.entry_size 282 | (Obj.magic 283 | t.entry_size (* TODO: use [Higher] typing for entry_size *)) 284 | bucket ~f:(fun packed -> 285 | let entry = v.packed_entry decoder_src packed in 286 | let entry' = f entry in 287 | vtable.packed_of_entry decoder_dst entry'))) 288 | in 289 | T 290 | { t with 291 | entry_size = Obj.magic t.entry_size 292 | ; hashtbl 293 | ; vtable 294 | ; mutation_allowed = true 295 | } 296 | 297 | let map (T t) ~decoder_src ~decoder_dst ~f = 298 | map_poly (T t) ~vtable:t.vtable ~decoder_src ~decoder_dst ~f 299 | 300 | let map_inplace : 301 | type a b c d e. 302 | (a, b, c, d, e) t -> decoder_src:d -> decoder_dst:d -> f:(e -> e) -> unit 303 | = 304 | fun (T t) ~decoder_src ~decoder_dst ~f -> 305 | let v = t.vtable in 306 | with_mutation_disallowed t ~f:(fun () -> 307 | Array.map_inplace t.hashtbl ~f:(fun bucket -> 308 | Bucket.map_inplace t.entry_size bucket ~f:(fun packed -> 309 | let entry = v.packed_entry decoder_src packed in 310 | let entry' = f entry in 311 | v.packed_of_entry decoder_dst entry'))) 312 | 313 | let iter : 314 | type a b c d e. (a, b, c, d, e) t -> decoder:d -> f:(e -> unit) -> unit = 315 | fun (T t) ~decoder ~f -> 316 | let v = t.vtable in 317 | with_mutation_disallowed t ~f:(fun () -> 318 | Array.iter t.hashtbl ~f:(fun bucket -> 319 | Bucket.iter t.entry_size bucket ~f:(fun packed -> 320 | f (v.packed_entry decoder packed)))) 321 | 322 | let iter_keys : 323 | type a b c d e. (a, b, c, d, e) t -> decoder:d -> f:(a -> unit) -> unit = 324 | fun (T t) ~decoder ~f -> 325 | let v = t.vtable in 326 | with_mutation_disallowed t ~f:(fun () -> 327 | Array.iter t.hashtbl ~f:(fun bucket -> 328 | Bucket.iter t.entry_size bucket ~f:(fun packed -> 329 | f (v.packed_key decoder packed)))) 330 | 331 | let exists t ~decoder ~f = 332 | let exception Exit in 333 | match iter t ~decoder ~f:(fun e -> if f e then raise Exit) with 334 | | exception Exit -> true 335 | | () -> false 336 | 337 | let for_all t ~decoder ~f = not (exists t ~decoder ~f:(fun e -> not (f e))) 338 | 339 | let count t ~decoder ~f = 340 | fold t ~decoder ~init:0 ~f:(fun acc e -> if f e then acc + 1 else acc) 341 | 342 | let to_list t ~decoder = 343 | let acc = fold t ~decoder ~init:[] ~f:(fun acc e -> e :: acc) in 344 | List.rev acc 345 | 346 | let to_sorted_seq : type a b c d e. (a, b, c, d, e) t -> decoder:d -> e Seq.t 347 | = 348 | fun (T t) ~decoder -> 349 | let v = t.vtable in 350 | Array.to_seq t.hashtbl 351 | |> Seq.flat_map (fun bucket -> 352 | let arr = 353 | Bucket.to_array t.entry_size bucket 354 | |> Array.map ~f:(fun off -> v.packed_entry decoder off) 355 | in 356 | Array.sort ~cmp:v.entry_compare arr; 357 | Array.to_seq arr) 358 | 359 | let reserve _ = failwith "TODO" 360 | let bucket_count (T t) = Array.length t.hashtbl 361 | 362 | let load_factor (T t) = 363 | Float.of_int t.cardinal /. Float.of_int (Array.length t.hashtbl) 364 | 365 | let invariant : 366 | type a b c d e. 367 | decoder:d -> a Invariant.t -> b Invariant.t -> (a, b, c, d, e) t -> unit = 368 | fun ~decoder invariant_key invariant_data (T t) -> 369 | let v = t.vtable in 370 | for i = 0 to Array.length t.hashtbl - 1 do 371 | Bucket.invariant t.entry_size 372 | (fun packed -> 373 | let entry = v.packed_entry decoder packed in 374 | invariant_key (v.entry_key entry); 375 | invariant_data (v.entry_value entry)) 376 | t.hashtbl.(i) 377 | done; 378 | let real_length = fold ~decoder (T t) ~init:0 ~f:(fun i _ -> i + 1) in 379 | assert (t.cardinal = real_length) 380 | end 381 | 382 | include T 383 | 384 | module No_decoder = struct 385 | include T 386 | 387 | type nonrec ('a, 'b, 'c, 'd) t = ('a, 'b, 'c, unit, 'd) t 388 | 389 | let decoder = () 390 | let find = find ~decoder 391 | let find_and_call = find_and_call ~decoder 392 | let find_exn = find_exn ~decoder 393 | let remove = remove ~decoder 394 | let mem = mem ~decoder 395 | let iter = iter ~decoder 396 | let iter_keys = iter_keys ~decoder 397 | let map = map ~decoder_src:decoder ~decoder_dst:decoder 398 | let map_poly = map_poly ~decoder_src:decoder ~decoder_dst:decoder 399 | let map_inplace = map_inplace ~decoder_src:decoder ~decoder_dst:decoder 400 | let fold = fold ~decoder 401 | let count = count ~decoder 402 | let exists = exists ~decoder 403 | let for_all = for_all ~decoder 404 | let to_list = to_list ~decoder 405 | let to_sorted_seq = to_sorted_seq ~decoder 406 | let add = add ~decoder 407 | let add_exn = add_exn ~decoder 408 | let replace = replace ~decoder 409 | let invariant k v t = invariant ~decoder k v t 410 | end 411 | 412 | (*———————————————————————————————————————————————————————————————————————————— 413 | Copyright (c) 2020–2021 Craig Ferguson 414 | 415 | Permission to use, copy, modify, and/or distribute this software for any 416 | purpose with or without fee is hereby granted, provided that the above 417 | copyright notice and this permission notice appear in all copies. 418 | 419 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 420 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 421 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 422 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 423 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 424 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 425 | DEALINGS IN THE SOFTWARE. 426 | ————————————————————————————————————————————————————————————————————————————*) 427 | -------------------------------------------------------------------------------- /src/hashed_container.mli: -------------------------------------------------------------------------------- 1 | (*———————————————————————————————————————————————————————————————————————————— 2 | Copyright (c) 2020–2021 Craig Ferguson 3 | Distributed under the MIT license. See terms at the end of this file. 4 | ————————————————————————————————————————————————————————————————————————————*) 5 | 6 | (** This module provides a very generic hashtable implementation that allows the 7 | internal memory layout to be customised to particular types. It allows the 8 | user to specify: 9 | 10 | - the type of keys and values in the hashtable; 11 | - the type of {i bindings}: internal representations of key / value pairs 12 | used in the hashtable itself (and the number of words assigned to each 13 | binding). 14 | - the number of in-memory words used to store the keys and values (may be 1, 15 | 2 or 3); 16 | - the representation of internal bindings. 17 | 18 | Together, these allow this implementation to be used in various compact 19 | deployments: 20 | 21 | - as a hash-set (with no redundant internal space for dummy values, as in a 22 | [unit Stdlib.Hashtbl.t]); *) 23 | 24 | include Hashed_container_intf.Intf 25 | 26 | (*———————————————————————————————————————————————————————————————————————————— 27 | Copyright (c) 2020–2021 Craig Ferguson 28 | 29 | Permission to use, copy, modify, and/or distribute this software for any 30 | purpose with or without fee is hereby granted, provided that the above 31 | copyright notice and this permission notice appear in all copies. 32 | 33 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 37 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 38 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | DEALINGS IN THE SOFTWARE. 40 | ————————————————————————————————————————————————————————————————————————————*) 41 | -------------------------------------------------------------------------------- /src/hashed_container_bucket.ml: -------------------------------------------------------------------------------- 1 | (*———————————————————————————————————————————————————————————————————————————— 2 | Copyright (c) 2020–2021 Craig Ferguson 3 | Distributed under the MIT license. See terms at the end of this file. 4 | ————————————————————————————————————————————————————————————————————————————*) 5 | 6 | open! Import 7 | 8 | (** The representation of buckets uses small lists specialised to a particular 9 | entry size. *) 10 | 11 | module I1 = Immediate_array 12 | module L1 = Uniform_array 13 | module L2 = Uniform_array.Tuple2 14 | module L3 = Uniform_array.Tuple3 15 | 16 | module Entry_size = struct 17 | type 'a immediate = 'a I1.t 18 | type 'a value1 = 'a L1.t 19 | type ('a, 'b) value2 = ('a, 'b) L2.t 20 | type ('a, 'b, 'c) value3 = ('a, 'b, 'c) L3.t 21 | 22 | type (_, _) t = 23 | | Immediate : ('a, 'a immediate) t 24 | | Value1 : ('a, 'a value1) t 25 | | Value2 : ('a * 'b, ('a, 'b) value2) t 26 | | Value3 : ('a * 'b * 'c, ('a, 'b, 'c) value3) t 27 | end 28 | 29 | let empty : type a b. (a, b) Entry_size.t -> b = function 30 | | Immediate -> I1.empty 31 | | Value1 -> L1.empty 32 | | Value2 -> L2.empty 33 | | Value3 -> L3.empty 34 | 35 | let length : type a b. (a, b) Entry_size.t -> b -> int = 36 | fun entry_size t -> 37 | match entry_size with 38 | | Immediate -> I1.length t 39 | | Value1 -> L1.length t 40 | | Value2 -> L2.length t 41 | | Value3 -> L3.length t 42 | 43 | let curry2 f a b = f (a, b) 44 | let curry3 f a b c = f (a, b, c) 45 | 46 | let list_rev_partition3 = 47 | let rec part yes no ~f ~a ~b ~c = function 48 | | [] -> (yes, no) 49 | | x :: l -> 50 | if f ~a ~b ~c x then part ~f ~a ~b ~c (x :: yes) no l 51 | else part ~f ~a ~b ~c yes (x :: no) l 52 | in 53 | fun l ~f ~a ~b ~c -> part ~f ~a ~b ~c [] [] l 54 | 55 | let partition3 : 56 | type a b s1 s2 s3. 57 | (a, b) Entry_size.t 58 | -> b 59 | -> a:s1 60 | -> b:s2 61 | -> c:s3 62 | -> f:(a:s1 -> b:s2 -> c:s3 -> a -> bool) 63 | -> b * b = 64 | fun entry_size t ~a ~b ~c ~f -> 65 | match entry_size with 66 | | Immediate -> ( 67 | match I1.length t with 68 | | 0 -> (I1.empty, I1.empty) 69 | | 1 -> 70 | let elt = I1.unsafe_get t 0 in 71 | if f ~a ~b ~c elt then (I1.singleton elt, I1.empty) 72 | else (I1.empty, I1.singleton elt) 73 | | 2 -> ( 74 | let elt0 = I1.unsafe_get t 0 and elt1 = I1.unsafe_get t 1 in 75 | match (f ~a ~b ~c elt0, f ~a ~b ~c elt1) with 76 | | true, false -> (I1.singleton elt0, I1.singleton elt1) 77 | | false, true -> (I1.singleton elt1, I1.singleton elt0) 78 | | (true as both_left), true | (false as both_left), false -> 79 | let both = I1.create ~len:2 elt0 in 80 | let both = I1.unsafe_set both 1 elt1 in 81 | if both_left then (both, I1.empty) else (I1.empty, both)) 82 | | n when n < Sys.int_size -> 83 | (* If the bucket length is less than the number of bits in an integer 84 | (which is practically always the case for a well-distributed hash 85 | function), we would use the bits of an integer to record 86 | partitioning choices and then allocate arrays of precisely the 87 | correct size. *) 88 | let bitv = ref 0 in 89 | let popcount = ref 0 in 90 | for i = 0 to n - 1 do 91 | let elt = I1.unsafe_get t i in 92 | if f ~a ~b ~c elt then ( 93 | bitv := !bitv lor (1 lsl i); 94 | incr popcount) 95 | done; 96 | let bitv = !bitv in 97 | let left_size = !popcount and right_size = n - !popcount in 98 | let dummy = I1.unsafe_get t 0 in 99 | let left = ref (I1.create ~len:left_size dummy) 100 | and right = ref (I1.create ~len:right_size dummy) in 101 | let added_to_left = ref 0 in 102 | for i = 0 to n - 1 do 103 | match (bitv lsr i) land 1 with 104 | | 1 -> 105 | left := I1.unsafe_set !left !added_to_left (I1.unsafe_get t i); 106 | incr added_to_left 107 | | 0 -> 108 | right := 109 | I1.unsafe_set !right (i - !added_to_left) (I1.unsafe_get t i) 110 | | _ -> assert false 111 | done; 112 | (!left, !right) 113 | | _ -> 114 | let left, right = I1.to_list t |> list_rev_partition3 ~f ~a ~b ~c in 115 | (I1.of_list_rev left, I1.of_list_rev right)) 116 | | Value1 -> 117 | let left, right = L1.to_list t |> list_rev_partition3 ~f ~a ~b ~c in 118 | (L1.of_list_rev left, L1.of_list_rev right) 119 | | Value2 -> 120 | let left, right = L2.to_list t |> list_rev_partition3 ~f ~a ~b ~c in 121 | (L2.of_list_rev left, L2.of_list_rev right) 122 | | Value3 -> 123 | let left, right = L3.to_list t |> list_rev_partition3 ~f ~a ~b ~c in 124 | (L3.of_list_rev left, L3.of_list_rev right) 125 | 126 | exception Exit 127 | 128 | let replace : 129 | type a b d k. 130 | (a, b) Entry_size.t 131 | -> b 132 | -> decoder:d 133 | -> unpack:(d -> a -> k) 134 | -> key:k 135 | -> key_equal:(k -> k -> bool) 136 | -> replace:bool 137 | -> data:a 138 | -> b = 139 | fun entry_size t ~decoder ~unpack ~key ~key_equal ~replace ~data -> 140 | match entry_size with 141 | | Immediate -> ( 142 | let length = I1.length t in 143 | match length with 144 | | 0 -> I1.singleton data 145 | | 1 -> ( 146 | (* Handle the length = 1 case separately because it's an immediate. *) 147 | let x = I1.unsafe_get t 0 in 148 | let key' = unpack decoder x in 149 | match key_equal key key' with 150 | | true -> if replace then I1.singleton data else t 151 | | false -> 152 | let res = I1.create ~len:2 x in 153 | I1.unsafe_set res 0 data) 154 | | _ -> ( 155 | try 156 | for i = 0 to length - 1 do 157 | let x = I1.unsafe_get t i in 158 | let key' = unpack decoder x in 159 | match key_equal key key' with 160 | | true -> 161 | (if replace then 162 | let (_ : _ I1.t) = I1.unsafe_set t i data in 163 | (* We can safely ignore the "new" bucket since it'll always 164 | be physically-equal to [t]: setting in an immediate array 165 | only returns a new value when it has length 1 (and that 166 | case is handled above). *) 167 | ()); 168 | raise_notrace Exit 169 | | false -> () 170 | done; 171 | (* Key was not found: add it to the start *) 172 | let t' = I1.create ~len:(length + 1) data in 173 | let t' = 174 | I1.unsafe_blit ~src:t ~dst:t' ~src_pos:0 ~dst_pos:1 ~len:length 175 | in 176 | t' 177 | with Exit -> t)) 178 | (* TODO: unify these cases *) 179 | | Value1 -> ( 180 | let exception Exit in 181 | try 182 | let length = L1.length t in 183 | for i = 0 to length - 1 do 184 | let x = L1.unsafe_get t i in 185 | let key' = unpack decoder x in 186 | match key_equal key key' with 187 | | true -> 188 | if replace then L1.unsafe_set t i data; 189 | raise_notrace Exit 190 | | false -> () 191 | done; 192 | (* Key was not found: add it to the start *) 193 | let t' = L1.unsafe_create_uninitialized ~len:(length + 1) in 194 | L1.unsafe_blit ~src:t ~dst:t' ~src_pos:0 ~dst_pos:1 ~len:length; 195 | L1.unsafe_set t' 0 data; 196 | t' 197 | with Exit -> t) 198 | | Value2 -> ( 199 | let data1, data2 = data in 200 | let exception Exit in 201 | try 202 | let length = L2.length t in 203 | for i = 0 to length - 1 do 204 | let x = L2.unsafe_get t i in 205 | let key' = unpack decoder x in 206 | match key_equal key key' with 207 | | true -> 208 | if replace then L2.unsafe_set t i data1 data2; 209 | raise_notrace Exit 210 | | false -> () 211 | done; 212 | (* Key was not found: add it to the start *) 213 | let t' = L2.unsafe_create_uninitialized ~len:(length + 1) in 214 | L2.unsafe_blit ~src:t ~dst:t' ~src_pos:0 ~dst_pos:1 ~len:length; 215 | L2.unsafe_set t' 0 data1 data2; 216 | t' 217 | with Exit -> t) 218 | | Value3 -> (* TODO *) assert false 219 | 220 | let cons : type a b. (a, b) Entry_size.t -> a -> b -> b = 221 | fun entry_size x xs -> 222 | match entry_size with 223 | | Immediate -> 224 | let old_len = I1.length xs in 225 | let len = old_len + 1 in 226 | let arr = I1.create ~len x in 227 | let arr = 228 | I1.unsafe_blit ~src:xs ~dst:arr ~src_pos:0 ~dst_pos:1 ~len:old_len 229 | in 230 | arr 231 | | Value1 -> 232 | let old_len = L1.length xs in 233 | let len = old_len + 1 in 234 | let arr = L1.unsafe_create_uninitialized ~len in 235 | L1.unsafe_blit ~src:xs ~dst:arr ~src_pos:0 ~dst_pos:1 ~len:old_len; 236 | L1.unsafe_set arr 0 x; 237 | arr 238 | | Value2 -> 239 | let x, y = x in 240 | let old_len = L2.length xs in 241 | let len = old_len + 1 in 242 | let arr = L2.unsafe_create_uninitialized ~len in 243 | L2.unsafe_blit ~src:xs ~dst:arr ~src_pos:0 ~dst_pos:1 ~len:old_len; 244 | L2.unsafe_set arr 0 x y; 245 | arr 246 | | Value3 -> 247 | let x, y, z = x in 248 | let old_len = L3.length xs in 249 | let len = old_len + 1 in 250 | let arr = L3.unsafe_create_uninitialized ~len in 251 | L3.unsafe_blit ~src:xs ~dst:arr ~src_pos:0 ~dst_pos:1 ~len:old_len; 252 | L3.unsafe_set arr 0 x y z; 253 | arr 254 | 255 | let exists : type a b. (a, b) Entry_size.t -> f:(a -> bool) -> b -> bool = 256 | fun entry_size ~f t -> 257 | match entry_size with 258 | | Immediate -> I1.exists t ~f 259 | | Value1 -> L1.exists t ~f 260 | | Value2 -> L2.exists t ~f:(curry2 f) 261 | | Value3 -> L3.exists t ~f:(curry3 f) 262 | 263 | let map : 264 | type a1 a2 b1 b2. 265 | (a1, b1) Entry_size.t -> (a2, b2) Entry_size.t -> f:(a1 -> a2) -> b1 -> b2 = 266 | fun e1 e2 ~f t -> 267 | match (e1, e2) with 268 | | Immediate, Immediate -> I1.map t ~f 269 | | Value1, Value1 -> L1.map t ~f 270 | | Value2, Value2 -> L2.map t ~f:(curry2 f) 271 | | Value3, Value3 -> L3.map t ~f:(curry3 f) 272 | | (Immediate | Value1 | Value2 | Value3), _ -> 273 | invalid_arg 274 | "Hashed_container_bucket.map: changing bucket implementation during \ 275 | map is unsupported" 276 | 277 | let map_inplace : type a b. (a, b) Entry_size.t -> f:(a -> a) -> b -> b = 278 | fun entry_size ~f t -> 279 | match entry_size with 280 | | Immediate -> I1.map_inplace t ~f 281 | | Value1 -> 282 | L1.map_inplace t ~f; 283 | t 284 | | Value2 -> 285 | L2.map_inplace t ~f:(curry2 f); 286 | t 287 | | Value3 -> 288 | L3.map_inplace t ~f:(curry3 f); 289 | t 290 | 291 | let find_map : 292 | type a b r. (a, b) Entry_size.t -> f:(a -> r option) -> b -> r option = 293 | fun entry_size ~f t -> 294 | match entry_size with 295 | | Immediate -> 296 | let rec aux ~f ~len t i = 297 | if i = len then None 298 | else 299 | match f (I1.unsafe_get t i) with 300 | | Some _ as r -> r 301 | | None -> aux ~f ~len t (i + 1) 302 | in 303 | aux ~f ~len:(I1.length t) t 0 304 | | Value1 -> 305 | let rec aux ~f ~len t i = 306 | if i = len then None 307 | else 308 | match f (L1.unsafe_get t i) with 309 | | Some _ as r -> r 310 | | None -> aux ~f ~len t (i + 1) 311 | in 312 | aux ~f ~len:(L1.length t) t 0 313 | | Value2 -> 314 | let rec aux ~f ~len t i = 315 | if i = len then None 316 | else 317 | match f (L2.unsafe_get_fst t i, L2.unsafe_get_snd t i) with 318 | | Some _ as r -> r 319 | | None -> aux ~f ~len t (i + 1) 320 | in 321 | aux ~f ~len:(L2.length t) t 0 322 | | Value3 -> 323 | let rec aux ~f ~len t i = 324 | if i = len then None 325 | else 326 | match 327 | f 328 | ( L3.unsafe_get_fst t i 329 | , L3.unsafe_get_snd t i 330 | , L3.unsafe_get_thd t i ) 331 | with 332 | | Some _ as r -> r 333 | | None -> aux ~f ~len t (i + 1) 334 | in 335 | aux ~f ~len:(L3.length t) t 0 336 | 337 | let fold_left : 338 | type a b acc. 339 | (a, b) Entry_size.t -> f:(acc -> a -> acc) -> init:acc -> b -> acc = 340 | fun entry_size ~f ~init t -> 341 | match entry_size with 342 | | Immediate -> I1.fold t ~init ~f 343 | | Value1 -> L1.fold t ~init ~f 344 | | Value2 -> L2.fold t ~init ~f:(fun acc a b -> f acc (a, b)) 345 | | Value3 -> L3.fold t ~init ~f:(fun acc a b c -> f acc (a, b, c)) 346 | 347 | let iter : type a b. (a, b) Entry_size.t -> f:(a -> unit) -> b -> unit = 348 | fun entry_size ~f t -> 349 | match entry_size with 350 | | Immediate -> I1.iter ~f t 351 | | Value1 -> L1.iter ~f t 352 | | Value2 -> L2.iter ~f:(curry2 f) t 353 | | Value3 -> L3.iter ~f:(curry3 f) t 354 | 355 | let to_array : type a b. (a, b) Entry_size.t -> b -> a array = 356 | fun entry_size t -> 357 | match entry_size with 358 | | Immediate -> I1.to_array t 359 | | Value1 -> L1.to_array t 360 | | Value2 -> L2.to_array t 361 | | Value3 -> L3.to_array t 362 | 363 | let of_list_rev : type a b. (a, b) Entry_size.t -> a list -> b = 364 | fun entry_size t -> 365 | match entry_size with 366 | | Immediate -> I1.of_list_rev t 367 | | Value1 -> L1.of_list_rev t 368 | | Value2 -> L2.of_list_rev t 369 | | Value3 -> L3.of_list_rev t 370 | 371 | let invariant : type a b. (a, b) Entry_size.t -> a Invariant.t -> b Invariant.t 372 | = 373 | fun entry_size inv_elt t -> 374 | match entry_size with 375 | | Immediate -> I1.invariant inv_elt t 376 | | Value1 -> L1.invariant inv_elt t 377 | | Value2 -> L2.invariant inv_elt t 378 | | Value3 -> L3.invariant inv_elt t 379 | 380 | (*———————————————————————————————————————————————————————————————————————————— 381 | Copyright (c) 2020–2021 Craig Ferguson 382 | 383 | Permission to use, copy, modify, and/or distribute this software for any 384 | purpose with or without fee is hereby granted, provided that the above 385 | copyright notice and this permission notice appear in all copies. 386 | 387 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 388 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 389 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 390 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 391 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 392 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 393 | DEALINGS IN THE SOFTWARE. 394 | ————————————————————————————————————————————————————————————————————————————*) 395 | -------------------------------------------------------------------------------- /src/hashed_container_bucket.mli: -------------------------------------------------------------------------------- 1 | (*———————————————————————————————————————————————————————————————————————————— 2 | Copyright (c) 2020–2021 Craig Ferguson 3 | Distributed under the MIT license. See terms at the end of this file. 4 | ————————————————————————————————————————————————————————————————————————————*) 5 | 6 | (** Extra functions are exposed here when needed by the [Hashed_container] 7 | implementation, and so the API is quite arbitrary by itself. *) 8 | 9 | module Entry_size : sig 10 | type 'a immediate = 'a Immediate_array.t 11 | type 'a value1 = 'a Uniform_array.t 12 | type ('a, 'b) value2 = ('a, 'b) Uniform_array.Tuple2.t 13 | type ('a, 'b, 'c) value3 = ('a, 'b, 'c) Uniform_array.Tuple3.t 14 | 15 | type (_, _) t = 16 | | Immediate : ('a, 'a immediate) t 17 | | Value1 : ('a, 'a value1) t 18 | | Value2 : ('a * 'b, ('a, 'b) value2) t 19 | | Value3 : ('a * 'b * 'c, ('a, 'b, 'c) value3) t 20 | end 21 | 22 | val empty : ('a, 'b) Entry_size.t -> 'b 23 | val length : ('a, 'b) Entry_size.t -> 'b -> int 24 | val cons : ('a, 'b) Entry_size.t -> 'a -> 'b -> 'b 25 | val exists : ('a, 'b) Entry_size.t -> f:('a -> bool) -> 'b -> bool 26 | 27 | val fold_left : 28 | ('a, 'b) Entry_size.t -> f:('acc -> 'a -> 'acc) -> init:'acc -> 'b -> 'acc 29 | 30 | val map : 31 | ('a1, 'b1) Entry_size.t 32 | -> ('a2, 'b2) Entry_size.t 33 | -> f:('a1 -> 'a2) 34 | -> 'b1 35 | -> 'b2 36 | 37 | val map_inplace : ('a, 'b) Entry_size.t -> f:('a -> 'a) -> 'b -> 'b 38 | val iter : ('a, 'b) Entry_size.t -> f:('a -> unit) -> 'b -> unit 39 | val to_array : ('a, 'b) Entry_size.t -> 'b -> 'a array 40 | val of_list_rev : ('a, 'b) Entry_size.t -> 'a list -> 'b 41 | val find_map : ('a, 'b) Entry_size.t -> f:('a -> 'r option) -> 'b -> 'r option 42 | 43 | val partition3 : 44 | ('a, 'b) Entry_size.t 45 | -> 'b 46 | -> a:'s1 47 | -> b:'s2 48 | -> c:'s3 49 | -> f:(a:'s1 -> b:'s2 -> c:'s3 -> 'a -> bool) 50 | -> 'b * 'b 51 | 52 | val replace : 53 | ('a, 'b) Entry_size.t 54 | -> 'b 55 | -> decoder:'d 56 | -> unpack:('d -> 'a -> 'k) 57 | -> key:'k 58 | -> key_equal:('k -> 'k -> bool) 59 | -> replace:bool 60 | -> data:'a 61 | -> 'b 62 | 63 | val invariant : ('a, 'b) Entry_size.t -> 'a Invariant.t -> 'b Invariant.t 64 | 65 | (*———————————————————————————————————————————————————————————————————————————— 66 | Copyright (c) 2020–2021 Craig Ferguson 67 | 68 | Permission to use, copy, modify, and/or distribute this software for any 69 | purpose with or without fee is hereby granted, provided that the above 70 | copyright notice and this permission notice appear in all copies. 71 | 72 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 73 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 74 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 75 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 76 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 77 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 78 | DEALINGS IN THE SOFTWARE. 79 | ————————————————————————————————————————————————————————————————————————————*) 80 | -------------------------------------------------------------------------------- /src/hashed_container_intf.ml: -------------------------------------------------------------------------------- 1 | (*———————————————————————————————————————————————————————————————————————————— 2 | Copyright (c) 2020–2021 Craig Ferguson 3 | Distributed under the MIT license. See terms at the end of this file. 4 | ————————————————————————————————————————————————————————————————————————————*) 5 | 6 | module type S = sig 7 | type ('key, 'value, 'kv_packed, 'decoder, 'kv_pair) t 8 | (** The type of hashtables with externally-stored bindings. The type 9 | parameters are as follows: 10 | 11 | - ['key]: the type of {i keys} in the hashtable; 12 | 13 | - ['value]: the type of {i values} in the hashtable; 14 | 15 | - ['kv_packed]: the type of {i packed binding references}. A ['kv_packed] 16 | is conceptually a compact reference to a key / value pair that may be 17 | dereferenced with a ['decoder]. The size of the runtime representation 18 | of _this_ type will determine the 19 | 20 | - ['decoder]: some external state necessary to unpack the internal 21 | bindings before reading them. 22 | 23 | - ['kv_pair]: the type of key / value pairs post decoding. 24 | 25 | Example types: 26 | 27 | {[ 28 | type 'a hashset = ('a, unit, 'a, unit, 'a) 29 | type ('k, 'v) hashtbl = ('k, 'v, 'k * 'v, unit, 'k * 'v) 30 | type ('k, 'v) ext_tbl = ('k, 'v, file_offset, io_reader, 'k * 'v) 31 | ]} *) 32 | 33 | type 'self key 34 | type ('inner, 'k, 'v, 'kv_packed) with_internal_entry 35 | type ('inner, 'k, 'v, 'kv_entry) with_external_entry 36 | type ('k, 'v, 'kv_entry) external_entry 37 | type ('inner, 'd) with_decoder 38 | 39 | val remove : ('k, _, _, 'd, _) t -> ('k key -> unit, 'd) with_decoder 40 | val mem : ('k, _, _, 'd, _) t -> ('k key -> bool, 'd) with_decoder 41 | 42 | val cardinal : (_, _, _, _, _) t -> int 43 | (** [cardinal t] is the number of bindings in [t]. *) 44 | 45 | val clear : (_, _, _, _, _) t -> unit 46 | (** [clear t] removes all bindings from [t]. *) 47 | 48 | (** {2 Iterators} *) 49 | 50 | val iter : 51 | ('k, 'v, _, 'd, 'e) t 52 | -> (f:(unit, 'k, 'v, 'e) with_external_entry -> unit, 'd) with_decoder 53 | 54 | val fold : 55 | ('k, 'v, _, 'd, 'e) t 56 | -> ( f:('acc -> ('acc, 'k, 'v, 'e) with_external_entry) -> init:'acc -> 'acc 57 | , 'd ) 58 | with_decoder 59 | 60 | val for_all : 61 | ('k, 'v, _, 'd, 'e) t 62 | -> (f:(bool, 'k, 'v, 'e) with_external_entry -> bool, 'd) with_decoder 63 | 64 | val exists : 65 | ('k, 'v, _, 'd, 'e) t 66 | -> (f:(bool, 'k, 'v, 'e) with_external_entry -> bool, 'd) with_decoder 67 | 68 | val count : 69 | ('k, 'v, _, 'd, 'e) t 70 | -> (f:(bool, 'k, 'v, 'e) with_external_entry -> int, 'd) with_decoder 71 | 72 | val to_list : 73 | ('k, 'v, _, 'd, 'e) t -> (('k, 'v, 'e) external_entry list, 'd) with_decoder 74 | 75 | val to_sorted_seq : 76 | ('k, 'v, _, 'd, 'e) t 77 | -> (('k, 'v, 'e) external_entry Seq.t, 'd) with_decoder 78 | 79 | val copy : ('a, 'b, 'c, 'd, 'e) t -> ('a, 'b, 'c, 'd, 'e) t 80 | 81 | (** {2:bucket Buckets} 82 | 83 | The container consists of an array of {i buckets}, each containing the 84 | subset of entries that share a particular hash prefix. For a well-chosen 85 | hash function, the number of elements in each bucket should be very small 86 | (usually 0 or 1). 87 | 88 | Most users should not interact with the container buckets directly (and 89 | use the higher-level API above instead). *) 90 | 91 | val bucket_count : (_, _, _, _, _) t -> int 92 | (** Returns the number of {i buckets} in the container. A bucket is a slot in 93 | the container's internal hashtable, to which elements are assigned based 94 | on the hash value of their key. *) 95 | 96 | (** {2:hash-policy Hash policy} *) 97 | 98 | val load_factor : (_, _, _, _, _) t -> float 99 | (** Returns the average number of elements per bucket. That is: 100 | [cardinal / 101 | bucket_count]. Complexity: O(1). *) 102 | 103 | val reserve : (_, _, _, _, _) t -> int -> unit 104 | (** Reserves space for at least the specified number of elements and 105 | regenerates the hash table. *) 106 | end 107 | 108 | module type Set = sig 109 | type 'a t 110 | type 'a key 111 | 112 | val add : 'a t -> 'a key -> unit 113 | val map : 'a t -> f:('a key -> 'a key) -> 'a t 114 | val map_inplace : 'a t -> f:('a key -> 'a key) -> unit 115 | 116 | (** @inline *) 117 | include 118 | S 119 | with type ('a, _, _, _, _) t := 'a t 120 | and type 'a key := 'a key 121 | and type ('a, _, _) external_entry := 'a key 122 | and type ('inner, 'a, _, _) with_external_entry := 'a key -> 'inner 123 | and type ('inner, 'a, _, _) with_internal_entry := 'a key -> 'inner 124 | and type ('inner, _) with_decoder := 'inner 125 | end 126 | 127 | module type Assoc = sig 128 | type ('a, 'b, 'c, 'd, 'e) t 129 | type ('a, 'b) with_decoder 130 | type ('a, 'b, 'c, 'd) with_internal_entry 131 | 132 | val replace : 133 | ('k, 'v, 'kv_p, 'd, _) t 134 | -> ((unit, 'k, 'v, 'kv_p) with_internal_entry, 'd) with_decoder 135 | (** Adds or replaces an existing binding. *) 136 | 137 | type ok_or_duplicate := [ `Ok | `Duplicate ] 138 | 139 | val add : 140 | ('k, 'v, 'kv_p, 'd, _) t 141 | -> ((ok_or_duplicate, 'k, 'v, 'kv_p) with_internal_entry, 'd) with_decoder 142 | 143 | val add_exn : 144 | ('k, 'v, 'kv_p, 'd, _) t 145 | -> ((unit, 'k, 'v, 'kv_p) with_internal_entry, 'd) with_decoder 146 | 147 | val find : ('k, 'v, _, 'd, _) t -> ('k -> 'v option, 'd) with_decoder 148 | (** [find t k] is [Some v] if [k] is bound to [v] in [t], or [None] if no such 149 | binding exists. *) 150 | 151 | val find_and_call : 152 | ('k, 'v, _, 'd, _) t 153 | -> ( 'k 154 | -> if_found:(key:'k -> data:'v -> 'r) 155 | -> if_not_found:('k -> 'r) 156 | -> 'r 157 | , 'd ) 158 | with_decoder 159 | 160 | val find_exn : ('k, 'v, _, 'd, _) t -> ('k -> 'v, 'd) with_decoder 161 | (** [find_exn t d k] is the value to which [k] is bound in [t], if it exists. 162 | 163 | @raise Not_found if there is no binding for [k] in [t]. *) 164 | 165 | val iter_keys : 166 | ('k, _, _, 'd, _) t -> (f:('k -> unit) -> unit, 'd) with_decoder 167 | 168 | include 169 | S 170 | with type ('a, 'b, 'c, 'd, 'e) t := ('a, 'b, 'c, 'd, 'e) t 171 | and type ('a, 'b) with_decoder := ('a, 'b) with_decoder 172 | and type ('a, 'b, 'c, 'd) with_internal_entry := 173 | ('a, 'b, 'c, 'd) with_internal_entry 174 | 175 | val invariant : 176 | ( 'k Invariant.t -> 'v Invariant.t -> ('k, 'v, _, 'd, _) t Invariant.t 177 | , 'd ) 178 | with_decoder 179 | end 180 | 181 | module type Types = sig 182 | type ('key, 'value, 'packed, 'decoder, 'entry) vtable = 183 | { key_hash : 'key -> int 184 | ; key_hash_size : int 185 | ; key_equal : 'key -> 'key -> bool 186 | ; entry_key : 'entry -> 'key 187 | ; entry_value : 'entry -> 'value 188 | ; entry_compare : 'entry -> 'entry -> int 189 | ; packed_key : 'decoder -> 'packed -> 'key 190 | ; packed_entry : 'decoder -> 'packed -> 'entry 191 | ; packed_of_entry : 'decoder -> 'entry -> 'packed 192 | } 193 | end 194 | 195 | module rec Types : Types = Types 196 | 197 | module type Intf = sig 198 | module type S = S 199 | module type Set = Set 200 | module type Assoc = Assoc 201 | 202 | include 203 | Assoc 204 | with type 'self key := 'self 205 | and type ('inner, 'key, _, 'kv_packed) with_internal_entry := 206 | 'key -> 'kv_packed -> 'inner 207 | and type (_, _, 'entry) external_entry := 'entry 208 | and type ('inner, _, _, 'entry) with_external_entry := 'entry -> 'inner 209 | and type ('inner, 'd) with_decoder := decoder:'d -> 'inner 210 | 211 | include Types 212 | 213 | val map : 214 | ('k, 'v, 'kv_p, 'd, 'e) t 215 | -> decoder_src:'d 216 | -> decoder_dst:'d 217 | -> f:('e -> 'e) 218 | -> ('k, 'v, 'kv_p, 'd, 'e) t 219 | 220 | val map_inplace : 221 | ('k, 'v, 'kv_p, 'd, 'e) t 222 | -> decoder_src:'d 223 | -> decoder_dst:'d 224 | -> f:('e -> 'e) 225 | -> unit 226 | 227 | val vtable : ('a, 'b, 'c, 'd, 'e) t -> ('a, 'b, 'c, 'd, 'e) vtable 228 | 229 | val map_poly : 230 | ('k, 'v1, 'kv_p1, 'd1, 'e1) t 231 | -> vtable:('k, 'v2, 'kv_p2, 'd2, 'e2) vtable 232 | -> decoder_src:'d1 233 | -> decoder_dst:'d2 234 | -> f:('e1 -> 'e2) 235 | -> ('k, 'v2, 'kv_p2, 'd2, 'e2) t 236 | 237 | module No_decoder : sig 238 | type nonrec ('a, 'b, 'c, 'd) t = ('a, 'b, 'c, unit, 'd) t 239 | 240 | include 241 | Assoc 242 | with type ('a, 'b, 'c, _, 'e) t := ('a, 'b, 'c, 'e) t 243 | and type 'self key := 'self 244 | and type ('inner, 'key, _, 'kv_packed) with_internal_entry := 245 | 'key -> 'kv_packed -> 'inner 246 | and type (_, _, 'entry) external_entry := 'entry 247 | and type ('inner, _, _, 'entry) with_external_entry := 'entry -> 'inner 248 | and type ('inner, _) with_decoder := 'inner 249 | 250 | val map : ('k, 'v, 'kv_p, 'e) t -> f:('e -> 'e) -> ('k, 'v, 'kv_p, 'e) t 251 | val map_inplace : ('k, 'v, 'kv_p, 'e) t -> f:('e -> 'e) -> unit 252 | 253 | val map_poly : 254 | ('k, 'v1, 'kv_p1, 'e1) t 255 | -> vtable:('k, 'v2, 'kv_p2, unit, 'e2) vtable 256 | -> f:('e1 -> 'e2) 257 | -> ('k, 'v2, 'kv_p2, 'e2) t 258 | end 259 | 260 | (** {2 Construction} *) 261 | 262 | module Entry_size : sig 263 | type 'a immediate 264 | type 'a value1 265 | type ('a, 'b) value2 266 | type ('a, 'b, 'c) value3 267 | 268 | type (_, _) t = 269 | | Immediate : ('a, 'a immediate) t 270 | | Value1 : ('a, 'a value1) t 271 | | Value2 : ('a * 'b, ('a, 'b) value2) t 272 | | Value3 : ('a * 'b * 'c, ('a, 'b, 'c) value3) t 273 | end 274 | 275 | val create : 276 | vtable:('key, 'value, 'kv_packed, 'decoder, 'kv_pair) vtable 277 | -> initial_capacity:int 278 | -> entry_size:('kv_packed, _) Entry_size.t 279 | -> unit 280 | -> ('key, 'value, 'kv_packed, 'decoder, 'kv_pair) t 281 | end 282 | 283 | (*———————————————————————————————————————————————————————————————————————————— 284 | Copyright (c) 2020–2021 Craig Ferguson 285 | 286 | Permission to use, copy, modify, and/or distribute this software for any 287 | purpose with or without fee is hereby granted, provided that the above 288 | copyright notice and this permission notice appear in all copies. 289 | 290 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 291 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 292 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 293 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 294 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 295 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 296 | DEALINGS IN THE SOFTWARE. 297 | ————————————————————————————————————————————————————————————————————————————*) 298 | -------------------------------------------------------------------------------- /src/hashset.ml: -------------------------------------------------------------------------------- 1 | (*———————————————————————————————————————————————————————————————————————————— 2 | Copyright (c) 2020–2021 Craig Ferguson 3 | Distributed under the MIT license. See terms at the end of this file. 4 | ————————————————————————————————————————————————————————————————————————————*) 5 | 6 | include Hashed_container.No_decoder 7 | 8 | type nonrec 'a t = ('a, unit, 'a, 'a) t 9 | 10 | let add t k = replace t k k 11 | 12 | module type Key = sig 13 | type t 14 | 15 | val equal : t -> t -> bool 16 | val compare : t -> t -> int 17 | val hash : t -> int 18 | val hash_size : int 19 | end 20 | 21 | let vtable_of_key : 22 | type k. 23 | (module Key with type t = k) 24 | -> (k, unit, k, unit, k) Hashed_container.vtable = 25 | fun (module Key) -> 26 | { key_hash = Key.hash 27 | ; key_hash_size = Key.hash_size 28 | ; key_equal = Key.equal 29 | ; entry_key = (fun k -> k) 30 | ; entry_value = (fun _ -> ()) 31 | ; entry_compare = Stdlib.compare (* XXX: polymorphic comparison *) 32 | ; packed_key = (fun () k -> k) 33 | ; packed_entry = (fun () k -> k) 34 | ; packed_of_entry = (fun () k -> k) 35 | } 36 | 37 | let create_generic (type a) ~(entry_size : (a, _) Hashed_container.Entry_size.t) 38 | ~initial_capacity (key : (module Key with type t = a)) : a t = 39 | Hashed_container.create ~vtable:(vtable_of_key key) ~entry_size 40 | ~initial_capacity () 41 | 42 | let create (type a) ~initial_capacity (module Key : Key with type t = a) : a t = 43 | create_generic ~entry_size:Value1 ~initial_capacity (module Key) 44 | 45 | module Immediate = struct 46 | include Hashed_container.No_decoder 47 | 48 | type nonrec 'a t = ('a, unit, 'a, 'a) t 49 | 50 | let add t k = replace t k k 51 | let entry_size = Hashed_container.Entry_size.Immediate 52 | 53 | let create (type a) ~initial_capacity (module Key : Key with type t = a) : a t 54 | = 55 | create_generic ~entry_size ~initial_capacity (module Key) 56 | end 57 | 58 | module Int = struct 59 | include Immediate 60 | 61 | type nonrec t = int t 62 | 63 | module Key = struct 64 | include Stdlib.Int 65 | 66 | let hash = Stdlib.Hashtbl.hash 67 | let hash_size = 30 68 | end 69 | 70 | let create ~initial_capacity () : t = 71 | create_generic ~entry_size:Immediate.entry_size ~initial_capacity 72 | (module Key) 73 | end 74 | 75 | module Immediate64 = struct 76 | include Hashed_container.No_decoder 77 | 78 | type nonrec 'a t = ('a, unit, 'a, 'a) t 79 | 80 | let add t k = replace t k k 81 | 82 | type _ boxed_entry_size = 83 | | E : ('a, _) Hashed_container.Entry_size.t -> 'a boxed_entry_size 84 | [@@unboxed] 85 | 86 | let entry_size = if Sys.word_size = 64 then E Immediate else E Value1 87 | 88 | let create (type a) ~initial_capacity (module Key : Key with type t = a) : a t 89 | = 90 | let (E entry_size) = entry_size in 91 | create_generic ~entry_size ~initial_capacity (module Key) 92 | end 93 | 94 | module Fixed_size_string = Hashset_fixed_size_string 95 | 96 | module Internal = struct 97 | type nonrec 'a t = 'a t 98 | 99 | let repr = Type_equality.Refl 100 | end 101 | 102 | (*———————————————————————————————————————————————————————————————————————————— 103 | Copyright (c) 2020–2021 Craig Ferguson 104 | 105 | Permission to use, copy, modify, and/or distribute this software for any 106 | purpose with or without fee is hereby granted, provided that the above 107 | copyright notice and this permission notice appear in all copies. 108 | 109 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 110 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 111 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 112 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 113 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 114 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 115 | DEALINGS IN THE SOFTWARE. 116 | ————————————————————————————————————————————————————————————————————————————*) 117 | -------------------------------------------------------------------------------- /src/hashset.mli: -------------------------------------------------------------------------------- 1 | (*———————————————————————————————————————————————————————————————————————————— 2 | Copyright (c) 2020–2021 Craig Ferguson 3 | Distributed under the MIT license. See terms at the end of this file. 4 | ————————————————————————————————————————————————————————————————————————————*) 5 | 6 | (** A polymorphic hashset of elements of arbitrary type. For more 7 | memory-efficient implementations for specific element types, see the 8 | provided {{!type-specialisations} type specialisations}. *) 9 | 10 | type 'a t 11 | 12 | module type Key = sig 13 | type t 14 | 15 | val equal : t -> t -> bool 16 | val compare : t -> t -> int 17 | val hash : t -> int 18 | val hash_size : int 19 | end 20 | 21 | val create : initial_capacity:int -> (module Key with type t = 'a) -> 'a t 22 | 23 | (** @inline *) 24 | include Hashed_container.Set with type 'a t := 'a t and type 'a key := 'a 25 | 26 | module Internal : 27 | Internal.S1 28 | with type 'a outer_t := 'a t 29 | and type 'a t = ('a, unit, 'a, unit, 'a) Hashed_container.t 30 | 31 | (** {1:type-specialisations Type specialisations} *) 32 | 33 | module Immediate : sig 34 | (** An hashset of elements with an {i immediate} runtime representation (such 35 | that [Obj.is_int] always holds). Attempting to store non-immediate values 36 | in this hashset will raise an exception. 37 | 38 | For [int] elements, one can use {!Int} directly. 39 | 40 | See {{!implementation} below} for an explanation of the implementation. *) 41 | 42 | include Hashed_container.Set with type 'a key := 'a 43 | (** @inline *) 44 | 45 | val create : initial_capacity:int -> (module Key with type t = 'a) -> 'a t 46 | 47 | (** {1:implementation Implementation details} 48 | 49 | Restricting the elements to always be immediates allows a more efficient 50 | implementation of the hashset in which buckets of size 1 can be stored 51 | directly in the parent array (rather than allocating a separate heap block 52 | for the singleton bucket, as done by the standard implementation). Buckets 53 | with more than a single element still use separate chaining, with an 54 | overhead of two words. 55 | 56 | For example, consider the following hashset of the characters 57 | ['a' ... 'd']: 58 | 59 | {v 60 | ┌─────┐ 61 | │ hdr │ 62 | ├─────┤ 63 | │ { } │ 64 | ├─────┤ 65 | │ 'a' │ 66 | ├─────┤ ┌─────┬─────┬─────┐ 67 | │ ┄┄┄┼┄┄┄>│ hdr │ 'c' │ 'b' │ 68 | ├─────┤ └─────┴─────┴─────┘ 69 | │ 'd' │ 70 | ├─────┤ 71 | │ { } │ 72 | └─────┘ 73 | v} 74 | 75 | For typical load factors, inlining singleton buckets into the parent array 76 | is a considerable memory reduction (~20%), and avoids some unnecessary 77 | allocations. *) 78 | end 79 | 80 | (** [Immediate64] is like [Immediate] but for types that are only guaranteed to 81 | have an immediate representation when [Sys.word_size = 64], such as 82 | [Int63.t]. *) 83 | module Immediate64 : sig 84 | include Hashed_container.Set with type 'a key := 'a 85 | (** @inline *) 86 | 87 | val create : initial_capacity:int -> (module Key with type t = 'a) -> 'a t 88 | end 89 | 90 | module Int : sig 91 | type t = int Immediate.t 92 | 93 | include Hashed_container.Set with type _ t := t and type _ key := int 94 | 95 | val create : initial_capacity:int -> unit -> t 96 | end 97 | 98 | module Fixed_size_string = Hashset_fixed_size_string 99 | 100 | (*———————————————————————————————————————————————————————————————————————————— 101 | Copyright (c) 2020–2021 Craig Ferguson 102 | 103 | Permission to use, copy, modify, and/or distribute this software for any 104 | purpose with or without fee is hereby granted, provided that the above 105 | copyright notice and this permission notice appear in all copies. 106 | 107 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 108 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 109 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 110 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 111 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 112 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 113 | DEALINGS IN THE SOFTWARE. 114 | ————————————————————————————————————————————————————————————————————————————*) 115 | -------------------------------------------------------------------------------- /src/hashset_fixed_size_string.ml: -------------------------------------------------------------------------------- 1 | (*———————————————————————————————————————————————————————————————————————————— 2 | Copyright (c) 2020–2021 Craig Ferguson 3 | Distributed under the MIT license. See terms at the end of this file. 4 | ————————————————————————————————————————————————————————————————————————————*) 5 | 6 | open! Import 7 | 8 | (* TODO: change the implementation of this module to use [Hashed_container *) 9 | 10 | module Bucket = struct 11 | include Immediate_array 12 | 13 | let cons x xs = 14 | let old_len = length xs in 15 | let len = old_len + 1 in 16 | let dst = create ~len x in 17 | let dst = unsafe_blit ~src:xs ~dst ~src_pos:0 ~dst_pos:1 ~len:old_len in 18 | dst 19 | end 20 | 21 | (* String elements are stored in an arena (to avoid header words + padding), 22 | and we keep a hash-set of pointers into the arena. *) 23 | type t = { arena : Arena.t; mutable hashset : Arena.id Bucket.t array } 24 | 25 | let hash_elt : string -> int = Hashtbl.hash 26 | 27 | let arena_capacity ~bucket_count = 28 | (* Expand the hashset when there are ~2 elements per bucket *) 29 | 2 * bucket_count 30 | 31 | let create ~elt_length ~initial_capacity = 32 | let bucket_count = max 1 (initial_capacity / 2) in 33 | let hashset = Array.make bucket_count Bucket.empty in 34 | let arena = 35 | Arena.create ~elt_length ~initial_capacity:(arena_capacity ~bucket_count) 36 | in 37 | { hashset; arena } 38 | 39 | let elt_index t elt = hash_elt elt mod Array.length t.hashset 40 | 41 | let mem t elt = 42 | let bucket = t.hashset.(elt_index t elt) in 43 | Bucket.exists ~f:(fun id -> Arena.elt_equal t.arena id elt) bucket 44 | 45 | let iter_hashset hashset f = Array.iter ~f:(Bucket.iter ~f) hashset 46 | 47 | let resize t = 48 | (* Scale the number of hashset buckets. *) 49 | let new_bucket_count = (2 * Array.length t.hashset) + 1 in 50 | let new_hashset = Array.make new_bucket_count Bucket.empty in 51 | iter_hashset t.hashset (fun index -> 52 | let elt = Arena.dereference t.arena index in 53 | let new_index = hash_elt elt mod new_bucket_count in 54 | new_hashset.(new_index) <- Bucket.cons index new_hashset.(new_index)); 55 | t.hashset <- new_hashset; 56 | (* Scale the arena size. *) 57 | Arena.expand t.arena (arena_capacity ~bucket_count:new_bucket_count) 58 | 59 | let add t elt = 60 | if Arena.is_full t.arena then resize t; 61 | let arena_idx = Arena.allocate t.arena elt in 62 | (* Add the arena offset to the hashset. *) 63 | let elt_idx = elt_index t elt in 64 | t.hashset.(elt_idx) <- Bucket.cons arena_idx t.hashset.(elt_idx) 65 | 66 | (*———————————————————————————————————————————————————————————————————————————— 67 | Copyright (c) 2020–2021 Craig Ferguson 68 | 69 | Permission to use, copy, modify, and/or distribute this software for any 70 | purpose with or without fee is hereby granted, provided that the above 71 | copyright notice and this permission notice appear in all copies. 72 | 73 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 74 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 75 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 76 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 77 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 78 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 79 | DEALINGS IN THE SOFTWARE. 80 | ————————————————————————————————————————————————————————————————————————————*) 81 | -------------------------------------------------------------------------------- /src/hashset_fixed_size_string.mli: -------------------------------------------------------------------------------- 1 | (*———————————————————————————————————————————————————————————————————————————— 2 | Copyright (c) 2020–2021 Craig Ferguson 3 | Distributed under the MIT license. See terms at the end of this file. 4 | ————————————————————————————————————————————————————————————————————————————*) 5 | 6 | type t 7 | (** A set of strings of some fixed length. *) 8 | 9 | val create : elt_length:int -> initial_capacity:int -> t 10 | (** [create ~elt_length:len ~initial_capacity:n] is a set of strings of length 11 | [len], capable of storing [n] values without internal reallocation. *) 12 | 13 | val add : t -> string -> unit 14 | (** [add t elt] adds [elt] to [t]. *) 15 | 16 | val mem : t -> string -> bool 17 | (** [mem t elt] is true iff the string [elt] has been added to [t]. *) 18 | 19 | (*———————————————————————————————————————————————————————————————————————————— 20 | Copyright (c) 2020–2021 Craig Ferguson 21 | 22 | Permission to use, copy, modify, and/or distribute this software for any 23 | purpose with or without fee is hereby granted, provided that the above 24 | copyright notice and this permission notice appear in all copies. 25 | 26 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 27 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 28 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 29 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 30 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 31 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 32 | DEALINGS IN THE SOFTWARE. 33 | ————————————————————————————————————————————————————————————————————————————*) 34 | -------------------------------------------------------------------------------- /src/hashtbl.ml: -------------------------------------------------------------------------------- 1 | (*———————————————————————————————————————————————————————————————————————————— 2 | Copyright (c) 2020–2021 Craig Ferguson 3 | Distributed under the MIT license. See terms at the end of this file. 4 | ————————————————————————————————————————————————————————————————————————————*) 5 | 6 | include Hashed_container.No_decoder 7 | 8 | type nonrec ('k, 'v) t = ('k, 'v, 'k * 'v, 'k * 'v) t 9 | 10 | let replace t ~key ~data = replace t key (key, data) 11 | let add t ~key ~data = add t key (key, data) 12 | let add_exn t ~key ~data = add_exn t key (key, data) 13 | let iter t ~f = iter t ~f:(fun (key, data) -> f ~key ~data) 14 | let map_inplace t ~f = map_inplace t ~f:(fun (key, data) -> (key, f data)) 15 | let fold t ~f ~init = fold t ~f:(fun acc (key, data) -> f acc ~key ~data) ~init 16 | let exists t ~f = exists t ~f:(fun (key, data) -> f ~key ~data) 17 | let for_all t ~f = for_all t ~f:(fun (key, data) -> f ~key ~data) 18 | let count t ~f = count t ~f:(fun (key, data) -> f ~key ~data) 19 | 20 | module type Key = sig 21 | type t 22 | 23 | val equal : t -> t -> bool 24 | val hash : t -> int 25 | val hash_size : int 26 | end 27 | 28 | let vtable_of_key : 29 | type k v. 30 | (module Key with type t = k) 31 | -> (k, v, k * v, unit, k * v) Hashed_container.vtable = 32 | fun (module Key) -> 33 | { key_hash = Key.hash 34 | ; key_hash_size = Key.hash_size 35 | ; key_equal = Key.equal 36 | ; entry_key = (fun (k, _) -> k) 37 | ; entry_value = (fun (_, v) -> v) 38 | ; entry_compare = Stdlib.compare (* XXX: polymorphic comparison *) 39 | ; packed_key = (fun () (k, _) -> k) 40 | ; packed_entry = (fun () kv -> kv) 41 | ; packed_of_entry = (fun () kv -> kv) 42 | } 43 | 44 | let map : type k v1 v2. (k, v1) t -> f:(v1 -> v2) -> (k, v2) t = 45 | fun t ~f -> 46 | let vtable = Hashed_container.vtable t in 47 | let key : (module Key with type t = k) = 48 | (module struct 49 | type t = k 50 | 51 | let equal = vtable.key_equal 52 | let hash = vtable.key_hash 53 | let hash_size = vtable.key_hash_size 54 | end) 55 | in 56 | map_poly t ~vtable:(vtable_of_key key) ~f:(fun (key, data) -> (key, f data)) 57 | 58 | let entry_size = Hashed_container.Entry_size.Value2 59 | 60 | let create ~initial_capacity (type key value) 61 | (key : (module Key with type t = key)) : (key, value) t = 62 | Hashed_container.create ~initial_capacity ~vtable:(vtable_of_key key) 63 | ~entry_size () 64 | 65 | let create_poly ~initial_capacity () (type key value) : (key, value) t = 66 | let key : (module Key with type t = key) = 67 | (module struct 68 | type t = key 69 | 70 | let equal = ( = ) 71 | let hash = Stdlib.Hashtbl.hash 72 | let hash_size = 30 73 | end) 74 | in 75 | Hashed_container.create ~initial_capacity ~vtable:(vtable_of_key key) 76 | ~entry_size () 77 | 78 | module Internal = struct 79 | type nonrec ('a, 'b) t = ('a, 'b) t 80 | 81 | let repr = Type_equality.Refl 82 | end 83 | 84 | (*———————————————————————————————————————————————————————————————————————————— 85 | Copyright (c) 2020–2021 Craig Ferguson 86 | 87 | Permission to use, copy, modify, and/or distribute this software for any 88 | purpose with or without fee is hereby granted, provided that the above 89 | copyright notice and this permission notice appear in all copies. 90 | 91 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 92 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 93 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 94 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 95 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 96 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 97 | DEALINGS IN THE SOFTWARE. 98 | ————————————————————————————————————————————————————————————————————————————*) 99 | -------------------------------------------------------------------------------- /src/hashtbl.mli: -------------------------------------------------------------------------------- 1 | (*———————————————————————————————————————————————————————————————————————————— 2 | Copyright (c) 2020–2021 Craig Ferguson 3 | Distributed under the MIT license. See terms at the end of this file. 4 | ————————————————————————————————————————————————————————————————————————————*) 5 | 6 | type ('k, 'v) t 7 | 8 | (** @inline *) 9 | include 10 | Hashed_container.Assoc 11 | with type ('k, 'v, _, _, _) t := ('k, 'v) t 12 | and type 'k key := 'k 13 | and type ('k, 'v, _) external_entry := 'k * 'v 14 | and type ('inner, 'k, 'v, _) with_external_entry := 15 | key:'k -> data:'v -> 'inner 16 | and type ('inner, 'k, 'v, _) with_internal_entry := 17 | key:'k -> data:'v -> 'inner 18 | and type ('r, _) with_decoder := 'r 19 | 20 | val map : ('k, 'v1) t -> f:('v1 -> 'v2) -> ('k, 'v2) t 21 | val map_inplace : ('k, 'v) t -> f:('v -> 'v) -> unit 22 | 23 | module type Key = sig 24 | type t 25 | 26 | val equal : t -> t -> bool 27 | val hash : t -> int 28 | val hash_size : int 29 | end 30 | 31 | val create : initial_capacity:int -> (module Key with type t = 'k) -> ('k, _) t 32 | val create_poly : initial_capacity:int -> unit -> (_, _) t 33 | 34 | module Internal : 35 | Internal.S2 36 | with type ('a, 'b) outer_t := ('a, 'b) t 37 | and type ('a, 'b) t = ('a, 'b, 'a * 'b, unit, 'a * 'b) Hashed_container.t 38 | 39 | (*———————————————————————————————————————————————————————————————————————————— 40 | Copyright (c) 2020–2021 Craig Ferguson 41 | 42 | Permission to use, copy, modify, and/or distribute this software for any 43 | purpose with or without fee is hereby granted, provided that the above 44 | copyright notice and this permission notice appear in all copies. 45 | 46 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 47 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 48 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 49 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 50 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 51 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 52 | DEALINGS IN THE SOFTWARE. 53 | ————————————————————————————————————————————————————————————————————————————*) 54 | -------------------------------------------------------------------------------- /src/immediate_array.ml: -------------------------------------------------------------------------------- 1 | open! Import 2 | 3 | type 'a t = ('a, 'a array) Obj_either.t 4 | 5 | let invariant invariant_elt t = 6 | match Obj_either.inspect t with 7 | | Immediate -> invariant_elt (Obj_either.get_immediate_exn t) 8 | | Value -> 9 | let arr = Obj_either.get_value_exn t in 10 | assert (Array.length arr <> 1); 11 | Array.iter ~f:invariant_elt arr 12 | 13 | let to_array t = 14 | match Obj_either.inspect t with 15 | | Immediate -> [| Obj_either.get_immediate_exn t |] 16 | | Value -> Obj_either.get_value_exn t 17 | 18 | let of_list = function 19 | | [ x ] -> Obj_either.of_immediate x 20 | | l -> Obj_either.of_value (Array.of_list l) 21 | 22 | let of_list_rev = function 23 | | [ x ] -> Obj_either.of_immediate x 24 | | [] -> Obj_either.of_value [||] 25 | | a :: l -> 26 | let len = 1 + List.length l in 27 | let arr = Array.make len a in 28 | let r = ref l in 29 | (* We start at [len - 2] because we already put [a] at [t.(len - 1)]. *) 30 | for i = len - 2 downto 0 do 31 | match !r with 32 | | [] -> assert false 33 | | a :: l -> 34 | Array.unsafe_set arr i a; 35 | r := l 36 | done; 37 | Obj_either.of_value arr 38 | 39 | let to_list t = 40 | match Obj_either.inspect t with 41 | | Immediate -> [ Obj_either.get_immediate_exn t ] 42 | | Value -> Array.to_list (Obj_either.get_value_exn t) 43 | 44 | let iter t ~f = 45 | match Obj_either.inspect t with 46 | | Immediate -> f (Obj_either.get_immediate_exn t) 47 | | Value -> Array.iter ~f (Obj_either.get_value_exn t) 48 | 49 | let map t ~f = 50 | match Obj_either.inspect t with 51 | | Immediate -> Obj_either.of_immediate (f (Obj_either.get_immediate_exn t)) 52 | | Value -> Obj_either.of_value (Array.map ~f (Obj_either.get_value_exn t)) 53 | 54 | let map_inplace t ~f = 55 | match Obj_either.inspect t with 56 | | Immediate -> Obj_either.of_immediate (f (Obj_either.get_immediate_exn t)) 57 | | Value -> 58 | Array.map_inplace ~f (Obj_either.get_value_exn t); 59 | t 60 | 61 | let fold t ~f ~init = 62 | match Obj_either.inspect t with 63 | | Immediate -> f init (Obj_either.get_immediate_exn t) 64 | | Value -> Array.fold_left ~f ~init (Obj_either.get_value_exn t) 65 | 66 | let exists t ~f = 67 | match Obj_either.inspect t with 68 | | Immediate -> f (Obj_either.get_immediate_exn t) 69 | | Value -> Array.exists ~f (Obj_either.get_value_exn t) 70 | 71 | let length t = 72 | match Obj_either.inspect t with 73 | | Immediate -> 1 74 | | Value -> Array.length (Obj_either.get_value_exn t) 75 | 76 | let unsafe_get t i = 77 | match Obj_either.inspect t with 78 | | Immediate -> Obj_either.get_immediate_exn t 79 | | Value -> Array.unsafe_get (Obj_either.get_value_exn t) i 80 | 81 | let unsafe_set t i x = 82 | match Obj_either.inspect t with 83 | | Immediate -> Obj_either.of_immediate x 84 | | Value -> 85 | Array.unsafe_set (Obj_either.get_value_exn t) i x; 86 | t 87 | 88 | let unsafe_blit ~src ~src_pos ~dst ~dst_pos ~len = 89 | if len = 0 then dst 90 | else 91 | match Obj_either.inspect src with 92 | | Immediate -> 93 | let elt = Obj_either.get_immediate_exn src in 94 | assert (src_pos = 0); 95 | assert (len = 1); 96 | unsafe_set dst dst_pos elt 97 | | Value -> ( 98 | let src = Obj_either.get_value_exn src in 99 | match Obj_either.inspect dst with 100 | | Immediate -> 101 | Format.eprintf "{ src_pos = %d; dst_pos = %d }@." src_pos dst_pos; 102 | assert (dst_pos = 0); 103 | assert (len = 1); 104 | Obj_either.of_immediate (Array.unsafe_get src src_pos) 105 | | Value -> 106 | let dst' = Obj_either.get_value_exn dst in 107 | Array.blit ~src ~src_pos ~dst:dst' ~dst_pos ~len; 108 | dst) 109 | 110 | let singleton x = Obj_either.of_immediate x 111 | 112 | let create ~len x = 113 | if len = 1 then Obj_either.of_immediate x 114 | else Obj_either.of_value (Array.make len x) 115 | 116 | (** Here we use The Force to circumvent the value restriction. This is safe for 117 | the same reason that [Array.empty : 'a. 'a t] is safe: an empty array can 118 | never hold a value of the chosen type, so the type parameter is effectively 119 | phantom. *) 120 | let empty = Obj.magic [||] 121 | -------------------------------------------------------------------------------- /src/immediate_array.mli: -------------------------------------------------------------------------------- 1 | (** An {i immediate} array is an array of elements that {i always} have an 2 | immediate runtime representation (i.e. [Obj.is_int] always holds). 3 | 4 | This requirement enables an array implementation that can represent 5 | singleton arrays as immediates (using 1 word rather than 3). A consequence 6 | of this is that the API is not purely mutable: functions that "mutate" an 7 | array must return that array (to cover the case of the array being an 8 | immediate singleton). 9 | 10 | This module is not exposed for external use. *) 11 | 12 | type 'a t 13 | 14 | val empty : _ t 15 | val create : len:int -> 'a -> 'a t 16 | val to_list : 'a t -> 'a list 17 | val of_list : 'a list -> 'a t 18 | val of_list_rev : 'a list -> 'a t 19 | val iter : 'a t -> f:('a -> unit) -> unit 20 | val map : 'a t -> f:('a -> 'b) -> 'b t 21 | val map_inplace : 'a t -> f:('a -> 'a) -> 'a t 22 | val fold : 'a t -> f:('acc -> 'a -> 'acc) -> init:'acc -> 'acc 23 | val exists : 'a t -> f:('a -> bool) -> bool 24 | val to_array : 'a t -> 'a array 25 | val length : _ t -> int 26 | val singleton : 'a -> 'a t 27 | 28 | val unsafe_blit : 29 | src:'a t -> src_pos:int -> dst:'a t -> dst_pos:int -> len:int -> 'a t 30 | 31 | val unsafe_set : 'a t -> int -> 'a -> 'a t 32 | val unsafe_get : 'a t -> int -> 'a 33 | 34 | include Invariant.S1 with type 'a t := 'a t 35 | -------------------------------------------------------------------------------- /src/import.ml: -------------------------------------------------------------------------------- 1 | include StdLabels 2 | include MoreLabels 3 | 4 | module Array = struct 5 | include Array 6 | 7 | let map_inplace t ~f = 8 | for i = 0 to Array.length t - 1 do 9 | Array.unsafe_set t i (f (Array.unsafe_get t i)) 10 | done 11 | end 12 | -------------------------------------------------------------------------------- /src/internal.ml: -------------------------------------------------------------------------------- 1 | (*———————————————————————————————————————————————————————————————————————————— 2 | Copyright (c) 2020–2021 Craig Ferguson 3 | Distributed under the MIT license. See terms at the end of this file. 4 | ————————————————————————————————————————————————————————————————————————————*) 5 | 6 | module type S0 = sig 7 | type t 8 | type outer_t 9 | 10 | val repr : (outer_t, t) Type_equality.t 11 | end 12 | 13 | module type S1 = sig 14 | type 'a t 15 | type 'a outer_t 16 | 17 | val repr : ('a outer_t, 'a t) Type_equality.t 18 | end 19 | 20 | module type S1_plus = sig 21 | type +'a t 22 | type +'a outer_t 23 | 24 | val repr : ('a outer_t, 'a t) Type_equality.t 25 | end 26 | 27 | module type S2 = sig 28 | type ('a, 'b) t 29 | type ('a, 'b) outer_t 30 | 31 | val repr : (('a, 'b) outer_t, ('a, 'b) t) Type_equality.t 32 | end 33 | 34 | module type S5 = sig 35 | type ('a, 'b, 'c, 'd, 'e) t 36 | type ('a, 'b, 'c, 'd, 'e) outer_t 37 | 38 | val repr : 39 | (('a, 'b, 'c, 'd, 'e) outer_t, ('a, 'b, 'c, 'd, 'e) t) Type_equality.t 40 | end 41 | 42 | (*———————————————————————————————————————————————————————————————————————————— 43 | Copyright (c) 2020–2021 Craig Ferguson 44 | 45 | Permission to use, copy, modify, and/or distribute this software for any 46 | purpose with or without fee is hereby granted, provided that the above 47 | copyright notice and this permission notice appear in all copies. 48 | 49 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 50 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 51 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 52 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 53 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 54 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 55 | DEALINGS IN THE SOFTWARE. 56 | ————————————————————————————————————————————————————————————————————————————*) 57 | -------------------------------------------------------------------------------- /src/invariant.ml: -------------------------------------------------------------------------------- 1 | include Invariant_intf 2 | -------------------------------------------------------------------------------- /src/invariant.mli: -------------------------------------------------------------------------------- 1 | include Invariant_intf.Intf 2 | (** @inline *) 3 | -------------------------------------------------------------------------------- /src/invariant_intf.ml: -------------------------------------------------------------------------------- 1 | type 'a t = 'a -> unit 2 | type 'a inv = 'a t 3 | 4 | module type S = sig 5 | type t 6 | 7 | val invariant : t inv 8 | end 9 | 10 | module type S1 = sig 11 | type 'a t 12 | 13 | val invariant : 'a inv -> 'a t inv 14 | end 15 | 16 | module type S2 = sig 17 | type ('a, 'b) t 18 | 19 | val invariant : 'a inv -> 'b inv -> ('a, 'b) t inv 20 | end 21 | 22 | module type S3 = sig 23 | type ('a, 'b, 'c) t 24 | 25 | val invariant : 'a inv -> 'b inv -> 'c inv -> ('a, 'b, 'c) t inv 26 | end 27 | 28 | module type Intf = sig 29 | type 'a t = 'a -> unit 30 | 31 | module type S = S 32 | module type S1 = S1 33 | module type S2 = S2 34 | module type S3 = S3 35 | end 36 | -------------------------------------------------------------------------------- /src/obj_array.ml: -------------------------------------------------------------------------------- 1 | (*———————————————————————————————————————————————————————————————————————————— 2 | Copyright (c) 2016–2020 Jane Street Group, LLC 3 | Distributed under the MIT license. See terms at the end of this file. 4 | ————————————————————————————————————————————————————————————————————————————*) 5 | 6 | (** Code in this module has been extracted from Jane Street's [base] library, 7 | with minimal modifications. *) 8 | 9 | open! Import 10 | 11 | (* We maintain the property that all values of type [t] do not have the tag 12 | [double_array_tag]. Some functions below assume this in order to avoid 13 | testing the tag, and will segfault if this property doesn't hold. *) 14 | type t = Obj.t array 15 | 16 | let invariant t = assert (Obj.tag (Obj.repr t) <> Obj.double_array_tag) 17 | let length = Array.length 18 | 19 | let swap t i j = 20 | let tmp = Array.get t i in 21 | Array.set t i (Array.get t j); 22 | Array.set t j tmp 23 | 24 | let zero_obj = Obj.repr (0 : int) 25 | 26 | (* We call [Array.create] with a value that is not a float so that the array doesn't get 27 | tagged with [Double_array_tag]. *) 28 | let create_zero ~len = Array.make len zero_obj 29 | 30 | let create ~len x = 31 | (* If we can, use [Array.create] directly. *) 32 | if Obj.tag x <> Obj.double_tag then Array.make len x 33 | else 34 | (* Otherwise use [create_zero] and set the contents *) 35 | let t = create_zero ~len in 36 | let x = Sys.opaque_identity x in 37 | for i = 0 to len - 1 do 38 | Array.unsafe_set t i x 39 | done; 40 | t 41 | 42 | let empty = [||] 43 | 44 | type not_a_float = Not_a_float_0 | Not_a_float_1 of int 45 | 46 | let _not_a_float_0 = Not_a_float_0 47 | let _not_a_float_1 = Not_a_float_1 42 48 | 49 | let get t i = 50 | (* Make the compiler believe [t] is an array not containing floats so it does not check 51 | if [t] is tagged with [Double_array_tag]. It is NOT ok to use [int array] since (if 52 | this function is inlined and the array contains in-heap boxed values) wrong register 53 | typing may result, leading to a failure to register necessary GC roots. *) 54 | Obj.repr ((Obj.magic (t : t) : not_a_float array).(i) : not_a_float) 55 | 56 | let[@inline always] unsafe_get t i = 57 | (* Make the compiler believe [t] is an array not containing floats so it does not check 58 | if [t] is tagged with [Double_array_tag]. *) 59 | Obj.repr 60 | (Array.unsafe_get (Obj.magic (t : t) : not_a_float array) i : not_a_float) 61 | 62 | let[@inline always] unsafe_set_with_caml_modify t i obj = 63 | (* Same comment as [unsafe_get]. Sys.opaque_identity prevents the compiler from 64 | potentially wrongly guessing the type of the array based on the type of element, that 65 | is prevent the implication: (Obj.tag obj = Obj.double_tag) => (Obj.tag t = 66 | Obj.double_array_tag) which flambda has tried in the past (at least that's assuming 67 | the compiler respects Sys.opaque_identity, which is not always the case). *) 68 | Array.unsafe_set 69 | (Obj.magic (t : t) : not_a_float array) 70 | i 71 | (Obj.obj (Sys.opaque_identity obj) : not_a_float) 72 | 73 | let[@inline always] unsafe_set_int_assuming_currently_int t i int = 74 | (* This skips [caml_modify], which is OK if both the old and new values are integers. *) 75 | Array.unsafe_set (Obj.magic (t : t) : int array) i (Sys.opaque_identity int) 76 | 77 | (* For [set] and [unsafe_set], if a pointer is involved, we first do a physical-equality 78 | test to see if the pointer is changing. If not, we don't need to do the [set], which 79 | saves a call to [caml_modify]. We think this physical-equality test is worth it 80 | because it is very cheap (both values are already available from the [is_int] test) 81 | and because [caml_modify] is expensive. *) 82 | 83 | let set t i obj = 84 | (* We use [get] first but then we use [Array.unsafe_set] since we know that [i] is 85 | valid. *) 86 | let old_obj = get t i in 87 | if Obj.is_int old_obj && Obj.is_int obj then 88 | unsafe_set_int_assuming_currently_int t i (Obj.obj obj : int) 89 | else if not (old_obj == obj) then unsafe_set_with_caml_modify t i obj 90 | 91 | let[@inline always] unsafe_set t i obj = 92 | let old_obj = unsafe_get t i in 93 | if Obj.is_int old_obj && Obj.is_int obj then 94 | unsafe_set_int_assuming_currently_int t i (Obj.obj obj : int) 95 | else if not (old_obj == obj) then unsafe_set_with_caml_modify t i obj 96 | 97 | let[@inline always] unsafe_set_omit_phys_equal_check t i obj = 98 | let old_obj = unsafe_get t i in 99 | if Obj.is_int old_obj && Obj.is_int obj then 100 | unsafe_set_int_assuming_currently_int t i (Obj.obj obj : int) 101 | else unsafe_set_with_caml_modify t i obj 102 | 103 | let singleton obj = create ~len:1 obj 104 | 105 | (* Pre-condition: t.(i) is an integer. *) 106 | let unsafe_set_assuming_currently_int t i obj = 107 | if Obj.is_int obj then 108 | unsafe_set_int_assuming_currently_int t i (Obj.obj obj : int) 109 | else 110 | (* [t.(i)] is an integer and [obj] is not, so we do not need to check if they are 111 | equal. *) 112 | unsafe_set_with_caml_modify t i obj 113 | 114 | let unsafe_set_int t i int = 115 | let old_obj = unsafe_get t i in 116 | if Obj.is_int old_obj then unsafe_set_int_assuming_currently_int t i int 117 | else unsafe_set_with_caml_modify t i (Obj.repr int) 118 | 119 | let unsafe_clear_if_pointer t i = 120 | let old_obj = unsafe_get t i in 121 | if not (Obj.is_int old_obj) then unsafe_set_with_caml_modify t i (Obj.repr 0) 122 | 123 | (** [unsafe_blit] is like [Array.blit], except it uses our own for-loop to avoid 124 | caml_modify when possible. Its performance is still not comparable to a 125 | memcpy. *) 126 | let unsafe_blit ~src ~src_pos ~dst ~dst_pos ~len = 127 | (* When [phys_equal src dst], we need to check whether [dst_pos < src_pos] and have the 128 | for loop go in the right direction so that we don't overwrite data that we still need 129 | to read. When [not (phys_equal src dst)], doing this is harmless. From a 130 | memory-performance perspective, it doesn't matter whether one loops up or down. 131 | Constant-stride access, forward or backward, should be indistinguishable (at least on 132 | an intel i7). So, we don't do a check for [phys_equal src dst] and always loop up in 133 | that case. *) 134 | if dst_pos < src_pos then 135 | for i = 0 to len - 1 do 136 | unsafe_set dst (dst_pos + i) (unsafe_get src (src_pos + i)) 137 | done 138 | else 139 | for i = len - 1 downto 0 do 140 | unsafe_set dst (dst_pos + i) (unsafe_get src (src_pos + i)) 141 | done 142 | 143 | (*———————————————————————————————————————————————————————————————————————————— 144 | Copyright (c) 2016–2020 Jane Street Group, LLC 145 | 146 | Permission to use, copy, modify, and/or distribute this software for any 147 | purpose with or without fee is hereby granted, provided that the above 148 | copyright notice and this permission notice appear in all copies. 149 | 150 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 151 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 152 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 153 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 154 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 155 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 156 | DEALINGS IN THE SOFTWARE. 157 | ————————————————————————————————————————————————————————————————————————————*) 158 | -------------------------------------------------------------------------------- /src/obj_array.mli: -------------------------------------------------------------------------------- 1 | (*———————————————————————————————————————————————————————————————————————————— 2 | Copyright (c) 2016–2020 Jane Street Group, LLC 3 | Distributed under the MIT license. See terms at the end of this file. 4 | ————————————————————————————————————————————————————————————————————————————*) 5 | 6 | (** This module is not exposed for external use, and is only here for the 7 | implementation of [Uniform_array] internally. [Obj.t Uniform_array.t] should 8 | be used in place of [Obj_array.t]. *) 9 | 10 | open! Import 11 | 12 | type t 13 | 14 | val create : len:int -> Obj.t -> t 15 | (** [create ~len x] returns an obj-array of length [len], all of whose indices 16 | have value [x]. *) 17 | 18 | val create_zero : len:int -> t 19 | (** [create_zero ~len] returns an obj-array of length [len], all of whose 20 | indices have value [Obj.repr 0]. *) 21 | 22 | val singleton : Obj.t -> t 23 | val empty : t 24 | val length : t -> int 25 | 26 | val get : t -> int -> Obj.t 27 | (** [get t i] and [unsafe_get t i] return the object at index [i]. [set t i o] 28 | and [unsafe_set t i o] set index [i] to [o]. In no case is the object 29 | copied. The [unsafe_*] variants omit the bounds check of [i]. *) 30 | 31 | val unsafe_get : t -> int -> Obj.t 32 | val set : t -> int -> Obj.t -> unit 33 | val unsafe_set : t -> int -> Obj.t -> unit 34 | val swap : t -> int -> int -> unit 35 | 36 | val unsafe_set_assuming_currently_int : t -> int -> Obj.t -> unit 37 | (** [unsafe_set_assuming_currently_int t i obj] sets index [i] of [t] to [obj], 38 | but only works correctly if [Obj.is_int (get t i)]. This precondition saves 39 | a dynamic check. 40 | 41 | [unsafe_set_int_assuming_currently_int] is similar, except the value being 42 | set is an int. 43 | 44 | [unsafe_set_int] is similar but does not assume anything about the target. *) 45 | 46 | val unsafe_set_int_assuming_currently_int : t -> int -> int -> unit 47 | val unsafe_set_int : t -> int -> int -> unit 48 | 49 | val unsafe_set_omit_phys_equal_check : t -> int -> Obj.t -> unit 50 | (** [unsafe_set_omit_phys_equal_check] is like [unsafe_set], except it doesn't 51 | do a [phys_equal] check to try to skip [caml_modify]. It is safe to call 52 | this even if the values are [phys_equal]. *) 53 | 54 | val unsafe_clear_if_pointer : t -> int -> unit 55 | (** [unsafe_clear_if_pointer t i] prevents [t.(i)] from pointing to anything to 56 | prevent space leaks. It does this by setting [t.(i)] to [Obj.repr 0]. As a 57 | performance hack, it only does this when [not (Obj.is_int t.(i))]. *) 58 | 59 | val unsafe_blit : 60 | src:t -> src_pos:int -> dst:t -> dst_pos:int -> len:int -> unit 61 | 62 | include Invariant.S with type t := t 63 | 64 | (*———————————————————————————————————————————————————————————————————————————— 65 | Copyright (c) 2016–2020 Jane Street Group, LLC 66 | 67 | Permission to use, copy, modify, and/or distribute this software for any 68 | purpose with or without fee is hereby granted, provided that the above 69 | copyright notice and this permission notice appear in all copies. 70 | 71 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 72 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 73 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 74 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 75 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 76 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 77 | DEALINGS IN THE SOFTWARE. 78 | ————————————————————————————————————————————————————————————————————————————*) 79 | -------------------------------------------------------------------------------- /src/obj_either.ml: -------------------------------------------------------------------------------- 1 | (*———————————————————————————————————————————————————————————————————————————— 2 | Copyright (c) 2020–2021 Craig Ferguson 3 | Distributed under the MIT license. See terms at the end of this file. 4 | ————————————————————————————————————————————————————————————————————————————*) 5 | 6 | type ('a, 'b) t = Obj.t 7 | 8 | let of_immediate : type a b. a -> (a, b) t = 9 | fun a -> 10 | if Obj.is_block (Obj.repr a) then 11 | failwith "Obj_either.of_immediate: passed a heap-allocated value"; 12 | Obj.repr a 13 | 14 | let of_value : type a b. b -> (a, b) t = 15 | fun b -> 16 | if Obj.is_int (Obj.repr b) then 17 | failwith "Obj_either.of_value: passed an immediate value"; 18 | Obj.repr b 19 | 20 | type state = Immediate | Value 21 | 22 | let inspect t = if Obj.is_int t then Immediate else Value 23 | 24 | let get_immediate_exn : type a b. (a, b) t -> a = 25 | fun t -> 26 | match inspect t with 27 | | Immediate -> Obj.obj t 28 | | Value -> 29 | failwith "Obj_either.get_immediate_exn: passed a heap-allocated value" 30 | 31 | let get_value_exn : type a b. (a, b) t -> b = 32 | fun t -> 33 | match inspect t with 34 | | Value -> Obj.obj t 35 | | Immediate -> failwith "Obj_either.get_value_exn: passed an immediate value" 36 | 37 | (*———————————————————————————————————————————————————————————————————————————— 38 | Copyright (c) 2020–2021 Craig Ferguson 39 | 40 | Permission to use, copy, modify, and/or distribute this software for any 41 | purpose with or without fee is hereby granted, provided that the above 42 | copyright notice and this permission notice appear in all copies. 43 | 44 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 45 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 46 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 47 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 48 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 49 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 50 | DEALINGS IN THE SOFTWARE. 51 | ————————————————————————————————————————————————————————————————————————————*) 52 | -------------------------------------------------------------------------------- /src/obj_either.mli: -------------------------------------------------------------------------------- 1 | (*———————————————————————————————————————————————————————————————————————————— 2 | Copyright (c) 2020–2021 Craig Ferguson 3 | Distributed under the MIT license. See terms at the end of this file. 4 | ————————————————————————————————————————————————————————————————————————————*) 5 | 6 | (** A variant of the [Either.t] type that distinguishes [Left] and [Right] cases 7 | by their {i immediacy} (ie. whether [Obj.is_int] holds of the contents) 8 | rather than a heap-allocated variant. Using this type with types that are 9 | conditionally-immediate (such as [Int63.t]) will result in undefined 10 | behaviour. 11 | 12 | This module is not exposed for external use. *) 13 | 14 | type ('a, 'b) t = private Obj.t 15 | 16 | val of_immediate : 'a -> ('a, _) t 17 | val of_value : 'b -> (_, 'b) t 18 | 19 | type state = Immediate | Value 20 | 21 | val inspect : (_, _) t -> state 22 | val get_immediate_exn : ('a, _) t -> 'a 23 | val get_value_exn : (_, 'b) t -> 'b 24 | 25 | (*———————————————————————————————————————————————————————————————————————————— 26 | Copyright (c) 2020–2021 Craig Ferguson 27 | 28 | Permission to use, copy, modify, and/or distribute this software for any 29 | purpose with or without fee is hereby granted, provided that the above 30 | copyright notice and this permission notice appear in all copies. 31 | 32 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 33 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 34 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 35 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 36 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 37 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 38 | DEALINGS IN THE SOFTWARE. 39 | ————————————————————————————————————————————————————————————————————————————*) 40 | -------------------------------------------------------------------------------- /src/type_equality.ml: -------------------------------------------------------------------------------- 1 | type (_, _) t = Refl : ('a, 'a) t 2 | -------------------------------------------------------------------------------- /src/type_equality.mli: -------------------------------------------------------------------------------- 1 | type (_, _) t = Refl : ('a, 'a) t 2 | -------------------------------------------------------------------------------- /src/uniform_array.ml: -------------------------------------------------------------------------------- 1 | (*———————————————————————————————————————————————————————————————————————————— 2 | Copyright (c) 2016–2020 Jane Street Group, LLC 3 | Copyright (c) 2020–2021 Craig Ferguson 4 | Distributed under the MIT license. See terms at the end of this file. 5 | ————————————————————————————————————————————————————————————————————————————*) 6 | 7 | (** Code in this module has been extracted from Jane Street's [base] library, 8 | and modified to add support for [Tuple2] and [Tuple3] specialisations. *) 9 | 10 | open! Import 11 | include Uniform_array_intf 12 | 13 | module type Trusted = sig 14 | type ('a, 'b, 'c) t 15 | type ('a, 'b, 'c) elt 16 | type ('a, 'b, 'c, 'inner) with_elt 17 | 18 | val empty : (_, _, _) t 19 | val unsafe_create_uninitialized : len:int -> (_, _, _) t 20 | val create_obj_array : len:int -> (_, _, _) t 21 | val create : len:int -> ('a, 'b, 'c, ('a, 'b, 'c) t) with_elt 22 | val singleton : ('a, 'b, 'c, ('a, 'b, 'c) t) with_elt 23 | val get : ('a, 'b, 'c) t -> int -> ('a, 'b, 'c) elt 24 | val set : ('a, 'b, 'c) t -> int -> ('a, 'b, 'c, unit) with_elt 25 | val swap : (_, _, _) t -> int -> int -> unit 26 | val unsafe_get : ('a, 'b, 'c) t -> int -> ('a, 'b, 'c) elt 27 | val unsafe_set : ('a, 'b, 'c) t -> int -> ('a, 'b, 'c, unit) with_elt 28 | 29 | val unsafe_set_omit_phys_equal_check : 30 | ('a, 'b, 'c) t -> int -> ('a, 'b, 'c, unit) with_elt 31 | 32 | val unsafe_set_int : (_, _, _) t -> int -> (int, int, int, unit) with_elt 33 | 34 | val unsafe_set_int_assuming_currently_int : 35 | (_, _, _) t -> int -> (int, int, int, unit) with_elt 36 | 37 | val unsafe_set_assuming_currently_int : 38 | (_, _, _) t -> int -> (Obj.t, Obj.t, Obj.t, unit) with_elt 39 | 40 | val length : (_, _, _) t -> int 41 | 42 | val unsafe_blit : 43 | src:('a, 'b, 'c) t 44 | -> src_pos:int 45 | -> dst:('a, 'b, 'c) t 46 | -> dst_pos:int 47 | -> len:int 48 | -> unit 49 | 50 | val unsafe_clear_if_pointer : (_, _, _) t -> int -> unit 51 | end 52 | 53 | (* WARNING: 54 | We use non-memory-safe things throughout the [Trusted] module. 55 | Most of it is only safe in combination with the type signature (e.g. exposing 56 | [val copy : 'a t -> 'b t] would be a big mistake). *) 57 | module Trusted : sig 58 | type 'a t 59 | 60 | include 61 | Trusted 62 | with type ('a, _, _) t := 'a t 63 | and type ('a, _, _) elt := 'a 64 | and type ('a, _, _, 'inner) with_elt := 'a -> 'inner 65 | end = struct 66 | type 'a t = Obj_array.t 67 | 68 | let empty = Obj_array.empty 69 | let unsafe_create_uninitialized ~len = Obj_array.create_zero ~len 70 | let create_obj_array ~len = Obj_array.create_zero ~len 71 | let create ~len x = Obj_array.create ~len (Obj.repr x) 72 | let singleton x = Obj_array.singleton (Obj.repr x) 73 | let swap t i j = Obj_array.swap t i j 74 | let get arr i = Obj.obj (Obj_array.get arr i) 75 | let set arr i x = Obj_array.set arr i (Obj.repr x) 76 | let unsafe_get arr i = Obj.obj (Obj_array.unsafe_get arr i) 77 | let unsafe_set arr i x = Obj_array.unsafe_set arr i (Obj.repr x) 78 | let unsafe_set_int arr i x = Obj_array.unsafe_set_int arr i x 79 | 80 | let unsafe_set_int_assuming_currently_int arr i x = 81 | Obj_array.unsafe_set_int_assuming_currently_int arr i x 82 | 83 | let unsafe_set_assuming_currently_int arr i x = 84 | Obj_array.unsafe_set_assuming_currently_int arr i (Obj.repr x) 85 | 86 | let length = Obj_array.length 87 | let unsafe_blit = Obj_array.unsafe_blit 88 | 89 | let unsafe_set_omit_phys_equal_check t i x = 90 | Obj_array.unsafe_set_omit_phys_equal_check t i (Obj.repr x) 91 | 92 | let unsafe_clear_if_pointer = Obj_array.unsafe_clear_if_pointer 93 | end 94 | 95 | include Trusted 96 | 97 | let init l ~f = 98 | if l < 0 then invalid_arg "Uniform_array.init" 99 | else 100 | let res = unsafe_create_uninitialized ~len:l in 101 | for i = 0 to l - 1 do 102 | unsafe_set res i (f i) 103 | done; 104 | res 105 | 106 | let of_array arr = init ~f:(Array.unsafe_get arr) (Array.length arr) 107 | let map a ~f = init ~f:(fun i -> f (unsafe_get a i)) (length a) 108 | 109 | let map_inplace a ~f = 110 | for i = 0 to length a - 1 do 111 | unsafe_set a i (f (unsafe_get a i)) 112 | done 113 | 114 | let iter a ~f = 115 | for i = 0 to length a - 1 do 116 | f (unsafe_get a i) 117 | done 118 | 119 | let iteri a ~f = 120 | for i = 0 to length a - 1 do 121 | f i (unsafe_get a i) 122 | done 123 | 124 | let invariant inv_a t = 125 | assert (Obj.tag (Obj.repr t) <> Obj.double_array_tag); 126 | iter t ~f:inv_a 127 | 128 | let to_list t = List.init ~f:(get t) ~len:(length t) 129 | 130 | let of_list l = 131 | let len = List.length l in 132 | let res = unsafe_create_uninitialized ~len in 133 | List.iteri l ~f:(fun i x -> set res i x); 134 | res 135 | 136 | let of_list_rev l = 137 | match l with 138 | | [] -> empty 139 | | a :: l -> 140 | let len = 1 + List.length l in 141 | let t = create ~len a in 142 | let r = ref l in 143 | (* We start at [len - 2] because we already put [a] at [t.(len - 1)]. *) 144 | for i = len - 2 downto 0 do 145 | match !r with 146 | | [] -> assert false 147 | | a :: l -> 148 | unsafe_set t i a; 149 | r := l 150 | done; 151 | t 152 | 153 | (* It is not safe for [to_array] to be the identity function because we have code that 154 | relies on [float array]s being unboxed, for example in [bin_write_array]. *) 155 | let to_array t = Array.init (length t) ~f:(fun i -> unsafe_get t i) 156 | 157 | let exists t ~f = 158 | let rec loop t ~f i = 159 | if i < 0 then false else f (unsafe_get t i) || loop t ~f (i - 1) 160 | in 161 | loop t ~f (length t - 1) 162 | 163 | let map2_exn t1 t2 ~f = 164 | let len = length t1 in 165 | if length t2 <> len then invalid_arg "Array.map2_exn"; 166 | init len ~f:(fun i -> f (unsafe_get t1 i) (unsafe_get t2 i)) 167 | 168 | let fold t ~init ~f = 169 | let r = ref init in 170 | for i = 0 to length t - 1 do 171 | r := f !r (unsafe_get t i) 172 | done; 173 | !r 174 | 175 | module Tuple2 = struct 176 | (** See {!Trusted} above. *) 177 | module Trusted : sig 178 | type ('a, 'b) t 179 | 180 | include 181 | Trusted 182 | with type ('a, 'b, _) t := ('a, 'b) t 183 | and type ('a, 'b, _) elt := 'a * 'b 184 | and type ('a, 'b, _, 'inner) with_elt := 'a -> 'b -> 'inner 185 | 186 | val get_fst : ('a, _) t -> int -> 'a 187 | val get_snd : (_, 'b) t -> int -> 'b 188 | val set_fst : ('a, _) t -> int -> 'a -> unit 189 | val set_snd : (_, 'b) t -> int -> 'b -> unit 190 | val unsafe_get_fst : ('a, _) t -> int -> 'a 191 | val unsafe_get_snd : (_, 'b) t -> int -> 'b 192 | val unsafe_set_fst : ('a, _) t -> int -> 'a -> unit 193 | val unsafe_set_snd : (_, 'b) t -> int -> 'b -> unit 194 | end = struct 195 | type ('a, 'b) t = Obj_array.t 196 | 197 | let entry_size = 2 198 | let empty = Obj_array.empty 199 | 200 | let unsafe_create_uninitialized ~len = 201 | Obj_array.create_zero ~len:(len * entry_size) 202 | 203 | let create_obj_array ~len = Obj_array.create_zero ~len:(len * entry_size) 204 | 205 | let create ~len x y = 206 | let t = unsafe_create_uninitialized ~len:(len * entry_size) in 207 | for i = 0 to len - 1 do 208 | Obj_array.unsafe_set t (entry_size * i) (Obj.repr x); 209 | Obj_array.unsafe_set t ((entry_size * i) + 1) (Obj.repr y) 210 | done; 211 | t 212 | 213 | let singleton x y = create ~len:1 x y 214 | 215 | let swap t i j = 216 | Obj_array.swap t (entry_size * i) (entry_size * j); 217 | Obj_array.swap t ((entry_size * i) + 1) ((entry_size * j) + 1) 218 | 219 | let get_fst arr i = Obj.obj (Obj_array.get arr (entry_size * i)) 220 | let get_snd arr i = Obj.obj (Obj_array.get arr ((entry_size * i) + 1)) 221 | let get arr i = (get_fst arr i, get_snd arr i) 222 | let set_fst arr i x = Obj_array.set arr (entry_size * i) (Obj.repr x) 223 | let set_snd arr i y = Obj_array.set arr ((entry_size * i) + 1) (Obj.repr y) 224 | 225 | let set arr i x y = 226 | set_fst arr i x; 227 | set_snd arr i y 228 | 229 | let unsafe_get_fst arr i = 230 | Obj.obj (Obj_array.unsafe_get arr (entry_size * i)) 231 | 232 | let unsafe_get_snd arr i = 233 | Obj.obj (Obj_array.unsafe_get arr ((entry_size * i) + 1)) 234 | 235 | let unsafe_get arr i = (unsafe_get_fst arr i, unsafe_get_snd arr i) 236 | 237 | let unsafe_set_fst arr i x = 238 | Obj_array.unsafe_set arr (entry_size * i) (Obj.repr x) 239 | 240 | let unsafe_set_snd arr i y = 241 | Obj_array.unsafe_set arr ((entry_size * i) + 1) (Obj.repr y) 242 | 243 | let unsafe_set arr i x y = 244 | unsafe_set_fst arr i x; 245 | unsafe_set_snd arr i y 246 | 247 | let length t = Obj_array.length t / entry_size 248 | 249 | let unsafe_blit ~src ~src_pos ~dst ~dst_pos ~len = 250 | Obj_array.unsafe_blit ~src ~src_pos:(entry_size * src_pos) ~dst 251 | ~dst_pos:(entry_size * dst_pos) ~len:(entry_size * len) 252 | 253 | let unsafe_set_omit_phys_equal_check t i x y = 254 | Obj_array.unsafe_set_omit_phys_equal_check t (entry_size * i) (Obj.repr x); 255 | Obj_array.unsafe_set_omit_phys_equal_check t 256 | ((entry_size * i) + 1) 257 | (Obj.repr y) 258 | 259 | let unsafe_set_int arr i x y = 260 | Obj_array.unsafe_set_int arr (entry_size * i) x; 261 | Obj_array.unsafe_set_int arr ((entry_size * i) + 1) y 262 | 263 | let unsafe_set_int_assuming_currently_int arr i x y = 264 | Obj_array.unsafe_set_int_assuming_currently_int arr (entry_size * i) x; 265 | Obj_array.unsafe_set_int_assuming_currently_int arr 266 | ((entry_size * i) + 1) 267 | y 268 | 269 | let unsafe_set_assuming_currently_int arr i x y = 270 | Obj_array.unsafe_set_assuming_currently_int arr (entry_size * i) x; 271 | Obj_array.unsafe_set_assuming_currently_int arr ((entry_size * i) + 1) y 272 | 273 | let unsafe_clear_if_pointer arr i = 274 | Obj_array.unsafe_clear_if_pointer arr (entry_size * i); 275 | Obj_array.unsafe_clear_if_pointer arr ((entry_size * i) + 1) 276 | end 277 | 278 | include Trusted 279 | 280 | let init l ~f = 281 | if l < 0 then invalid_arg "Uniform_array.init" 282 | else 283 | let res = unsafe_create_uninitialized ~len:l in 284 | for i = 0 to l - 1 do 285 | let x, y = f i in 286 | unsafe_set res i x y 287 | done; 288 | res 289 | 290 | let of_array arr = init ~f:(Array.unsafe_get arr) (Array.length arr) 291 | 292 | let map a ~f = 293 | init ~f:(fun i -> f (unsafe_get_fst a i) (unsafe_get_snd a i)) (length a) 294 | 295 | let map_inplace a ~f = 296 | for i = 0 to length a - 1 do 297 | let x, y = f (unsafe_get_fst a i) (unsafe_get_snd a i) in 298 | unsafe_set a i x y 299 | done 300 | 301 | let iter a ~f = 302 | for i = 0 to length a - 1 do 303 | f (unsafe_get_fst a i) (unsafe_get_snd a i) 304 | done 305 | 306 | let iteri a ~f = 307 | for i = 0 to length a - 1 do 308 | f i (unsafe_get_fst a i) (unsafe_get_snd a i) 309 | done 310 | 311 | let invariant inv_elt t = 312 | assert (Obj.tag (Obj.repr t) <> Obj.double_array_tag); 313 | iter t ~f:(fun a b -> inv_elt (a, b)) 314 | 315 | let to_list t = List.init ~f:(get t) ~len:(length t) 316 | 317 | let of_list l = 318 | let len = List.length l in 319 | let res = unsafe_create_uninitialized ~len in 320 | List.iteri l ~f:(fun i (x, y) -> unsafe_set res i x y); 321 | res 322 | 323 | let of_list_rev l = 324 | match l with 325 | | [] -> empty 326 | | (a, b) :: l -> 327 | let len = 1 + List.length l in 328 | let t = unsafe_create_uninitialized ~len in 329 | unsafe_set_fst t (len - 1) a; 330 | unsafe_set_snd t (len - 1) b; 331 | let r = ref l in 332 | (* We start at [len - 2] because we already put [a] at [t.(len - 1)]. *) 333 | for i = len - 2 downto 0 do 334 | match !r with 335 | | [] -> assert false 336 | | (a, b) :: l -> 337 | unsafe_set_fst t i a; 338 | unsafe_set_snd t i b; 339 | r := l 340 | done; 341 | t 342 | 343 | (* It is not safe for [to_array] to be the identity function because we have code that 344 | relies on [float array]s being unboxed, for example in [bin_write_array]. *) 345 | let to_array t = Array.init (length t) ~f:(fun i -> unsafe_get t i) 346 | 347 | let exists t ~f = 348 | let rec loop t ~f i = 349 | if i < 0 then false 350 | else f (unsafe_get_fst t i) (unsafe_get_snd t i) || loop t ~f (i - 1) 351 | in 352 | loop t ~f (length t - 1) 353 | 354 | let map2_exn t1 t2 ~f = 355 | let len = length t1 in 356 | if length t2 <> len then invalid_arg "Array.map2_exn"; 357 | init len ~f:(fun i -> 358 | f (unsafe_get_fst t1 i) (unsafe_get_snd t1 i) (unsafe_get_fst t2 i) 359 | (unsafe_get_snd t2 i)) 360 | 361 | let fold t ~init ~f = 362 | let r = ref init in 363 | for i = 0 to length t - 1 do 364 | r := f !r (unsafe_get_fst t i) (unsafe_get_snd t i) 365 | done; 366 | !r 367 | end 368 | 369 | module Tuple3 = struct 370 | (** See {!Trusted} above. *) 371 | module Trusted : sig 372 | type ('a, 'b, 'c) t 373 | 374 | include 375 | Trusted 376 | with type ('a, 'b, 'c) t := ('a, 'b, 'c) t 377 | and type ('a, 'b, 'c) elt := 'a * 'b * 'c 378 | and type ('a, 'b, 'c, 'inner) with_elt := 'a -> 'b -> 'c -> 'inner 379 | 380 | val get_fst : ('a, _, _) t -> int -> 'a 381 | val get_snd : (_, 'b, _) t -> int -> 'b 382 | val get_thd : (_, _, 'c) t -> int -> 'c 383 | val set_fst : ('a, _, _) t -> int -> 'a -> unit 384 | val set_snd : (_, 'b, _) t -> int -> 'b -> unit 385 | val set_thd : (_, _, 'c) t -> int -> 'c -> unit 386 | val unsafe_get_fst : ('a, _, _) t -> int -> 'a 387 | val unsafe_get_snd : (_, 'b, _) t -> int -> 'b 388 | val unsafe_get_thd : (_, _, 'c) t -> int -> 'c 389 | val unsafe_set_fst : ('a, _, _) t -> int -> 'a -> unit 390 | val unsafe_set_snd : (_, 'b, _) t -> int -> 'b -> unit 391 | val unsafe_set_thd : (_, _, 'c) t -> int -> 'c -> unit 392 | end = struct 393 | type ('a, 'b, 'c) t = Obj_array.t 394 | 395 | let entry_size = 3 396 | let empty = Obj_array.empty 397 | 398 | let unsafe_create_uninitialized ~len = 399 | Obj_array.create_zero ~len:(len * entry_size) 400 | 401 | let create_obj_array ~len = Obj_array.create_zero ~len:(len * entry_size) 402 | 403 | let create ~len x y z = 404 | let t = unsafe_create_uninitialized ~len:(len * entry_size) in 405 | for i = 0 to len - 1 do 406 | Obj_array.unsafe_set t (entry_size * i) (Obj.repr x); 407 | Obj_array.unsafe_set t ((entry_size * i) + 1) (Obj.repr y); 408 | Obj_array.unsafe_set t ((entry_size * i) + 2) (Obj.repr z) 409 | done; 410 | t 411 | 412 | let singleton x y = create ~len:1 x y 413 | 414 | let swap t i j = 415 | Obj_array.swap t (entry_size * i) (entry_size * j); 416 | Obj_array.swap t ((entry_size * i) + 1) ((entry_size * j) + 1); 417 | Obj_array.swap t ((entry_size * i) + 2) ((entry_size * j) + 2) 418 | 419 | let get_fst arr i = Obj.obj (Obj_array.get arr (entry_size * i)) 420 | let get_snd arr i = Obj.obj (Obj_array.get arr ((entry_size * i) + 1)) 421 | let get_thd arr i = Obj.obj (Obj_array.get arr ((entry_size * i) + 2)) 422 | let get arr i = (get_fst arr i, get_snd arr i, get_thd arr i) 423 | let set_fst arr i x = Obj_array.set arr (entry_size * i) (Obj.repr x) 424 | let set_snd arr i y = Obj_array.set arr ((entry_size * i) + 1) (Obj.repr y) 425 | let set_thd arr i z = Obj_array.set arr ((entry_size * i) + 2) (Obj.repr z) 426 | 427 | let set arr i x y z = 428 | set_fst arr i x; 429 | set_snd arr i y; 430 | set_thd arr i z 431 | 432 | let unsafe_get_fst arr i = 433 | Obj.obj (Obj_array.unsafe_get arr (entry_size * i)) 434 | 435 | let unsafe_get_snd arr i = 436 | Obj.obj (Obj_array.unsafe_get arr ((entry_size * i) + 1)) 437 | 438 | let unsafe_get_thd arr i = 439 | Obj.obj (Obj_array.unsafe_get arr ((entry_size * i) + 2)) 440 | 441 | let unsafe_get arr i = 442 | (unsafe_get_fst arr i, unsafe_get_snd arr i, unsafe_get_thd arr i) 443 | 444 | let unsafe_set_fst arr i x = 445 | Obj_array.unsafe_set arr (entry_size * i) (Obj.repr x) 446 | 447 | let unsafe_set_snd arr i y = 448 | Obj_array.unsafe_set arr ((entry_size * i) + 1) (Obj.repr y) 449 | 450 | let unsafe_set_thd arr i z = 451 | Obj_array.unsafe_set arr ((entry_size * i) + 2) (Obj.repr z) 452 | 453 | let unsafe_set arr i x y z = 454 | unsafe_set_fst arr i x; 455 | unsafe_set_snd arr i y; 456 | unsafe_set_thd arr i z 457 | 458 | let length t = Obj_array.length t / entry_size 459 | 460 | let unsafe_blit ~src ~src_pos ~dst ~dst_pos ~len = 461 | Obj_array.unsafe_blit ~src ~src_pos:(entry_size * src_pos) ~dst 462 | ~dst_pos:(entry_size * dst_pos) ~len:(entry_size * len) 463 | 464 | let unsafe_set_omit_phys_equal_check t i x y z = 465 | Obj_array.unsafe_set_omit_phys_equal_check t (entry_size * i) (Obj.repr x); 466 | Obj_array.unsafe_set_omit_phys_equal_check t 467 | ((entry_size * i) + 1) 468 | (Obj.repr y); 469 | Obj_array.unsafe_set_omit_phys_equal_check t 470 | ((entry_size * i) + 2) 471 | (Obj.repr z) 472 | 473 | let unsafe_set_int arr i x y z = 474 | Obj_array.unsafe_set_int arr (entry_size * i) x; 475 | Obj_array.unsafe_set_int arr ((entry_size * i) + 1) y; 476 | Obj_array.unsafe_set_int arr ((entry_size * i) + 2) z 477 | 478 | let unsafe_set_int_assuming_currently_int arr i x y z = 479 | Obj_array.unsafe_set_int_assuming_currently_int arr (entry_size * i) x; 480 | Obj_array.unsafe_set_int_assuming_currently_int arr 481 | ((entry_size * i) + 1) 482 | y; 483 | Obj_array.unsafe_set_int_assuming_currently_int arr 484 | ((entry_size * i) + 2) 485 | z 486 | 487 | let unsafe_set_assuming_currently_int arr i x y z = 488 | Obj_array.unsafe_set_assuming_currently_int arr (entry_size * i) x; 489 | Obj_array.unsafe_set_assuming_currently_int arr ((entry_size * i) + 1) y; 490 | Obj_array.unsafe_set_assuming_currently_int arr ((entry_size * i) + 2) z 491 | 492 | let unsafe_clear_if_pointer arr i = 493 | Obj_array.unsafe_clear_if_pointer arr (entry_size * i); 494 | Obj_array.unsafe_clear_if_pointer arr ((entry_size * i) + 1); 495 | Obj_array.unsafe_clear_if_pointer arr ((entry_size * i) + 2) 496 | end 497 | 498 | include Trusted 499 | 500 | let init l ~f = 501 | if l < 0 then invalid_arg "Uniform_array.init" 502 | else 503 | let res = unsafe_create_uninitialized ~len:l in 504 | for i = 0 to l - 1 do 505 | let x, y, z = f i in 506 | unsafe_set res i x y z 507 | done; 508 | res 509 | 510 | let of_array arr = init ~f:(Array.unsafe_get arr) (Array.length arr) 511 | 512 | let map a ~f = 513 | init 514 | ~f:(fun i -> 515 | f (unsafe_get_fst a i) (unsafe_get_snd a i) (unsafe_get_thd a i)) 516 | (length a) 517 | 518 | let map_inplace a ~f = 519 | for i = 0 to length a - 1 do 520 | let x, y, z = 521 | f (unsafe_get_fst a i) (unsafe_get_snd a i) (unsafe_get_thd a i) 522 | in 523 | unsafe_set a i x y z 524 | done 525 | 526 | let iter a ~f = 527 | for i = 0 to length a - 1 do 528 | f (unsafe_get_fst a i) (unsafe_get_snd a i) (unsafe_get_thd a i) 529 | done 530 | 531 | let iteri a ~f = 532 | for i = 0 to length a - 1 do 533 | f i (unsafe_get_fst a i) (unsafe_get_snd a i) (unsafe_get_thd a i) 534 | done 535 | 536 | let invariant inv_elt t = 537 | assert (Obj.tag (Obj.repr t) <> Obj.double_array_tag); 538 | iter t ~f:(fun a b c -> inv_elt (a, b, c)) 539 | 540 | let to_list t = List.init ~f:(get t) ~len:(length t) 541 | 542 | let of_list l = 543 | let len = List.length l in 544 | let res = unsafe_create_uninitialized ~len in 545 | List.iteri l ~f:(fun i (x, y, z) -> unsafe_set res i x y z); 546 | res 547 | 548 | let of_list_rev l = 549 | match l with 550 | | [] -> empty 551 | | (a, b, c) :: l -> 552 | let len = 1 + List.length l in 553 | let t = unsafe_create_uninitialized ~len in 554 | unsafe_set_fst t (len - 1) a; 555 | unsafe_set_snd t (len - 1) b; 556 | unsafe_set_thd t (len - 1) c; 557 | let r = ref l in 558 | (* We start at [len - 2] because we already put [a] at [t.(len - 1)]. *) 559 | for i = len - 2 downto 0 do 560 | match !r with 561 | | [] -> assert false 562 | | (a, b, c) :: l -> 563 | unsafe_set_fst t i a; 564 | unsafe_set_snd t i b; 565 | unsafe_set_thd t i c; 566 | r := l 567 | done; 568 | t 569 | 570 | (* It is not safe for [to_array] to be the identity function because we have code that 571 | relies on [float array]s being unboxed, for example in [bin_write_array]. *) 572 | let to_array t = Array.init (length t) ~f:(fun i -> unsafe_get t i) 573 | 574 | let exists t ~f = 575 | let rec loop t ~f i = 576 | if i < 0 then false 577 | else 578 | f (unsafe_get_fst t i) (unsafe_get_snd t i) (unsafe_get_thd t i) 579 | || loop t ~f (i - 1) 580 | in 581 | loop t ~f (length t - 1) 582 | 583 | let map2_exn t1 t2 ~f = 584 | let len = length t1 in 585 | if length t2 <> len then invalid_arg "Array.map2_exn"; 586 | init len ~f:(fun i -> 587 | f (unsafe_get_fst t1 i) (unsafe_get_snd t1 i) (unsafe_get_thd t1 i) 588 | (unsafe_get_fst t2 i) (unsafe_get_snd t2 i) (unsafe_get_thd t2 i)) 589 | 590 | let fold t ~init ~f = 591 | let r = ref init in 592 | for i = 0 to length t - 1 do 593 | r := f !r (unsafe_get_fst t i) (unsafe_get_snd t i) (unsafe_get_thd t i) 594 | done; 595 | !r 596 | end 597 | 598 | (*———————————————————————————————————————————————————————————————————————————— 599 | Copyright (c) 2016–2020 Jane Street Group, LLC 600 | Copyright (c) 2020–2021 Craig Ferguson 601 | 602 | Permission to use, copy, modify, and/or distribute this software for any 603 | purpose with or without fee is hereby granted, provided that the above 604 | copyright notice and this permission notice appear in all copies. 605 | 606 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 607 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 608 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 609 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 610 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 611 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 612 | DEALINGS IN THE SOFTWARE. 613 | ————————————————————————————————————————————————————————————————————————————*) 614 | -------------------------------------------------------------------------------- /src/uniform_array.mli: -------------------------------------------------------------------------------- 1 | (** Same semantics as ['a Array.t], except it's guaranteed that the 2 | representation array is not tagged with [Double_array_tag], the tag for 3 | float arrays. 4 | 5 | This means it's safer to use in the presence of [Obj.magic], but it's slower 6 | than normal [Array] if you use it with floats. It can often be faster than 7 | [Array] if you use it with non-floats. *) 8 | 9 | include Uniform_array_intf.Intf 10 | (** @inline *) 11 | -------------------------------------------------------------------------------- /src/uniform_array_intf.ml: -------------------------------------------------------------------------------- 1 | (*———————————————————————————————————————————————————————————————————————————— 2 | Copyright (c) 2016–2020 Jane Street Group, LLC 3 | Copyright (c) 2020–2021 Craig Ferguson 4 | Distributed under the MIT license. See terms at the end of this file. 5 | ————————————————————————————————————————————————————————————————————————————*) 6 | 7 | module type S = sig 8 | type ('a, 'b, 'c) t 9 | type ('a, 'b, 'c) elt 10 | type ('a, 'b, 'c, 'inner) with_elt 11 | 12 | val empty : (_, _, _) t 13 | val create : len:int -> ('a, 'b, 'c, ('a, 'b, 'c) t) with_elt 14 | val singleton : ('a, 'b, 'c, ('a, 'b, 'c) t) with_elt 15 | val init : int -> f:(int -> ('a, 'b, 'c) elt) -> ('a, 'b, 'c) t 16 | val length : (_, _, _) t -> int 17 | val get : ('a, 'b, 'c) t -> int -> ('a, 'b, 'c) elt 18 | val unsafe_get : ('a, 'b, 'c) t -> int -> ('a, 'b, 'c) elt 19 | val set : ('a, 'b, 'c) t -> int -> ('a, 'b, 'c, unit) with_elt 20 | val unsafe_set : ('a, 'b, 'c) t -> int -> ('a, 'b, 'c, unit) with_elt 21 | val swap : (_, _, _) t -> int -> int -> unit 22 | 23 | val map : 24 | ('a1, 'b1, 'c1) t 25 | -> f:('a1, 'b1, 'c1, ('a2, 'b2, 'c2) elt) with_elt 26 | -> ('a2, 'b2, 'c2) t 27 | 28 | val map_inplace : 29 | ('a, 'b, 'c) t -> f:('a, 'b, 'c, ('a, 'b, 'c) elt) with_elt -> unit 30 | 31 | val map2_exn : 32 | ('a1, 'b1, 'c1) t 33 | -> ('a2, 'b2, 'c2) t 34 | -> f:('a1, 'b1, 'c1, ('a2, 'b2, 'c2, ('a3, 'b3, 'c3) elt) with_elt) with_elt 35 | -> ('a3, 'b3, 'c3) t 36 | (** Functions with the 2 suffix raise an exception if the lengths of the two 37 | given arrays aren't the same. *) 38 | 39 | val iter : ('a, 'b, 'c) t -> f:('a, 'b, 'c, unit) with_elt -> unit 40 | 41 | val iteri : ('a, 'b, 'c) t -> f:(int -> ('a, 'b, 'c, unit) with_elt) -> unit 42 | (** Like {!iter}, but the function is applied to the index of the element as 43 | first argument, and the element itself as second argument. *) 44 | 45 | val fold : 46 | ('a, 'b, 'c) t 47 | -> init:'acc 48 | -> f:('acc -> ('a, 'b, 'c, 'acc) with_elt) 49 | -> 'acc 50 | 51 | val exists : ('a, 'b, 'c) t -> f:('a, 'b, 'c, bool) with_elt -> bool 52 | 53 | val of_array : ('a, 'b, 'c) elt array -> ('a, 'b, 'c) t 54 | (** [of_array] and [to_array] return fresh arrays with the same contents 55 | rather than returning a reference to the underlying array. *) 56 | 57 | val to_array : ('a, 'b, 'c) t -> ('a, 'b, 'c) elt array 58 | val of_list : ('a, 'b, 'c) elt list -> ('a, 'b, 'c) t 59 | val of_list_rev : ('a, 'b, 'c) elt list -> ('a, 'b, 'c) t 60 | val to_list : ('a, 'b, 'c) t -> ('a, 'b, 'c) elt list 61 | 62 | (** {2 Extra lowlevel and unsafe functions} *) 63 | 64 | val unsafe_blit : 65 | src:('a, 'b, 'c) t 66 | -> src_pos:int 67 | -> dst:('a, 'b, 'c) t 68 | -> dst_pos:int 69 | -> len:int 70 | -> unit 71 | 72 | val unsafe_create_uninitialized : len:int -> (_, _, _) t 73 | (** The behavior is undefined if you access an element before setting it. *) 74 | 75 | val create_obj_array : len:int -> (Obj.t, Obj.t, Obj.t) t 76 | (** New obj array filled with [Obj.repr 0] *) 77 | 78 | val unsafe_clear_if_pointer : (Obj.t, Obj.t, Obj.t) t -> int -> unit 79 | (** [unsafe_clear_if_pointer t i] prevents [t.(i)] from pointing to anything 80 | to prevent space leaks. It does this by setting [t.(i)] to 81 | [Caml.Obj.repr 0]. As a performance hack, it only does this when 82 | [not (Caml.Obj.is_int t.(i))]. It is an error to access the cleared index 83 | before setting it again. *) 84 | end 85 | 86 | module type Intf = sig 87 | module type S = S 88 | 89 | type 'a t 90 | 91 | (** @inline *) 92 | include 93 | S 94 | with type ('a, _, _) t := 'a t 95 | and type ('a, _, _) elt := 'a 96 | and type ('a, _, _, 'inner) with_elt := 'a -> 'inner 97 | 98 | val unsafe_set_omit_phys_equal_check : 'a t -> int -> 'a -> unit 99 | (** [unsafe_set_omit_phys_equal_check] is like [unsafe_set], except it doesn't 100 | do a [phys_equal] check to try to skip [caml_modify]. It is safe to call 101 | this even if the values are [phys_equal]. *) 102 | 103 | val unsafe_set_assuming_currently_int : Obj.t t -> int -> Obj.t -> unit 104 | (** [unsafe_set_assuming_currently_int t i obj] sets index [i] of [t] to 105 | [obj], but only works correctly if the value there is an immediate, i.e. 106 | [Obj.is_int (get t i)]. This precondition saves a dynamic check. 107 | 108 | [unsafe_set_int_assuming_currently_int] is similar, except the value being 109 | set is an int. 110 | 111 | [unsafe_set_int] is similar but does not assume anything about the target. *) 112 | 113 | val unsafe_set_int_assuming_currently_int : Obj.t t -> int -> int -> unit 114 | val unsafe_set_int : Obj.t t -> int -> int -> unit 115 | 116 | include Invariant.S1 with type 'a t := 'a t 117 | 118 | (** {2 Arrays of tuples} 119 | 120 | Compact uniform arrays of tuples: e.g. an [('a, 'b) Tuple2.t] is 121 | equivalent to [('a * 'b) t] but has a more compact memory representation. 122 | 123 | Each tuple array has a memory overhead of {b 2 words}: a 1-word pointer to 124 | a heap-allocated block with a 1-word header, with every other word storing 125 | data (just as for a regular array). This is slightly more efficient than 126 | just storing a tuple of arrays (which has [(2 + 2n)]-words of overhead for 127 | a tuple of arity [n]). This is useful for datastructures that may contain 128 | many small tuple arrays (such as the buckets of a hashtable). *) 129 | 130 | module Tuple2 : sig 131 | type ('a, 'b) t 132 | 133 | (** @inline *) 134 | include 135 | S 136 | with type ('a, 'b, _) t := ('a, 'b) t 137 | and type ('a, 'b, _) elt := 'a * 'b 138 | and type ('a, 'b, _, 'inner) with_elt := 'a -> 'b -> 'inner 139 | 140 | val get_fst : ('a, _) t -> int -> 'a 141 | val get_snd : (_, 'b) t -> int -> 'b 142 | val set_fst : ('a, _) t -> int -> 'a -> unit 143 | val set_snd : (_, 'b) t -> int -> 'b -> unit 144 | val unsafe_get_fst : ('a, _) t -> int -> 'a 145 | val unsafe_get_snd : (_, 'b) t -> int -> 'b 146 | val unsafe_set_fst : ('a, _) t -> int -> 'a -> unit 147 | val unsafe_set_snd : (_, 'b) t -> int -> 'b -> unit 148 | val invariant : ('a * 'b) Invariant.t -> ('a, 'b) t Invariant.t 149 | end 150 | 151 | module Tuple3 : sig 152 | type ('a, 'b, 'c) t 153 | 154 | (** @inline *) 155 | include 156 | S 157 | with type ('a, 'b, 'c) t := ('a, 'b, 'c) t 158 | and type ('a, 'b, 'c) elt := 'a * 'b * 'c 159 | and type ('a, 'b, 'c, 'inner) with_elt := 'a -> 'b -> 'c -> 'inner 160 | 161 | val get_fst : ('a, _, _) t -> int -> 'a 162 | val get_snd : (_, 'b, _) t -> int -> 'b 163 | val get_thd : (_, _, 'c) t -> int -> 'c 164 | val set_fst : ('a, _, _) t -> int -> 'a -> unit 165 | val set_snd : (_, 'b, _) t -> int -> 'b -> unit 166 | val set_thd : (_, _, 'c) t -> int -> 'c -> unit 167 | val unsafe_get_fst : ('a, _, _) t -> int -> 'a 168 | val unsafe_get_snd : (_, 'b, _) t -> int -> 'b 169 | val unsafe_get_thd : (_, _, 'c) t -> int -> 'c 170 | val unsafe_set_fst : ('a, _, _) t -> int -> 'a -> unit 171 | val unsafe_set_snd : (_, 'b, _) t -> int -> 'b -> unit 172 | val unsafe_set_thd : (_, _, 'c) t -> int -> 'c -> unit 173 | val invariant : ('a * 'b * 'c) Invariant.t -> ('a, 'b, 'c) t Invariant.t 174 | end 175 | end 176 | 177 | (*———————————————————————————————————————————————————————————————————————————— 178 | Copyright (c) 2016–2020 Jane Street Group, LLC 179 | Copyright (c) 2020–2021 Craig Ferguson 180 | 181 | Permission to use, copy, modify, and/or distribute this software for any 182 | purpose with or without fee is hereby granted, provided that the above 183 | copyright notice and this permission notice appear in all copies. 184 | 185 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 186 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 187 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 188 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 189 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 190 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 191 | DEALINGS IN THE SOFTWARE. 192 | ————————————————————————————————————————————————————————————————————————————*) 193 | -------------------------------------------------------------------------------- /test/compact_test_helpers.ml: -------------------------------------------------------------------------------- 1 | module Import = struct 2 | include Stdlib.MoreLabels 3 | include Stdlib.StdLabels 4 | end 5 | -------------------------------------------------------------------------------- /test/dune: -------------------------------------------------------------------------------- 1 | (test 2 | (name test) 3 | (package compact) 4 | (libraries alcotest compact compact_test_helpers vector fmt) 5 | (modules :standard \ compact_test_helpers)) 6 | 7 | (library 8 | (name compact_test_helpers) 9 | (modules compact_test_helpers) 10 | (libraries compact)) 11 | -------------------------------------------------------------------------------- /test/test.ml: -------------------------------------------------------------------------------- 1 | open! Compact 2 | 3 | let check_bool pos ~expected actual = 4 | Alcotest.(check ~pos bool) "" expected actual 5 | 6 | let check_invalid_arg pos f = 7 | let fail got = 8 | Alcotest.failf ~pos 9 | "Expected function to raise `Invalid_argument`, but raised: %a" 10 | Fmt.(Dump.option exn) 11 | got 12 | in 13 | match f () with 14 | | _ -> fail None 15 | | exception Invalid_argument _ -> () 16 | | exception exn -> fail (Some exn) 17 | 18 | module String_set = struct 19 | module String_set = Hashset.Fixed_size_string 20 | 21 | let test_simple () = 22 | let set = String_set.create ~elt_length:1 ~initial_capacity:0 in 23 | String_set.mem set "a" |> check_bool __POS__ ~expected:false; 24 | String_set.add set "a"; 25 | String_set.mem set "a" |> check_bool __POS__ ~expected:true; 26 | String_set.add set "b"; 27 | String_set.add set "c"; 28 | String_set.mem set "a" |> check_bool __POS__ ~expected:true; 29 | String_set.mem set "b" |> check_bool __POS__ ~expected:true; 30 | String_set.mem set "c" |> check_bool __POS__ ~expected:true 31 | 32 | let test_random () = 33 | let elt_length = 32 in 34 | let set = String_set.create ~elt_length ~initial_capacity:0 in 35 | let reference_tbl = Stdlib.Hashtbl.create 0 in 36 | let reference_vector = Vector.create ~dummy:"" in 37 | let random_string () = 38 | String.init elt_length (fun _ -> char_of_int (Random.int 256)) 39 | in 40 | for i = 0 to 1_000_000 do 41 | (* Add a new element. *) 42 | let new_elt = random_string () in 43 | String_set.add set new_elt; 44 | Stdlib.Hashtbl.add reference_tbl new_elt (); 45 | Vector.push reference_vector new_elt; 46 | 47 | (* Pick a random existing element and check [mem] is true. *) 48 | let elt = Vector.get reference_vector (Random.int (i + 1)) in 49 | assert (Stdlib.Hashtbl.mem reference_tbl elt); 50 | String_set.mem set elt |> check_bool __POS__ ~expected:true; 51 | 52 | (* Pick a random non-existing element and check [mem] is false. *) 53 | let non_elt = random_string () in 54 | assert (not (Stdlib.Hashtbl.mem reference_tbl non_elt)); 55 | String_set.mem set non_elt |> check_bool __POS__ ~expected:false 56 | done 57 | end 58 | 59 | let () = 60 | let test name fn = Alcotest.test_case name `Quick fn in 61 | Alcotest.run __FILE__ 62 | [ ( "main" 63 | , [ test "String_set.simple" String_set.test_simple 64 | ; test "String_set.random" String_set.test_random 65 | ] ) 66 | ; ("Arena", Test_arena.tests) 67 | ; ("Hashed_container", Test_hashed_container.tests) 68 | ; ("Hashtbl", Test_hashtbl.tests) 69 | ; ("Immediate_array", Test_immediate_array.tests) 70 | ; ("Uniform_array", Test_uniform_array.tests) 71 | ] 72 | -------------------------------------------------------------------------------- /test/test_arena.ml: -------------------------------------------------------------------------------- 1 | open! Compact 2 | 3 | let check_bool pos ~expected actual = 4 | Alcotest.(check ~pos bool) "" expected actual 5 | 6 | let check_invalid_arg pos f = 7 | let fail got = 8 | Alcotest.failf ~pos 9 | "Expected function to raise `Invalid_argument`, but raised: %a" 10 | Fmt.(Dump.option exn) 11 | got 12 | in 13 | match f () with 14 | | _ -> fail None 15 | | exception Invalid_argument _ -> () 16 | | exception exn -> fail (Some exn) 17 | 18 | let test_is_full () = 19 | let arena = Arena.create ~elt_length:1 ~initial_capacity:0 in 20 | Arena.is_full arena |> check_bool __POS__ ~expected:true; 21 | Arena.expand arena 1; 22 | Arena.is_full arena |> check_bool __POS__ ~expected:false; 23 | let (_ : Arena.id) = Arena.allocate arena "x" in 24 | Arena.is_full arena |> check_bool __POS__ ~expected:true 25 | 26 | (* Exercises [allocate] and [dereference]. *) 27 | let test_read_write () = 28 | let arena = Arena.create ~elt_length:1 ~initial_capacity:0 in 29 | check_invalid_arg __POS__ (fun () -> Arena.allocate arena "x"); 30 | 31 | (* Add some elements and ensure they're dereferenced correctly: *) 32 | Arena.expand arena 4; 33 | let elts = [ "a"; "b"; "c"; "d" ] in 34 | let ids = List.map (Arena.allocate arena) elts in 35 | check_invalid_arg __POS__ (fun () -> Arena.allocate arena "e"); 36 | Stdlib.ListLabels.iter2 elts ids ~f:(fun expected id -> 37 | let got = Arena.dereference arena id in 38 | Alcotest.(check string) "Element is dereferenced correctly" expected got) 39 | 40 | let test_expand () = 41 | let arena = Arena.create ~elt_length:100 ~initial_capacity:0 in 42 | Arena.expand arena 0 (* No-op expands are fine *); 43 | check_invalid_arg __POS__ (fun () -> Arena.expand arena (-1)); 44 | Arena.expand arena 1; 45 | Arena.expand arena 3; 46 | 47 | (* Not allowed to contract the arena (even when the space is unused): *) 48 | check_invalid_arg __POS__ (fun () -> Arena.expand arena 2) 49 | 50 | let test_elt_equal () = 51 | let arena = Arena.create ~elt_length:1 ~initial_capacity:1 in 52 | let a_ref = Arena.allocate arena "a" in 53 | Arena.elt_equal arena a_ref "a" |> check_bool __POS__ ~expected:true; 54 | Arena.elt_equal arena a_ref "b" |> check_bool __POS__ ~expected:false; 55 | () 56 | 57 | let test_smuggled_id () = 58 | let elt_length = 30 in 59 | let arena = Arena.create ~elt_length ~initial_capacity:3 in 60 | 61 | (* Build an invalid ID into [arena] by interacting with a different one: *) 62 | let smuggled_id = 63 | let elt_length = 50 in 64 | let arena = Arena.create ~elt_length ~initial_capacity:2 in 65 | let elt = String.make elt_length 'x' in 66 | (* Allocate a string of length 50 at offset 0, then another string 67 | immediately after it, returning a pointer to offset 50. *) 68 | let (_ : Arena.id) = Arena.allocate arena elt in 69 | Arena.allocate arena elt 70 | in 71 | let check_deref_invalid pos = 72 | check_invalid_arg pos (fun () -> Arena.dereference arena smuggled_id); 73 | check_invalid_arg pos (fun () -> Arena.elt_equal arena smuggled_id "") 74 | in 75 | 76 | check_deref_invalid __POS__ (* id = 50, Arena offset = 0 *); 77 | let (_ : Arena.id) = Arena.allocate arena (String.make elt_length 'a') in 78 | check_deref_invalid __POS__ (* id = 50, Arena offset = 30 *); 79 | let (_ : Arena.id) = Arena.allocate arena (String.make elt_length 'b') in 80 | check_deref_invalid __POS__ (* id = 50, arena offset = 60 (elt len = 30) *); 81 | let (_ : Arena.id) = Arena.allocate arena (String.make elt_length 'c') in 82 | 83 | (* This time, the smuggled ID is a 'valid' pointer into the new arena, so we 84 | can't guard against invalid usage. We read over the boundary between 85 | elements 2 and 3 instead: *) 86 | let result = Arena.dereference arena smuggled_id in 87 | let expected = String.make 10 'b' ^ String.make 20 'c' in 88 | Alcotest.(check ~pos:__POS__ string) "" expected result 89 | 90 | let test_invalid_length () = 91 | let arena = Arena.create ~elt_length:1 ~initial_capacity:100 in 92 | check_invalid_arg __POS__ (fun () -> Arena.allocate arena "") 93 | 94 | let tests = 95 | let test name fn = Alcotest.test_case ("Arena." ^ name) `Quick fn in 96 | [ test "is_full" test_is_full 97 | ; test "read_write" test_read_write 98 | ; test "expand" test_expand 99 | ; test "elt_equal" test_elt_equal 100 | ; test "smuggled_id" test_smuggled_id 101 | ; test "invalid_length" test_invalid_length 102 | ] 103 | -------------------------------------------------------------------------------- /test/test_arena.mli: -------------------------------------------------------------------------------- 1 | val tests : unit Alcotest.test_case list 2 | -------------------------------------------------------------------------------- /test/test_hashed_container.ml: -------------------------------------------------------------------------------- 1 | open! Compact 2 | 3 | module Int_key = struct 4 | include Stdlib.Int 5 | 6 | let hash = Stdlib.Hashtbl.hash 7 | let hash_size = 30 8 | end 9 | 10 | module String_key = struct 11 | include Stdlib.String 12 | 13 | let hash = Stdlib.Hashtbl.hash 14 | let hash_size = 30 15 | end 16 | 17 | let expect_failure ?(msg = "") pos ~f = 18 | match f () with 19 | | exception Failure _ -> () 20 | | exception _ -> assert false 21 | | _ -> Alcotest.fail ~pos msg 22 | 23 | let test_non_immediate_entry () = 24 | let module T = Hashset.Immediate in 25 | let t = T.create (module String_key) ~initial_capacity:0 in 26 | expect_failure __POS__ ~f:(fun () -> T.add t "strings are not immediates") 27 | 28 | let test_non_immediate64_entry () = 29 | let module T = Hashset.Immediate64 in 30 | let t = T.create (module String_key) ~initial_capacity:0 in 31 | let f () = T.add t "strings are not immediates" in 32 | match Sys.word_size with 33 | | 64 -> expect_failure __POS__ ~f 34 | | _ -> 35 | (* Adding the string should work, since [Immediate64] isn't attempting to 36 | inline singleton arrays: *) 37 | f () 38 | 39 | let test_mutation_while_iterating () = 40 | let t = Hashset.create (module Int_key) ~initial_capacity:1 in 41 | Hashset.add t 1; 42 | 43 | (* Attempting a mutation during an iteration is forbidden: even if the 44 | mutation has no effect (e.g. we add an element that already exists): *) 45 | let expect_failure = 46 | expect_failure ~msg:"Expected mutation while iterating to raise Failure" 47 | in 48 | let expect_fail_in_iter pos ~f = 49 | Hashset.iter t ~f:(function 50 | | 1 -> expect_failure pos ~f 51 | | _ -> assert false) 52 | in 53 | expect_fail_in_iter __POS__ ~f:(fun () -> Hashset.add t 1); 54 | expect_fail_in_iter __POS__ ~f:(fun () -> Hashset.add t 2); 55 | expect_fail_in_iter __POS__ ~f:(fun () -> Hashset.remove t 1); 56 | expect_fail_in_iter __POS__ ~f:(fun () -> Hashset.clear t); 57 | 58 | (* Iteration should be re-entrant, and mutation in the outer loop should still 59 | be forbidden: *) 60 | Hashset.iter t ~f:(fun _ -> 61 | Hashset.iter t ~f:(fun _ -> ()); 62 | expect_failure __POS__ ~f:(fun () -> Hashset.add t 2)); 63 | 64 | (* Mutation should be allowed after finishing an iteration, even if we abort with an exception: *) 65 | let () = 66 | let exception Exit in 67 | match Hashset.iter t ~f:(fun _ -> raise Exit) with 68 | | () -> assert false 69 | | exception Exit -> Hashset.add t 1 70 | in 71 | 72 | () 73 | 74 | let test_bucket_count () = 75 | let module Key = struct 76 | type t = int 77 | 78 | let equal = ( = ) 79 | let compare = Stdlib.compare 80 | let hash x = x 81 | let hash_size = 30 82 | end in 83 | let t = Hashset.create (module Key) ~initial_capacity:0 in 84 | let check_buckets pos expected = 85 | Alcotest.(check ~pos int) "" expected (Hashset.bucket_count t) 86 | in 87 | let check_load_factor pos expected = 88 | Alcotest.(check ~pos (float 0.)) "" expected (Hashset.load_factor t) 89 | in 90 | let add_elt = 91 | let count = ref (-1) in 92 | fun () -> 93 | incr count; 94 | Hashset.add t !count 95 | in 96 | let initial_buckets = 16 in 97 | let max_load_factor = 2 in 98 | 99 | (* Initially the set is empty: *) 100 | check_buckets __POS__ initial_buckets; 101 | check_load_factor __POS__ 0.; 102 | 103 | (* We fill it up to its initial capacity: *) 104 | for _ = 1 to max_load_factor * initial_buckets do 105 | add_elt () 106 | done; 107 | check_buckets __POS__ initial_buckets; 108 | check_load_factor __POS__ (Float.of_int max_load_factor); 109 | 110 | (* We exceed the max load factor, triggering a resize: *) 111 | add_elt (); 112 | check_buckets __POS__ (2 * initial_buckets); 113 | check_load_factor __POS__ (1. +. (1. /. Float.of_int (2 * initial_buckets))); 114 | 115 | (* Clearing sets the number of buckets to 1: *) 116 | Hashset.clear t; 117 | check_buckets __POS__ 1; 118 | check_load_factor __POS__ 0.; 119 | () 120 | 121 | let tests = 122 | let test name fn = Alcotest.test_case ("Hashset." ^ name) `Quick fn in 123 | [ test "Immediate.non_immediate_entry" test_non_immediate_entry 124 | ; test "Immediate.non_immediate64_entry" test_non_immediate64_entry 125 | ; test "mutation_while_iterating" test_mutation_while_iterating 126 | ; test "bucket_count" test_bucket_count 127 | ] 128 | -------------------------------------------------------------------------------- /test/test_hashed_container.mli: -------------------------------------------------------------------------------- 1 | val tests : unit Alcotest.test_case list 2 | -------------------------------------------------------------------------------- /test/test_hashtbl.ml: -------------------------------------------------------------------------------- 1 | open! Compact_test_helpers.Import 2 | open! Compact 3 | 4 | module String_key = struct 5 | include Stdlib.String 6 | 7 | let hash = Stdlib.Hashtbl.hash 8 | let hash_size = 30 9 | end 10 | 11 | let test_data = [ ("a", 1); ("b", 2); ("c", 3) ] 12 | 13 | let test_hash = 14 | let h = Hashtbl.create ~initial_capacity:10 (module String_key) in 15 | List.iter test_data ~f:(fun (k, v) -> Hashtbl.replace h ~key:k ~data:v); 16 | h 17 | 18 | let test_find () = 19 | let found = Hashtbl.find test_hash "a" in 20 | let not_found = Hashtbl.find test_hash "A" in 21 | Hashtbl.invariant ignore ignore test_hash; 22 | assert (match (found, not_found) with Some _, None -> true | _ -> false) 23 | 24 | let test_findi_and_call () = 25 | let our_hash = Hashtbl.copy test_hash in 26 | let test_string = "test string" in 27 | Hashtbl.add_exn our_hash ~key:test_string ~data:10; 28 | let test_string' = "test " ^ "string" in 29 | assert (not (test_string == test_string')); 30 | assert ( 31 | Hashtbl.find_and_call our_hash test_string' 32 | ~if_found:(fun ~key ~data -> test_string == key && data = 10) 33 | ~if_not_found:(fun _ -> false)) 34 | 35 | let test_add () = 36 | let our_hash = Hashtbl.copy test_hash in 37 | let duplicate = Hashtbl.add our_hash ~key:"a" ~data:4 in 38 | let no_duplicate = Hashtbl.add our_hash ~key:"d" ~data:5 in 39 | assert (Hashtbl.find our_hash "a" = Some 1); 40 | assert (Hashtbl.find our_hash "d" = Some 5); 41 | Hashtbl.invariant ignore ignore our_hash; 42 | assert ( 43 | match (duplicate, no_duplicate) with `Duplicate, `Ok -> true | _ -> false) 44 | 45 | let test_iter () = 46 | let cmp x y = -Int.compare x y in 47 | let predicted = List.sort ~cmp (List.map test_data ~f:(fun (_, v) -> v)) in 48 | let found = 49 | let found = ref [] in 50 | Hashtbl.iter test_hash ~f:(fun ~key:_ ~data:v -> found := v :: !found); 51 | !found |> List.sort ~cmp 52 | in 53 | assert (List.equal ~eq:Int.equal predicted found) 54 | 55 | let test_iter_keys () = 56 | let cmp x y = -String.compare x y in 57 | let predicted = List.sort ~cmp (List.map test_data ~f:(fun (k, _) -> k)) in 58 | let found = 59 | let found = ref [] in 60 | Hashtbl.iter_keys test_hash ~f:(fun k -> found := k :: !found); 61 | !found |> List.sort ~cmp 62 | in 63 | assert (List.equal ~eq:String.equal predicted found) 64 | 65 | (* let test_of_alist_size () = 66 | * let predicted = List.length test_data in 67 | * let found = Hashtbl.cardinal (Hashtbl.of_alist_poly_exn test_data) in 68 | * predicted = found 69 | * 70 | * let test_of_alist_right_keys () = 71 | * let predicted = List.map test_data ~f:(fun (k, _) -> k) in 72 | * let found = Hashtbl.keys (Hashtbl.of_alist_poly_exn test_data) in 73 | * let sp = List.sort ~cmp:Stdlib.compare predicted in 74 | * let sf = List.sort ~cmp:Stdlib.compre found in 75 | * sp = sf *) 76 | 77 | (* let%test_module "of_alist_or_error" = 78 | * (module struct 79 | * let test_unique = Result.is_ok (Hashtbl.of_alist_poly_or_error test_data) 80 | * 81 | * let test_duplicate = 82 | * Result.is_error (Hashtbl.of_alist_poly_or_error (test_data @ test_data)) 83 | * end) *) 84 | 85 | let test_size_and_right_keys () = 86 | let predicted = List.map test_data ~f:(fun (k, _) -> k) in 87 | let found = Hashtbl.to_list test_hash |> List.map ~f:fst in 88 | let sp = List.sort ~cmp:Stdlib.compare predicted in 89 | let sf = List.sort ~cmp:Stdlib.compare found in 90 | assert (sp = sf) 91 | 92 | let test_size_and_right_data () = 93 | let predicted = List.map test_data ~f:(fun (_, v) -> v) in 94 | let found = Hashtbl.to_list test_hash |> List.map ~f:snd in 95 | let sp = List.sort ~cmp:Stdlib.compare predicted in 96 | let sf = List.sort ~cmp:Stdlib.compare found in 97 | assert (sp = sf) 98 | 99 | let test_map () = 100 | let add1 x = x + 1 in 101 | let predicted_data = 102 | List.sort ~cmp:Stdlib.compare 103 | (List.map test_data ~f:(fun (k, v) -> (k, add1 v))) 104 | in 105 | let found_alist = 106 | Hashtbl.map test_hash ~f:add1 107 | |> Hashtbl.to_list 108 | |> List.sort ~cmp:Stdlib.compare 109 | in 110 | assert (List.equal ~eq:Stdlib.( = ) predicted_data found_alist) 111 | 112 | let test_map_inplace () = 113 | let f x = x + 3 in 114 | let predicted_data = 115 | List.sort ~cmp:Stdlib.compare 116 | (List.map test_data ~f:(fun (k, v) -> (k, f v))) 117 | in 118 | let test_hash = Hashtbl.copy test_hash in 119 | Hashtbl.map_inplace test_hash ~f; 120 | let found_alist = 121 | Hashtbl.to_list test_hash |> List.sort ~cmp:Stdlib.compare 122 | in 123 | assert (List.equal ~eq:Stdlib.( = ) predicted_data found_alist) 124 | 125 | let test_insert_find_remove () = 126 | let t = Hashtbl.create_poly () ~initial_capacity:1 in 127 | let inserted = ref [] in 128 | Random.init 123; 129 | let verify_inserted t = 130 | let missing = 131 | List.fold_left !inserted ~init:[] ~f:(fun acc (key, data) -> 132 | match Hashtbl.find t key with 133 | | None -> `Missing key :: acc 134 | | Some d -> if data = d then acc else `Wrong_data (key, data) :: acc) 135 | in 136 | match missing with [] -> () | _ -> failwith "some inserts are missing" 137 | in 138 | let rec loop i t = 139 | if i < 2000 then ( 140 | let k = Random.int 10_000 in 141 | inserted := (k, i) :: List.remove_assoc k !inserted; 142 | Hashtbl.replace t ~key:k ~data:i; 143 | Hashtbl.invariant ignore ignore t; 144 | verify_inserted t; 145 | loop (i + 1) t) 146 | in 147 | loop 0 t; 148 | List.iter !inserted ~f:(fun (x, _) -> 149 | Hashtbl.remove t x; 150 | Hashtbl.invariant ignore ignore t; 151 | (match Hashtbl.find t x with 152 | | None -> () 153 | | Some _ -> Printf.ksprintf failwith "present after removal: %d" x); 154 | inserted := List.remove_assoc x !inserted; 155 | verify_inserted t) 156 | 157 | let list_take t_orig n = 158 | if n <= 0 then [] 159 | else 160 | let rec loop n t accum = 161 | if n = 0 then List.rev accum 162 | else 163 | match t with [] -> t_orig | hd :: tl -> loop (n - 1) tl (hd :: accum) 164 | in 165 | loop n t_orig [] 166 | 167 | let test_clear () = 168 | let t = Hashtbl.create_poly () ~initial_capacity:1 in 169 | let l = List.init ~len:100 ~f:Fun.id in 170 | let verify_present l = List.for_all l ~f:(Hashtbl.mem t) in 171 | let verify_not_present l = 172 | List.for_all l ~f:(fun i -> not (Hashtbl.mem t i)) 173 | in 174 | List.iter l ~f:(fun i -> Hashtbl.replace t ~key:i ~data:(i * i)); 175 | List.iter l ~f:(fun i -> Hashtbl.replace t ~key:i ~data:(i * i)); 176 | assert (Hashtbl.cardinal t = 100); 177 | assert (verify_present l); 178 | Hashtbl.clear t; 179 | Hashtbl.invariant ignore ignore t; 180 | assert (Hashtbl.cardinal t = 0); 181 | assert (verify_not_present l); 182 | let l = list_take l 42 in 183 | List.iter l ~f:(fun i -> Hashtbl.replace t ~key:i ~data:(i * i)); 184 | assert (Hashtbl.cardinal t = 42); 185 | assert (verify_present l); 186 | Hashtbl.invariant ignore ignore t 187 | 188 | let test_mem () = 189 | let t = Hashtbl.create_poly () ~initial_capacity:1 in 190 | Hashtbl.invariant ignore ignore t; 191 | assert (not (Hashtbl.mem t "Fred")); 192 | Hashtbl.invariant ignore ignore t; 193 | Hashtbl.replace t ~key:"Fred" ~data:"Wilma"; 194 | Hashtbl.invariant ignore ignore t; 195 | assert (Hashtbl.mem t "Fred"); 196 | Hashtbl.invariant ignore ignore t; 197 | Hashtbl.remove t "Fred"; 198 | Hashtbl.invariant ignore ignore t; 199 | assert (not (Hashtbl.mem t "Fred")); 200 | Hashtbl.invariant ignore ignore t 201 | 202 | let test_exists () = 203 | let t = Hashtbl.create_poly ~initial_capacity:0 () in 204 | assert (not (Hashtbl.exists t ~f:(fun ~key:_ -> failwith "can't be called"))); 205 | Hashtbl.replace t ~key:7 ~data:3; 206 | assert (not (Hashtbl.exists t ~f:(fun ~key:_ ~data -> Int.equal 4 data))); 207 | Hashtbl.replace t ~key:8 ~data:4; 208 | assert (Hashtbl.exists t ~f:(fun ~key:_ ~data -> Int.equal 4 data)) 209 | (* Hashtbl.replace t ~key:9 ~data:5; 210 | * assert (Hashtbl.existsi t ~f:(fun ~key ~data -> key + data = 14)) *) 211 | 212 | let test_for_all () = 213 | let t = Hashtbl.create_poly ~initial_capacity:0 () in 214 | assert (Hashtbl.for_all t ~f:(fun ~key:_ -> failwith "can't be called")); 215 | Hashtbl.replace t ~key:7 ~data:3; 216 | assert (Hashtbl.for_all t ~f:(fun ~key:_ ~data:x -> Int.equal x 3)); 217 | Hashtbl.replace t ~key:8 ~data:4; 218 | assert (not (Hashtbl.for_all t ~f:(fun ~key:_ ~data:x -> Int.equal x 3))) 219 | (* Hashtbl.replace t ~key:9 ~data:5; 220 | * assert (Hashtbl.for_alli t ~f:(fun ~key ~data -> key - 4 = data)) *) 221 | 222 | let test_count () = 223 | let t = Hashtbl.create_poly ~initial_capacity:0 () in 224 | assert (Hashtbl.count t ~f:(fun ~key:_ -> failwith "can't be called") = 0); 225 | Hashtbl.replace t ~key:7 ~data:3; 226 | assert (Hashtbl.count t ~f:(fun ~key:_ ~data:x -> Int.equal x 3) = 1); 227 | Hashtbl.replace t ~key:8 ~data:4; 228 | assert (Hashtbl.count t ~f:(fun ~key:_ ~data:x -> Int.equal x 3) = 1) 229 | (* Hashtbl.replace t ~key:9 ~data:5; 230 | * assert (Hashtbl.counti t ~f:(fun ~key ~data -> key - 4 = data) = 3) *) 231 | 232 | let tests = 233 | let test name fn = Alcotest.test_case ("Hashtbl." ^ name) `Quick fn in 234 | [ test "find" test_find 235 | ; test "findi_and_call" test_findi_and_call 236 | ; test "add" test_add 237 | ; test "iter" test_iter 238 | ; test "iter_keys" test_iter_keys 239 | ; test "size_and_right_keys" test_size_and_right_keys 240 | ; test "size_and_right_data" test_size_and_right_data 241 | ; test "map" test_map 242 | ; test "map_inplace" test_map_inplace 243 | ; test "insert_find_remove" test_insert_find_remove 244 | ; test "clear" test_clear 245 | ; test "mem" test_mem 246 | ; test "exists" test_exists 247 | ; test "for_all" test_for_all 248 | ; test "count" test_count 249 | ] 250 | -------------------------------------------------------------------------------- /test/test_hashtbl.mli: -------------------------------------------------------------------------------- 1 | val tests : unit Alcotest.test_case list 2 | -------------------------------------------------------------------------------- /test/test_immediate_array.ml: -------------------------------------------------------------------------------- 1 | module I = Compact.Immediate_array 2 | 3 | let get_size (t : _ I.t) = 1 + Obj.reachable_words (Obj.repr t) 4 | 5 | let check_int pos ~expected actual = 6 | Alcotest.(check ~pos int) "" expected actual 7 | 8 | let test_empty () = 9 | let t = I.empty in 10 | I.invariant (fun _ -> assert false) t 11 | 12 | let test_singleton () = 13 | let t = I.singleton () in 14 | I.invariant ignore t 15 | 16 | let test_length () = 17 | let check_size pos ~expected t = 18 | List.mem (get_size t) expected 19 | |> Alcotest.(check ~pos bool) 20 | (Fmt.str "Expected one of %a" Fmt.(Dump.list int) expected) 21 | true 22 | in 23 | let expected_size_of_length = function 24 | | 0 -> 25 | (* NOTE: when using [--disable-naked-pointers] (or the multicore OCaml 26 | runtime), atoms like the empty array are included in [Obj.reachable_words], 27 | making the "size" of an empty immediate array is [2] rather than [1]. *) 28 | [ 1; 2 ] 29 | | 1 -> [ 1 ] 30 | | n -> [ 2 + n ] 31 | in 32 | for i = 0 to 10 do 33 | let l = List.init i (fun _ -> ()) in 34 | let t = I.of_list l in 35 | check_int __POS__ ~expected:i (I.length t); 36 | check_size __POS__ ~expected:(expected_size_of_length i) t; 37 | I.invariant ignore t 38 | done 39 | 40 | let tests = 41 | let test name fn = Alcotest.test_case ("Immediate_array." ^ name) `Quick fn in 42 | [ test "empty" test_empty 43 | ; test "singleton" test_singleton 44 | ; test "length" test_length 45 | ] 46 | -------------------------------------------------------------------------------- /test/test_immediate_array.mli: -------------------------------------------------------------------------------- 1 | val tests : unit Alcotest.test_case list 2 | -------------------------------------------------------------------------------- /test/test_uniform_array.ml: -------------------------------------------------------------------------------- 1 | open! Compact_test_helpers.Import 2 | open! Compact 3 | open Uniform_array 4 | 5 | let zero_obj = Obj.repr (0 : int) 6 | let phys_equal = ( == ) 7 | let does_raise f = match ignore (f ()) with () -> false | exception _ -> true 8 | 9 | let test_create_obj_array () = 10 | let t = create_obj_array ~len:0 in 11 | assert (length t = 0) 12 | 13 | let test_create () = 14 | let str = Obj.repr "foo" in 15 | let t = create ~len:2 str in 16 | assert (phys_equal (get t 0) str); 17 | assert (phys_equal (get t 1) str) 18 | 19 | let test_float_elements () = 20 | let float = Obj.repr 3.5 in 21 | let t = create ~len:2 float in 22 | assert (Obj.tag (Obj.repr t) = 0); 23 | (* not a double array *) 24 | assert (phys_equal (get t 0) float); 25 | assert (phys_equal (get t 1) float); 26 | set t 1 (Obj.repr 4.); 27 | assert (Float.equal (Obj.obj (get t 1)) 4.) 28 | 29 | let test_empty () = 30 | assert (length empty = 0); 31 | assert (does_raise (fun () -> get empty 0)) 32 | 33 | let test_singleton () = 34 | assert (length (singleton zero_obj) = 1); 35 | assert (phys_equal (get (singleton zero_obj) 0) zero_obj); 36 | assert (does_raise (fun () -> get (singleton zero_obj) 1)); 37 | 38 | let f = 13. in 39 | let t = singleton (Obj.repr f) in 40 | invariant ignore t; 41 | assert (Obj.repr f = get t 0) 42 | 43 | (* [get], [unsafe_get], [set], [unsafe_set], [unsafe_set_assuming_currently_int] *) 44 | let test_get_and_set () = 45 | let t = create_obj_array ~len:1 in 46 | assert (length t = 1); 47 | assert (phys_equal (get t 0) zero_obj); 48 | assert (phys_equal (unsafe_get t 0) zero_obj); 49 | let one_obj = Obj.repr (1 : int) in 50 | let check_get expect = 51 | assert (phys_equal (get t 0) expect); 52 | assert (phys_equal (unsafe_get t 0) expect) 53 | in 54 | set t 0 one_obj; 55 | check_get one_obj; 56 | unsafe_set t 0 zero_obj; 57 | check_get zero_obj; 58 | unsafe_set_assuming_currently_int t 0 one_obj; 59 | check_get one_obj 60 | 61 | let test_exists () = 62 | let test arr f = of_list arr |> exists ~f in 63 | let r pos expected actual = Alcotest.(check ~pos bool) "" expected actual in 64 | r __POS__ false (test [] Fun.id); 65 | r __POS__ true (test [ true ] Fun.id); 66 | r __POS__ true (test [ false; false; false; false; true ] Fun.id); 67 | r __POS__ true (test [ 0; 1; 2; 3; 4 ] (fun i -> i mod 2 = 1)); 68 | r __POS__ false (test [ 0; 2; 4; 6; 8 ] (fun i -> i mod 2 = 1)) 69 | 70 | let test_iteri () = 71 | let test pos ~expected arr = 72 | let acc = ref [] in 73 | of_list arr |> iteri ~f:(fun i x -> acc := (i, x) :: !acc); 74 | Alcotest.(check ~pos (list (pair int char))) "" expected (List.rev !acc) 75 | in 76 | test __POS__ ~expected:[] []; 77 | test __POS__ ~expected:[ (0, 'a') ] [ 'a' ]; 78 | test __POS__ 79 | ~expected:[ (0, 'a'); (1, 'b'); (2, 'c'); (3, 'd') ] 80 | [ 'a'; 'b'; 'c'; 'd' ] 81 | 82 | let tests = 83 | let test name fn = Alcotest.test_case ("Uniform_array." ^ name) `Quick fn in 84 | [ test "create_obj_array" test_create_obj_array 85 | ; test "create" test_create 86 | ; test "float_elements" test_float_elements 87 | ; test "empty" test_empty 88 | ; test "singleton" test_singleton 89 | ; test "get_and_set" test_get_and_set 90 | ; test "exists" test_exists 91 | ; test "iteri" test_iteri 92 | ] 93 | -------------------------------------------------------------------------------- /test/test_uniform_array.mli: -------------------------------------------------------------------------------- 1 | val tests : unit Alcotest.test_case list 2 | --------------------------------------------------------------------------------