├── .github └── workflows │ └── changelog-check.yml ├── .gitignore ├── .ocamlformat ├── CHANGES.md ├── LICENSE ├── Makefile ├── README.md ├── bench ├── bench.ml ├── bench.mli ├── common.ml ├── dune └── replay.ml ├── dune ├── dune-project ├── index-bench.opam ├── index.opam ├── src ├── cache.ml ├── checks.ml ├── checks.mli ├── checks_intf.ml ├── data.ml ├── dune ├── fan.ml ├── fan.mli ├── import.ml ├── index.ml ├── index.mli ├── index_intf.ml ├── io.ml ├── io.mli ├── io_array.ml ├── io_array.mli ├── io_intf.ml ├── layout.ml ├── layout.mli ├── log.ml ├── log.mli ├── log_file.ml ├── log_file.mli ├── platform.ml ├── search.ml ├── search.mli ├── search_intf.ml ├── small_list.ml ├── small_list.mli ├── stats.ml ├── stats.mli └── unix │ ├── buffer.ml │ ├── buffer.mli │ ├── dune │ ├── import.ml │ ├── index_unix.ml │ ├── index_unix.mli │ ├── pread.c │ ├── pwrite.c │ ├── raw.ml │ ├── raw.mli │ ├── syscalls.ml │ └── syscalls.mli └── test ├── cache.ml ├── cli ├── dune ├── generate.ml ├── index-fsck-help.txt ├── index_fsck.ml └── stat.t │ └── run.t ├── dune ├── fuzz ├── fan │ ├── dune │ ├── main.ml │ └── main.mli └── input │ └── 000 ├── main.ml ├── search.ml └── unix ├── common.ml ├── common.mli ├── dune ├── flush_callback.ml ├── flush_callback.mli ├── force_merge.ml ├── force_merge.mli ├── io_array.ml ├── io_array.mli ├── log.ml ├── log.mli ├── main.ml ├── main.mli └── test_lru.ml /.github/workflows/changelog-check.yml: -------------------------------------------------------------------------------- 1 | name: Changelog check 2 | 3 | on: 4 | pull_request: 5 | branches: [ main ] 6 | types: [ opened, synchronize, reopened, labeled, unlabeled ] 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - uses: actions/checkout@v1 14 | 15 | - name: git diff 16 | if: ${{ !contains(github.event.pull_request.labels.*.name, 'no-changelog-needed') }} 17 | env: 18 | BASE_REF: ${{ github.event.pull_request.base.ref }} 19 | run: | 20 | ! git diff --exit-code origin/$BASE_REF -- CHANGES.md 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | _bench/ 2 | _build/ 3 | _metrics/ 4 | _opam 5 | _tests/ 6 | *.install 7 | *.merlin 8 | **/.DS_Store 9 | -------------------------------------------------------------------------------- /.ocamlformat: -------------------------------------------------------------------------------- 1 | version = 0.26.1 2 | profile = conventional 3 | 4 | ocaml-version = 4.08 5 | break-infix = fit-or-vertical 6 | parse-docstrings = true 7 | indicate-multiline-delimiters = no 8 | module-item-spacing = compact 9 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | # Unreleased 2 | 3 | ## Changed 4 | 5 | - Lock files are now opened with O_CLOEXEC flag (#394, @vect0r-vicall) 6 | - Update to cmdliner.1.1.0 (#382, @MisterDA) 7 | - Mirage support: optional dependency to unix (#396, @art-w) 8 | 9 | # 1.6.2 (2023-06-06) 10 | 11 | ## Changed 12 | 13 | - Update for compatibility with `mtime.2.0.0`, which is now the lower bounds 14 | (#392, @patricoferris) 15 | 16 | # 1.6.1 (2022-06-08) 17 | 18 | ## Added 19 | 20 | - Support all version of cmdliner (#386) 21 | 22 | ## Fixed 23 | 24 | - Make `index-bench`'s `bench` executable private, avoiding executable 25 | name collisions in opam-monorepo projects. (#389, @NathanReb) 26 | 27 | # 1.6.0 (2022-02-12) 28 | 29 | ## Added 30 | 31 | - Added a `Raw.Header_prefix` function, for use by libraries that share the 32 | file format used by `index.unix`. (#378) 33 | 34 | # 1.5.0 (2021-11-09) 35 | 36 | ## Changed 37 | 38 | - The benchmarks now use `tezos-base58` instead of `tezos-context-hash` (#367) 39 | 40 | - Add an LRU to cache the result of `Index.find` operations. The default LRU 41 | capacity is 30_000 entries. (#366) 42 | 43 | # 1.4.2 (2021-10-15) 44 | 45 | ## Fixed 46 | 47 | - Fix stats recording in `Raw.unsafe_write` (#351) 48 | 49 | ## Changed 50 | 51 | - Changed the implementation of the write-ahead log to significantly reduce its 52 | memory usage (at the cost of some additional disk IO). (#355) 53 | 54 | # 1.4.1 (2021-07-16) 55 | 56 | ## Fixed 57 | 58 | - Proper cleaning of merge file descriptors when aborting a merge (#326) 59 | 60 | - Recover from crash of the merge thread. When this happen, the main thread can 61 | continue to run and will need to recover from the crash before doing a new 62 | merge. This fixes a critical issue which might cause data loss (#339) 63 | 64 | - Make sure that no entries can disappear for read-only instances during 65 | log_async recovery (#338) 66 | 67 | # 1.4.0 (2021-06-16) 68 | 69 | ## Fixed 70 | 71 | - Fixed a crash-consistency bug due to a potential flush of an incomplete entry 72 | to disk. Entries are now flushed as complete strings. (#301) 73 | 74 | - Attempt to recover from `log_async` invariant violations during an explicit 75 | sync operation, rather than failing immediately. (#329) 76 | 77 | ## Changed 78 | 79 | - Release overly defensive warnings occuring when pre-fetching the disk. (#322) 80 | 81 | - Optimised the in-memory representation of index handles and intermediate 82 | buffers, resulting in a significant reduction in memory use. (#273, #279) 83 | 84 | - Benches are now executed 3 times and a new option `nb-exec` has been added (#292) 85 | 86 | - `Index.Make` now requires an implementation of a monotonic time source. 87 | (#321) 88 | 89 | - The `Index.Make` functor now takes a single `Platform` argument containing 90 | all system dependencies (i.e. `IO`, `Clock`, `Semaphore` and `Thread`). The 91 | `Platform` module holds the necessary types for these modules. (#321, #330) 92 | 93 | ## Added 94 | 95 | - Added benchmarks that replay a trace of index operations. (#300) 96 | 97 | - Log reporter for the benches 98 | 99 | # 1.3.1 (2021-04-29) 100 | 101 | ## Fixed 102 | 103 | - Reduce allocations during merge (#274, #277) 104 | 105 | - Protect concurrent syncs with a lock (#309) 106 | 107 | - Fixed a performance issue for `Index.sync` when there is a blocking merge in 108 | progress: the `log_async` file was not cached properly and fully reloaded 109 | from disk every time. (#310) 110 | 111 | - Release the merge lock if a merge raises an exception (#312) 112 | 113 | - Added fsync after `Index.clear` to signal more quickly to read-only instances 114 | than something has changed in the file (#308) 115 | 116 | ## Changed 117 | 118 | - Specialise `IO.v` to create read-only or read-write instances. (#291) 119 | 120 | - `clear` removes the files on disks and opens new ones containing only the 121 | header. (#288, #307, #317) 122 | 123 | # 1.3.0 (2021-01-05) 124 | 125 | ## Added 126 | 127 | - Added `flush_callback` parameter to the creation of a store, to register 128 | a callback before a flush. This callback can be temporarily disabled by 129 | `~no_callback:()` to `flush`. (#189, #216) 130 | 131 | - Added `Stats.merge_durations` to list the duration of the last 10 merges. 132 | (#193) 133 | 134 | - Added `is_merging` to detect if a merge is running. (#192) 135 | 136 | - New `IO.Header.{get,set}` functions to read and write the file headers 137 | atomically (#175, #204, @icristescu, @CraigFe, @samoht) 138 | 139 | - Added a `throttle` configuration option to select the strategy to use 140 | when the cache are full and an async merge is already in progress. The 141 | current behavior is the (default) `` `Block_writes`` strategy. The new 142 | `` `Overcommit_memory`` does not block but continue to fill the cache instead. 143 | (#209, @samoht) 144 | 145 | - Add `IO.exists` obligation for IO implementations, to be used for lazy 146 | creation of IO instances. (#233, @CraigFe) 147 | 148 | - `Index.close` now takes an `~immediately:()` argument. When passed, this 149 | causes `close` to terminate any ongoing asynchronous merge operation, rather 150 | than waiting for it to finish. (#185, #234) 151 | 152 | - Added `Index.Checks.cli`, which provides offline integrity checking of Index 153 | stores. (#236) 154 | 155 | - `Index.replace` now takes a `~overcommit` argument to postpone a merge. (#253) 156 | 157 | - `Index.merge` is now part of the public API. (#253) 158 | 159 | - `Index.try_merge` is now part of the public API. `try_merge' is a no-op if 160 | the number of entries in the write-ahead log is smaller than `log_size`, 161 | otherwise it's `merge'. (#253 @samoht) 162 | 163 | ## Changed 164 | 165 | - `sync` has to be called by the read-only instance to synchronise with the 166 | files on disk. (#175) 167 | - Caching of `Index` instances is now explicit: `Index.Make` requires a cache 168 | implementation, and `Index.v` may be passed a cache to be used for instance 169 | sharing. The default behaviour is _not_ to share instances. (#188) 170 | 171 | ## Fixed 172 | 173 | - Added values after a clear are found by read-only instances. (#168) 174 | - Fix a race between `merge` and `sync` (#203, @samoht, @CraigFe) 175 | - Fix a potential loss of data if a crash occurs at the end of a merge (#232) 176 | - Fix `Index.iter` to only iterate once over elements persisted on the disk 177 | (#260, @samoht, @icristescu) 178 | 179 | # 1.2.1 (2020-06-24) 180 | 181 | ## Added 182 | 183 | - Added `Index_unix.Syscalls`, a module exposing various Unix bindings for 184 | interacting with file-systems. (#176) 185 | 186 | ## Fixed 187 | 188 | - Fail when `Index_unix.IO` file version number is not as expected. (#178) 189 | 190 | - Fixed creation of an index when an empty `data` file exists. (#173) 191 | 192 | # 1.2.0 (2020-02-25) 193 | 194 | ## Added 195 | 196 | - Added `filter`, removing bindings depending on a predicate (#165) 197 | 198 | ## Changed 199 | 200 | - Parameterise `Index.Make` over arbitrary mutex and thread implementations (and 201 | remove the obligation for `IO` to provide this functionality). (#160, #161) 202 | 203 | # 1.1.0 (2019-12-21) 204 | 205 | ## Changed 206 | 207 | - Improve the cooperativeness of the `merge` operation, allowing concurrent read 208 | operations to share CPU resources with ongoing merges. (#152) 209 | 210 | - Improve speed of read operations for read-only instances. (#141) 211 | 212 | ## Removed 213 | 214 | - Remove `force_merge` from `Index.S`, due to difficulties with guaranteeing 215 | sensible semantics to this function under MRSW access patterns. (#147, #150) 216 | 217 | # 1.0.1 (2019-11-29) 218 | 219 | ## Added 220 | 221 | - Provide a better CLI interface for the benchmarks (#130, #133) 222 | 223 | ## Fixed 224 | 225 | - Fix a segmentation fault when using musl <= 1.1.20 by not allocating 64k-byte 226 | buffers on the thread stack (#132) 227 | - Do not call `pwrite` with `len=0` (#131) 228 | - Clear `log.mem` on `close` (#135) 229 | - Load `log_async` on startup (#136) 230 | 231 | # 1.0.0 (2019-11-14) 232 | 233 | First stable release. 234 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2019 Clément Pascutto, Thomas Gazagnaire 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all clean test doc examples bench 2 | 3 | all: 4 | dune build 5 | 6 | test: 7 | dune runtest 8 | 9 | examples: 10 | dune build @examples 11 | 12 | clean: 13 | dune clean 14 | 15 | doc: 16 | dune build @doc 17 | 18 | bench: 19 | @dune exec -- ./bench/bench.exe --json --minimal --nb-exec 3 --verbosity quiet 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Index - a platform-agnostic multi-level index 2 | 3 | [![OCaml-CI Build Status](https://img.shields.io/endpoint?url=https%3A%2F%2Fci.ocamllabs.io%2Fbadge%2Fmirage%2Findex%2Fmain&logo=ocaml)](https://ci.ocamllabs.io/github/mirage/index) 4 | 5 | Index is a scalable implementation of persistent indices in OCaml. 6 | 7 | It takes an arbitrary IO implementation and user-supplied content types 8 | and supplies a standard key-value interface for persistent storage. 9 | 10 | Index supports instance sharing: 11 | each OCaml runtime can share a common singleton instance. 12 | 13 | Index supports multiple-reader/single-writer access. 14 | Concurrent access is safely managed using lock files. 15 | -------------------------------------------------------------------------------- /bench/bench.mli: -------------------------------------------------------------------------------- 1 | (* left empty on purpose *) 2 | -------------------------------------------------------------------------------- /bench/common.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Copyright (c) 2018-2021 Tarides 3 | * 4 | * Permission to use, copy, modify, and distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | *) 16 | 17 | module Seq = struct 18 | include Seq 19 | 20 | (* Backported from ocaml 4.11 *) 21 | let rec unfold f u () = 22 | match f u with None -> Nil | Some (x, u') -> Cons (x, unfold f u') 23 | end 24 | 25 | let with_timer f = 26 | let started = Mtime_clock.counter () in 27 | let a = f () in 28 | let duration = Mtime_clock.count started in 29 | (a, duration) 30 | 31 | let with_progress_bar ~message ~n ~unit = 32 | let open Progress in 33 | let bar = 34 | let total = Int64.of_int n in 35 | let open Line.Using_int64 in 36 | list 37 | [ 38 | const message; 39 | count_to total; 40 | const unit; 41 | bar total; 42 | percentage_of total; 43 | ] 44 | in 45 | with_reporter ~config:(Config.v ~max_width:(Some 79) ()) bar 46 | 47 | module FSHelper = struct 48 | let file f = 49 | try (Unix.stat f).st_size with Unix.Unix_error (Unix.ENOENT, _, _) -> 0 50 | 51 | let index root = 52 | let index_dir = Filename.concat root "index" in 53 | let a = file (Filename.concat index_dir "data") in 54 | let b = file (Filename.concat index_dir "log") in 55 | let c = file (Filename.concat index_dir "log_async") in 56 | (a + b + c) / 1024 / 1024 57 | 58 | let size root = index root 59 | let get_size root = size root 60 | 61 | let rm_dir root = 62 | if Sys.file_exists root then ( 63 | let cmd = Printf.sprintf "rm -rf %s" root in 64 | Logs.info (fun l -> l "exec: %s" cmd); 65 | ignore (Sys.command cmd : int)) 66 | end 67 | -------------------------------------------------------------------------------- /bench/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name common) 3 | (modules common) 4 | (libraries progress logs fmt mtime mtime.clock.os unix)) 5 | 6 | (executable 7 | (name bench) 8 | (modules bench) 9 | (preprocess 10 | (pps ppx_repr ppx_deriving_yojson)) 11 | (libraries index index.unix cmdliner metrics metrics-unix yojson fmt re 12 | stdlib-shims common mtime mtime.clock.os unix)) 13 | 14 | (alias 15 | (name bench) 16 | (deps ./bench.exe)) 17 | 18 | (rule 19 | (alias runbench) 20 | (action 21 | (run ./bench.exe))) 22 | 23 | (executable 24 | (name replay) 25 | (modules replay) 26 | (preprocess 27 | (pps ppx_repr)) 28 | (libraries index index.unix unix cmdliner logs repr ppx_repr common 29 | tezos-base58 optint fmt rusage mtime mtime.clock.os digestif)) 30 | 31 | ;; Require the above executables to compile during tests 32 | 33 | (rule 34 | (alias runtest) 35 | (package index-bench) 36 | (deps bench.exe replay.exe) 37 | (action (progn))) 38 | -------------------------------------------------------------------------------- /bench/replay.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Copyright (c) 2018-2021 Tarides 3 | * 4 | * Permission to use, copy, modify, and distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | *) 16 | 17 | open Common 18 | module Int63 = Optint.Int63 19 | 20 | module Encoding = struct 21 | module Hash : sig 22 | type t 23 | 24 | val t : t Repr.t 25 | val short_hash : t -> int 26 | val hash_size : int 27 | val hash : string Digestif.iter -> t 28 | end = struct 29 | module H = Digestif.Make_BLAKE2B (struct 30 | let digest_size = 32 31 | end) 32 | 33 | type t = H.t 34 | 35 | let prefix = "\079\199" (* Co(52) *) 36 | 37 | let pp ppf t = 38 | let s = H.to_raw_string t in 39 | Tezos_base58.pp ppf (Tezos_base58.encode ~prefix s) 40 | 41 | let of_b58 : string -> (t, [ `Msg of string ]) result = 42 | fun x -> 43 | match Tezos_base58.decode ~prefix (Base58 x) with 44 | | Some x -> Ok (H.of_raw_string x) 45 | | None -> Error (`Msg "Failed to read b58check_encoding data") 46 | 47 | let short_hash_string = Repr.(unstage (short_hash string)) 48 | let short_hash ?seed t = short_hash_string ?seed (H.to_raw_string t) 49 | 50 | let t : t Repr.t = 51 | Repr.map ~pp ~of_string:of_b58 52 | Repr.(string_of (`Fixed H.digest_size)) 53 | ~short_hash H.of_raw_string H.to_raw_string 54 | 55 | let short_hash = 56 | let f = short_hash_string ?seed:None in 57 | fun t -> f (H.to_raw_string t) 58 | 59 | let hash_size = H.digest_size 60 | let hash = H.digesti_string 61 | end 62 | 63 | module Key : Index.Key.S with type t = Hash.t = struct 64 | type t = Hash.t [@@deriving repr] 65 | 66 | let hash = Repr.(unstage (short_hash Hash.t)) ?seed:None 67 | let hash_size = 30 68 | let equal = Repr.(unstage (equal Hash.t)) 69 | let encode = Repr.(unstage (to_bin_string Hash.t)) 70 | let encoded_size = Hash.hash_size 71 | let decode_bin = Repr.(unstage (decode_bin Hash.t)) 72 | let decode s off = decode_bin s (ref off) 73 | end 74 | 75 | module Val = struct 76 | type t = Int63.t * int * char [@@deriving repr] 77 | 78 | let to_bin_string = Repr.(unstage (to_bin_string (triple int63 int32 char))) 79 | let encode (off, len, kind) = to_bin_string (off, Int32.of_int len, kind) 80 | let decode_bin = Repr.(unstage (decode_bin (triple int63 int32 char))) 81 | 82 | let decode s off = 83 | let off, len, kind = decode_bin s (ref off) in 84 | (off, Int32.to_int len, kind) 85 | 86 | let encoded_size = (64 / 8) + (32 / 8) + 1 87 | end 88 | end 89 | 90 | let decoded_seq_of_encoded_chan_with_prefixes : 91 | 'a. 'a Repr.ty -> in_channel -> 'a Seq.t = 92 | fun repr channel -> 93 | let decode_bin = Repr.decode_bin repr |> Repr.unstage in 94 | let decode_prefix = Repr.(decode_bin int32 |> unstage) in 95 | let produce_op () = 96 | try 97 | (* First read the prefix *) 98 | let prefix = really_input_string channel 4 in 99 | let pos_ref = ref 0 in 100 | let len = decode_prefix prefix pos_ref in 101 | assert (!pos_ref = 4); 102 | let len = Int32.to_int len in 103 | (* Then read the repr *) 104 | pos_ref := 0; 105 | let content = really_input_string channel len in 106 | let op = decode_bin content pos_ref in 107 | assert (!pos_ref = len); 108 | Some (op, ()) 109 | with End_of_file -> None 110 | in 111 | Seq.unfold produce_op () 112 | 113 | type config = { nb_ops : int; trace_data_file : string; root : string } 114 | 115 | module Trace = struct 116 | type key = string [@@deriving repr] 117 | 118 | type op = 119 | | Clear 120 | | Flush 121 | | Mem of key * bool 122 | | Find of key * bool 123 | | Ro_mem of key * bool 124 | | Ro_find of key * bool 125 | | Add of key * (Int63.t * int * char) 126 | [@@deriving repr] 127 | 128 | let open_ops_sequence path : op Seq.t = 129 | let chan = open_in_bin path in 130 | decoded_seq_of_encoded_chan_with_prefixes op_t chan 131 | end 132 | 133 | module Benchmark = struct 134 | type result = { time : Mtime.Span.t; size : int } 135 | 136 | let run config f = 137 | let res, time = with_timer f in 138 | let size = FSHelper.get_size config.root in 139 | ({ time; size }, res) 140 | 141 | let get_maxrss () = 142 | let usage = Rusage.(get Self) in 143 | let ( / ) = Int64.div in 144 | usage.maxrss / 1024L / 1024L 145 | 146 | let pp_results ppf result = 147 | Format.fprintf ppf "Total time: %a; Size on disk: %d M; Maxrss: %Ld" 148 | Mtime.Span.pp result.time result.size (get_maxrss ()) 149 | end 150 | 151 | module type S = sig 152 | include Index.S 153 | 154 | val v : string -> t 155 | val close : t -> unit 156 | end 157 | 158 | module Index_lib = Index 159 | 160 | module Index = struct 161 | module Index = 162 | Index_unix.Make (Encoding.Key) (Encoding.Val) (Index.Cache.Unbounded) 163 | 164 | include Index 165 | 166 | let cache = Index.empty_cache () 167 | let v root = Index.v ~cache ~readonly:false ~fresh:true ~log_size:500_000 root 168 | let close t = Index.close t 169 | end 170 | 171 | let hash_of_string = Repr.of_string Encoding.Hash.t 172 | 173 | module Bench_suite 174 | (Store : S 175 | with type key = Encoding.Hash.t 176 | and type value = Int63.t * int * char) = 177 | struct 178 | let key_to_hash k = 179 | match hash_of_string k with 180 | | Ok k -> k 181 | | Error (`Msg m) -> Fmt.failwith "error decoding hash %s" m 182 | 183 | let add_operation store op_seq nb_ops () = 184 | with_progress_bar ~message:"Replaying trace" ~n:nb_ops ~unit:"operations" 185 | @@ fun progress -> 186 | let rec aux op_seq i = 187 | if i >= nb_ops then i 188 | else 189 | match op_seq () with 190 | | Seq.Nil -> i 191 | | Cons (op, op_seq) -> 192 | let () = 193 | match op with 194 | | Trace.Flush -> Store.flush store 195 | | Clear -> Store.clear store 196 | | Mem (k, b) -> 197 | let k = key_to_hash k in 198 | let b' = Store.mem store k in 199 | if b <> b' then 200 | Fmt.failwith "Operation mem %a expected %b got %b" 201 | (Repr.pp Encoding.Key.t) k b b' 202 | | Find (k, b) -> 203 | let k = key_to_hash k in 204 | let b' = 205 | match Store.find store k with 206 | | (_ : Store.value) -> true 207 | | exception Not_found -> false 208 | in 209 | if b <> b' then 210 | Fmt.failwith "Operation find %a expected %b got %b" 211 | (Repr.pp Encoding.Key.t) k b b' 212 | | Add (k, v) -> 213 | let k = key_to_hash k in 214 | Store.replace store k v 215 | | Ro_mem _ | Ro_find _ -> () 216 | in 217 | progress Int64.one; 218 | aux op_seq (i + 1) 219 | in 220 | aux op_seq 0 221 | 222 | let run_read_trace config = 223 | let op_seq = Trace.open_ops_sequence config.trace_data_file in 224 | let store = Store.v config.root in 225 | 226 | let result, nb_ops = 227 | add_operation store op_seq config.nb_ops |> Benchmark.run config 228 | in 229 | 230 | let () = Store.close store in 231 | 232 | fun ppf -> 233 | Format.fprintf ppf "Tezos trace for %d nb_ops @\nResults: @\n%a@\n" nb_ops 234 | Benchmark.pp_results result 235 | end 236 | 237 | module Bench = Bench_suite (Index) 238 | 239 | let main nb_ops trace_data_file = 240 | Printexc.record_backtrace true; 241 | Random.self_init (); 242 | let root = "_bench_replay" in 243 | FSHelper.rm_dir root; 244 | let config = { trace_data_file; root; nb_ops } in 245 | let results = Bench.run_read_trace config in 246 | Logs.app (fun l -> l "%t@." results) 247 | 248 | open Cmdliner 249 | 250 | let nb_ops = 251 | let doc = 252 | Arg.info ~doc:"Number of operations to read from trace." [ "ops" ] 253 | in 254 | Arg.(value @@ opt int 2 doc) 255 | 256 | let trace_data_file = 257 | let doc = 258 | Arg.info ~docv:"PATH" ~doc:"Trace of Tezos operations to be replayed." [] 259 | in 260 | Arg.(required @@ pos 0 (some string) None doc) 261 | 262 | let main_term = 263 | Term.( 264 | const (fun () -> main) 265 | $ Index_lib.Private.Logs.setup_term (module Mtime_clock) (module Fmt_tty) 266 | $ nb_ops 267 | $ trace_data_file) 268 | 269 | let () = 270 | let man = 271 | [ 272 | `S "DESCRIPTION"; 273 | `P 274 | "Benchmarks for index operations. Requires traces of operations\n\ 275 | \ download them (`wget trace.repr`) from: "; 276 | `P 277 | "Trace with $(b,401,253,899) operations \ 278 | http://data.tarides.com/index/trace_401253899.repr"; 279 | `P 280 | "Trace with $(b,544,766,125) operations \ 281 | http://data.tarides.com/index/trace_544766125.repr"; 282 | ] 283 | in 284 | let info = 285 | Cmd.info ~man 286 | ~doc:"Replay index operations done by the bootstrapping of a tezos node" 287 | "replay-index" 288 | in 289 | exit @@ Cmd.eval (Cmd.v info main_term) 290 | -------------------------------------------------------------------------------- /dune: -------------------------------------------------------------------------------- 1 | (vendored_dirs vendors) 2 | -------------------------------------------------------------------------------- /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 2.7) 2 | (name index) 3 | (cram enable) 4 | (implicit_transitive_deps false) 5 | -------------------------------------------------------------------------------- /index-bench.opam: -------------------------------------------------------------------------------- 1 | opam-version: "2.0" 2 | synopsis: "Index benchmarking suite" 3 | maintainer: "Clement Pascutto" 4 | authors: ["Clement Pascutto" "Thomas Gazagnaire" "Ioana Cristescu"] 5 | license: "MIT" 6 | homepage: "https://github.com/mirage/index" 7 | bug-reports: "https://github.com/mirage/index/issues" 8 | depends: [ 9 | "ocaml" {>= "4.03.0"} 10 | "cmdliner" {>= "1.1.0"} 11 | "dune" {>= "2.7.0"} 12 | "fmt" 13 | "index" {= version} 14 | "metrics" 15 | "metrics-unix" 16 | "ppx_deriving_yojson" 17 | "re" {>= "1.9.0"} 18 | "stdlib-shims" 19 | "yojson" 20 | "ppx_repr" 21 | "mtime" {>= "2.0.0"} 22 | "logs" {>= "0.7.0"} 23 | "progress" {>= "0.2.1"} 24 | "tezos-base58" {>= "1.0.0" & with-test} 25 | "digestif" {>= "0.7" & with-test} 26 | "optint" {>= "0.1.0" & with-test} 27 | "repr" {>= "0.2.1" & with-test} 28 | "rusage" {>= "1.0.0" & with-test} 29 | ] 30 | conflicts: [ 31 | "result" {< "1.5"} 32 | ] 33 | build: [ 34 | ["dune" "subst"] {dev} 35 | ["dune" "build" "-p" name "-j" jobs] 36 | ["dune" "runtest" "-p" name "-j" jobs] {with-test} 37 | ] 38 | dev-repo: "git+https://github.com/mirage/index.git" 39 | -------------------------------------------------------------------------------- /index.opam: -------------------------------------------------------------------------------- 1 | opam-version: "2.0" 2 | maintainer: "Clement Pascutto" 3 | authors: [ 4 | "Craig Ferguson " 5 | "Thomas Gazagnaire " 6 | "Ioana Cristescu " 7 | "Clément Pascutto " 8 | ] 9 | license: "MIT" 10 | homepage: "https://github.com/mirage/index" 11 | bug-reports: "https://github.com/mirage/index/issues" 12 | dev-repo: "git+https://github.com/mirage/index.git" 13 | doc: "https://mirage.github.io/index/" 14 | 15 | build: [ 16 | ["dune" "subst"] {dev} 17 | ["dune" "build" "-p" name "-j" jobs] 18 | ["dune" "runtest" "-p" name "-j" jobs] {with-test} 19 | ] 20 | 21 | depends: [ 22 | "ocaml" {>= "4.08.0"} 23 | "dune" {>= "2.7.0"} 24 | "optint" {>= "0.1.0"} 25 | "repr" {>= "0.6.0"} 26 | "ppx_repr" 27 | "fmt" {>= "0.8.5"} 28 | "logs" {>= "0.7.0"} 29 | "mtime" {>= "2.0.0"} 30 | "cmdliner" {>= "1.1.0"} 31 | "progress" {>= "0.2.1"} 32 | "semaphore-compat" {>= "1.0.1"} 33 | "jsonm" 34 | "stdlib-shims" 35 | "alcotest" {with-test & >= "1.7.0"} 36 | "crowbar" {with-test & >= "0.2"} 37 | "re" {with-test} 38 | "lru" {>= "0.3.0"} 39 | ] 40 | synopsis: "A platform-agnostic multi-level index for OCaml" 41 | description:""" 42 | Index is a scalable implementation of persistent indices in OCaml. 43 | 44 | It takes an arbitrary IO implementation and user-supplied content 45 | types and supplies a standard key-value interface for persistent 46 | storage. Index provides instance sharing: each OCaml 47 | run-time can share a common singleton instance. 48 | 49 | Index supports multiple-reader/single-writer access. Concurrent access 50 | is safely managed using lock files.""" 51 | -------------------------------------------------------------------------------- /src/cache.ml: -------------------------------------------------------------------------------- 1 | (* The MIT License 2 | 3 | Copyright (c) 2019 Craig Ferguson 4 | Thomas Gazagnaire 5 | Ioana Cristescu 6 | Clément Pascutto 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. *) 17 | 18 | open! Import 19 | 20 | module type S = sig 21 | type ('k, 'v) t 22 | (** A cache of values of type ['v], indexed by keys of type ['k]. *) 23 | 24 | val create : unit -> (_, _) t 25 | val add : ('k, 'v) t -> 'k -> 'v -> unit 26 | val find : ('k, 'v) t -> 'k -> 'v option 27 | val remove : ('k, _) t -> 'k -> unit 28 | end 29 | 30 | (** Cache implementation that always misses. *) 31 | module Noop : S = struct 32 | type (_, _) t = unit 33 | 34 | let create () = () 35 | let add () _ _ = () 36 | let find () _ = None 37 | let remove () _ = () 38 | end 39 | 40 | (** Cache implementation that always finds previously-added values, and grows 41 | indefinitely. *) 42 | module Unbounded : S = struct 43 | include Hashtbl 44 | 45 | let create () = create 0 46 | let find = find_opt 47 | end 48 | -------------------------------------------------------------------------------- /src/checks.ml: -------------------------------------------------------------------------------- 1 | include Checks_intf 2 | open! Import 3 | 4 | module Make (K : Data.Key) (V : Data.Value) (Platform : Platform_args) = struct 5 | open Platform 6 | module Entry = Data.Entry.Make (K) (V) 7 | 8 | module IO = struct 9 | include Io.Extend (IO) 10 | 11 | (** This module never makes persistent changes *) 12 | let v = v_readonly 13 | 14 | let page_size = Int63.of_int (Entry.encoded_size * 1000) 15 | 16 | let iter ?min ?max f = 17 | iter ?min ?max ~page_size (fun ~off ~buf ~buf_off -> 18 | let entry = Entry.decode buf buf_off in 19 | f off entry; 20 | Entry.encoded_size) 21 | 22 | let read_entry io off = 23 | let buf = Bytes.create Entry.encoded_size in 24 | let (_ : int) = IO.read io ~off ~len:Entry.encoded_size buf in 25 | Entry.decode (Bytes.unsafe_to_string buf) 0 26 | end 27 | 28 | type size = Bytes of int63 [@@deriving repr] 29 | 30 | let size_t = 31 | let pp = Fmt.using (fun (Bytes b) -> b) Progress.Units.Bytes.pp_int63 in 32 | Repr.like 33 | ~json: 34 | ( (fun e t -> 35 | ignore @@ Jsonm.encode e (`Lexeme (`String (Fmt.to_to_string pp t)))), 36 | fun _ -> assert false ) 37 | size_t 38 | 39 | let path = 40 | let open Cmdliner.Arg in 41 | required 42 | @@ pos 0 (some string) None 43 | @@ info ~doc:"Path to the Index store on disk" ~docv:"PATH" [] 44 | 45 | module Stat = struct 46 | type io = { 47 | size : size; 48 | offset : int64; 49 | generation : int64; 50 | fanout_size : size; 51 | } 52 | [@@deriving repr] 53 | 54 | type files = { 55 | data : io option; 56 | log : io option; 57 | log_async : io option; 58 | merge : io option; 59 | lock : string option; 60 | } 61 | [@@deriving repr] 62 | 63 | type t = { entry_size : size; files : files } [@@deriving repr] 64 | 65 | let with_io : type a. string -> (IO.t -> a) -> a option = 66 | fun path f -> 67 | match IO.v path with 68 | | Error `No_file_on_disk -> None 69 | | Ok io -> 70 | let a = f io in 71 | IO.close io; 72 | Some a 73 | 74 | let io path = 75 | with_io path @@ fun io -> 76 | let IO.Header.{ offset; generation } = IO.Header.get io in 77 | let fanout_size = Bytes (IO.get_fanout_size io) in 78 | let size = Bytes (IO.size io |> Int63.of_int) in 79 | let offset = Int63.to_int64 offset in 80 | let generation = Int63.to_int64 generation in 81 | { size; offset; generation; fanout_size } 82 | 83 | let run ~root = 84 | Logs.app (fun f -> f "Getting statistics for store: `%s'@," root); 85 | let data = io (Layout.data ~root) in 86 | let log = io (Layout.log ~root) in 87 | let log_async = io (Layout.log_async ~root) in 88 | let merge = io (Layout.merge ~root) in 89 | let lock = 90 | IO.Lock.pp_dump (Layout.lock ~root) 91 | |> Option.map (fun f -> 92 | f Format.str_formatter; 93 | Format.flush_str_formatter ()) 94 | in 95 | let entry_size = K.encoded_size + V.encoded_size |> Int63.of_int in 96 | { 97 | entry_size = Bytes entry_size; 98 | files = { data; log; log_async; merge; lock }; 99 | } 100 | |> Repr.pp_json ~minify:false t Fmt.stdout 101 | 102 | let term = Cmdliner.Term.(const (fun root () -> run ~root) $ path) 103 | end 104 | 105 | module Integrity_check = struct 106 | let encoded_sizeL = Int63.of_int Entry.encoded_size 107 | let encoded_sizeLd = Int64.of_int Entry.encoded_size 108 | 109 | let print_window_around central_offset io context = 110 | let window_size = (2 * context) + 1 in 111 | List.init window_size (fun i -> 112 | let index = i - context in 113 | Int63.(add central_offset (mul (of_int index) encoded_sizeL))) 114 | |> List.filter (fun off -> Int63.compare off Int63.zero >= 0) 115 | |> List.map (fun off -> 116 | let entry = IO.read_entry io off in 117 | let highlight = 118 | if off = central_offset then Fmt.(styled (`Fg `Red)) else Fun.id 119 | in 120 | highlight (fun ppf () -> (Repr.pp Entry.t) ppf entry)) 121 | |> Fmt.(concat ~sep:cut) 122 | 123 | let run ~root = 124 | let context = 2 in 125 | match IO.v (Layout.data ~root) with 126 | | Error `No_file_on_disk -> Fmt.failwith "No data file in %s" root 127 | | Ok io -> 128 | let io_offset = IO.offset io in 129 | if Int63.compare io_offset encoded_sizeL < 0 then ( 130 | if not (Int63.equal io_offset Int63.zero) then 131 | Fmt.failwith 132 | "Non-integer number of entries in file: { offset = %a; \ 133 | entry_size = %d }" 134 | Int63.pp io_offset Entry.encoded_size) 135 | else 136 | let first_entry = IO.read_entry io Int63.zero in 137 | let previous = ref first_entry in 138 | Format.eprintf "\n%!"; 139 | Progress.( 140 | with_reporter 141 | (counter ~style:`UTF8 ~message:"Scanning store for faults" 142 | ~pp:Progress.Units.Bytes.of_int64 (Int63.to_int64 io_offset))) 143 | @@ fun report -> 144 | io 145 | |> IO.iter ~min:encoded_sizeL (fun off e -> 146 | report encoded_sizeLd; 147 | if !previous.key_hash > e.key_hash then 148 | Log.err (fun f -> 149 | f "Found non-monotonic region:@,%a@," 150 | (print_window_around off io context) 151 | ()); 152 | previous := e) 153 | 154 | let term = Cmdliner.Term.(const (fun root () -> run ~root) $ path) 155 | end 156 | 157 | module Cli = struct 158 | open Cmdliner 159 | 160 | let reporter = 161 | let pp_header ppf = function 162 | | Logs.App, header -> 163 | Fmt.(styled `Bold (styled (`Fg `Cyan) string)) ppf ">> "; 164 | Fmt.(option string) ppf header 165 | | _, header -> Fmt.(option string) ppf header 166 | in 167 | Logs_fmt.reporter ~pp_header () 168 | 169 | let main () : empty = 170 | let default = Term.(ret (const (`Help (`Auto, None)))) in 171 | let info = 172 | let doc = "Check and repair Index data-stores." in 173 | Cmd.info ~doc "index-fsck" 174 | in 175 | let commands = 176 | [ 177 | ( Term.( 178 | Stat.term 179 | $ Log.setup_term ~reporter (module Clock) (module Fmt_tty)), 180 | Cmd.info ~doc:"Print high-level statistics about the store." "stat" 181 | ); 182 | ( Term.( 183 | Integrity_check.term 184 | $ Log.setup_term ~reporter (module Clock) (module Fmt_tty)), 185 | Cmd.info 186 | ~doc:"Search the store for integrity faults and corruption." 187 | "integrity-check" ); 188 | ] 189 | in 190 | let commands = List.map (fun (term, info) -> Cmd.v info term) commands in 191 | exit @@ Cmd.eval (Cmd.group ~default info commands) 192 | end 193 | 194 | let cli = Cli.main 195 | end 196 | -------------------------------------------------------------------------------- /src/checks.mli: -------------------------------------------------------------------------------- 1 | (** Offline integrity checking and recovery for Index stores. *) 2 | 3 | include Checks_intf.Checks 4 | (** @inline *) 5 | -------------------------------------------------------------------------------- /src/checks_intf.ml: -------------------------------------------------------------------------------- 1 | open! Import 2 | 3 | type empty = | 4 | 5 | module type S = sig 6 | module Stat : sig 7 | val run : root:string -> unit 8 | (** Read basic metrics from an existing store. *) 9 | 10 | val term : (unit -> unit) Cmdliner.Term.t 11 | (** A pre-packaged [Cmdliner] term for executing {!run}. *) 12 | end 13 | 14 | module Integrity_check : sig 15 | val run : root:string -> unit 16 | (** Check that the integrity invariants of a store are preserved, and 17 | display any broken invariants. *) 18 | 19 | val term : (unit -> unit) Cmdliner.Term.t 20 | (** A pre-packaged [Cmdliner] term for executing {!run}. *) 21 | end 22 | 23 | val cli : unit -> empty 24 | (** Run a [Cmdliner] binary containing tools for running offline integrity 25 | checks. *) 26 | end 27 | 28 | module type Platform_args = sig 29 | module IO : Io.S 30 | module Clock : Platform.CLOCK 31 | module Progress : Progress_engine.S 32 | module Fmt_tty : Platform.FMT_TTY 33 | end 34 | 35 | module type Checks = sig 36 | type nonrec empty = empty 37 | 38 | module type S = S 39 | module type Platform_args = Platform_args 40 | 41 | module Make (K : Data.Key) (V : Data.Value) (_ : Platform_args) : S 42 | end 43 | -------------------------------------------------------------------------------- /src/data.ml: -------------------------------------------------------------------------------- 1 | open! Import 2 | 3 | exception Invalid_size of string 4 | (** The exception raised when trying to encode a key or a value of size other 5 | than encoded_size *) 6 | 7 | module type Key = sig 8 | type t [@@deriving repr] 9 | (** The type for keys. *) 10 | 11 | val equal : t -> t -> bool 12 | (** The equality function for keys. *) 13 | 14 | val hash : t -> int 15 | (** Note: Unevenly distributed hash functions may result in performance drops. *) 16 | 17 | val hash_size : int 18 | (** The number of bits necessary to encode the maximum output value of 19 | {!hash}. `Hashtbl.hash` uses 30 bits. 20 | 21 | Overestimating the [hash_size] will result in performance drops; 22 | underestimation will result in undefined behavior. *) 23 | 24 | val encode : t -> string 25 | (** [encode] is an encoding function. The resultant encoded values must have 26 | size {!encoded_size}. *) 27 | 28 | val encoded_size : int 29 | (** [encoded_size] is the size of the result of {!encode}, expressed in number 30 | of bytes. *) 31 | 32 | val decode : string -> int -> t 33 | (** [decode s off] is the decoded form of the encoded value at the offset 34 | [off] of string [s]. Must satisfy [decode (encode t) 0 = t]. *) 35 | end 36 | 37 | module type Value = sig 38 | type t [@@deriving repr] 39 | 40 | val encode : t -> string 41 | val encoded_size : int 42 | val decode : string -> int -> t 43 | end 44 | 45 | module Entry = struct 46 | module type S = sig 47 | type key 48 | type value 49 | 50 | type t = private { key : key; key_hash : int; value : value } 51 | [@@deriving repr] 52 | 53 | val v : key -> value -> t 54 | val encoded_size : int 55 | val encoded_sizeL : int63 56 | val decode : string -> int -> t 57 | val decode_key : string -> int -> key * int 58 | val decode_value : string -> int -> value 59 | val encode : t -> (string -> unit) -> unit 60 | val encode' : key -> value -> (string -> unit) -> unit 61 | val compare : t -> t -> int 62 | (* Compare entries by their key hash. *) 63 | end 64 | 65 | module Make (K : Key) (V : Value) : 66 | S with type key := K.t and type value := V.t = struct 67 | type t = { key : K.t; key_hash : int; value : V.t } [@@deriving repr] 68 | 69 | let v key value = { key; key_hash = K.hash key; value } 70 | let encoded_size = K.encoded_size + V.encoded_size 71 | let encoded_sizeL = Int63.of_int encoded_size 72 | 73 | let decode string off = 74 | let key = K.decode string off in 75 | let value = V.decode string (off + K.encoded_size) in 76 | { key; key_hash = K.hash key; value } 77 | 78 | let decode_key string off = 79 | let key = K.decode string off in 80 | (key, K.hash key) 81 | 82 | let decode_value string off = V.decode string (off + K.encoded_size) 83 | 84 | let encode' key value f = 85 | let encoded_key = K.encode key in 86 | let encoded_value = V.encode value in 87 | if String.length encoded_key <> K.encoded_size then 88 | raise (Invalid_size encoded_key); 89 | if String.length encoded_value <> V.encoded_size then 90 | raise (Invalid_size encoded_value); 91 | f (encoded_key ^ encoded_value) 92 | 93 | let encode { key; value; _ } f = encode' key value f 94 | let compare a b = Int.compare a.key_hash b.key_hash 95 | end 96 | end 97 | 98 | module String_fixed (L : sig 99 | val length : int 100 | end) : sig 101 | type t = string 102 | 103 | include Key with type t := string 104 | include Value with type t := string 105 | end = struct 106 | type t = string [@@deriving repr] 107 | 108 | let hash = Hashtbl.hash 109 | let hash_size = 30 110 | let encode s = s 111 | let decode s off = String.sub s off L.length 112 | let encoded_size = L.length 113 | let equal = String.equal 114 | end 115 | -------------------------------------------------------------------------------- /src/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (public_name index) 3 | (name index) 4 | (libraries logs fmt stdlib-shims mtime cmdliner logs.fmt logs.cli fmt.cli 5 | jsonm progress.engine repr ppx_repr optint lru) 6 | (preprocess 7 | (pps ppx_repr))) 8 | -------------------------------------------------------------------------------- /src/fan.ml: -------------------------------------------------------------------------------- 1 | (* The MIT License 2 | 3 | Copyright (c) 2019 Craig Ferguson 4 | Thomas Gazagnaire 5 | Ioana Cristescu 6 | Clément Pascutto 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. *) 17 | 18 | open! Import 19 | 20 | type 'a t = { fans : int63 array; mask : int; shift : int } 21 | 22 | let equal t t' = 23 | let rec loop i = 24 | if i >= Array.length t.fans then true 25 | else if Int63.equal t.fans.(i) t'.fans.(i) then loop (i + 1) 26 | else false 27 | in 28 | t.mask = t'.mask 29 | && t.shift = t'.shift 30 | && Array.length t.fans = Array.length t'.fans 31 | && loop 0 32 | 33 | let log2 a = log a /. log 2. 34 | 35 | let v ~hash_size ~entry_size n = 36 | let entry_sizef = float_of_int entry_size in 37 | let entries_per_page = 4096. /. entry_sizef in 38 | let raw_nb_fans = float_of_int n /. entries_per_page in 39 | let size = max 0 (int_of_float (ceil (log2 raw_nb_fans))) in 40 | let nb_fans = 1 lsl size in 41 | let shift = hash_size - size in 42 | { 43 | fans = Array.make nb_fans Int63.zero; 44 | mask = (nb_fans - 1) lsl shift; 45 | shift; 46 | } 47 | 48 | let nb_fans t = Array.length t.fans 49 | let fan t h = (h land t.mask) lsr t.shift 50 | 51 | let search t h = 52 | let fan = fan t h in 53 | let low = if fan = 0 then Int63.zero else t.fans.(fan - 1) in 54 | (low, t.fans.(fan)) 55 | 56 | let update t hash off = 57 | let fan = fan t hash in 58 | t.fans.(fan) <- off 59 | 60 | let finalize t = 61 | let rec loop curr i = 62 | if i = Array.length t.fans then () 63 | else ( 64 | if t.fans.(i) = Int63.zero then t.fans.(i) <- curr; 65 | loop t.fans.(i) (i + 1)) 66 | in 67 | loop Int63.zero 0; 68 | (t :> [ `Read ] t) 69 | 70 | let exported_size t = Array.length t.fans * Int63.encoded_size 71 | 72 | let export t = 73 | let encoded_size = exported_size t in 74 | let buf = Bytes.create encoded_size in 75 | let rec loop i = 76 | if i >= Array.length t.fans then () 77 | else ( 78 | Int63.encode buf t.fans.(i) ~off:(i * Int63.encoded_size); 79 | loop (i + 1)) 80 | in 81 | loop 0; 82 | Bytes.unsafe_to_string buf 83 | 84 | let import ~hash_size buf = 85 | let nb_fans = String.length buf / 8 in 86 | let fans = 87 | Array.init nb_fans (fun i -> Int63.decode buf ~off:(i * Int63.encoded_size)) 88 | in 89 | let size = int_of_float (log2 (float_of_int nb_fans)) in 90 | let shift = hash_size - size in 91 | let mask = (nb_fans - 1) lsl shift in 92 | { fans; mask; shift } 93 | -------------------------------------------------------------------------------- /src/fan.mli: -------------------------------------------------------------------------------- 1 | (* The MIT License 2 | 3 | Copyright (c) 2019 Craig Ferguson 4 | Thomas Gazagnaire 5 | Ioana Cristescu 6 | Clément Pascutto 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. *) 17 | 18 | open! Import 19 | 20 | type 'a t 21 | 22 | val equal : 'a t -> 'a t -> bool 23 | (** The equality function for fanouts. *) 24 | 25 | val v : hash_size:int -> entry_size:int -> int -> [ `Write ] t 26 | (** [v ~hash_size ~entry_size n] creates a fan_out for an index with [hash_size] 27 | and [entry_size], containing [n] elements. *) 28 | 29 | val nb_fans : 'a t -> int 30 | (** [nb_fans t] is the number of fans in [t]. *) 31 | 32 | val search : [ `Read ] t -> int -> int63 * int63 33 | (** [search t hash] is the interval of offsets containing [hash], if present. *) 34 | 35 | val update : [ `Write ] t -> int -> int63 -> unit 36 | (** [update t hash off] updates [t] so that [hash] is registered to be at offset 37 | [off]. *) 38 | 39 | val finalize : [ `Write ] t -> [ `Read ] t 40 | (** Finalizes the update of the fanout. This is mandatory before any [search] 41 | query. *) 42 | 43 | val exported_size : 'a t -> int 44 | (** [exported_size t] is the size of [export t]. This does not actually compute 45 | the encoding of [t]. *) 46 | 47 | val export : [ `Read ] t -> string 48 | (** [export t] is a string encoded form of [t]. *) 49 | 50 | val import : hash_size:int -> string -> [ `Read ] t 51 | (** [import ~hash_size buf] decodes [buf] such that 52 | [import ~hash_size (export t) = t] if [t] was initially created with 53 | ~hash_size. *) 54 | -------------------------------------------------------------------------------- /src/import.ml: -------------------------------------------------------------------------------- 1 | module Int63 = struct 2 | include Optint.Int63 3 | 4 | let ( + ) = add 5 | let ( - ) = sub 6 | let ( * ) = mul 7 | let ( / ) = div 8 | 9 | let to_int_exn = 10 | let max_int = of_int Int.max_int in 11 | fun x -> 12 | if compare x max_int > 0 then 13 | Fmt.failwith "Int63.to_int_exn: %a too large" pp x 14 | else to_int x 15 | 16 | let to_int_trunc = to_int 17 | let to_int = `shadowed 18 | end 19 | 20 | type int63 = Int63.t [@@deriving repr] 21 | 22 | module Mtime = struct 23 | include Mtime 24 | 25 | let span_to_s span = Mtime.Span.to_float_ns span *. 1e-9 26 | let span_to_us span = Mtime.Span.to_float_ns span *. 1e-3 27 | end 28 | -------------------------------------------------------------------------------- /src/index.mli: -------------------------------------------------------------------------------- 1 | (* The MIT License 2 | 3 | Copyright (c) 2019 Craig Ferguson 4 | Thomas Gazagnaire 5 | Ioana Cristescu 6 | Clément Pascutto 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. *) 17 | 18 | (** Index 19 | 20 | [Index] is a scalable implementation of persistent indices in OCaml. 21 | 22 | [Index] provides the standard key-value interface: [find], [mem] and 23 | [replace]. It requires three IO instances: 24 | 25 | - A `log` IO containing all of the recently-added bindings; this is also 26 | kept in memory. 27 | 28 | - When the `log` IO is full, it is merged into the `index` IO. Search is 29 | done first in `log` then in `index`, which makes recently added bindings 30 | search faster. 31 | 32 | - A `lock` IO to ensure safe concurrent access. *) 33 | 34 | include Index_intf.Index 35 | (** @inline *) 36 | -------------------------------------------------------------------------------- /src/index_intf.ml: -------------------------------------------------------------------------------- 1 | (* The MIT License 2 | 3 | Copyright (c) 2019 Craig Ferguson 4 | Thomas Gazagnaire 5 | Ioana Cristescu 6 | Clément Pascutto 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. *) 17 | 18 | open! Import 19 | 20 | module type Key = sig 21 | include Data.Key 22 | (** @inline *) 23 | end 24 | 25 | module type Value = sig 26 | include Data.Value 27 | (** @inline *) 28 | end 29 | 30 | module type S = sig 31 | type t 32 | (** The type for indexes. *) 33 | 34 | type key 35 | (** The type for keys. *) 36 | 37 | type value 38 | (** The type for values. *) 39 | 40 | type cache 41 | (** The type for caches of index instances. *) 42 | 43 | val empty_cache : unit -> cache 44 | (** Construct a new empty cache of index instances. *) 45 | 46 | val v : 47 | ?flush_callback:(unit -> unit) -> 48 | ?cache:cache -> 49 | ?fresh:bool -> 50 | ?readonly:bool -> 51 | ?throttle:[ `Overcommit_memory | `Block_writes ] -> 52 | ?lru_size:int -> 53 | log_size:int -> 54 | string -> 55 | t 56 | (** The constructor for indexes. 57 | 58 | @param flush_callback 59 | A function to be called before any new bindings are persisted to disk 60 | (including both automatic flushing and explicit calls to {!flush} or 61 | {!close}). 62 | 63 | This can be used to ensure certain pre-conditions are met before 64 | bindings are persisted to disk. (For instance, if the index bindings are 65 | pointers into another data-structure [d], it may be necessary to flush 66 | [d] first to avoid creating dangling pointers.) 67 | @param cache used for instance sharing. 68 | @param fresh whether an existing index should be overwritten. 69 | @param read_only whether read-only mode is enabled for this index. 70 | @param throttle 71 | the strategy to use when the cache are full and and async in already in 72 | progress. [Block_writes] (the default) blocks any new writes until the 73 | merge is completed. [Overcommit_memory] does not block but continues to 74 | fill the (already full) cache. 75 | @param log_size the maximum number of bindings in the `log` IO. 76 | @param lru_size 77 | the maximum number of recently-read index bindings kept in memory. 78 | Defaults to 30_000. *) 79 | 80 | val clear : t -> unit 81 | (** [clear t] clears [t] so that there are no more bindings in it. *) 82 | 83 | val find : t -> key -> value 84 | (** [find t k] is the binding of [k] in [t]. *) 85 | 86 | val mem : t -> key -> bool 87 | (** [mem t k] is [true] iff [k] is bound in [t]. *) 88 | 89 | val replace : ?overcommit:bool -> t -> key -> value -> unit 90 | (** [replace t k v] binds [k] to [v] in [t], replacing any existing binding of 91 | [k]. 92 | 93 | If [overcommit] is true, the operation does not triger a merge, even if 94 | the caches are full. By default [overcommit] is false. *) 95 | 96 | val filter : t -> (key * value -> bool) -> unit 97 | (** [filter t p] removes all the bindings (k, v) that do not satisfy [p]. This 98 | operation is costly and blocking. *) 99 | 100 | val iter : (key -> value -> unit) -> t -> unit 101 | (** Iterates over the index bindings. Limitations: 102 | 103 | - Order is not specified. 104 | - In case of recent replacements of existing values (since the last 105 | merge), this will hit both the new and old bindings. 106 | - May not observe recent concurrent updates to the index by other 107 | processes. *) 108 | 109 | val flush : ?no_callback:unit -> ?with_fsync:bool -> t -> unit 110 | (** Flushes all internal buffers of the [IO] instances. 111 | 112 | - Passing [~no_callback:()] disables calling the [flush_callback] passed 113 | to {!v}. 114 | - If [with_fsync] is [true], this also flushes the OS caches for each [IO] 115 | instance. *) 116 | 117 | val close : ?immediately:unit -> t -> unit 118 | (** Closes all resources used by [t], flushing any internal buffers in the 119 | instance. 120 | 121 | If [immediately] is passed, this operation will abort any ongoing 122 | background processes. This guarantees not to corrupt the store, but may 123 | require additional work to be done on the next startup. *) 124 | 125 | val sync : t -> unit 126 | (** [sync t] syncs a read-only index with the files on disk. Raises 127 | {!RW_not_allowed} if called by a read-write index. *) 128 | 129 | val is_merging : t -> bool 130 | (** [is_merging t] returns true if [t] is running a merge. Raises 131 | {!RO_not_allowed} if called by a read-only index. *) 132 | 133 | val merge : t -> unit 134 | (** [merge t] forces a merge for [t]. 135 | 136 | If there is no merge running, this operation is non-blocking, i.e. it 137 | returns immediately, with the merge running concurrently. 138 | 139 | If a merge is running already, this operation blocks until the previous 140 | merge is complete. It then launches a merge (which runs concurrently) and 141 | returns. *) 142 | 143 | val try_merge : t -> unit 144 | (** [try_merge] is like {!merge} but is a no-op if the number of entries in 145 | the write-ahead log is smaller than [log_size]. *) 146 | 147 | (** Offline [fsck]-like utility for checking the integrity of Index stores 148 | built using this module. *) 149 | module Checks : sig 150 | include Checks.S 151 | (** @inline *) 152 | end 153 | end 154 | 155 | module Private_types = struct 156 | type merge_stages = [ `After | `After_clear | `After_first_entry | `Before ] 157 | (** Some operations that trigger a merge can have hooks inserted at the 158 | following stages: 159 | 160 | - [`Before]: immediately before merging (while holding the merge lock); 161 | - [`After_clear]: immediately after clearing the log, at the end of a 162 | merge; 163 | - [`After_first_entry]: immediately after adding the first entry in the 164 | merge file, if the data file contains at least one entry; 165 | - [`After]: immediately after merging (while holding the merge lock). *) 166 | 167 | type merge_result = [ `Completed | `Aborted ] 168 | end 169 | 170 | module type Private = sig 171 | include S 172 | 173 | type 'a hook 174 | 175 | include module type of Private_types 176 | (** @inline *) 177 | 178 | type 'a async 179 | (** The type of asynchronous computation. *) 180 | 181 | val replace' : 182 | ?hook:[ `Merge of merge_stages ] hook -> 183 | ?overcommit:bool -> 184 | t -> 185 | key -> 186 | value -> 187 | merge_result async option 188 | (** [replace' t k v] is like {!replace t k v} but returns a promise of a merge 189 | result if the {!replace} call triggered one. *) 190 | 191 | val close' : hook:[ `Abort_signalled ] hook -> ?immediately:unit -> t -> unit 192 | (** [`Abort_signalled]: after the cancellation signal has been sent to any 193 | concurrent merge operations, but {i before} blocking on those 194 | cancellations having completed. *) 195 | 196 | val clear' : hook:[ `Abort_signalled | `IO_clear ] hook -> t -> unit 197 | 198 | val try_merge_aux : 199 | ?hook:merge_stages hook -> ?force:bool -> t -> merge_result async 200 | (** [try_merge_aux t] tries to merge [t]. If [force] is false (the default), a 201 | merge is performed only if there is more entries in the write-ahead log 202 | than the configured limits. If [force] is set, the merge is performed no 203 | matter what. *) 204 | 205 | val await : 'a async -> ('a, [ `Async_exn of exn ]) result 206 | (** Wait for an asynchronous computation to finish. *) 207 | 208 | val replace_with_timer : ?sampling_interval:int -> t -> key -> value -> unit 209 | (** Time replace operations. The reported time is an average on an number of 210 | consecutive operations, which can be specified by [sampling_interval]. If 211 | [sampling_interval] is not set, no operation is timed. *) 212 | 213 | val sync' : 214 | ?hook: 215 | [ `Before_offset_read 216 | | `After_offset_read 217 | | `Reload_log 218 | | `Reload_log_async ] 219 | hook -> 220 | t -> 221 | unit 222 | (** Hooks: 223 | 224 | - [`Before_offset_read]: before reading the generation number and the 225 | offset. 226 | - [`After_offset_read]: after reading the generation number and offset. *) 227 | 228 | val log : t -> (key * value) list option 229 | val log_async : t -> (key * value) list option 230 | end 231 | 232 | module type Index = sig 233 | (* N.B. We use [sig ... end] redirections to avoid linking to the [_intf] 234 | file in the generated docs. Once Odoc 2 is released, this can be 235 | removed. *) 236 | 237 | module Key : sig 238 | (** The input of {!Make} for keys. *) 239 | module type S = sig 240 | include Key 241 | (** @inline *) 242 | end 243 | 244 | (** String keys of a given fixed size in bytes. *) 245 | module String_fixed (L : sig 246 | val length : int 247 | end) : S with type t = string 248 | end 249 | 250 | module Value : sig 251 | (** The input of {!Make} for values. The same requirements as for {!Key} 252 | apply. *) 253 | module type S = sig 254 | include Value 255 | (** @inline *) 256 | end 257 | 258 | (** String values of a given fixed size in bytes. *) 259 | module String_fixed (L : sig 260 | val length : int 261 | end) : S with type t = string 262 | end 263 | 264 | (** Platform dependencies required by {!Make}. *) 265 | 266 | module Platform = Platform 267 | 268 | (** Signatures and implementations of caches. {!Make} requires a cache in 269 | order to provide instance sharing. *) 270 | module Cache : sig 271 | include module type of Cache 272 | (** @inline *) 273 | end 274 | 275 | (** Index module signature. *) 276 | module type S = sig 277 | include S 278 | (** @inline *) 279 | end 280 | 281 | exception RO_not_allowed 282 | (** The exception raised when a write operation is attempted on a read_only 283 | index. *) 284 | 285 | exception RW_not_allowed 286 | (** The exception is raised when a sync operation is attempted on a read-write 287 | index. *) 288 | 289 | exception Closed 290 | (** The exception raised when any operation is attempted on a closed index, 291 | except for [close], which is idempotent. *) 292 | 293 | module Make (K : Key.S) (V : Value.S) (_ : Platform.S) (C : Cache.S) : 294 | S with type key = K.t and type value = V.t 295 | 296 | (** Run-time metric tracking for index instances. *) 297 | module Stats : sig 298 | include module type of Stats 299 | (** @inline *) 300 | end 301 | 302 | module Checks = Checks 303 | 304 | (** These modules should not be used. They are exposed purely for testing 305 | purposes. *) 306 | module Private : sig 307 | module Hook : sig 308 | type 'a t 309 | 310 | val v : ('a -> unit) -> 'a t 311 | end 312 | 313 | module Search = Search 314 | module Io = Io 315 | module Io_array = Io_array 316 | module Fan = Fan 317 | module Data = Data 318 | module Layout = Layout 319 | 320 | module Logs : sig 321 | val setup : 322 | ?reporter:Logs.reporter -> 323 | ?style_renderer:Fmt.style_renderer -> 324 | ?level:Logs.level -> 325 | (module Platform.CLOCK) -> 326 | (module Platform.FMT_TTY) -> 327 | unit 328 | 329 | val setup_term : 330 | ?reporter:Logs.reporter -> 331 | (module Platform.CLOCK) -> 332 | (module Platform.FMT_TTY) -> 333 | unit Cmdliner.Term.t 334 | end 335 | 336 | module type S = Private with type 'a hook := 'a Hook.t 337 | 338 | module Make (K : Key) (V : Value) (_ : Platform.S) (C : Cache.S) : 339 | S with type key = K.t and type value = V.t 340 | end 341 | end 342 | -------------------------------------------------------------------------------- /src/io.ml: -------------------------------------------------------------------------------- 1 | (* The MIT License 2 | 3 | Copyright (c) 2019 Craig Ferguson 4 | Thomas Gazagnaire 5 | Ioana Cristescu 6 | Clément Pascutto 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. *) 17 | 18 | include Io_intf 19 | open! Import 20 | 21 | module Extend (S : S) = struct 22 | include S 23 | 24 | let iter ~page_size ?min:(min_off = Int63.zero) ?max:max_off f io = 25 | let max_off = match max_off with None -> offset io | Some m -> m in 26 | let rec aux offset = 27 | let remaining = Int63.sub max_off offset in 28 | if remaining <= Int63.zero then () 29 | else 30 | let len = Int63.to_int_exn (min remaining page_size) in 31 | let raw = Bytes.create len in 32 | let n = read io ~off:offset ~len raw in 33 | let rec read_page page off = 34 | if off = n then () 35 | else 36 | let read = 37 | f ~off:Int63.(add (of_int off) offset) ~buf:page ~buf_off:off 38 | in 39 | (read_page [@tailcall]) page (off + read) 40 | in 41 | read_page (Bytes.unsafe_to_string raw) 0; 42 | (aux [@tailcall]) Int63.(add offset page_size) 43 | in 44 | (aux [@tailcall]) min_off 45 | end 46 | -------------------------------------------------------------------------------- /src/io.mli: -------------------------------------------------------------------------------- 1 | (* The MIT License 2 | 3 | Copyright (c) 2019 Craig Ferguson 4 | Thomas Gazagnaire 5 | Ioana Cristescu 6 | Clément Pascutto 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. *) 17 | 18 | include Io_intf.Io 19 | (** @inline *) 20 | -------------------------------------------------------------------------------- /src/io_array.ml: -------------------------------------------------------------------------------- 1 | (* The MIT License 2 | 3 | Copyright (c) 2019 Craig Ferguson 4 | Thomas Gazagnaire 5 | Ioana Cristescu 6 | Clément Pascutto 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. *) 17 | 18 | open! Import 19 | 20 | module type ELT = sig 21 | type t 22 | 23 | val encoded_size : int 24 | val decode : string -> int -> t 25 | end 26 | 27 | module type S = sig 28 | include Search.ARRAY 29 | 30 | type io 31 | 32 | val v : io -> t 33 | end 34 | 35 | module Make (IO : Io.S) (Elt : ELT) : 36 | S with type io = IO.t and type elt = Elt.t = struct 37 | module Elt = struct 38 | include Elt 39 | 40 | let encoded_sizeL = Int63.of_int encoded_size 41 | end 42 | 43 | type io = IO.t 44 | type elt = Elt.t 45 | type buffer = { buf : bytes; low_off : int63; high_off : int63 } 46 | type t = { io : IO.t; mutable buffer : buffer option } 47 | 48 | let v io = { io; buffer = None } 49 | 50 | let get_entry_from_io io off = 51 | let buf = Bytes.create Elt.encoded_size in 52 | let n = IO.read io ~off ~len:Elt.encoded_size buf in 53 | assert (n = Elt.encoded_size); 54 | Elt.decode (Bytes.unsafe_to_string buf) 0 55 | 56 | let get_entry_from_buffer buf off = 57 | let buf_off = Int63.(to_int_exn (off - buf.low_off)) in 58 | assert (buf_off <= Bytes.length buf.buf); 59 | Elt.decode (Bytes.unsafe_to_string buf.buf) buf_off 60 | 61 | let is_in_buffer t off = 62 | match t.buffer with 63 | | None -> false 64 | | Some b -> 65 | Int63.compare off b.low_off >= 0 && Int63.compare off b.high_off <= 0 66 | 67 | let get t i = 68 | let off = Int63.(i * Elt.encoded_sizeL) in 69 | match t.buffer with 70 | | Some b when is_in_buffer t off -> ( 71 | try get_entry_from_buffer b off with _ -> assert false) 72 | | _ -> get_entry_from_io t.io off 73 | 74 | let length t = Int63.div (IO.offset t.io) Elt.encoded_sizeL 75 | 76 | let max_buffer_size = 77 | (* The prefetched area should not exceed 4096 in most cases, thanks to the 78 | fan out. However, if the hash function is not well distributed, some 79 | exceptions might happen where the prefetched area actually exceeds 4096. 80 | As long as this excess is reasonable (x8), we still want to prefetch. *) 81 | 8 * 4096 82 | 83 | let buf = Bytes.create max_buffer_size 84 | 85 | let set_buffer t ~low ~high = 86 | let range = Elt.encoded_size * (1 + Int63.(to_int_exn (high - low))) in 87 | let low_off = Int63.mul low Elt.encoded_sizeL in 88 | let high_off = Int63.mul high Elt.encoded_sizeL in 89 | let n = IO.read t.io ~off:low_off ~len:range buf in 90 | assert (n = range); 91 | t.buffer <- Some { buf; low_off; high_off } 92 | 93 | let pre_fetch t ~low ~high = 94 | let range = Elt.encoded_size * (1 + Int63.(to_int_exn (high - low))) in 95 | if Int63.compare low high > 0 then 96 | Log.warn (fun m -> 97 | m "Requested pre-fetch region is empty: [%a, %a]" Int63.pp low 98 | Int63.pp high) 99 | else if range > max_buffer_size then 100 | Log.warn (fun m -> 101 | m "Requested pre-fetch [%a, %a] is larger than %d" Int63.pp low 102 | Int63.pp high max_buffer_size) 103 | else 104 | match t.buffer with 105 | | Some b -> 106 | let low_buf, high_buf = 107 | Int63. 108 | (div b.low_off Elt.encoded_sizeL, div b.high_off Elt.encoded_sizeL) 109 | in 110 | if low >= low_buf && high <= high_buf then 111 | Log.debug (fun m -> 112 | m 113 | "Pre-existing buffer [%a, %a] encloses requested pre-fetch \ 114 | [%a, %a]" 115 | Int63.pp low_buf Int63.pp high_buf Int63.pp low Int63.pp high) 116 | else ( 117 | Log.warn (fun m -> 118 | m 119 | "Current buffer [%a, %a] insufficient. Prefetching in range \ 120 | [%a, %a]" 121 | Int63.pp low_buf Int63.pp high_buf Int63.pp low Int63.pp high); 122 | set_buffer t ~low ~high) 123 | | None -> 124 | Log.debug (fun m -> 125 | m "No existing buffer. Prefetching in range [%a, %a]" Int63.pp low 126 | Int63.pp high); 127 | set_buffer t ~low ~high 128 | end 129 | -------------------------------------------------------------------------------- /src/io_array.mli: -------------------------------------------------------------------------------- 1 | (* The MIT License 2 | 3 | Copyright (c) 2019 Craig Ferguson 4 | Thomas Gazagnaire 5 | Ioana Cristescu 6 | Clément Pascutto 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. *) 17 | 18 | open! Import 19 | 20 | module type ELT = sig 21 | type t 22 | 23 | val encoded_size : int 24 | val decode : string -> int -> t 25 | end 26 | 27 | module type S = sig 28 | include Search.ARRAY 29 | 30 | type io 31 | 32 | val v : io -> t 33 | end 34 | 35 | (** Takes an IO instance and wraps it in an Array interface with support for 36 | prefetching sections of the array. *) 37 | module Make (IO : Io.S) (Elt : ELT) : S with type io = IO.t and type elt = Elt.t 38 | -------------------------------------------------------------------------------- /src/io_intf.ml: -------------------------------------------------------------------------------- 1 | open! Import 2 | 3 | module type S = sig 4 | type t 5 | 6 | val v : 7 | ?flush_callback:(unit -> unit) -> 8 | fresh:bool -> 9 | generation:int63 -> 10 | fan_size:int63 -> 11 | string -> 12 | t 13 | 14 | val v_readonly : string -> (t, [ `No_file_on_disk ]) result 15 | val offset : t -> int63 16 | val read : t -> off:int63 -> len:int -> bytes -> int 17 | 18 | val clear : 19 | generation:int63 -> ?hook:(unit -> unit) -> reopen:bool -> t -> unit 20 | 21 | val flush : ?no_callback:unit -> ?with_fsync:bool -> t -> unit 22 | val get_generation : t -> int63 23 | val set_fanout : t -> string -> unit 24 | val get_fanout : t -> string 25 | val get_fanout_size : t -> int63 26 | val rename : src:t -> dst:t -> unit 27 | val append : t -> string -> unit 28 | val append_substring : t -> string -> off:int -> len:int -> unit 29 | val close : t -> unit 30 | 31 | val size_header : t -> int 32 | (** [size_header t] is [t]'s header size. *) 33 | 34 | module Lock : sig 35 | type t 36 | 37 | val lock : string -> t 38 | val unlock : t -> unit 39 | 40 | val pp_dump : string -> (Format.formatter -> unit) option 41 | (** To be used for debugging purposes only. *) 42 | end 43 | 44 | module Header : sig 45 | type header = { offset : int63; generation : int63 } 46 | 47 | val set : t -> header -> unit 48 | val get : t -> header 49 | end 50 | 51 | val exists : string -> bool 52 | (** [exists name] is true iff there is a pre-existing IO instance called 53 | [name]. *) 54 | 55 | val size : t -> int 56 | (** Returns the true size of the underlying data representation in bytes. Note 57 | that this is not necessarily equal to the total size of {i observable} 58 | data, which is given by {!offset}. 59 | 60 | To be used for debugging purposes only. *) 61 | end 62 | 63 | module type Io = sig 64 | module type S = S 65 | 66 | module Extend (S : S) : sig 67 | include S with type t = S.t 68 | 69 | val iter : 70 | page_size:int63 -> 71 | ?min:int63 -> 72 | ?max:int63 -> 73 | (off:int63 -> buf:string -> buf_off:int -> int) -> 74 | t -> 75 | unit 76 | end 77 | end 78 | -------------------------------------------------------------------------------- /src/layout.ml: -------------------------------------------------------------------------------- 1 | open! Import 2 | 3 | let toplevel ~root name = Filename.(concat (concat root "index") name) 4 | let log = toplevel "log" 5 | let log_async = toplevel "log_async" 6 | let data = toplevel "data" 7 | let lock = toplevel "lock" 8 | let merge = toplevel "merge" 9 | -------------------------------------------------------------------------------- /src/layout.mli: -------------------------------------------------------------------------------- 1 | (** Defines the namespacing of the various IO instances required by Index. *) 2 | 3 | open! Import 4 | 5 | val log : root:string -> string 6 | val log_async : root:string -> string 7 | val data : root:string -> string 8 | val lock : root:string -> string 9 | val merge : root:string -> string 10 | -------------------------------------------------------------------------------- /src/log.ml: -------------------------------------------------------------------------------- 1 | (* The MIT License 2 | 3 | Copyright (c) 2019 Craig Ferguson 4 | Thomas Gazagnaire 5 | Ioana Cristescu 6 | Clément Pascutto 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. *) 17 | 18 | open! Import 19 | 20 | let src = Logs.Src.create "index" ~doc:"Index" 21 | 22 | module Log = (val Logs.src_log src : Logs.LOG) 23 | include Log 24 | 25 | let default_reporter (type c) ?(prefix = "") 26 | (module Clock : Platform.CLOCK with type counter = c) (counter : c) = 27 | let report src level ~over k msgf = 28 | let k _ = 29 | over (); 30 | k () 31 | in 32 | let ppf = match level with Logs.App -> Fmt.stdout | _ -> Fmt.stderr in 33 | let with_stamp h _tags k fmt = 34 | let dt = Mtime.span_to_us (Clock.count counter) in 35 | Fmt.kpf k ppf 36 | ("%s%+04.0fus %a %a @[" ^^ fmt ^^ "@]@.") 37 | prefix dt Logs_fmt.pp_header (level, h) 38 | Fmt.(styled `Magenta string) 39 | (Logs.Src.name src) 40 | in 41 | msgf @@ fun ?header ?tags fmt -> with_stamp header tags k fmt 42 | in 43 | { Logs.report } 44 | 45 | let setup ?reporter ?style_renderer ?level (module Clock : Platform.CLOCK) 46 | (module Fmt_tty : Platform.FMT_TTY) = 47 | let start_time = Clock.counter () in 48 | let reporter = 49 | match reporter with 50 | | Some x -> x 51 | | None -> default_reporter (module Clock) start_time 52 | in 53 | Fmt_tty.setup_std_outputs ?style_renderer (); 54 | Logs.set_level level; 55 | Logs.set_reporter reporter; 56 | () 57 | 58 | open Cmdliner 59 | 60 | let ( let+ ) t f = Term.(const f $ t) 61 | let ( and+ ) a b = Term.(const (fun x y -> (x, y)) $ a $ b) 62 | 63 | let setup_term ?reporter clock fmt_tty = 64 | let+ style_renderer = Fmt_cli.style_renderer () 65 | and+ level = Logs_cli.level () in 66 | setup ?reporter ?style_renderer ?level clock fmt_tty 67 | -------------------------------------------------------------------------------- /src/log.mli: -------------------------------------------------------------------------------- 1 | (* The MIT License 2 | 3 | Copyright (c) 2019 Craig Ferguson 4 | Thomas Gazagnaire 5 | Ioana Cristescu 6 | Clément Pascutto 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. *) 17 | 18 | include Logs.LOG 19 | 20 | val setup : 21 | ?reporter:Logs.reporter -> 22 | ?style_renderer:Fmt.style_renderer -> 23 | ?level:Logs.level -> 24 | (module Platform.CLOCK) -> 25 | (module Platform.FMT_TTY) -> 26 | unit 27 | 28 | val setup_term : 29 | ?reporter:Logs.reporter -> 30 | (module Platform.CLOCK) -> 31 | (module Platform.FMT_TTY) -> 32 | unit Cmdliner.Term.t 33 | -------------------------------------------------------------------------------- /src/log_file.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Copyright (c) 2021 Tarides 3 | * 4 | * Permission to use, copy, modify, and distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | *) 16 | 17 | open! Import 18 | 19 | module Make (IO : Io.S) (Key : Data.Key) (Value : Data.Value) = struct 20 | module Entry = Data.Entry.Make (Key) (Value) 21 | 22 | module IO = struct 23 | include Io.Extend (IO) 24 | 25 | let iter_keys ?min f = 26 | let page_size = Int63.(mul Entry.encoded_sizeL (of_int 1_000)) in 27 | iter ~page_size ?min (fun ~off ~buf ~buf_off -> 28 | let key, _ = Entry.decode_key buf buf_off in 29 | f off key; 30 | Entry.encoded_size) 31 | end 32 | 33 | module Scratch = struct 34 | type t = { buffer : bytes (** [Bytes.length buf = Entry.encoded_size] *) } 35 | [@@unboxed] 36 | 37 | let create () = { buffer = Bytes.create Entry.encoded_size } 38 | end 39 | 40 | type t = { 41 | io : IO.t; (** The disk file handler *) 42 | append_io : string -> unit; (** Pre-allocated [IO.append io] closure *) 43 | mutable hashtbl : int63 Small_list.t Array.t; 44 | (** Hashtable of (key, value) pairs in [io], stored using just their 45 | file offsets for memory compactness. Length is always a power of 46 | two. *) 47 | mutable bucket_count_log2 : int; 48 | (** Invariant: equal to [log_2 (Array.length hashtbl)] *) 49 | mutable cardinal : int; 50 | } 51 | 52 | let io t = t.io 53 | let cardinal t = t.cardinal 54 | 55 | let clear_memory t = 56 | t.hashtbl <- [| Small_list.empty |]; 57 | t.bucket_count_log2 <- 0; 58 | t.cardinal <- 0 59 | 60 | let clear ~generation ?hook ~reopen t = 61 | IO.clear ~generation ?hook ~reopen t.io; 62 | clear_memory t 63 | 64 | let close t = 65 | IO.close t.io; 66 | clear_memory t 67 | 68 | let flush ?no_callback ~with_fsync t = IO.flush ?no_callback ~with_fsync t.io 69 | 70 | let key_of_offset t (scratch : Scratch.t) off = 71 | let len = Key.encoded_size in 72 | let r = IO.read t.io ~off ~len scratch.buffer in 73 | assert (r = len); 74 | fst (Entry.decode_key (Bytes.unsafe_to_string scratch.buffer) 0) 75 | 76 | let entry_of_offset t (scratch : Scratch.t) off = 77 | let len = Entry.encoded_size in 78 | let r = IO.read t.io ~off ~len scratch.buffer in 79 | assert (r = Entry.encoded_size); 80 | Entry.decode (Bytes.unsafe_to_string scratch.buffer) 0 81 | 82 | let elt_index t key = 83 | (* NOTE: we use the _uppermost_ bits of the key hash to index the bucket 84 | array, so that the hashtbl is approximately sorted by key hash (with only 85 | the entries within each bucket being relatively out of order). *) 86 | let unneeded_bits = Key.hash_size - t.bucket_count_log2 in 87 | (Key.hash key lsr unneeded_bits) land ((1 lsl t.bucket_count_log2) - 1) 88 | 89 | let resize t scratch = 90 | (* Scale the number of hashtbl buckets. *) 91 | t.bucket_count_log2 <- t.bucket_count_log2 + 1; 92 | let new_bucket_count = 1 lsl t.bucket_count_log2 in 93 | if new_bucket_count > Sys.max_array_length then 94 | Fmt.failwith 95 | "Log_file.resize: can't construct a hashtbl with %d buckets \ 96 | (Sys.max_array_length = %d)" 97 | new_bucket_count Sys.max_array_length; 98 | let new_hashtbl = Array.make new_bucket_count Small_list.empty in 99 | ArrayLabels.iteri t.hashtbl ~f:(fun i bucket -> 100 | (* The bindings in this bucket will be split into two new buckets, using 101 | the next bit of [Key.hash] as a discriminator. *) 102 | let bucket_2i, bucket_2i_plus_1 = 103 | Small_list.to_list bucket 104 | |> List.partition (fun offset -> 105 | let key = key_of_offset t scratch offset in 106 | let new_index = elt_index t key in 107 | assert (new_index lsr 1 = i); 108 | new_index land 1 = 0) 109 | in 110 | new_hashtbl.(2 * i) <- Small_list.of_list bucket_2i; 111 | new_hashtbl.((2 * i) + 1) <- Small_list.of_list bucket_2i_plus_1); 112 | t.hashtbl <- new_hashtbl 113 | 114 | (** Replace implementation that only updates in-memory state (and doesn't 115 | write the binding to disk). *) 116 | let replace_memory t scratch key offset = 117 | if t.cardinal > 2 * Array.length t.hashtbl then resize t scratch; 118 | let elt_idx = elt_index t key in 119 | let bucket = t.hashtbl.(elt_idx) in 120 | let bucket = 121 | let key_found = ref false in 122 | let bucket' = 123 | Small_list.map bucket ~f:(fun offset' -> 124 | if !key_found then 125 | (* We ensure there's at most one binding for a given key *) 126 | offset' 127 | else 128 | let key' = key_of_offset t scratch offset' in 129 | match Key.equal key key' with 130 | | false -> offset' 131 | | true -> 132 | (* New binding for this key *) 133 | key_found := true; 134 | offset) 135 | in 136 | match !key_found with 137 | | true -> 138 | (* We're replacing an existing value. No need to change [cardinal]. *) 139 | bucket' 140 | | false -> 141 | (* The existing bucket doesn't contain this key. *) 142 | t.cardinal <- t.cardinal + 1; 143 | Small_list.cons offset bucket 144 | in 145 | t.hashtbl.(elt_idx) <- bucket 146 | 147 | let replace t key value = 148 | let offset = IO.offset t.io in 149 | Entry.encode' key value t.append_io; 150 | replace_memory t (Scratch.create ()) key offset 151 | 152 | let sync_entries ~min t = 153 | let scratch = Scratch.create () in 154 | IO.iter_keys ~min 155 | (fun offset key -> replace_memory t scratch key offset) 156 | t.io 157 | 158 | let reload t = 159 | clear_memory t; 160 | sync_entries ~min:Int63.zero t 161 | 162 | let create io = 163 | let cardinal = Int63.(to_int_exn (IO.offset io / Entry.encoded_sizeL)) in 164 | let bucket_count_log2, bucket_count = 165 | let rec aux n_log2 n = 166 | if n >= cardinal then (n_log2, n) 167 | else if n * 2 > Sys.max_array_length then (n_log2, n) 168 | else aux (n_log2 + 1) (n * 2) 169 | in 170 | aux 4 16 171 | in 172 | let hashtbl = Array.make bucket_count Small_list.empty in 173 | let t = 174 | { io; append_io = IO.append io; hashtbl; bucket_count_log2; cardinal } 175 | in 176 | let scratch = Scratch.create () in 177 | IO.iter_keys (fun offset key -> replace_memory t scratch key offset) io; 178 | t 179 | 180 | let find t key = 181 | let elt_idx = elt_index t key in 182 | let bucket = t.hashtbl.(elt_idx) in 183 | let scratch = Scratch.create () in 184 | Small_list.find_map bucket ~f:(fun offset -> 185 | (* We expect the keys to match most of the time, so we decode the 186 | value at the same time. *) 187 | let entry = entry_of_offset t scratch offset in 188 | match Key.equal key entry.key with 189 | | false -> None 190 | | true -> Some entry.value) 191 | |> function 192 | | None -> raise Not_found 193 | | Some x -> x 194 | 195 | let fold t ~f ~init = 196 | let scratch = Scratch.create () in 197 | ArrayLabels.fold_left t.hashtbl ~init ~f:(fun acc bucket -> 198 | Small_list.fold_left bucket ~init:acc ~f:(fun acc offset -> 199 | let entry = entry_of_offset t scratch offset in 200 | f acc entry)) 201 | 202 | let iter t ~f = 203 | let scratch = Scratch.create () in 204 | ArrayLabels.iter t.hashtbl ~f:(fun bucket -> 205 | Small_list.iter bucket ~f:(fun offset -> 206 | f (entry_of_offset t scratch offset))) 207 | 208 | let to_sorted_seq t = 209 | let scratch = Scratch.create () in 210 | Array.to_seq t.hashtbl 211 | |> Seq.flat_map (fun bucket -> 212 | let arr = 213 | Small_list.to_array bucket 214 | |> Array.map (fun off -> entry_of_offset t scratch off) 215 | in 216 | Array.sort Entry.compare arr; 217 | Array.to_seq arr) 218 | end 219 | -------------------------------------------------------------------------------- /src/log_file.mli: -------------------------------------------------------------------------------- 1 | (* 2 | * Copyright (c) 2021 Tarides 3 | * 4 | * Permission to use, copy, modify, and distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | *) 16 | 17 | open! Import 18 | 19 | module Make (IO : Io.S) (Key : Data.Key) (Value : Data.Value) : sig 20 | module Entry : Data.Entry.S with type key := Key.t and type value := Value.t 21 | 22 | type t 23 | type key := Key.t 24 | type value := Value.t 25 | 26 | val create : IO.t -> t 27 | (** [create io] constructs a write-ahead log from an IO handle referencing an 28 | unordered sequence of (binary encoded) [key * value] bindings. The 29 | bindings are read into memory, and any subsequent {!replace} operations 30 | are reflected on disk. *) 31 | 32 | val close : t -> unit 33 | 34 | (** {2 Hashtable API} *) 35 | 36 | val cardinal : t -> int 37 | val find : t -> key -> value 38 | val replace : t -> key -> value -> unit 39 | val iter : t -> f:(Entry.t -> unit) -> unit 40 | val fold : t -> f:('acc -> Entry.t -> 'acc) -> init:'acc -> 'acc 41 | 42 | val to_sorted_seq : t -> Entry.t Seq.t 43 | (** [to_sorted_seq t] is the sequence of all entries in [t], sorted by 44 | [Entry.compare]. Modifying [t] while consuming the sequence results in 45 | undefined behaviour. *) 46 | 47 | (** {2 Low-level API} *) 48 | 49 | val io : t -> IO.t 50 | (** [io t] is [t]'s underlying IO handle. Any updates to the file will not be 51 | reflected in the in-memory mirror of its state. *) 52 | 53 | val sync_entries : min:int63 -> t -> unit 54 | (** [sync_entries ~min t] loads the entries from [log]'s IO into memory, 55 | starting from offset [min]. *) 56 | 57 | val flush : ?no_callback:unit -> with_fsync:bool -> t -> unit 58 | (** [flush t] is [IO.flush (io t)]. *) 59 | 60 | val reload : t -> unit 61 | (** [reload t] clears [t]'s in-memory state and reloads all bindings from the 62 | underlying IO hande. *) 63 | 64 | val clear : 65 | generation:int63 -> ?hook:(unit -> unit) -> reopen:bool -> t -> unit 66 | (** [clear t] clears both [t]'s in-memory state and its underlying IO handle. 67 | The flags are passed to [IO.clear]. *) 68 | end 69 | -------------------------------------------------------------------------------- /src/platform.ml: -------------------------------------------------------------------------------- 1 | module type IO = Io.S 2 | 3 | module type CLOCK = sig 4 | (** A monotonic time source. See {!Mtime_clock} for an OS-dependent 5 | implementation. *) 6 | 7 | type counter 8 | 9 | val counter : unit -> counter 10 | val count : counter -> Mtime.span 11 | end 12 | 13 | module type SEMAPHORE = sig 14 | (** Binary semaphores for mutual exclusion *) 15 | 16 | type t 17 | (** The type of binary semaphore. *) 18 | 19 | val make : bool -> t 20 | (** [make b] returns a new semaphore with the given initial state. If [b] is 21 | [true], the semaphore is initially available for acquisition; otherwise, 22 | the semaphore is initially unavailable. *) 23 | 24 | val acquire : string -> t -> unit 25 | (** Acquire the given semaphore. Acquisition is not re-entrant. *) 26 | 27 | val release : t -> unit 28 | (** Release the given semaphore. If any threads are attempting to acquire the 29 | semaphore, exactly one of them will gain access to the semaphore. *) 30 | 31 | val with_acquire : string -> t -> (unit -> 'a) -> 'a 32 | (** [with_acquire t f] first obtains [t], then computes [f ()], and finally 33 | release [t]. *) 34 | 35 | val is_held : t -> bool 36 | (** [is_held t] returns [true] if the semaphore is held, without acquiring 37 | [t]. *) 38 | end 39 | 40 | module type THREAD = sig 41 | (** Cooperative threads. *) 42 | 43 | type 'a t 44 | (** The type of thread handles. *) 45 | 46 | val async : (unit -> 'a) -> 'a t 47 | (** [async f] creates a new thread of control which executes [f ()] and 48 | returns the corresponding thread handle. The thread terminates whenever 49 | [f ()] returns a value or raises an exception. *) 50 | 51 | val await : 'a t -> ('a, [ `Async_exn of exn ]) result 52 | (** [await t] blocks on the termination of [t]. *) 53 | 54 | val return : 'a -> 'a t 55 | (** [return ()] is a pre-terminated thread handle. *) 56 | 57 | val yield : unit -> unit 58 | (** Re-schedule the calling thread without suspending it. *) 59 | end 60 | 61 | module type FMT_TTY = sig 62 | val setup_std_outputs : 63 | ?style_renderer:Fmt.style_renderer -> ?utf_8:bool -> unit -> unit 64 | end 65 | 66 | module type S = sig 67 | module IO : IO 68 | module Semaphore : SEMAPHORE 69 | module Thread : THREAD 70 | module Clock : CLOCK 71 | module Progress : Progress_engine.S 72 | module Fmt_tty : FMT_TTY 73 | end 74 | -------------------------------------------------------------------------------- /src/search.ml: -------------------------------------------------------------------------------- 1 | (* The MIT License 2 | 3 | Copyright (c) 2019 Craig Ferguson 4 | Thomas Gazagnaire 5 | Ioana Cristescu 6 | Clément Pascutto 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. *) 17 | 18 | (* Metrics must be 19 | - totally ordered 20 | - computable from entries and (potentially redundantly) from keys 21 | - linearly interpolate-able on the int63 type *) 22 | 23 | include Search_intf 24 | open! Import 25 | 26 | module Make 27 | (Entry : ENTRY) 28 | (Array : ARRAY with type elt = Entry.t) 29 | (Metric : METRIC with module Entry := Entry) : 30 | S with module Entry := Entry and module Array := Array = struct 31 | module Value = Entry.Value 32 | 33 | module Key = struct 34 | include Entry.Key 35 | 36 | let ( = ) a b = compare a b = 0 37 | end 38 | 39 | module Metric = struct 40 | include Metric 41 | 42 | let ( < ) a b = compare a b < 0 43 | let ( = ) a b = compare a b = 0 44 | let ( > ) a b = compare a b > 0 45 | end 46 | 47 | let look_around array key key_metric index = 48 | let rec search (op : int63 -> int63) curr = 49 | let i = op curr in 50 | if i < Int63.zero || i >= Array.length array then raise Not_found 51 | else 52 | let e = array.(i) in 53 | let e_metric = Metric.of_entry e in 54 | if not Metric.(key_metric = e_metric) then raise Not_found 55 | else if Key.equal (Entry.to_key e) key then Entry.to_value e 56 | else (search [@tailcall]) op i 57 | in 58 | try search Int63.pred index 59 | with Not_found -> (search [@tailcall]) Int63.succ index 60 | 61 | (** Improves over binary search in cases where the values in some array are 62 | uniformly distributed according to some metric (such as a hash). *) 63 | let interpolation_search array key ~low ~high = 64 | let key_metric = Metric.of_key key in 65 | (* The core of the search *) 66 | let rec search low high lowest_entry highest_entry = 67 | if high < low then raise Not_found 68 | else ( 69 | Array.pre_fetch array ~low ~high; 70 | let lowest_entry = Lazy.force lowest_entry in 71 | if high = low then 72 | if Key.(key = Entry.to_key lowest_entry) then 73 | Entry.to_value lowest_entry 74 | else raise Not_found 75 | else 76 | let lowest_metric = Metric.of_entry lowest_entry in 77 | if Metric.(lowest_metric > key_metric) then raise Not_found 78 | else 79 | let highest_entry = Lazy.force highest_entry in 80 | let highest_metric = Metric.of_entry highest_entry in 81 | if Metric.(highest_metric < key_metric) then raise Not_found 82 | else 83 | let next_index = 84 | Metric.linear_interpolate ~low:(low, lowest_metric) 85 | ~high:(high, highest_metric) key_metric 86 | in 87 | let e = array.(next_index) in 88 | let e_metric = Metric.of_entry e in 89 | if Metric.(key_metric = e_metric) then 90 | if Key.(key = Entry.to_key e) then Entry.to_value e 91 | else look_around array key key_metric next_index 92 | else if Metric.(key_metric > e_metric) then 93 | (search [@tailcall]) 94 | Int63.(succ next_index) 95 | high 96 | (lazy array.(Int63.(succ next_index))) 97 | (Lazy.from_val highest_entry) 98 | else 99 | (search [@tailcall]) low (Int63.pred next_index) 100 | (Lazy.from_val lowest_entry) 101 | (lazy array.(Int63.(pred next_index)))) 102 | in 103 | if high < Int63.zero then raise Not_found 104 | else (search [@tailcall]) low high (lazy array.(low)) (lazy array.(high)) 105 | end 106 | -------------------------------------------------------------------------------- /src/search.mli: -------------------------------------------------------------------------------- 1 | (* The MIT License 2 | 3 | Copyright (c) 2019 Craig Ferguson 4 | Thomas Gazagnaire 5 | Ioana Cristescu 6 | Clément Pascutto 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. *) 17 | 18 | include Search_intf.Search 19 | (** @inline *) 20 | -------------------------------------------------------------------------------- /src/search_intf.ml: -------------------------------------------------------------------------------- 1 | (* The MIT License 2 | 3 | Copyright (c) 2019 Craig Ferguson 4 | Thomas Gazagnaire 5 | Ioana Cristescu 6 | Clément Pascutto 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. *) 17 | 18 | open! Import 19 | 20 | module type ARRAY = sig 21 | type t 22 | type elt 23 | 24 | val get : t -> int63 -> elt 25 | val length : t -> int63 26 | val pre_fetch : t -> low:int63 -> high:int63 -> unit 27 | end 28 | 29 | module type ENTRY = sig 30 | type t 31 | 32 | module Key : sig 33 | type t 34 | 35 | val equal : t -> t -> bool 36 | end 37 | 38 | module Value : sig 39 | type t 40 | end 41 | 42 | val to_key : t -> Key.t 43 | val to_value : t -> Value.t 44 | end 45 | 46 | module type METRIC = sig 47 | type t 48 | 49 | module Entry : ENTRY 50 | 51 | val compare : t -> t -> int 52 | val of_entry : Entry.t -> t 53 | val of_key : Entry.Key.t -> t 54 | val linear_interpolate : low:int63 * t -> high:int63 * t -> t -> int63 55 | end 56 | 57 | module type S = sig 58 | module Entry : ENTRY 59 | module Array : ARRAY with type elt = Entry.t 60 | 61 | val interpolation_search : 62 | Array.t -> Entry.Key.t -> low:int63 -> high:int63 -> Entry.Value.t 63 | end 64 | 65 | module type Search = sig 66 | module type ARRAY = ARRAY 67 | module type ENTRY = ENTRY 68 | module type METRIC = METRIC 69 | module type S = S 70 | 71 | module Make 72 | (Entry : ENTRY) 73 | (Array : ARRAY with type elt = Entry.t) 74 | (Metric : METRIC with module Entry := Entry) : 75 | S with module Entry := Entry and module Array := Array 76 | end 77 | -------------------------------------------------------------------------------- /src/small_list.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Copyright (c) 2021 Tarides 3 | * 4 | * Permission to use, copy, modify, and distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | *) 16 | 17 | type 'a t = 18 | | Tuple0 19 | | Tuple1 of 'a 20 | | Tuple2 of 'a * 'a 21 | | Tuple3 of 'a * 'a * 'a 22 | | Tuple4 of 'a * 'a * 'a * 'a 23 | | Tuple5 of 'a * 'a * 'a * 'a * 'a 24 | | Tuple6 of 'a * 'a * 'a * 'a * 'a * 'a 25 | | Many of 'a * 'a * 'a * 'a * 'a * 'a * 'a * 'a list 26 | 27 | let empty = Tuple0 28 | 29 | let cons x = function 30 | | Tuple0 -> Tuple1 x 31 | | Tuple1 a -> Tuple2 (x, a) 32 | | Tuple2 (a, b) -> Tuple3 (x, a, b) 33 | | Tuple3 (a, b, c) -> Tuple4 (x, a, b, c) 34 | | Tuple4 (a, b, c, d) -> Tuple5 (x, a, b, c, d) 35 | | Tuple5 (a, b, c, d, e) -> Tuple6 (x, a, b, c, d, e) 36 | | Tuple6 (a, b, c, d, e, f) -> Many (x, a, b, c, d, e, f, []) 37 | | Many (a, b, c, d, e, f, g, l) -> Many (x, a, b, c, d, e, f, g :: l) 38 | 39 | let map ~f:fn = function 40 | | Tuple0 -> Tuple0 41 | | Tuple1 a -> Tuple1 (fn a) 42 | | Tuple2 (a, b) -> Tuple2 (fn a, fn b) 43 | | Tuple3 (a, b, c) -> Tuple3 (fn a, fn b, fn c) 44 | | Tuple4 (a, b, c, d) -> Tuple4 (fn a, fn b, fn c, fn d) 45 | | Tuple5 (a, b, c, d, e) -> Tuple5 (fn a, fn b, fn c, fn d, fn e) 46 | | Tuple6 (a, b, c, d, e, f) -> Tuple6 (fn a, fn b, fn c, fn d, fn e, fn f) 47 | | Many (a, b, c, d, e, f, g, l) -> 48 | Many (fn a, fn b, fn c, fn d, fn e, fn f, fn g, List.map fn l) 49 | 50 | let iter ~f:fn = function 51 | | Tuple0 -> () 52 | | Tuple1 a -> fn a 53 | | Tuple2 (a, b) -> 54 | fn a; 55 | fn b 56 | | Tuple3 (a, b, c) -> 57 | fn a; 58 | fn b; 59 | fn c 60 | | Tuple4 (a, b, c, d) -> 61 | fn a; 62 | fn b; 63 | fn c; 64 | fn d 65 | | Tuple5 (a, b, c, d, e) -> 66 | fn a; 67 | fn b; 68 | fn c; 69 | fn d; 70 | fn e 71 | | Tuple6 (a, b, c, d, e, f) -> 72 | fn a; 73 | fn b; 74 | fn c; 75 | fn d; 76 | fn e; 77 | fn f 78 | | Many (a, b, c, d, e, f, g, l) -> 79 | fn a; 80 | fn b; 81 | fn c; 82 | fn d; 83 | fn e; 84 | fn f; 85 | fn g; 86 | List.iter fn l 87 | 88 | let exists ~f:fn = function 89 | | Tuple0 -> false 90 | | Tuple1 a -> fn a 91 | | Tuple2 (a, b) -> fn a || fn b 92 | | Tuple3 (a, b, c) -> fn a || fn b || fn c 93 | | Tuple4 (a, b, c, d) -> fn a || fn b || fn c || fn d 94 | | Tuple5 (a, b, c, d, e) -> fn a || fn b || fn c || fn d || fn e 95 | | Tuple6 (a, b, c, d, e, f) -> fn a || fn b || fn c || fn d || fn e || fn f 96 | | Many (a, b, c, d, e, f, g, l) -> 97 | fn a || fn b || fn c || fn d || fn e || fn f || fn g || List.exists fn l 98 | 99 | (* TODO(4.10): use [Stdlib.List.find_map] instead. *) 100 | let rec list_find_map f = function 101 | | [] -> None 102 | | x :: l -> ( 103 | match f x with Some _ as result -> result | None -> list_find_map f l) 104 | 105 | let[@ocamlformat "disable"] find_map ~f:fn = function 106 | | Tuple0 -> raise Not_found 107 | | Tuple1 a -> fn a 108 | | Tuple2 (a, b) -> ( 109 | match fn a with Some _ as r -> r | None -> 110 | fn b) 111 | | Tuple3 (a, b, c) -> ( 112 | match fn a with Some _ as r -> r | None -> 113 | match fn b with Some _ as r -> r | None -> 114 | fn c) 115 | | Tuple4 (a, b, c, d) -> ( 116 | match fn a with Some _ as r -> r | None -> 117 | match fn b with Some _ as r -> r | None -> 118 | match fn c with Some _ as r -> r | None -> 119 | fn d) 120 | | Tuple5 (a, b, c, d, e) -> ( 121 | match fn a with Some _ as r -> r | None -> 122 | match fn b with Some _ as r -> r | None -> 123 | match fn c with Some _ as r -> r | None -> 124 | match fn d with Some _ as r -> r | None -> 125 | fn e) 126 | | Tuple6 (a, b, c, d, e, f) -> ( 127 | match fn a with Some _ as r -> r | None -> 128 | match fn b with Some _ as r -> r | None -> 129 | match fn c with Some _ as r -> r | None -> 130 | match fn d with Some _ as r -> r | None -> 131 | match fn e with Some _ as r -> r | None -> 132 | fn f) 133 | | Many (a, b, c, d, e, f, g, l) -> ( 134 | match fn a with Some _ as r -> r | None -> 135 | match fn b with Some _ as r -> r | None -> 136 | match fn c with Some _ as r -> r | None -> 137 | match fn d with Some _ as r -> r | None -> 138 | match fn e with Some _ as r -> r | None -> 139 | match fn f with Some _ as r -> r | None -> 140 | match fn g with Some _ as r -> r | None -> 141 | list_find_map fn l) 142 | 143 | let to_list = function 144 | | Tuple0 -> [] 145 | | Tuple1 a -> [ a ] 146 | | Tuple2 (a, b) -> [ a; b ] 147 | | Tuple3 (a, b, c) -> [ a; b; c ] 148 | | Tuple4 (a, b, c, d) -> [ a; b; c; d ] 149 | | Tuple5 (a, b, c, d, e) -> [ a; b; c; d; e ] 150 | | Tuple6 (a, b, c, d, e, f) -> [ a; b; c; d; e; f ] 151 | | Many (a, b, c, d, e, f, g, l) -> a :: b :: c :: d :: e :: f :: g :: l 152 | 153 | let to_array = function 154 | | Tuple0 -> [||] 155 | | Tuple1 a -> [| a |] 156 | | Tuple2 (a, b) -> [| a; b |] 157 | | Tuple3 (a, b, c) -> [| a; b; c |] 158 | | Tuple4 (a, b, c, d) -> [| a; b; c; d |] 159 | | Tuple5 (a, b, c, d, e) -> [| a; b; c; d; e |] 160 | | Tuple6 (a, b, c, d, e, f) -> [| a; b; c; d; e; f |] 161 | | Many (a, b, c, d, e, f, g, l) -> 162 | let len = 7 + List.length l in 163 | let arr = Array.make len a in 164 | arr.(1) <- b; 165 | arr.(2) <- c; 166 | arr.(3) <- d; 167 | arr.(4) <- e; 168 | arr.(5) <- f; 169 | arr.(6) <- g; 170 | List.iteri (fun i elt -> arr.(i + 7) <- elt) l; 171 | arr 172 | 173 | let of_list = function 174 | | [] -> Tuple0 175 | | [ a ] -> Tuple1 a 176 | | [ a; b ] -> Tuple2 (a, b) 177 | | [ a; b; c ] -> Tuple3 (a, b, c) 178 | | [ a; b; c; d ] -> Tuple4 (a, b, c, d) 179 | | [ a; b; c; d; e ] -> Tuple5 (a, b, c, d, e) 180 | | [ a; b; c; d; e; f ] -> Tuple6 (a, b, c, d, e, f) 181 | | a :: b :: c :: d :: e :: f :: g :: l -> Many (a, b, c, d, e, f, g, l) 182 | 183 | let fold_left ~f:fn ~init = function 184 | | Tuple0 -> init 185 | | Tuple1 a -> fn init a 186 | | Tuple2 (a, b) -> fn (fn init a) b 187 | | Tuple3 (a, b, c) -> fn (fn (fn init a) b) c 188 | | Tuple4 (a, b, c, d) -> fn (fn (fn (fn init a) b) c) d 189 | | Tuple5 (a, b, c, d, e) -> fn (fn (fn (fn (fn init a) b) c) d) e 190 | | Tuple6 (a, b, c, d, e, f) -> fn (fn (fn (fn (fn (fn init a) b) c) d) e) f 191 | | Many (a, b, c, d, e, f, g, l) -> 192 | List.fold_left fn (fn (fn (fn (fn (fn (fn (fn init a) b) c) d) e) f) g) l 193 | -------------------------------------------------------------------------------- /src/small_list.mli: -------------------------------------------------------------------------------- 1 | (* 2 | * Copyright (c) 2021 Tarides 3 | * 4 | * Permission to use, copy, modify, and distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | *) 16 | 17 | (** This API has the same semantics as that of [List]. *) 18 | 19 | type +'a t 20 | 21 | val empty : _ t 22 | val cons : 'a -> 'a t -> 'a t 23 | val map : f:('a -> 'b) -> 'a t -> 'b t 24 | val iter : f:('a -> unit) -> 'a t -> unit 25 | val exists : f:('a -> bool) -> 'a t -> bool 26 | val to_list : 'a t -> 'a list 27 | val of_list : 'a list -> 'a t 28 | val to_array : 'a t -> 'a array 29 | val find_map : f:('a -> 'b option) -> 'a t -> 'b option 30 | val fold_left : f:('acc -> 'a -> 'acc) -> init:'acc -> 'a t -> 'acc 31 | -------------------------------------------------------------------------------- /src/stats.ml: -------------------------------------------------------------------------------- 1 | open! Import 2 | 3 | type t = { 4 | mutable bytes_read : int; 5 | mutable nb_reads : int; 6 | mutable bytes_written : int; 7 | mutable nb_writes : int; 8 | mutable nb_merge : int; 9 | mutable merge_durations : float list; 10 | mutable nb_replace : int; 11 | mutable replace_durations : float list; 12 | mutable nb_sync : int; 13 | mutable time_sync : float; 14 | mutable lru_hits : int; 15 | mutable lru_misses : int; 16 | } 17 | 18 | let fresh_stats () = 19 | { 20 | bytes_read = 0; 21 | nb_reads = 0; 22 | bytes_written = 0; 23 | nb_writes = 0; 24 | nb_merge = 0; 25 | merge_durations = []; 26 | nb_replace = 0; 27 | replace_durations = []; 28 | nb_sync = 0; 29 | time_sync = 0.0; 30 | lru_hits = 0; 31 | lru_misses = 0; 32 | } 33 | 34 | let stats = fresh_stats () 35 | let get () = stats 36 | 37 | let reset_stats () = 38 | stats.bytes_read <- 0; 39 | stats.nb_reads <- 0; 40 | stats.bytes_written <- 0; 41 | stats.nb_writes <- 0; 42 | stats.nb_merge <- 0; 43 | stats.merge_durations <- []; 44 | stats.nb_replace <- 0; 45 | stats.replace_durations <- []; 46 | stats.nb_sync <- 0; 47 | stats.time_sync <- 0.0; 48 | stats.lru_hits <- 0; 49 | stats.lru_misses <- 0 50 | 51 | let incr_bytes_read n = stats.bytes_read <- stats.bytes_read + n 52 | let incr_bytes_written n = stats.bytes_written <- stats.bytes_written + n 53 | let incr_nb_reads () = stats.nb_reads <- succ stats.nb_reads 54 | let incr_nb_writes () = stats.nb_writes <- succ stats.nb_writes 55 | let incr_nb_merge () = stats.nb_merge <- succ stats.nb_merge 56 | let incr_nb_replace () = stats.nb_replace <- succ stats.nb_replace 57 | let incr_nb_sync () = stats.nb_sync <- succ stats.nb_sync 58 | let incr_nb_lru_hits () = stats.lru_hits <- succ stats.lru_hits 59 | let incr_nb_lru_misses () = stats.lru_misses <- succ stats.lru_misses 60 | 61 | let add_read n = 62 | incr_bytes_read n; 63 | incr_nb_reads () 64 | 65 | let add_write n = 66 | incr_bytes_written n; 67 | incr_nb_writes () 68 | 69 | module Make (Clock : Platform.CLOCK) = struct 70 | let replace_timer = ref (Clock.counter ()) 71 | let nb_replace = ref 0 72 | 73 | let start_replace () = 74 | if !nb_replace = 0 then replace_timer := Clock.counter () 75 | 76 | let end_replace ~sampling_interval = 77 | nb_replace := !nb_replace + 1; 78 | if !nb_replace = sampling_interval then ( 79 | let span = Clock.count !replace_timer in 80 | let average = Mtime.span_to_us span /. float_of_int !nb_replace in 81 | stats.replace_durations <- average :: stats.replace_durations; 82 | replace_timer := Clock.counter (); 83 | nb_replace := 0) 84 | 85 | let sync_with_timer f = 86 | let timer = Clock.counter () in 87 | f (); 88 | let span = Clock.count timer in 89 | stats.time_sync <- Mtime.span_to_us span 90 | 91 | let drop_head l = if List.length l >= 10 then List.tl l else l 92 | 93 | let add_merge_duration span = 94 | let span = Mtime.span_to_us span in 95 | stats.merge_durations <- drop_head stats.merge_durations @ [ span ] 96 | end 97 | -------------------------------------------------------------------------------- /src/stats.mli: -------------------------------------------------------------------------------- 1 | open! Import 2 | 3 | type t = { 4 | mutable bytes_read : int; 5 | mutable nb_reads : int; 6 | mutable bytes_written : int; 7 | mutable nb_writes : int; 8 | mutable nb_merge : int; 9 | mutable merge_durations : float list; 10 | mutable nb_replace : int; 11 | mutable replace_durations : float list; 12 | mutable nb_sync : int; 13 | mutable time_sync : float; 14 | mutable lru_hits : int; 15 | mutable lru_misses : int; 16 | } 17 | (** The type for stats for an index I. 18 | 19 | - [bytes_read] is the number of bytes read from disk; 20 | - [nb_reads] is the number of reads from disk; 21 | - [bytes_written] is the number of bytes written to disk; 22 | - [nb_writes] is the number of writes to disk; 23 | - [nb_merge] is the number of times a merge occurred; 24 | - [merge_durations] lists how much time the last 10 merge operations took 25 | (in microseconds); 26 | - [nb_replace] is the number of calls to [I.replace]; 27 | - [replace_durations] lists how much time replace operations took. Each 28 | element is an average of [n] consecutive replaces, where [n] is the 29 | [sampling_interval] specified when calling [end_replace]. 30 | - [time_sync] is the duration of the latest call to sync. *) 31 | 32 | val get : unit -> t 33 | val reset_stats : unit -> unit 34 | val add_read : int -> unit 35 | val add_write : int -> unit 36 | val incr_nb_merge : unit -> unit 37 | val incr_nb_replace : unit -> unit 38 | val incr_nb_sync : unit -> unit 39 | val incr_nb_lru_hits : unit -> unit 40 | val incr_nb_lru_misses : unit -> unit 41 | 42 | module Make (_ : Platform.CLOCK) : sig 43 | val start_replace : unit -> unit 44 | val end_replace : sampling_interval:int -> unit 45 | val sync_with_timer : (unit -> unit) -> unit 46 | val add_merge_duration : Mtime.Span.t -> unit 47 | end 48 | -------------------------------------------------------------------------------- /src/unix/buffer.ml: -------------------------------------------------------------------------------- 1 | (* The MIT License 2 | 3 | Copyright (c) 2021 Clément Pascutto 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. *) 14 | 15 | open! Import 16 | 17 | type t = { mutable buffer : bytes; mutable position : int } 18 | 19 | external unsafe_blit_string : string -> int -> bytes -> int -> int -> unit 20 | = "caml_blit_string" 21 | [@@noalloc] 22 | (** Bytes.unsafe_blit_string not available in OCaml 4.08. *) 23 | 24 | let create n = { buffer = Bytes.create n; position = 0 } 25 | 26 | let write_with (write : string -> int -> int -> unit) b = 27 | write (Bytes.unsafe_to_string b.buffer) 0 b.position 28 | 29 | let length b = b.position 30 | let is_empty b = b.position = 0 31 | let clear b = b.position <- 0 32 | 33 | let resize b more = 34 | let old_pos = b.position in 35 | let old_len = Bytes.length b.buffer in 36 | let new_len = ref old_len in 37 | while old_pos + more > !new_len do 38 | new_len := 2 * !new_len 39 | done; 40 | let new_buffer = Bytes.create !new_len in 41 | Bytes.blit b.buffer 0 new_buffer 0 b.position; 42 | b.buffer <- new_buffer 43 | 44 | let add_substring b s ~off ~len = 45 | let new_position = b.position + len in 46 | if new_position > Bytes.length b.buffer then resize b len; 47 | unsafe_blit_string s off b.buffer b.position len; 48 | b.position <- new_position 49 | 50 | let blit ~src ~src_off ~dst ~dst_off ~len = 51 | assert (src_off + len <= src.position); 52 | Bytes.blit src.buffer src_off dst dst_off len 53 | 54 | let add_string b s = add_substring b s ~off:0 ~len:(String.length s) 55 | -------------------------------------------------------------------------------- /src/unix/buffer.mli: -------------------------------------------------------------------------------- 1 | (* The MIT License 2 | 3 | Copyright (c) 2021 Clément Pascutto 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. *) 14 | 15 | (** Extensible buffers with non-allocating access to the buffer's contents. *) 16 | 17 | type t 18 | (** The type of buffers. *) 19 | 20 | val create : int -> t 21 | (** [create n] is a fresh buffer with initial size [n]. *) 22 | 23 | val length : t -> int 24 | (** [length b] is the number of bytes contained in the buffer. *) 25 | 26 | val is_empty : t -> bool 27 | (** [is_empty t] iff [t] contains 0 characters. *) 28 | 29 | val clear : t -> unit 30 | (** [clear t] clears the data contained in [t]. It does not reset the buffer to 31 | its initial size. *) 32 | 33 | val add_substring : t -> string -> off:int -> len:int -> unit 34 | (** [add_substring t s ~off ~len] appends the substring 35 | [s.(off) .. s.(off + len - 1)] at the end of [t], resizing [t] if necessary. *) 36 | 37 | val add_string : t -> string -> unit 38 | (** [add_string t s] appends [s] at the end of [t], resizing [t] if necessary. *) 39 | 40 | val write_with : (string -> int -> int -> unit) -> t -> unit 41 | (** [write_with writer t] uses [writer] to write the contents of [t]. [writer] 42 | takes a string to write, an offset and a length. *) 43 | 44 | val blit : src:t -> src_off:int -> dst:bytes -> dst_off:int -> len:int -> unit 45 | (** [blit] copies [len] bytes from the buffer [src], starting at offset 46 | [src_off], into the supplied bytes [dst], starting at offset [dst_off]. *) 47 | -------------------------------------------------------------------------------- /src/unix/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (foreign_stubs 3 | (language c) 4 | (names pread pwrite)) 5 | (public_name index.unix) 6 | (name index_unix) 7 | (optional) 8 | (libraries fmt fmt.tty index logs logs.threaded threads.posix unix 9 | semaphore-compat mtime mtime.clock.os optint progress)) 10 | -------------------------------------------------------------------------------- /src/unix/import.ml: -------------------------------------------------------------------------------- 1 | module Int63 = Optint.Int63 2 | 3 | type int63 = Int63.t 4 | 5 | module Mtime = struct 6 | include Mtime 7 | 8 | let span_to_s span = Mtime.Span.to_float_ns span *. 1e-9 9 | let span_to_us span = Mtime.Span.to_float_ns span *. 1e-3 10 | end 11 | -------------------------------------------------------------------------------- /src/unix/index_unix.ml: -------------------------------------------------------------------------------- 1 | (* The MIT License 2 | 3 | Copyright (c) 2019 Craig Ferguson 4 | Thomas Gazagnaire 5 | Ioana Cristescu 6 | Clément Pascutto 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. *) 17 | 18 | open! Import 19 | 20 | let src = Logs.Src.create "index_unix" ~doc:"Index_unix" 21 | 22 | module Log = (val Logs.src_log src : Logs.LOG) 23 | 24 | exception RO_not_allowed 25 | 26 | let current_version = "00000001" 27 | 28 | module Stats = Index.Stats 29 | 30 | module IO : Index.Platform.IO = struct 31 | let ( ++ ) = Int63.add 32 | let ( -- ) = Int63.sub 33 | 34 | type t = { 35 | mutable file : string; 36 | mutable header : int63; 37 | mutable raw : Raw.t; 38 | mutable offset : int63; 39 | mutable flushed : int63; 40 | mutable fan_size : int63; 41 | readonly : bool; 42 | buf : Buffer.t; 43 | flush_callback : unit -> unit; 44 | } 45 | 46 | let flush ?no_callback ?(with_fsync = false) t = 47 | if t.readonly then raise RO_not_allowed; 48 | if not (Buffer.is_empty t.buf) then ( 49 | let buf_len = Buffer.length t.buf in 50 | let offset = t.offset in 51 | (match no_callback with Some () -> () | None -> t.flush_callback ()); 52 | Log.debug (fun l -> l "[%s] flushing %d bytes" t.file buf_len); 53 | Buffer.write_with (Raw.unsafe_write t.raw ~off:t.flushed) t.buf; 54 | Buffer.clear t.buf; 55 | Raw.Offset.set t.raw offset; 56 | assert (t.flushed ++ Int63.of_int buf_len = t.header ++ offset); 57 | t.flushed <- offset ++ t.header); 58 | if with_fsync then Raw.fsync t.raw 59 | 60 | let rename ~src ~dst = 61 | flush ~with_fsync:true src; 62 | Raw.close dst.raw; 63 | Unix.rename src.file dst.file; 64 | Buffer.clear dst.buf; 65 | src.file <- dst.file; 66 | dst.header <- src.header; 67 | dst.fan_size <- src.fan_size; 68 | dst.offset <- src.offset; 69 | dst.flushed <- src.flushed; 70 | dst.raw <- src.raw 71 | 72 | let close t = 73 | if not t.readonly then Buffer.clear t.buf; 74 | Raw.close t.raw 75 | 76 | let auto_flush_limit = Int63.of_int 1_000_000 77 | 78 | let append_substring t buf ~off ~len = 79 | if t.readonly then raise RO_not_allowed; 80 | Buffer.add_substring t.buf buf ~off ~len; 81 | let len = Int63.of_int len in 82 | t.offset <- t.offset ++ len; 83 | if t.offset -- t.flushed > auto_flush_limit then flush t 84 | 85 | let append t buf = append_substring t buf ~off:0 ~len:(String.length buf) 86 | 87 | let read t ~off ~len buf = 88 | let off = t.header ++ off in 89 | let end_of_value = off ++ Int63.of_int len in 90 | if not t.readonly then 91 | assert ( 92 | let total_length = t.flushed ++ Int63.of_int (Buffer.length t.buf) in 93 | (* NOTE: we don't require that [end_of_value <= total_length] in order 94 | to support short reads on read-write handles (see comment about this 95 | case below). *) 96 | off <= total_length); 97 | 98 | if t.readonly || end_of_value <= t.flushed then 99 | (* Value is entirely on disk *) 100 | Raw.unsafe_read t.raw ~off ~len buf 101 | else 102 | (* Must read some data not yet flushed to disk *) 103 | let requested_from_disk = max 0 (Int63.to_int (t.flushed -- off)) in 104 | let requested_from_buffer = len - requested_from_disk in 105 | let read_from_disk = 106 | if requested_from_disk > 0 then ( 107 | let read = Raw.unsafe_read t.raw ~off ~len:requested_from_disk buf in 108 | assert (read = requested_from_disk); 109 | read) 110 | else 0 111 | in 112 | let read_from_buffer = 113 | let src_off = max 0 (Int63.to_int (off -- t.flushed)) in 114 | let len = 115 | (* The user may request more bytes than actually exist, in which case 116 | we read to the end of the write buffer and return a size less than 117 | [len]. *) 118 | let available_length = Buffer.length t.buf - src_off in 119 | min available_length requested_from_buffer 120 | in 121 | Buffer.blit ~src:t.buf ~src_off ~dst:buf ~dst_off:requested_from_disk 122 | ~len; 123 | len 124 | in 125 | read_from_disk + read_from_buffer 126 | 127 | let offset t = t.offset 128 | 129 | let get_generation t = 130 | let i = Raw.Generation.get t.raw in 131 | Log.debug (fun m -> m "get_generation: %a" Int63.pp i); 132 | i 133 | 134 | let get_fanout t = Raw.Fan.get t.raw 135 | let get_fanout_size t = Raw.Fan.get_size t.raw 136 | 137 | let set_fanout t buf = 138 | assert (Int63.(equal (of_int (String.length buf)) t.fan_size)); 139 | Raw.Fan.set t.raw buf 140 | 141 | module Header = struct 142 | type header = { offset : int63; generation : int63 } 143 | 144 | let pp ppf { offset; generation } = 145 | Format.fprintf ppf "{ offset = %a; generation = %a }" Int63.pp offset 146 | Int63.pp generation 147 | 148 | let get t = 149 | let Raw.Header.{ offset; generation; _ } = Raw.Header.get t.raw in 150 | t.offset <- offset; 151 | let headers = { offset; generation } in 152 | Log.debug (fun m -> m "[%s] get_headers: %a" t.file pp headers); 153 | headers 154 | 155 | let set t { offset; generation } = 156 | let version = current_version in 157 | Log.debug (fun m -> 158 | m "[%s] set_header %a" t.file pp { offset; generation }); 159 | Raw.Header.(set t.raw { offset; version; generation }) 160 | end 161 | 162 | let protect_unix_exn = function 163 | | Unix.Unix_error _ as e -> failwith (Printexc.to_string e) 164 | | e -> raise e 165 | 166 | let ignore_enoent = function 167 | | Unix.Unix_error (Unix.ENOENT, _, _) -> () 168 | | e -> raise e 169 | 170 | let protect f x = try f x with e -> protect_unix_exn e 171 | let safe f x = try f x with e -> ignore_enoent e 172 | 173 | let mkdir dirname = 174 | let rec aux dir k = 175 | if Sys.file_exists dir && Sys.is_directory dir then k () 176 | else ( 177 | if Sys.file_exists dir then safe Unix.unlink dir; 178 | (aux [@tailcall]) (Filename.dirname dir) (fun () -> 179 | protect (Unix.mkdir dir) 0o755; 180 | k ())) 181 | in 182 | (aux [@tailcall]) dirname (fun () -> ()) 183 | 184 | let raw_file ~flags ~version ~offset ~generation file = 185 | let x = Unix.openfile file flags 0o644 in 186 | let raw = Raw.v x in 187 | let header = { Raw.Header.offset; version; generation } in 188 | Log.debug (fun m -> 189 | m "[%s] raw set_header %a" file Header.pp { offset; generation }); 190 | Raw.Header.set raw header; 191 | Raw.Fan.set raw ""; 192 | Raw.fsync raw; 193 | raw 194 | 195 | let clear ~generation ?(hook = fun () -> ()) ~reopen t = 196 | t.offset <- Int63.zero; 197 | t.flushed <- t.header; 198 | Buffer.clear t.buf; 199 | let old = t.raw in 200 | 201 | if reopen then ( 202 | (* Open a fresh file and rename it to ensure atomicity: 203 | concurrent readers should never see the file disapearing. *) 204 | let tmp_file = t.file ^ "_tmp" in 205 | t.raw <- 206 | raw_file ~version:current_version ~generation ~offset:Int63.zero 207 | ~flags:Unix.[ O_CREAT; O_RDWR; O_CLOEXEC ] 208 | tmp_file; 209 | Unix.rename tmp_file t.file) 210 | else 211 | (* Remove the file current file. This allows a fresh file to be 212 | created, before writing the new generation in the old file. *) 213 | Unix.unlink t.file; 214 | 215 | hook (); 216 | 217 | (* Set new generation in the old file. *) 218 | Raw.Header.set old 219 | { Raw.Header.offset = Int63.zero; generation; version = current_version }; 220 | Raw.close old 221 | 222 | let () = assert (String.length current_version = 8) 223 | 224 | let v_instance ?(flush_callback = fun () -> ()) ~readonly ~fan_size ~offset 225 | file raw = 226 | let eight = Int63.of_int 8 in 227 | let header = eight ++ eight ++ eight ++ eight ++ fan_size in 228 | { 229 | header; 230 | file; 231 | offset; 232 | raw; 233 | readonly; 234 | fan_size; 235 | buf = Buffer.create (if readonly then 0 else 4 * 1024); 236 | flushed = header ++ offset; 237 | flush_callback; 238 | } 239 | 240 | let v ?flush_callback ~fresh ~generation ~fan_size file = 241 | let v = v_instance ?flush_callback ~readonly:false file in 242 | mkdir (Filename.dirname file); 243 | let header = 244 | { Raw.Header.offset = Int63.zero; version = current_version; generation } 245 | in 246 | match Sys.file_exists file with 247 | | false -> 248 | let x = Unix.openfile file Unix.[ O_CREAT; O_CLOEXEC; O_RDWR ] 0o644 in 249 | let raw = Raw.v x in 250 | Raw.Header.set raw header; 251 | Raw.Fan.set_size raw fan_size; 252 | v ~fan_size ~offset:Int63.zero raw 253 | | true -> 254 | let x = Unix.openfile file Unix.[ O_EXCL; O_CLOEXEC; O_RDWR ] 0o644 in 255 | let raw = Raw.v x in 256 | if fresh then ( 257 | Raw.Header.set raw header; 258 | Raw.Fan.set_size raw fan_size; 259 | Raw.fsync raw; 260 | v ~fan_size ~offset:Int63.zero raw) 261 | else 262 | let version = Raw.Version.get raw in 263 | if version <> current_version then 264 | Fmt.failwith "Io.v: unsupported version %s (current version is %s)" 265 | version current_version; 266 | 267 | let offset = Raw.Offset.get raw in 268 | let fan_size = Raw.Fan.get_size raw in 269 | v ~fan_size ~offset raw 270 | 271 | let v_readonly file = 272 | let v = v_instance ~readonly:true file in 273 | mkdir (Filename.dirname file); 274 | try 275 | let x = Unix.openfile file Unix.[ O_EXCL; O_CLOEXEC; O_RDONLY ] 0o644 in 276 | let raw = Raw.v x in 277 | try 278 | let version = Raw.Version.get raw in 279 | if version <> current_version then 280 | Fmt.failwith "Io.v: unsupported version %s (current version is %s)" 281 | version current_version; 282 | let offset = Raw.Offset.get raw in 283 | let fan_size = Raw.Fan.get_size raw in 284 | Ok (v ~fan_size ~offset raw) 285 | with Raw.Not_written -> 286 | (* The readonly instance cannot read a file that does not have a 287 | header.*) 288 | Raw.close raw; 289 | Error `No_file_on_disk 290 | with 291 | | Unix.Unix_error (Unix.ENOENT, _, _) -> 292 | (* The readonly instance cannot open a non existing file. *) 293 | Error `No_file_on_disk 294 | | e -> raise e 295 | 296 | let exists = Sys.file_exists 297 | let size { raw; _ } = (Raw.fstat raw).st_size 298 | let size_header t = t.header |> Int63.to_int 299 | 300 | module Lock = struct 301 | type t = { path : string; fd : Unix.file_descr } 302 | 303 | exception Locked of string 304 | 305 | let unsafe_lock op f = 306 | mkdir (Filename.dirname f); 307 | let fd = 308 | Unix.openfile f [ Unix.O_CREAT; Unix.O_RDWR; Unix.O_CLOEXEC ] 0o600 309 | and pid = string_of_int (Unix.getpid ()) in 310 | let pid_len = String.length pid in 311 | try 312 | Unix.lockf fd op 0; 313 | if Unix.single_write_substring fd pid 0 pid_len <> pid_len then ( 314 | Unix.close fd; 315 | failwith "Unable to write PID to lock file") 316 | else Some fd 317 | with 318 | | Unix.Unix_error (Unix.EAGAIN, _, _) -> 319 | Unix.close fd; 320 | None 321 | | e -> 322 | Unix.close fd; 323 | raise e 324 | 325 | let with_ic path f = 326 | let ic = open_in path in 327 | let a = f ic in 328 | close_in ic; 329 | a 330 | 331 | let err_rw_lock path = 332 | let line = with_ic path input_line in 333 | let pid = int_of_string line in 334 | Log.err (fun l -> 335 | l 336 | "Cannot lock %s: index is already opened in write mode by PID %d. \ 337 | Current PID is %d." 338 | path pid (Unix.getpid ())); 339 | raise (Locked path) 340 | 341 | let lock path = 342 | Log.debug (fun l -> l "Locking %s" path); 343 | match unsafe_lock Unix.F_TLOCK path with 344 | | Some fd -> { path; fd } 345 | | None -> err_rw_lock path 346 | 347 | let unlock { path; fd } = 348 | Log.debug (fun l -> l "Unlocking %s" path); 349 | Unix.close fd 350 | 351 | let pp_dump path = 352 | match Sys.file_exists path with 353 | | false -> None 354 | | true -> 355 | let contents = 356 | with_ic path (fun ic -> 357 | really_input_string ic (in_channel_length ic)) 358 | in 359 | Some (fun ppf -> Fmt.string ppf contents) 360 | end 361 | end 362 | 363 | module Semaphore = struct 364 | module S = Semaphore_compat.Semaphore.Binary 365 | 366 | let is_held t = 367 | let acquired = S.try_acquire t in 368 | if acquired then S.release t; 369 | not acquired 370 | 371 | include S 372 | 373 | let acquire n t = 374 | let x = Mtime_clock.counter () in 375 | S.acquire t; 376 | let y = Mtime_clock.count x in 377 | if Mtime.span_to_s y > 1. then 378 | Log.warn (fun l -> l "Semaphore %s was blocked for %a" n Mtime.Span.pp y) 379 | 380 | let with_acquire n t f = 381 | acquire n t; 382 | Fun.protect ~finally:(fun () -> S.release t) f 383 | end 384 | 385 | module Thread = struct 386 | type 'a t = 387 | | Async of { thread : Thread.t; result : ('a, exn) result option ref } 388 | | Value of 'a 389 | 390 | let async f = 391 | let result = ref None in 392 | let protected_f x = 393 | try result := Some (Ok (f x)) 394 | with exn -> 395 | result := Some (Error exn); 396 | raise exn 397 | in 398 | let thread = Thread.create protected_f () in 399 | Async { thread; result } 400 | 401 | let yield = Thread.yield 402 | let return a = Value a 403 | 404 | let await t = 405 | match t with 406 | | Value v -> Ok v 407 | | Async { thread; result } -> ( 408 | let () = Thread.join thread in 409 | match !result with 410 | | Some (Ok _ as o) -> o 411 | | Some (Error exn) -> Error (`Async_exn exn) 412 | | None -> assert false) 413 | end 414 | 415 | module Platform = struct 416 | module IO = IO 417 | module Semaphore = Semaphore 418 | module Thread = Thread 419 | module Clock = Mtime_clock 420 | module Progress = Progress 421 | module Fmt_tty = Fmt_tty 422 | end 423 | 424 | module Make (K : Index.Key.S) (V : Index.Value.S) = 425 | Index.Make (K) (V) (Platform) 426 | 427 | module Syscalls = Syscalls 428 | 429 | module Private = struct 430 | module Platform = Platform 431 | module IO = IO 432 | module Raw = Raw 433 | 434 | module Make (K : Index.Key.S) (V : Index.Value.S) = 435 | Index.Private.Make (K) (V) (Platform) 436 | end 437 | -------------------------------------------------------------------------------- /src/unix/index_unix.mli: -------------------------------------------------------------------------------- 1 | (* The MIT License 2 | 3 | Copyright (c) 2019 Craig Ferguson 4 | Thomas Gazagnaire 5 | Ioana Cristescu 6 | Clément Pascutto 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. *) 17 | 18 | open! Import 19 | 20 | module Make (K : Index.Key.S) (V : Index.Value.S) (C : Index.Cache.S) : 21 | Index.S with type key = K.t and type value = V.t 22 | 23 | module Syscalls = Syscalls 24 | (** Bindings to Unix system calls. *) 25 | 26 | (** These modules should not be used. They are exposed purely for testing 27 | purposes. *) 28 | module Private : sig 29 | module Platform : Index.Platform.S 30 | module IO : Index.Platform.IO 31 | module Raw = Raw 32 | 33 | module Make (K : Index.Key.S) (V : Index.Value.S) (C : Index.Cache.S) : 34 | Index.Private.S with type key = K.t and type value = V.t 35 | end 36 | -------------------------------------------------------------------------------- /src/unix/pread.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | CAMLprim value caml_index_pread_int 8 | (value v_fd, value v_fd_off, value v_buf, value v_buf_off, value v_len) 9 | { 10 | CAMLparam5(v_fd, v_fd_off, v_buf, v_buf_off, v_len); 11 | 12 | ssize_t ret; 13 | size_t fd = Int_val(v_fd); 14 | size_t fd_off = Long_val(v_fd_off); 15 | size_t buf_off = Long_val(v_buf_off); 16 | size_t len = Long_val(v_len); 17 | 18 | size_t numbytes = (len > UNIX_BUFFER_SIZE) ? UNIX_BUFFER_SIZE : len; 19 | ret = pread(fd, &Byte(v_buf, buf_off), numbytes, fd_off); 20 | 21 | if (ret == -1) uerror("read", Nothing); 22 | 23 | CAMLreturn(Val_long(ret)); 24 | } 25 | 26 | CAMLprim value caml_index_pread_int64 27 | (value v_fd, value v_fd_off, value v_buf, value v_buf_off, value v_len) 28 | { 29 | CAMLparam5(v_fd, v_fd_off, v_buf, v_buf_off, v_len); 30 | 31 | ssize_t ret; 32 | size_t fd = Int_val(v_fd); 33 | size_t fd_off = Int64_val(v_fd_off); 34 | size_t buf_off = Long_val(v_buf_off); 35 | size_t len = Long_val(v_len); 36 | 37 | size_t numbytes = (len > UNIX_BUFFER_SIZE) ? UNIX_BUFFER_SIZE : len; 38 | ret = pread(fd, &Byte(v_buf, buf_off), numbytes, fd_off); 39 | 40 | if (ret == -1) uerror("read", Nothing); 41 | 42 | CAMLreturn(Val_long(ret)); 43 | } 44 | -------------------------------------------------------------------------------- /src/unix/pwrite.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | CAMLprim value caml_index_pwrite_int 8 | (value v_fd, value v_fd_off, value v_buf, value v_buf_off, value v_len) 9 | { 10 | CAMLparam5(v_fd, v_fd_off, v_buf, v_buf_off, v_len); 11 | 12 | ssize_t ret; 13 | size_t fd = Int_val(v_fd); 14 | size_t fd_off = Long_val(v_fd_off); 15 | size_t buf_off = Long_val(v_buf_off); 16 | size_t len = Long_val(v_len); 17 | 18 | size_t numbytes = (len > UNIX_BUFFER_SIZE) ? UNIX_BUFFER_SIZE : len; 19 | ret = pwrite(fd, &Byte(v_buf, buf_off), numbytes, fd_off); 20 | 21 | if (ret == -1) uerror("write", Nothing); 22 | 23 | CAMLreturn(Val_long(ret)); 24 | } 25 | 26 | CAMLprim value caml_index_pwrite_int64 27 | (value v_fd, value v_fd_off, value v_buf, value v_buf_off, value v_len) 28 | { 29 | CAMLparam5(v_fd, v_fd_off, v_buf, v_buf_off, v_len); 30 | 31 | ssize_t ret; 32 | size_t fd = Int_val(v_fd); 33 | size_t fd_off = Int64_val(v_fd_off); 34 | size_t buf_off = Long_val(v_buf_off); 35 | size_t len = Long_val(v_len); 36 | 37 | size_t numbytes = (len > UNIX_BUFFER_SIZE) ? UNIX_BUFFER_SIZE : len; 38 | ret = pwrite(fd, &Byte(v_buf, buf_off), numbytes, fd_off); 39 | 40 | if (ret == -1) uerror("write", Nothing); 41 | 42 | CAMLreturn(Val_long(ret)); 43 | } 44 | -------------------------------------------------------------------------------- /src/unix/raw.ml: -------------------------------------------------------------------------------- 1 | open! Import 2 | module Stats = Index.Stats 3 | 4 | let ( ++ ) = Int63.add 5 | 6 | type t = { fd : Unix.file_descr } [@@unboxed] 7 | 8 | let v fd = { fd } 9 | 10 | let really_write fd fd_offset buffer buffer_offset length = 11 | let rec aux fd_offset buffer_offset length = 12 | let w = Syscalls.pwrite ~fd ~fd_offset ~buffer ~buffer_offset ~length in 13 | if w = 0 || w = length then () 14 | else 15 | (aux [@tailcall]) 16 | (fd_offset ++ Int63.of_int w) 17 | (buffer_offset + w) (length - w) 18 | in 19 | aux fd_offset buffer_offset length 20 | 21 | let really_read fd fd_offset length buffer = 22 | let rec aux fd_offset buffer_offset length = 23 | let r = Syscalls.pread ~fd ~fd_offset ~buffer ~buffer_offset ~length in 24 | if r = 0 then buffer_offset (* end of file *) 25 | else if r = length then buffer_offset + r 26 | else 27 | (aux [@tailcall]) 28 | (fd_offset ++ Int63.of_int r) 29 | (buffer_offset + r) (length - r) 30 | in 31 | aux fd_offset 0 length 32 | 33 | let fsync t = Unix.fsync t.fd 34 | let close t = Unix.close t.fd 35 | let fstat t = Unix.fstat t.fd 36 | 37 | let unsafe_write t ~off buffer buffer_offset length = 38 | let buffer = Bytes.unsafe_of_string buffer in 39 | really_write t.fd off buffer buffer_offset length; 40 | Stats.add_write length 41 | 42 | let unsafe_read t ~off ~len buf = 43 | let n = really_read t.fd off len buf in 44 | Stats.add_read n; 45 | n 46 | 47 | let encode_int63 n = 48 | let buf = Bytes.create Int63.encoded_size in 49 | Int63.encode buf ~off:0 n; 50 | Bytes.unsafe_to_string buf 51 | 52 | let decode_int63 buf = Int63.decode ~off:0 buf 53 | 54 | exception Not_written 55 | 56 | let assert_read ~len n = 57 | if n = 0 && n <> len then raise Not_written; 58 | assert ( 59 | if Int.equal n len then true 60 | else ( 61 | Printf.eprintf "Attempted to read %d bytes, but got %d bytes instead!\n%!" 62 | len n; 63 | false)) 64 | [@@inline always] 65 | 66 | module Offset = struct 67 | let off = Int63.zero 68 | let set t n = unsafe_write t ~off (encode_int63 n) 0 8 69 | 70 | let get t = 71 | let len = 8 in 72 | let buf = Bytes.create len in 73 | let n = unsafe_read t ~off ~len buf in 74 | assert_read ~len n; 75 | decode_int63 (Bytes.unsafe_to_string buf) 76 | end 77 | 78 | module Version = struct 79 | let off = Int63.of_int 8 80 | 81 | let get t = 82 | let len = 8 in 83 | let buf = Bytes.create len in 84 | let n = unsafe_read t ~off ~len buf in 85 | assert_read ~len n; 86 | Bytes.unsafe_to_string buf 87 | 88 | let set t v = unsafe_write t ~off v 0 8 89 | end 90 | 91 | module Generation = struct 92 | let off = Int63.of_int 16 93 | 94 | let get t = 95 | let len = 8 in 96 | let buf = Bytes.create len in 97 | let n = unsafe_read t ~off ~len buf in 98 | assert_read ~len n; 99 | decode_int63 (Bytes.unsafe_to_string buf) 100 | 101 | let set t gen = unsafe_write t ~off (encode_int63 gen) 0 8 102 | end 103 | 104 | module Fan = struct 105 | let off = Int63.of_int 24 106 | 107 | let set t buf = 108 | let buf_len = String.length buf in 109 | let size = encode_int63 (Int63.of_int buf_len) in 110 | unsafe_write t ~off size 0 8; 111 | if buf <> "" then unsafe_write t ~off:(off ++ Int63.of_int 8) buf 0 buf_len 112 | 113 | let get_size t = 114 | let len = 8 in 115 | let size_buf = Bytes.create len in 116 | let n = unsafe_read t ~off ~len size_buf in 117 | assert_read ~len n; 118 | decode_int63 (Bytes.unsafe_to_string size_buf) 119 | 120 | let set_size t size = 121 | let buf = encode_int63 size in 122 | unsafe_write t ~off buf 0 8 123 | 124 | let get t = 125 | let size = Int63.to_int (get_size t) in 126 | let buf = Bytes.create size in 127 | let n = unsafe_read t ~off:(off ++ Int63.of_int 8) ~len:size buf in 128 | assert_read ~len:size n; 129 | Bytes.unsafe_to_string buf 130 | end 131 | 132 | module Header = struct 133 | type t = { offset : int63; version : string; generation : int63 } 134 | 135 | (** NOTE: These functions must be equivalent to calling the above [set] / 136 | [get] functions individually. *) 137 | 138 | let total_header_length = 8 + 8 + 8 139 | 140 | let read_word buf off = 141 | let result = Bytes.create 8 in 142 | Bytes.blit buf off result 0 8; 143 | Bytes.unsafe_to_string result 144 | 145 | let get t = 146 | let header = Bytes.create total_header_length in 147 | let n = unsafe_read t ~off:Int63.zero ~len:total_header_length header in 148 | assert_read ~len:total_header_length n; 149 | let offset = read_word header 0 |> decode_int63 in 150 | let version = read_word header 8 in 151 | let generation = read_word header 16 |> decode_int63 in 152 | { offset; version; generation } 153 | 154 | let set t { offset; version; generation } = 155 | assert (String.length version = 8); 156 | let b = Bytes.create total_header_length in 157 | Bytes.blit_string (encode_int63 offset) 0 b 0 8; 158 | Bytes.blit_string version 0 b 8 8; 159 | Bytes.blit_string (encode_int63 generation) 0 b 16 8; 160 | unsafe_write t ~off:Int63.zero (Bytes.unsafe_to_string b) 0 161 | total_header_length 162 | end 163 | 164 | module Header_prefix = struct 165 | type t = { offset : int63; version : string } 166 | 167 | (** NOTE: These functions must be equivalent to calling the above [set] / 168 | [get] functions individually. *) 169 | 170 | let total_header_length = 8 + 8 171 | 172 | let read_word buf off = 173 | let result = Bytes.create 8 in 174 | Bytes.blit buf off result 0 8; 175 | Bytes.unsafe_to_string result 176 | 177 | let get t = 178 | let header = Bytes.create total_header_length in 179 | let n = unsafe_read t ~off:Int63.zero ~len:total_header_length header in 180 | assert_read ~len:total_header_length n; 181 | let offset = read_word header 0 |> decode_int63 in 182 | let version = read_word header 8 in 183 | { offset; version } 184 | 185 | let set t { offset; version } = 186 | assert (String.length version = 8); 187 | let b = Bytes.create total_header_length in 188 | Bytes.blit_string (encode_int63 offset) 0 b 0 8; 189 | Bytes.blit_string version 0 b 8 8; 190 | unsafe_write t ~off:Int63.zero (Bytes.unsafe_to_string b) 0 191 | total_header_length 192 | end 193 | -------------------------------------------------------------------------------- /src/unix/raw.mli: -------------------------------------------------------------------------------- 1 | (** [Raw] wraps a file-descriptor with an file-format used internally by Index. 2 | The format contains the following header fields: 3 | 4 | - {b offset}: a 64-bit integer, denoting the length of the file containing 5 | valid data; 6 | - {b version}: an 8-byte version string; 7 | - {b generation}: a 64-bit integer denoting the generation number; 8 | - {b fan}: a 64-bit length field, followed by a string containing that many 9 | bytes. *) 10 | 11 | open! Import 12 | 13 | type t 14 | (** The type of [raw] file handles. *) 15 | 16 | val v : Unix.file_descr -> t 17 | (** Construct a [raw] value from a file descriptor. *) 18 | 19 | val unsafe_write : t -> off:int63 -> string -> int -> int -> unit 20 | val unsafe_read : t -> off:int63 -> len:int -> bytes -> int 21 | val fsync : t -> unit 22 | val close : t -> unit 23 | val fstat : t -> Unix.stats 24 | 25 | exception Not_written 26 | 27 | module Version : sig 28 | val get : t -> string 29 | val set : t -> string -> unit 30 | end 31 | 32 | module Offset : sig 33 | val get : t -> int63 34 | val set : t -> int63 -> unit 35 | end 36 | 37 | module Generation : sig 38 | val get : t -> int63 39 | val set : t -> int63 -> unit 40 | end 41 | 42 | module Fan : sig 43 | val get : t -> string 44 | val set : t -> string -> unit 45 | val get_size : t -> int63 46 | val set_size : t -> int63 -> unit 47 | end 48 | 49 | module Header : sig 50 | type raw 51 | 52 | type t = { 53 | offset : int63; (** The length of the file containing valid data *) 54 | version : string; (** Format version *) 55 | generation : int63; (** Generation number *) 56 | } 57 | 58 | val get : raw -> t 59 | val set : raw -> t -> unit 60 | end 61 | with type raw := t 62 | 63 | (** Functions for interacting with the header format {i without} the generation 64 | number, provided for use in [irmin-pack]. *) 65 | module Header_prefix : sig 66 | type raw 67 | 68 | type t = { 69 | offset : int63; (** The length of the file containing valid data *) 70 | version : string; (** Format version *) 71 | } 72 | 73 | val get : raw -> t 74 | val set : raw -> t -> unit 75 | end 76 | with type raw := t 77 | -------------------------------------------------------------------------------- /src/unix/syscalls.ml: -------------------------------------------------------------------------------- 1 | open! Import 2 | 3 | external pread_int : Unix.file_descr -> int -> bytes -> int -> int -> int 4 | = "caml_index_pread_int" 5 | 6 | external pread_int64 : Unix.file_descr -> int64 -> bytes -> int -> int -> int 7 | = "caml_index_pread_int64" 8 | 9 | let pread : fd:_ -> fd_offset:int63 -> _ = 10 | match Int63.is_immediate with 11 | | True -> 12 | fun ~fd ~fd_offset ~buffer ~buffer_offset ~length -> 13 | pread_int fd fd_offset buffer buffer_offset length 14 | | False -> 15 | fun ~fd ~fd_offset ~buffer ~buffer_offset ~length -> 16 | pread_int64 fd 17 | (Int63.Boxed.to_int64 fd_offset) 18 | buffer buffer_offset length 19 | 20 | external pwrite_int : Unix.file_descr -> int -> bytes -> int -> int -> int 21 | = "caml_index_pwrite_int" 22 | 23 | external pwrite_int64 : Unix.file_descr -> int64 -> bytes -> int -> int -> int 24 | = "caml_index_pwrite_int64" 25 | 26 | let pwrite : fd:_ -> fd_offset:int63 -> _ = 27 | match Int63.is_immediate with 28 | | True -> 29 | fun ~fd ~fd_offset ~buffer ~buffer_offset ~length -> 30 | pwrite_int fd fd_offset buffer buffer_offset length 31 | | False -> 32 | fun ~fd ~fd_offset ~buffer ~buffer_offset ~length -> 33 | pwrite_int64 fd 34 | (Int63.Boxed.to_int64 fd_offset) 35 | buffer buffer_offset length 36 | -------------------------------------------------------------------------------- /src/unix/syscalls.mli: -------------------------------------------------------------------------------- 1 | open! Import 2 | 3 | val pread : 4 | fd:Unix.file_descr -> 5 | fd_offset:int63 -> 6 | buffer:bytes -> 7 | buffer_offset:int -> 8 | length:int -> 9 | int 10 | (** Reads up to [length] bytes from [fd] (starting at position [fd_offset]) into 11 | [buffer] (starting at position [buffer_offset]). Returns the number of bytes 12 | actually read. [fd]'s cursor position is unchanged. *) 13 | 14 | val pwrite : 15 | fd:Unix.file_descr -> 16 | fd_offset:int63 -> 17 | buffer:bytes -> 18 | buffer_offset:int -> 19 | length:int -> 20 | int 21 | (** Writes up to [length] bytes from [buffer] (starting at position 22 | [buffer_offset]) to the file descriptor [fd] (starting at position 23 | [fd_offset]). Returns the number of bytes actually written. [fd]'s cursor 24 | position is unchanged. *) 25 | -------------------------------------------------------------------------------- /test/cache.ml: -------------------------------------------------------------------------------- 1 | let check_none msg = Alcotest.(check (option reject)) msg None 2 | let check_some msg x = Alcotest.(check (option int)) msg (Some x) 3 | 4 | let test_noop () = 5 | let open Index.Cache.Noop in 6 | (* Test that added entries are never found. *) 7 | let c = create () in 8 | find c "not-added" |> check_none "Cannot find non-existent value"; 9 | add c "added" 1; 10 | find c "added" |> check_none "Cannot find added value"; 11 | remove c "added"; 12 | find c "added" |> check_none "Cannot find added value after remove"; 13 | () 14 | 15 | let test_unbounded () = 16 | let open Index.Cache.Unbounded in 17 | (* Test that added entries are always found. *) 18 | let c = create () in 19 | find c "not-added" |> check_none "Cannot find non-existent value"; 20 | add c "added" 1; 21 | find c "added" |> check_some "Can find added value" 1; 22 | remove c "added"; 23 | find c "added" |> check_none "Cannot find added value after remove"; 24 | () 25 | 26 | let tests = 27 | [ 28 | Alcotest.test_case "noop" `Quick test_noop; 29 | Alcotest.test_case "unbounded" `Quick test_unbounded; 30 | ] 31 | -------------------------------------------------------------------------------- /test/cli/dune: -------------------------------------------------------------------------------- 1 | (executable 2 | (name index_fsck) 3 | (modules index_fsck) 4 | (libraries index index.unix unix)) 5 | 6 | (executable 7 | (name generate) 8 | (modules generate) 9 | (libraries index index.unix unix)) 10 | 11 | (rule 12 | (alias generate-cli-test-data) 13 | (targets data) 14 | (action 15 | (run %{exe:generate.exe}))) 16 | 17 | (cram 18 | (deps 19 | (file index_fsck.exe) 20 | (file data) 21 | (alias generate-cli-test-data))) 22 | 23 | ; FIXME: we should not depend on the version of cmdliner 24 | ; (rule 25 | ; (alias runtest) 26 | ; (action 27 | ; (progn 28 | ; (with-stdout-to 29 | ; index-fsck-help.txt.gen 30 | ; (run %{exe:index_fsck.exe} --help=plain)) 31 | ; (diff? index-fsck-help.txt index-fsck-help.txt.gen)))) 32 | -------------------------------------------------------------------------------- /test/cli/generate.ml: -------------------------------------------------------------------------------- 1 | module Size = struct 2 | let length = 20 3 | end 4 | 5 | let random () = 6 | let random_char _ = char_of_int (33 + Random.int 94) in 7 | String.init Size.length random_char 8 | 9 | module Index = 10 | Index_unix.Make 11 | (Index.Key.String_fixed (Size)) (Index.Value.String_fixed (Size)) 12 | (Index.Cache.Noop) 13 | 14 | let random () = 15 | let index = Index.v ~fresh:true ~log_size:100 "data/random" in 16 | for _ = 1 to 1001 do 17 | Index.replace index (random ()) (random ()) 18 | done; 19 | Unix.sleep 1; 20 | Index.close index 21 | 22 | let () = random () 23 | -------------------------------------------------------------------------------- /test/cli/index-fsck-help.txt: -------------------------------------------------------------------------------- 1 | NAME 2 | index-fsck - Check and repair Index data-stores. 3 | 4 | SYNOPSIS 5 | index-fsck COMMAND ... 6 | 7 | COMMANDS 8 | integrity-check 9 | Search the store for integrity faults and corruption. 10 | 11 | stat 12 | Print high-level statistics about the store. 13 | 14 | OPTIONS 15 | --help[=FMT] (default=auto) 16 | Show this help in format FMT. The value FMT must be one of `auto', 17 | `pager', `groff' or `plain'. With `auto', the format is `pager` or 18 | `plain' whenever the TERM env var is `dumb' or undefined. 19 | 20 | -------------------------------------------------------------------------------- /test/cli/index_fsck.ml: -------------------------------------------------------------------------------- 1 | module Size = struct 2 | let length = 20 3 | end 4 | 5 | module Index = 6 | Index_unix.Make 7 | (Index.Key.String_fixed (Size)) (Index.Value.String_fixed (Size)) 8 | (Index.Cache.Noop) 9 | 10 | let () = match Index.Checks.cli () with _ -> . 11 | -------------------------------------------------------------------------------- /test/cli/stat.t/run.t: -------------------------------------------------------------------------------- 1 | No files are shown when running `stat` in a non-existent directory: 2 | 3 | $ ../index_fsck.exe stat ../data/non-existent-store 4 | >> Getting statistics for store: `../data/non-existent-store' 5 | { 6 | "entry_size": "40.0 B", 7 | "files": {} 8 | } 9 | 10 | Running `stat` on an index after 10 merges: 11 | 12 | $ ../index_fsck.exe stat ../data/random > log.txt 2>&1 13 | $ sed -Ee 's/"lock": "[0-9]+"/"lock": ""/g' log.txt 14 | >> Getting statistics for store: `../data/random' 15 | { 16 | "entry_size": "40.0 B", 17 | "files": { 18 | "data": { 19 | "size": "35.6 KiB", 20 | "offset": 36360, 21 | "generation": 9, 22 | "fanout_size": "128.0 B" 23 | }, 24 | "log": { 25 | "size": "3.6 KiB", 26 | "offset": 3680, 27 | "generation": 9, 28 | "fanout_size": "0.0 B" 29 | }, 30 | "lock": "" 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /test/dune: -------------------------------------------------------------------------------- 1 | (test 2 | (name main) 3 | (package index) 4 | (libraries index alcotest optint)) 5 | -------------------------------------------------------------------------------- /test/fuzz/fan/dune: -------------------------------------------------------------------------------- 1 | (executable 2 | (name main) 3 | (libraries crowbar index optint)) 4 | 5 | (alias 6 | (name runtest) 7 | (package index) 8 | (deps main.exe)) 9 | 10 | (rule 11 | (alias fuzz) 12 | (package index) 13 | (deps 14 | main.exe 15 | (source_tree ../input)) 16 | (action 17 | (run afl-fuzz -i ../input -o output -m 2048 ./main.exe @@))) 18 | -------------------------------------------------------------------------------- /test/fuzz/fan/main.ml: -------------------------------------------------------------------------------- 1 | open Crowbar 2 | module Fan = Index.Private.Fan 3 | module Int63 = Optint.Int63 4 | 5 | let hash_size = 30 6 | let entry_size = 56 7 | let entry_sizeL = Int63.of_int entry_size 8 | let int_bound = 100_000_000 9 | let bounded_int = map [ int ] (fun i -> abs i mod int_bound) 10 | let hash = map [ bytes ] Hashtbl.hash 11 | let hash_list = map [ list hash ] (List.sort compare) 12 | 13 | let empty_fan_with_size = 14 | map [ bounded_int ] (fun n -> (Fan.v ~hash_size ~entry_size n, n)) 15 | 16 | let empty_fan = map [ empty_fan_with_size ] fst 17 | 18 | let update_list = 19 | let rec loop off acc = function 20 | | [] -> List.rev acc 21 | | hash :: t -> loop (Int63.add off entry_sizeL) ((hash, off) :: acc) t 22 | in 23 | map [ hash_list ] (loop Int63.zero []) 24 | 25 | let fan_with_updates = 26 | map [ empty_fan; update_list ] (fun fan l -> 27 | List.iter (fun (hash, off) -> Fan.update fan hash off) l; 28 | let fan = Fan.finalize fan in 29 | (fan, l)) 30 | 31 | let fan = map [ fan_with_updates ] fst 32 | 33 | let check_export_size fan = 34 | let expected_size = Fan.exported_size fan in 35 | let exported = Fan.export fan in 36 | check_eq ~pp:pp_int (String.length exported) expected_size 37 | 38 | let check_export fan = 39 | let exported = Fan.export fan in 40 | let imported = Fan.import ~hash_size exported in 41 | check_eq ~eq:Fan.equal imported fan 42 | 43 | let check_updates (fan, updates) = 44 | List.iter 45 | (fun (hash, off) -> 46 | let low, high = Fan.search fan hash in 47 | if off < low || high < off then 48 | failf "hash %d was added at off %a, but got low=%a, high=%a" hash 49 | Int63.pp off Int63.pp low Int63.pp high) 50 | updates 51 | 52 | let check_fan_size (fan, size) = 53 | let nb_fans = Fan.nb_fans fan in 54 | let fan_size = size / nb_fans in 55 | if fan_size * entry_size > 4096 then failf "Fan size is too big: %d" fan_size 56 | 57 | let () = 58 | add_test ~name:"Export size" [ fan ] check_export_size; 59 | add_test ~name:"Export/Import" [ fan ] check_export; 60 | add_test ~name:"Update" [ fan_with_updates ] check_updates; 61 | add_test ~name:"Fan size" [ empty_fan_with_size ] check_fan_size 62 | -------------------------------------------------------------------------------- /test/fuzz/fan/main.mli: -------------------------------------------------------------------------------- 1 | (* left empty on purpose *) 2 | -------------------------------------------------------------------------------- /test/fuzz/input/000: -------------------------------------------------------------------------------- 1 | abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc 2 | abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc 3 | abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc 4 | abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc 5 | abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc 6 | abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc 7 | abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc 8 | abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc 9 | abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc 10 | abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc 11 | abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc 12 | abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc 13 | abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc 14 | abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc 15 | abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc 16 | abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc 17 | abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc 18 | abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc 19 | abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc 20 | abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc 21 | -------------------------------------------------------------------------------- /test/main.ml: -------------------------------------------------------------------------------- 1 | let () = 2 | Alcotest.run "index" [ ("cache", Cache.tests); ("search", Search.tests) ] 3 | -------------------------------------------------------------------------------- /test/search.ml: -------------------------------------------------------------------------------- 1 | module Int63 = Optint.Int63 2 | 3 | module Entry = struct 4 | module Key = struct 5 | type t = int 6 | 7 | let equal = ( = ) 8 | end 9 | 10 | module Value = struct 11 | type t = string 12 | end 13 | 14 | type t = Key.t * Value.t 15 | 16 | let to_key = fst 17 | let to_value = snd 18 | end 19 | 20 | module EltArray = struct 21 | type t = Entry.t array 22 | type elt = Entry.t 23 | 24 | let get t i = t.(Int63.to_int i) 25 | let length t = Array.length t |> Int63.of_int 26 | let pre_fetch _ ~low:_ ~high:_ = () 27 | end 28 | 29 | (* Metric equal to an integer key *) 30 | module Metric_key = struct 31 | module Entry = Entry 32 | 33 | type t = Entry.Key.t 34 | 35 | let compare : t -> t -> int = compare 36 | let of_entry = Entry.to_key 37 | let of_key k = k 38 | 39 | let linear_interpolate ~low:(low_out, low_in) ~high:(high_out, high_in) m = 40 | let low_in = float_of_int low_in in 41 | let high_in = float_of_int high_in in 42 | let target_in = float_of_int m in 43 | let low_out = Int63.to_float low_out in 44 | let high_out = Int63.to_float high_out in 45 | (* Fractional position of [target_in] along the line from [low_in] to [high_in] *) 46 | let proportion = (target_in -. low_in) /. (high_in -. low_in) in 47 | (* Convert fractional position to position in output space *) 48 | let position = low_out +. (proportion *. (high_out -. low_out)) in 49 | let rounded = ceil (position -. 0.5) +. 0.5 in 50 | Int63.of_float rounded 51 | end 52 | 53 | module Search = Index.Private.Search.Make (Entry) (EltArray) (Metric_key) 54 | 55 | let interpolation_unique () = 56 | let array = Array.init 10_000 (fun i -> (i, string_of_int i)) in 57 | let length = EltArray.length array in 58 | Array.iter 59 | (fun (i, v) -> 60 | Search.interpolation_search array i 61 | ~low:Int63.(zero) 62 | ~high:Int63.(pred length) 63 | |> Alcotest.(check string) "" v) 64 | array 65 | 66 | (* Degenerate metric that is the same for all entries *) 67 | module Metric_constant = struct 68 | module Entry = Entry 69 | 70 | type t = unit 71 | 72 | let compare () () = 0 73 | let of_entry _ = () 74 | let of_key _ = () 75 | 76 | let linear_interpolate ~low:(low_out, _) ~high:(high_out, _) _ = 77 | let ( + ), ( - ) = Int63.(add, sub) in 78 | (* Any value in the range [low_out, high_out] is valid *) 79 | low_out 80 | + Int63.of_int64 (Random.int64 (Int63.to_int64 (high_out - low_out))) 81 | + Int63.one 82 | end 83 | 84 | module Search_constant = 85 | Index.Private.Search.Make (Entry) (EltArray) (Metric_constant) 86 | 87 | let interpolation_constant_metric () = 88 | let array = Array.init 100 (fun i -> (i, string_of_int i)) in 89 | let length = EltArray.length array in 90 | Array.iter 91 | (fun (i, v) -> 92 | Search_constant.interpolation_search array i ~low:Int63.zero 93 | ~high:Int63.(pred length) 94 | |> Alcotest.(check string) "" v) 95 | array 96 | 97 | let tests = 98 | [ 99 | Alcotest.test_case "unique" `Quick interpolation_unique; 100 | Alcotest.test_case "constant metric" `Quick interpolation_constant_metric; 101 | ] 102 | 103 | let () = Random.self_init () 104 | -------------------------------------------------------------------------------- /test/unix/common.ml: -------------------------------------------------------------------------------- 1 | let ( >> ) f g x = g (f x) 2 | let src = Logs.Src.create "test/unix" ~doc:"Index_unix tests" 3 | 4 | module Log = (val Logs.src_log src : Logs.LOG) 5 | 6 | let report () = 7 | Logs_threaded.enable (); 8 | Index.Private.Logs.setup ~level:Logs.Debug 9 | (module Mtime_clock) 10 | (module Fmt_tty) 11 | 12 | module String_size = struct 13 | let length = 20 14 | end 15 | 16 | let () = Random.self_init () 17 | let random_char () = char_of_int (33 + Random.int 94) 18 | let random_string () = String.init String_size.length (fun _i -> random_char ()) 19 | 20 | module Default = struct 21 | let log_size = 4 22 | let lru_size = 0 23 | let size = 103 24 | end 25 | 26 | module Key = struct 27 | include Index.Key.String_fixed (String_size) 28 | 29 | let v = random_string 30 | let pp = Fmt.Dump.string 31 | end 32 | 33 | module Value = struct 34 | include Index.Value.String_fixed (String_size) 35 | 36 | let v = random_string 37 | let equal = String.equal 38 | let pp = Fmt.Dump.string 39 | end 40 | 41 | type binding = Key.t * Value.t 42 | 43 | let pp_binding ppf (key, value) = 44 | Fmt.pf ppf "{ %a → %a }" (Repr.pp Key.t) key (Repr.pp Value.t) value 45 | 46 | let check_entry findf typ k v = 47 | match findf k with 48 | | v' when Value.equal v v' -> () 49 | | v' (* v =/= v' *) -> 50 | Alcotest.failf "Found %s when checking for binding %a in %s" v' pp_binding 51 | (k, v) typ 52 | | exception Not_found -> 53 | Alcotest.failf "Expected key %s is missing in %s" k typ 54 | 55 | module Tbl = struct 56 | let v ~size = 57 | let h = Hashtbl.create size in 58 | for _ = 1 to size do 59 | Hashtbl.add h (Key.v ()) (Value.v ()) 60 | done; 61 | assert (Hashtbl.length h = size); 62 | h 63 | 64 | let check_binding tbl = check_entry (Hashtbl.find tbl) "table" 65 | end 66 | 67 | module Index = struct 68 | include Index_unix.Private.Make (Key) (Value) (Index.Cache.Unbounded) 69 | 70 | let replace_random ?hook t = 71 | let ((key, value) as binding) = (Key.v (), Value.v ()) in 72 | (binding, replace' ?hook t key value) 73 | 74 | let check_binding index = check_entry (find index) "index" 75 | 76 | let check_not_found index k = 77 | match find index k with 78 | | exception Not_found -> () 79 | | v -> 80 | Alcotest.failf "Found binding %a but expected key to be absent" 81 | pp_binding (k, v) 82 | end 83 | 84 | let check_completed = function 85 | | Ok `Completed -> () 86 | | Ok `Aborted -> Alcotest.fail "Unexpected asynchronous abort" 87 | | Error (`Async_exn exn) -> 88 | Alcotest.failf "Unexpected asynchronous exception: %s" 89 | (Printexc.to_string exn) 90 | 91 | module Make_context (Config : sig 92 | val root : string 93 | end) = 94 | struct 95 | let fresh_name = 96 | let c = ref 0 in 97 | fun object_type -> 98 | incr c; 99 | let name = Filename.concat Config.root ("index_" ^ string_of_int !c) in 100 | Logs.info (fun m -> 101 | m "Constructing %s context object: %s" object_type name); 102 | name 103 | 104 | type t = { 105 | rw : Index.t; 106 | tbl : (string, string) Hashtbl.t; 107 | clone : ?fresh:bool -> readonly:bool -> unit -> Index.t; 108 | close_all : unit -> unit; 109 | } 110 | 111 | let ignore (_ : t) = () 112 | 113 | let empty_index ?(log_size = Default.log_size) ?(lru_size = Default.lru_size) 114 | ?flush_callback ?throttle () = 115 | let name = fresh_name "empty_index" in 116 | let cache = Index.empty_cache () in 117 | let rw = 118 | Index.v ?flush_callback ?throttle ~cache ~fresh:true ~log_size ~lru_size 119 | name 120 | in 121 | let close_all = ref (fun () -> Index.close rw) in 122 | let tbl = Hashtbl.create 0 in 123 | let clone ?(fresh = false) ~readonly () = 124 | let t = 125 | Index.v ?flush_callback ?throttle ~cache ~fresh ~log_size ~lru_size 126 | ~readonly name 127 | in 128 | (close_all := !close_all >> fun () -> Index.close t); 129 | t 130 | in 131 | { rw; tbl; clone; close_all = (fun () -> !close_all ()) } 132 | 133 | let full_index ?(size = Default.size) ?(log_size = Default.log_size) 134 | ?(lru_size = Default.lru_size) ?(flush_callback = fun () -> ()) ?throttle 135 | () = 136 | let f = 137 | (* Disable [flush_callback] while adding initial entries *) 138 | ref (fun () -> ()) 139 | in 140 | let name = fresh_name "full_index" in 141 | let cache = Index.empty_cache () in 142 | let rw = 143 | Index.v 144 | ~flush_callback:(fun () -> !f ()) 145 | ?throttle ~cache ~fresh:true ~log_size ~lru_size name 146 | in 147 | let close_all = ref (fun () -> Index.close rw) in 148 | let tbl = Hashtbl.create 0 in 149 | for _ = 1 to size do 150 | let k = Key.v () in 151 | let v = Value.v () in 152 | Index.replace rw k v; 153 | Hashtbl.replace tbl k v 154 | done; 155 | Index.flush rw; 156 | Index.try_merge_aux ~force:true rw |> Index.await |> check_completed; 157 | f := flush_callback (* Enable [flush_callback] *); 158 | let clone ?(fresh = false) ~readonly () = 159 | let t = 160 | Index.v ~flush_callback ?throttle ~cache ~fresh ~log_size ~lru_size 161 | ~readonly name 162 | in 163 | (close_all := !close_all >> fun () -> Index.close t); 164 | t 165 | in 166 | { rw; tbl; clone; close_all = (fun () -> !close_all ()) } 167 | 168 | let call_then_close (type a) (t : t) (f : t -> a) : a = 169 | let a = f t in 170 | t.close_all (); 171 | a 172 | 173 | let with_empty_index ?log_size ?lru_size ?flush_callback ?throttle () f = 174 | call_then_close 175 | (empty_index ?log_size ?lru_size ?flush_callback ?throttle ()) 176 | f 177 | 178 | let with_full_index ?log_size ?lru_size ?flush_callback ?throttle ?size () f = 179 | call_then_close 180 | (full_index ?log_size ?lru_size ?flush_callback ?throttle ?size ()) 181 | f 182 | end 183 | 184 | let ( let* ) f k = f k 185 | let uncurry f (x, y) = f x y 186 | let ignore_value (_ : Value.t) = () 187 | let ignore_bool (_ : bool) = () 188 | let ignore_index (_ : Index.t) = () 189 | 190 | let check_equivalence index htbl = 191 | Hashtbl.iter (Index.check_binding index) htbl; 192 | Index.iter (Tbl.check_binding htbl) index 193 | 194 | let check_disjoint index htbl = 195 | Hashtbl.iter 196 | (fun k v -> 197 | match Index.find index k with 198 | | exception Not_found -> () 199 | | v' when Value.equal v v' -> 200 | Alcotest.failf "Binding %a should not be present" pp_binding (k, v) 201 | | v' -> 202 | Alcotest.failf "Found value %a when checking for the absence of %a" 203 | (Repr.pp Value.t) v' pp_binding (k, v)) 204 | htbl 205 | 206 | let get_open_fd root = 207 | let ( >>? ) x f = match x with `Ok x -> f x | `Skip err -> `Skip err in 208 | let pid = string_of_int (Unix.getpid ()) in 209 | let name = Filename.concat root "empty" in 210 | let fd_file = "index_fd_tmp" in 211 | let lsof_command = "lsof -a -s -p " ^ pid ^ " > " ^ fd_file in 212 | (match Sys.os_type with 213 | | "Unix" -> `Ok () 214 | | _ -> `Skip "non-UNIX operating system") 215 | >>? fun () -> 216 | (match Unix.system lsof_command with 217 | | Unix.WEXITED 0 -> `Ok () 218 | | Unix.WEXITED _ -> 219 | `Skip "failing `lsof` command. Is `lsof` installed on your system?" 220 | | Unix.WSIGNALED _ | Unix.WSTOPPED _ -> `Skip "`lsof` command was interrupted") 221 | >>? fun () -> 222 | let lines = ref [] in 223 | let extract_fd line = 224 | try 225 | let pos = Re.Str.search_forward (Re.Str.regexp name) line 0 in 226 | let fd = Re.Str.string_after line pos in 227 | lines := fd :: !lines 228 | with Not_found -> () 229 | in 230 | let ic = open_in fd_file in 231 | (try 232 | while true do 233 | extract_fd (input_line ic) 234 | done 235 | with End_of_file -> close_in ic); 236 | `Ok !lines 237 | 238 | let partition sub l = 239 | List.partition 240 | (fun line -> 241 | try 242 | ignore (Re.Str.search_forward (Re.Str.regexp sub) line 0); 243 | true 244 | with Not_found -> false) 245 | l 246 | -------------------------------------------------------------------------------- /test/unix/common.mli: -------------------------------------------------------------------------------- 1 | val random_char : unit -> char 2 | 3 | val report : unit -> unit 4 | (** Set logs reporter at [Logs.Debug] level *) 5 | 6 | module Log : Logs.LOG 7 | 8 | module Default : sig 9 | val log_size : int 10 | val size : int 11 | end 12 | 13 | (** Simple key/value modules with String type and a random constructor *) 14 | module Key : sig 15 | include Index.Key.S with type t = string 16 | 17 | val v : unit -> t 18 | val pp : t Fmt.t 19 | end 20 | 21 | module Value : sig 22 | include Index.Value.S with type t = string 23 | 24 | val v : unit -> t 25 | val pp : t Fmt.t 26 | end 27 | 28 | module Tbl : sig 29 | val v : size:int -> (Key.t, Value.t) Hashtbl.t 30 | (** Construct a new table of random key-value pairs. *) 31 | 32 | val check_binding : (Key.t, Value.t) Hashtbl.t -> Key.t -> Value.t -> unit 33 | (** Check that a binding exists in the table. *) 34 | end 35 | 36 | module Index : sig 37 | open Index.Private 38 | include S with type key = Key.t and type value = Value.t 39 | 40 | val replace_random : 41 | ?hook:[ `Merge of merge_stages ] Hook.t -> 42 | t -> 43 | (key * value) * merge_result async option 44 | (** Add a random fresh binding to the given index. *) 45 | 46 | val check_binding : t -> Key.t -> Value.t -> unit 47 | (** Check that a binding exists in the index.*) 48 | 49 | val check_not_found : t -> Key.t -> unit 50 | (** Check that a key does not exist in the index. *) 51 | end 52 | 53 | (** Helper constructors for fresh pre-initialised indices *) 54 | module Make_context (Config : sig 55 | val root : string 56 | end) : sig 57 | type t = private { 58 | rw : Index.t; 59 | tbl : (string, string) Hashtbl.t; 60 | clone : ?fresh:bool -> readonly:bool -> unit -> Index.t; 61 | close_all : unit -> unit; 62 | } 63 | 64 | val ignore : t -> unit 65 | 66 | val fresh_name : string -> string 67 | (** [fresh_name typ] is a clean directory for a resource of type [typ]. *) 68 | 69 | val with_empty_index : 70 | ?log_size:int -> 71 | ?lru_size:int -> 72 | ?flush_callback:(unit -> unit) -> 73 | ?throttle:[ `Overcommit_memory | `Block_writes ] -> 74 | unit -> 75 | (t -> 'a) -> 76 | 'a 77 | (** [with_empty_index f] applies [f] to a fresh empty index. Afterwards, the 78 | index and any clones are closed. *) 79 | 80 | val with_full_index : 81 | ?log_size:int -> 82 | ?lru_size:int -> 83 | ?flush_callback:(unit -> unit) -> 84 | ?throttle:[ `Overcommit_memory | `Block_writes ] -> 85 | ?size:int -> 86 | unit -> 87 | (t -> 'a) -> 88 | 'a 89 | (** [with_full_index f] applies [f] to a fresh index with a random table of 90 | key/value pairs. [f] also gets a constructor for opening clones of the 91 | index at the same location. Afterwards, the index and any clones are 92 | closed. *) 93 | end 94 | 95 | val ( let* ) : ('a -> 'b) -> 'a -> 'b 96 | (** CPS monad *) 97 | 98 | val ( >> ) : ('a -> 'b) -> ('b -> 'c) -> 'a -> 'c 99 | val uncurry : ('a -> 'b -> 'c) -> 'a * 'b -> 'c 100 | val ignore_value : Value.t -> unit 101 | val ignore_bool : bool -> unit 102 | val ignore_index : Index.t -> unit 103 | 104 | type binding = Key.t * Value.t 105 | 106 | val pp_binding : binding Fmt.t 107 | 108 | val check_completed : 109 | ([ `Aborted | `Completed ], [ `Async_exn of exn ]) result -> unit 110 | 111 | val check_equivalence : Index.t -> (Key.t, Value.t) Hashtbl.t -> unit 112 | val check_disjoint : Index.t -> (Key.t, Value.t) Hashtbl.t -> unit 113 | val get_open_fd : string -> [> `Ok of string list | `Skip of string ] 114 | val partition : string -> string list -> string list * string list 115 | -------------------------------------------------------------------------------- /test/unix/dune: -------------------------------------------------------------------------------- 1 | (tests 2 | (names main force_merge io_array) 3 | (package index) 4 | (libraries index index.unix unix alcotest fmt logs logs.fmt logs.threaded re 5 | stdlib-shims threads.posix repr semaphore-compat optint mtime.clock.os)) 6 | -------------------------------------------------------------------------------- /test/unix/flush_callback.ml: -------------------------------------------------------------------------------- 1 | module I = Index 2 | open Common 3 | module Semaphore = Semaphore_compat.Semaphore.Binary 4 | 5 | module Context = Common.Make_context (struct 6 | let root = Filename.concat "_tests" "unix.flush_callback" 7 | end) 8 | 9 | module Mutable_callback = struct 10 | type t = { 11 | flush_callback : unit -> unit; 12 | require_callback : 13 | 'a. ?at_least_once:unit -> ?callback:(unit -> unit) -> (unit -> 'a) -> 'a; 14 | (** Locally override the definition of [flush_callback] inside a 15 | continuation. The default [callback] is the identity function. 16 | 17 | - The continuation must trigger the callback exactly once (unless 18 | [~at_least_once:()] is passed). 19 | - Any callbacks not scoped inside [require_callback] result in 20 | failure. *) 21 | } 22 | 23 | let v () : t = 24 | let unexpected () = Alcotest.fail "Callback call not expected" in 25 | let top = ref unexpected in 26 | let require_callback ?at_least_once ?(callback = fun () -> ()) (type a) 27 | (f : unit -> a) : a = 28 | let called = ref false in 29 | let prev_top = !top in 30 | (top := 31 | fun () -> 32 | match (at_least_once, !called) with 33 | | None, true -> Alcotest.fail "flush_callback already triggered" 34 | | _, _ -> 35 | called := true; 36 | (* Ensure the callback does not recursively invoke an auto-flush. *) 37 | let saved_top = !top in 38 | top := unexpected; 39 | callback (); 40 | top := saved_top); 41 | let a = f () in 42 | if not !called then Alcotest.fail "flush_callback was not called"; 43 | top := prev_top; 44 | a 45 | in 46 | let flush_callback () = !top () in 47 | { require_callback; flush_callback } 48 | end 49 | 50 | let check_no_merge binding = function 51 | | None -> binding 52 | | Some _merge_promise -> 53 | Alcotest.failf "New binding %a triggered an unexpected merge operation" 54 | pp_binding binding 55 | 56 | (** Tests that [close] does not trigger the [flush_callback] *) 57 | let test_close () = 58 | let fail typ () = 59 | Alcotest.failf "Closing <%s> should not trigger the flush_callback" typ 60 | in 61 | Context.with_empty_index ~flush_callback:(fail "empty index") () 62 | Context.ignore; 63 | 64 | Context.with_full_index ~flush_callback:(fail "fresh index") () Context.ignore; 65 | 66 | let calls = ref 0 in 67 | Context.with_empty_index 68 | ~flush_callback:(fun () -> incr calls) 69 | () 70 | (fun Context.{ rw; _ } -> 71 | Index.replace_random rw 72 | |> uncurry check_no_merge 73 | |> (ignore : binding -> unit)); 74 | Alcotest.(check int) 75 | "Closing a dirty index should trigger the flush_callback once" 1 !calls 76 | 77 | (** Test that [flush] triggers the [flush_callback] when necessary. *) 78 | let test_flush () = 79 | let Mutable_callback.{ require_callback; flush_callback } = 80 | Mutable_callback.v () 81 | in 82 | let* Context.{ rw; clone; _ } = Context.with_empty_index ~flush_callback () in 83 | let ro = clone ~readonly:true () in 84 | 85 | Index.flush rw (* No callback, since there are no bindings to persist *); 86 | let binding = Index.replace_random rw |> uncurry check_no_merge in 87 | require_callback 88 | ~callback:(fun () -> 89 | Log.app (fun m -> 90 | m "Checking that newly-added binding %a is not yet visible" pp_binding 91 | binding); 92 | Index.sync ro; 93 | Index.check_not_found ro (fst binding)) 94 | (fun () -> Index.flush rw); 95 | 96 | Log.app (fun m -> 97 | m "After the flush, binding %a should be visible" pp_binding binding); 98 | Index.sync ro; 99 | uncurry (Index.check_binding ro) binding; 100 | 101 | let _ = Index.replace_random rw |> uncurry check_no_merge in 102 | Index.flush ~no_callback:() rw (* No callback, by user request *); 103 | 104 | () 105 | 106 | (** Test that flushes due to [replace] operations trigger the [flush_callback]: 107 | 108 | - 1. Initial flush of [log] before an automatic merge. 109 | - 2. Flushing of [log_async] while a merge is ongoing. *) 110 | let test_replace () = 111 | let log_size = 8 in 112 | let bindings = Tbl.v ~size:log_size in 113 | let binding_list = bindings |> Hashtbl.to_seq |> List.of_seq in 114 | let Mutable_callback.{ require_callback; flush_callback } = 115 | Mutable_callback.v () 116 | in 117 | let* Context.{ rw; clone; _ } = 118 | Context.with_empty_index ~log_size ~flush_callback () 119 | in 120 | let ro = clone ~readonly:true () in 121 | 122 | (* The first [log_size]-many replaces don't trigger the callback. (Provided 123 | the [auto_flush_limit] is not reached, which it is not.) *) 124 | let replace_no_merge binding = 125 | Index.replace' rw (fst binding) (snd binding) 126 | |> check_no_merge binding 127 | |> (ignore : Key.t * Value.t -> unit) 128 | in 129 | binding_list |> List.iter replace_no_merge; 130 | 131 | (* The next replace overflows the log, causing the bindings to be persisted *) 132 | let do_merge = Semaphore.make false in 133 | let overflow_binding, merge_promise = 134 | require_callback 135 | ~callback:(fun () -> 136 | Log.app (fun m -> 137 | m 138 | "Checking newly-added bindings are not visible from a synced RO \ 139 | instance until [flush_callback] is called"); 140 | Index.sync ro; 141 | check_disjoint ro bindings) 142 | (fun () -> 143 | Index.replace_random 144 | ~hook: 145 | (I.Private.Hook.v (function 146 | | `Merge `Before -> Semaphore.acquire do_merge 147 | | _ -> ())) 148 | rw) 149 | in 150 | 151 | Log.app (fun m -> m "Checking merged bindings are now visible"); 152 | Hashtbl.add bindings (fst overflow_binding) (snd overflow_binding); 153 | Index.sync ro; 154 | check_equivalence ro bindings; 155 | 156 | (* New values added during the merge go into [log_async] *) 157 | let async_binding = Index.replace_random rw |> uncurry check_no_merge in 158 | Log.app (fun m -> 159 | m "Added new binding %a while merge is ongoing" pp_binding async_binding); 160 | 161 | (* We could implicitly cause an automatic flush of [log_async], but it's 162 | simpler to just explicitly force one. *) 163 | Index.sync ro; 164 | require_callback ~at_least_once:() 165 | ~callback:(fun () -> 166 | Log.app (fun m -> 167 | m 168 | "Checking async_binding %a is not yet visible from a synced RO \ 169 | instance" 170 | pp_binding async_binding); 171 | check_equivalence ro bindings) 172 | (fun () -> Index.flush rw); 173 | 174 | (* The merge triggers the callback when flushing [log_async] entries into 175 | [log]. (Not necessary here, since [log_async] values were already flushed.) *) 176 | require_callback (fun () -> 177 | Semaphore.release do_merge; 178 | merge_promise |> Option.get |> Index.await |> check_completed); 179 | 180 | Log.app (fun m -> 181 | m 182 | "Checking that all added bindings are now visible from a synced RO \ 183 | instance"); 184 | Hashtbl.add bindings (fst async_binding) (snd async_binding); 185 | Index.sync ro; 186 | check_equivalence ro bindings 187 | 188 | let tests = 189 | [ 190 | ("close", `Quick, test_close); 191 | ("flush", `Quick, test_flush); 192 | ("replace", `Quick, test_replace); 193 | ] 194 | -------------------------------------------------------------------------------- /test/unix/flush_callback.mli: -------------------------------------------------------------------------------- 1 | val tests : unit Alcotest.test_case list 2 | -------------------------------------------------------------------------------- /test/unix/force_merge.ml: -------------------------------------------------------------------------------- 1 | module Hook = Index.Private.Hook 2 | module Semaphore = Semaphore_compat.Semaphore.Binary 3 | open Common 4 | 5 | let root = Filename.concat "_tests" "unix.force_merge" 6 | 7 | module Context = Common.Make_context (struct 8 | let root = root 9 | end) 10 | 11 | let after f = Hook.v (function `After -> f () | _ -> ()) 12 | let after_clear f = Hook.v (function `After_clear -> f () | _ -> ()) 13 | let before f = Hook.v (function `Before -> f () | _ -> ()) 14 | 15 | let before_offset_read f = 16 | Hook.v (function `Before_offset_read -> f () | _ -> ()) 17 | 18 | let test_find_present t tbl = 19 | Hashtbl.iter 20 | (fun k v -> 21 | match Index.find t k with 22 | | res -> 23 | if not (res = v) then Alcotest.fail "Replacing existing value failed." 24 | | exception Not_found -> 25 | Alcotest.failf "Inserted value is not present anymore: %s." k) 26 | tbl 27 | 28 | let test_one_entry r k v = 29 | match Index.find r k with 30 | | res -> 31 | if not (res = v) then Alcotest.fail "Replacing existing value failed." 32 | | exception Not_found -> 33 | Alcotest.failf "Inserted value is not present anymore: %s." k 34 | 35 | let test_fd () = 36 | match Common.get_open_fd root with 37 | | `Ok lines -> ( 38 | let contains sub s = 39 | try 40 | ignore (Re.Str.search_forward (Re.Str.regexp sub) s 0); 41 | true 42 | with Not_found -> false 43 | in 44 | let result = 45 | let data, rs = List.partition (contains "data") lines in 46 | if List.length data > 2 then 47 | Alcotest.fail "Too many file descriptors opened for data files"; 48 | let log, rs = List.partition (contains "log") rs in 49 | if List.length log > 2 then 50 | Alcotest.fail "Too many file descriptors opened for log files"; 51 | let lock, rs = List.partition (contains "lock") rs in 52 | if List.length lock > 2 then 53 | Alcotest.fail "Too many file descriptors opened for lock files"; 54 | if List.length rs > 0 then 55 | Alcotest.fail "Unknown file descriptors opened"; 56 | `Ok () 57 | in 58 | match result with 59 | | `Ok () -> () 60 | | `Skip err -> Log.warn (fun m -> m "`test_fd` was skipped: %s" err)) 61 | | `Skip err -> Log.warn (fun m -> m "`test_fd` was skipped: %s" err) 62 | 63 | let readonly_s () = 64 | let* { Context.tbl; clone; _ } = Context.with_full_index () in 65 | let r1 = clone ~readonly:true () in 66 | let r2 = clone ~readonly:true () in 67 | let r3 = clone ~readonly:true () in 68 | test_find_present r1 tbl; 69 | test_find_present r2 tbl; 70 | test_find_present r3 tbl; 71 | test_fd () 72 | 73 | let readonly () = 74 | let* { Context.tbl; clone; _ } = Context.with_full_index () in 75 | let r1 = clone ~readonly:true () in 76 | let r2 = clone ~readonly:true () in 77 | let r3 = clone ~readonly:true () in 78 | Hashtbl.iter 79 | (fun k v -> 80 | test_one_entry r1 k v; 81 | test_one_entry r2 k v; 82 | test_one_entry r3 k v) 83 | tbl; 84 | test_fd () 85 | 86 | let readonly_and_merge () = 87 | let* { Context.rw; clone; _ } = Context.with_full_index () in 88 | let w = rw in 89 | let r1 = clone ~readonly:true () in 90 | let r2 = clone ~readonly:true () in 91 | let r3 = clone ~readonly:true () in 92 | let interleave () = 93 | let k1 = Key.v () in 94 | let v1 = Value.v () in 95 | Index.replace w k1 v1; 96 | Index.flush w; 97 | let t1 = Index.try_merge_aux ~force:true w in 98 | Index.sync r1; 99 | Index.sync r2; 100 | Index.sync r3; 101 | test_one_entry r1 k1 v1; 102 | test_one_entry r2 k1 v1; 103 | test_one_entry r3 k1 v1; 104 | 105 | let k2 = Key.v () in 106 | let v2 = Value.v () in 107 | Index.replace w k2 v2; 108 | Index.flush w; 109 | Index.sync r1; 110 | Index.sync r2; 111 | Index.sync r3; 112 | test_one_entry r1 k1 v1; 113 | let t2 = Index.try_merge_aux ~force:true w in 114 | test_one_entry r2 k2 v2; 115 | test_one_entry r3 k1 v1; 116 | 117 | let k2 = Key.v () in 118 | let v2 = Value.v () in 119 | let k3 = Key.v () in 120 | let v3 = Value.v () in 121 | test_one_entry r1 k1 v1; 122 | Index.replace w k2 v2; 123 | Index.flush w; 124 | Index.sync r1; 125 | let t3 = Index.try_merge_aux ~force:true w in 126 | test_one_entry r1 k1 v1; 127 | Index.replace w k3 v3; 128 | Index.flush w; 129 | Index.sync r3; 130 | let t4 = Index.try_merge_aux ~force:true w in 131 | test_one_entry r3 k3 v3; 132 | 133 | let k2 = Key.v () in 134 | let v2 = Value.v () in 135 | Index.replace w k2 v2; 136 | Index.flush w; 137 | Index.sync r2; 138 | Index.sync r3; 139 | test_one_entry w k2 v2; 140 | let t5 = Index.try_merge_aux ~force:true w in 141 | test_one_entry w k2 v2; 142 | test_one_entry r2 k2 v2; 143 | test_one_entry r3 k1 v1; 144 | 145 | let k2 = Key.v () in 146 | let v2 = Value.v () in 147 | Index.replace w k2 v2; 148 | Index.flush w; 149 | Index.sync r2; 150 | Index.sync r3; 151 | test_one_entry r2 k1 v1; 152 | let t6 = Index.try_merge_aux ~force:true w in 153 | test_one_entry w k2 v2; 154 | test_one_entry r2 k2 v2; 155 | test_one_entry r3 k2 v2; 156 | Index.await t1 |> check_completed; 157 | Index.await t2 |> check_completed; 158 | Index.await t3 |> check_completed; 159 | Index.await t4 |> check_completed; 160 | Index.await t5 |> check_completed; 161 | Index.await t6 |> check_completed 162 | in 163 | 164 | for _ = 1 to 10 do 165 | interleave () 166 | done; 167 | test_fd () 168 | 169 | (* A force merge has an implicit flush, however, if the replace occurs at the end of the merge, the value is not flushed *) 170 | let write_after_merge () = 171 | let* { Context.rw; clone; _ } = Context.with_full_index () in 172 | let w = rw in 173 | let r1 = clone ~readonly:true () in 174 | let k1 = Key.v () in 175 | let v1 = Value.v () in 176 | let k2 = Key.v () in 177 | let v2 = Value.v () in 178 | Index.replace w k1 v1; 179 | let hook = after (fun () -> Index.replace w k2 v2) in 180 | let t = Index.try_merge_aux ~force:true ~hook w in 181 | Index.await t |> check_completed; 182 | Index.sync r1; 183 | test_one_entry r1 k1 v1; 184 | Alcotest.check_raises (Printf.sprintf "Absent value was found: %s." k2) 185 | Not_found (fun () -> ignore_value (Index.find r1 k2)) 186 | 187 | let replace_while_merge () = 188 | let* { Context.rw; clone; _ } = Context.with_full_index () in 189 | let w = rw in 190 | let r1 = clone ~readonly:true () in 191 | let k1 = Key.v () in 192 | let v1 = Value.v () in 193 | let k2 = Key.v () in 194 | let v2 = Value.v () in 195 | Index.replace w k1 v1; 196 | let hook = 197 | before (fun () -> 198 | Index.replace w k2 v2; 199 | test_one_entry w k2 v2) 200 | in 201 | let t = Index.try_merge_aux ~force:true ~hook w in 202 | Index.sync r1; 203 | test_one_entry r1 k1 v1; 204 | Index.await t |> check_completed 205 | 206 | (* note that here we cannot do 207 | `test_one_entry r1 k2 v2` 208 | as there is no way to guarantee that the latests value 209 | added by a RW instance is found by a RO instance 210 | *) 211 | 212 | let find_while_merge () = 213 | let* { Context.rw; clone; _ } = Context.with_full_index () in 214 | let w = rw in 215 | let k1 = Key.v () in 216 | let v1 = Value.v () in 217 | Index.replace w k1 v1; 218 | let f () = test_one_entry w k1 v1 in 219 | let t1 = Index.try_merge_aux ~force:true ~hook:(after f) w in 220 | let t2 = Index.try_merge_aux ~force:true ~hook:(after f) w in 221 | let r1 = clone ~readonly:true () in 222 | let f () = test_one_entry r1 k1 v1 in 223 | let t3 = Index.try_merge_aux ~force:true ~hook:(before f) w in 224 | let t4 = Index.try_merge_aux ~force:true ~hook:(before f) w in 225 | Index.await t1 |> check_completed; 226 | Index.await t2 |> check_completed; 227 | Index.await t3 |> check_completed; 228 | Index.await t4 |> check_completed 229 | 230 | let find_in_async_generation_change () = 231 | let* { Context.rw; clone; _ } = Context.with_full_index () in 232 | let w = rw in 233 | let r1 = clone ~readonly:true () in 234 | let k1 = Key.v () in 235 | let v1 = Value.v () in 236 | let f () = 237 | Index.replace w k1 v1; 238 | Index.flush w; 239 | Index.sync r1; 240 | test_one_entry r1 k1 v1 241 | in 242 | let t1 = Index.try_merge_aux ~force:true ~hook:(before f) w in 243 | Index.await t1 |> check_completed 244 | 245 | let find_in_async_same_generation () = 246 | let* { Context.rw; clone; _ } = Context.with_full_index () in 247 | let w = rw in 248 | let r1 = clone ~readonly:true () in 249 | let k1 = Key.v () in 250 | let v1 = Value.v () in 251 | let k2 = Key.v () in 252 | let v2 = Value.v () in 253 | let f () = 254 | Index.replace w k1 v1; 255 | Index.flush w; 256 | Index.sync r1; 257 | test_one_entry r1 k1 v1; 258 | Index.replace w k2 v2; 259 | Index.flush w; 260 | Index.sync r1; 261 | test_one_entry r1 k2 v2 262 | in 263 | let t1 = Index.try_merge_aux ~force:true ~hook:(before f) w in 264 | Index.await t1 |> check_completed 265 | 266 | let sync_before_and_after_clearing_async () = 267 | let* { Context.rw; clone; _ } = Context.with_full_index () in 268 | let w = rw in 269 | let ro = clone ~readonly:true () in 270 | let k1 = Key.v () in 271 | let v1 = Value.v () in 272 | let k2 = Key.v () in 273 | let v2 = Value.v () in 274 | let add_in_async () = 275 | Index.replace w k1 v1; 276 | Index.replace w k2 v2; 277 | Index.flush w; 278 | Log.debug (fun l -> l "RO updates async's offset"); 279 | Index.sync ro 280 | in 281 | let sync_before_clear_async () = 282 | Log.debug (fun l -> l "RO updates instance's generation"); 283 | Index.sync ro 284 | in 285 | let hook = 286 | Hook.v (function 287 | | `Before -> add_in_async () 288 | | `After_clear -> sync_before_clear_async () 289 | | _ -> ()) 290 | in 291 | let t1 = Index.try_merge_aux ~force:true ~hook w in 292 | Index.await t1 |> check_completed; 293 | Index.sync ro; 294 | test_one_entry ro k1 v1; 295 | test_one_entry ro k2 v2 296 | 297 | (** RW adds a value in log and flushes it, so every subsequent RO sync should 298 | find that value. But if the RO sync occurs during a merge, after a clear but 299 | before a generation change, then the value is missed. Also test ro find at 300 | this point. *) 301 | let sync_after_clear_log () = 302 | let* Context.{ rw; clone; _ } = Context.with_empty_index () in 303 | let ro = clone ~readonly:true () in 304 | let k1, v1 = (Key.v (), Value.v ()) in 305 | Index.replace rw k1 v1; 306 | Index.flush rw; 307 | let hook = after_clear (fun () -> Index.sync ro) in 308 | let t = Index.try_merge_aux ~force:true ~hook rw in 309 | Index.await t |> check_completed; 310 | test_one_entry ro k1 v1; 311 | let k2, v2 = (Key.v (), Value.v ()) in 312 | Index.replace rw k2 v2; 313 | Index.flush rw; 314 | Index.sync ro; 315 | let hook = after_clear (fun () -> test_one_entry ro k1 v1) in 316 | let t = Index.try_merge_aux ~force:true ~hook rw in 317 | Index.await t |> check_completed 318 | 319 | (** during a merge RO sync can miss a value if it reads the generation before 320 | the generation is updated. *) 321 | let merge_during_sync () = 322 | let* Context.{ rw; clone; _ } = Context.with_empty_index () in 323 | let ro = clone ~readonly:true () in 324 | let k1, v1 = (Key.v (), Value.v ()) in 325 | Index.replace rw k1 v1; 326 | Index.flush rw; 327 | let hook = 328 | before_offset_read (fun () -> 329 | let t = Index.try_merge_aux ~force:true rw in 330 | Index.await t |> check_completed) 331 | in 332 | Index.sync' ~hook ro; 333 | test_one_entry ro k1 v1 334 | 335 | let test_is_merging () = 336 | let* Context.{ rw; _ } = Context.with_empty_index () in 337 | let add_binding_and_merge ~hook = 338 | let k1, v1 = (Key.v (), Value.v ()) in 339 | Index.replace rw k1 v1; 340 | let t = Index.try_merge_aux ~force:true ~hook rw in 341 | Index.await t |> check_completed 342 | in 343 | let f msg b () = Alcotest.(check bool) msg (Index.is_merging rw) b in 344 | f "before merge" false (); 345 | add_binding_and_merge ~hook:(before (f "before" true)); 346 | f "between merge" false (); 347 | add_binding_and_merge ~hook:(after (f "after" true)); 348 | add_binding_and_merge ~hook:(after_clear (f "after clear" true)) 349 | 350 | let add_bindings index = 351 | let k1, v1 = (Key.v (), Value.v ()) in 352 | Index.replace index k1 v1 353 | 354 | (** Test that a clear aborts the merge. *) 355 | let test_non_blocking_clear () = 356 | let* Context.{ rw; _ } = Context.with_empty_index () in 357 | let merge_started = Semaphore.make false and merge = Semaphore.make false in 358 | let merge_hook = 359 | Hook.v @@ function 360 | | `Before -> 361 | Semaphore.release merge_started; 362 | Semaphore.acquire merge 363 | | `After -> Alcotest.fail "Merge should have been aborted by clear" 364 | | _ -> () 365 | in 366 | let clear_hook = 367 | Hook.v @@ function 368 | | `Abort_signalled -> Semaphore.release merge 369 | | `IO_clear -> () 370 | in 371 | add_bindings rw; 372 | let thread = Index.try_merge_aux ~force:true ~hook:merge_hook rw in 373 | Semaphore.acquire merge_started; 374 | add_bindings rw; 375 | Index.clear' ~hook:clear_hook rw; 376 | match Index.await thread with 377 | | Ok `Aborted -> () 378 | | _ -> Alcotest.fail "merge should have aborted" 379 | 380 | (** The test consists of aborting a first merge after one entry is added in the 381 | ./merge file and checking that a second merge succeeds. Regression test for 382 | PR 211 in which the second merge was triggering an assert failure. *) 383 | let test_abort_merge ~abort_merge () = 384 | let* { Context.rw; clone; _ } = Context.with_full_index () in 385 | let merge_started = Semaphore.make false and merge = Semaphore.make false in 386 | let merge_hook = 387 | Hook.v @@ function 388 | | `After_first_entry -> 389 | Semaphore.release merge_started; 390 | Semaphore.acquire merge 391 | | `After | `After_clear -> 392 | Alcotest.fail "Merge should have been aborted by clear" 393 | | `Before -> () 394 | in 395 | let abort_hook = 396 | Hook.v @@ function 397 | | `Abort_signalled -> Semaphore.release merge 398 | | `IO_clear -> () 399 | in 400 | let t = Index.try_merge_aux ~force:true ~hook:merge_hook rw in 401 | Semaphore.acquire merge_started; 402 | abort_merge ~hook:abort_hook rw; 403 | (match Index.await t with 404 | | Ok `Aborted -> () 405 | | _ -> Alcotest.fail "Merge should have aborted"); 406 | let rw = clone ~readonly:false ~fresh:false () in 407 | add_bindings rw; 408 | let t = Index.try_merge_aux ~force:true rw in 409 | Index.await t |> check_completed 410 | 411 | let test_clear_aborts_merge = test_abort_merge ~abort_merge:Index.clear' 412 | 413 | let test_close_immediately_aborts_merge = 414 | test_abort_merge ~abort_merge:(Index.close' ~immediately:()) 415 | 416 | let tests = 417 | [ 418 | ("readonly in sequence", `Quick, readonly_s); 419 | ("readonly interleaved", `Quick, readonly); 420 | ("interleaved merge", `Quick, readonly_and_merge); 421 | ("write at the end of merge", `Quick, write_after_merge); 422 | ("write in log_async", `Quick, replace_while_merge); 423 | ("find while merging", `Quick, find_while_merge); 424 | ("find in async without log", `Quick, find_in_async_generation_change); 425 | ("find in async with log", `Quick, find_in_async_same_generation); 426 | ( "sync before and after clearing the async", 427 | `Quick, 428 | sync_before_and_after_clearing_async ); 429 | ("sync and find after log cleared", `Quick, sync_after_clear_log); 430 | ("merge during ro sync", `Quick, merge_during_sync); 431 | ("is_merging", `Quick, test_is_merging); 432 | ("clear is not blocking", `Quick, test_non_blocking_clear); 433 | ("`clear` aborts merge", `Quick, test_clear_aborts_merge); 434 | ( "`close ~immediately` aborts merge", 435 | `Quick, 436 | test_close_immediately_aborts_merge ); 437 | ] 438 | -------------------------------------------------------------------------------- /test/unix/force_merge.mli: -------------------------------------------------------------------------------- 1 | val tests : unit Alcotest.test_case list 2 | -------------------------------------------------------------------------------- /test/unix/io_array.ml: -------------------------------------------------------------------------------- 1 | module Int63 = Optint.Int63 2 | module IO = Index_unix.Private.IO 3 | 4 | let ( // ) = Filename.concat 5 | let root = "_tests" // "unix.io_array" 6 | 7 | module Entry = struct 8 | module Key = Common.Key 9 | module Value = Common.Value 10 | 11 | type t = Key.t * Value.t 12 | 13 | let encoded_size = Key.encoded_size + Value.encoded_size 14 | 15 | let decode string off = 16 | let key = Key.decode string off in 17 | let value = Value.decode string (off + Key.encoded_size) in 18 | (key, value) 19 | 20 | let append_io io (key, value) = 21 | let encoded_key = Key.encode key in 22 | let encoded_value = Value.encode value in 23 | IO.append io encoded_key; 24 | IO.append io encoded_value 25 | end 26 | 27 | module IOArray = Index.Private.Io_array.Make (IO) (Entry) 28 | 29 | let entry = Alcotest.(pair string string) 30 | 31 | let fresh_io name = 32 | IO.v ~fresh:true ~generation:Int63.zero ~fan_size:Int63.zero (root // name) 33 | 34 | (* Append a random sequence of [size] keys to an IO instance and return 35 | a pair of an IOArray and an equivalent in-memory array. *) 36 | let populate_random ~size io = 37 | let rec loop acc = function 38 | | 0 -> acc 39 | | n -> 40 | let e = (Common.Key.v (), Common.Value.v ()) in 41 | Entry.append_io io e; 42 | loop (e :: acc) (n - 1) 43 | in 44 | let mem_arr = Array.of_list (List.rev (loop [] size)) in 45 | let io_arr = IOArray.v io in 46 | IO.flush io; 47 | (mem_arr, io_arr) 48 | 49 | (* Tests *) 50 | let read_sequential () = 51 | let size = 1000 in 52 | let io = fresh_io "read_sequential" in 53 | let mem_arr, io_arr = populate_random ~size io in 54 | for i = 0 to size - 1 do 55 | let expected = mem_arr.(i) in 56 | let actual = IOArray.get io_arr (Int63.of_int i) in 57 | Alcotest.(check entry) 58 | (Fmt.str "Inserted key at index %i is accessible" i) 59 | expected actual 60 | done 61 | 62 | let read_sequential_prefetch () = 63 | let size = 1000 in 64 | let io = fresh_io "read_sequential_prefetch" in 65 | let mem_arr, io_arr = populate_random ~size io in 66 | IOArray.pre_fetch io_arr ~low:Int63.zero ~high:(Int63.of_int 999); 67 | 68 | (* Read the arrays backwards *) 69 | for i = size - 1 to 0 do 70 | let expected = mem_arr.(i) in 71 | let actual = IOArray.get io_arr (Int63.of_int i) in 72 | Alcotest.(check entry) 73 | (Fmt.str "Inserted key at index %i is accessible" i) 74 | expected actual 75 | done 76 | 77 | let tests = 78 | [ 79 | ("fresh", `Quick, read_sequential); 80 | ("prefetch", `Quick, read_sequential_prefetch); 81 | ] 82 | -------------------------------------------------------------------------------- /test/unix/io_array.mli: -------------------------------------------------------------------------------- 1 | val tests : unit Alcotest.test_case list 2 | -------------------------------------------------------------------------------- /test/unix/log.ml: -------------------------------------------------------------------------------- 1 | let src = Logs.Src.create "index-unix-test" ~doc:"Index Unix Testing" 2 | 3 | module Log = (val Logs.src_log src : Logs.LOG) 4 | include Log 5 | -------------------------------------------------------------------------------- /test/unix/log.mli: -------------------------------------------------------------------------------- 1 | include Logs.LOG 2 | -------------------------------------------------------------------------------- /test/unix/main.mli: -------------------------------------------------------------------------------- 1 | (* left empty on purpose *) 2 | -------------------------------------------------------------------------------- /test/unix/test_lru.ml: -------------------------------------------------------------------------------- 1 | (** Tests of the in-memory LRU used by the Index implementation. 2 | 3 | NOTE: most other tests in the suite set an LRU size of 0 for simplicity. *) 4 | 5 | module Stats = Index.Stats 6 | open Common 7 | 8 | module Context = Common.Make_context (struct 9 | let root = Filename.concat "_tests" "test_log_with_lru" 10 | end) 11 | 12 | let check_bool pos ~expected act = Alcotest.(check ~pos bool) "" expected act 13 | let check_int pos ~expected act = Alcotest.(check ~pos int) "" expected act 14 | 15 | let check_value pos ~expected act = 16 | let key = Alcotest.testable Key.pp Key.equal in 17 | Alcotest.(check ~pos key) "" expected act 18 | 19 | let check_lru_stats pos ~hits ~misses = 20 | Alcotest.(check ~pos int) "LRU hits" hits Stats.((get ()).lru_hits); 21 | Alcotest.(check ~pos int) "LRU misses" misses Stats.((get ()).lru_misses) 22 | 23 | let get_new_cached_binding idx = 24 | let k, v = (Key.v (), Value.v ()) in 25 | Index.replace idx k v; 26 | check_value __POS__ ~expected:v (Index.find idx k); 27 | (k, v) 28 | 29 | let test_replace_and_find () = 30 | let lru_size = 1 in 31 | let* { rw = idx; _ } = Context.with_empty_index ~lru_size () in 32 | 33 | (* Check that [replace] populates the LRU: *) 34 | let k1, v1 = (Key.v (), Value.v ()) in 35 | let () = 36 | Stats.reset_stats (); 37 | Index.replace idx k1 v1; 38 | (* [k1] is now in the LRU: *) 39 | check_value __POS__ ~expected:v1 (Index.find idx k1); 40 | check_lru_stats __POS__ ~hits:1 ~misses:0 41 | in 42 | 43 | (* Check that a second [replace] updates the LRU contents: *) 44 | let k2, v2 = (Key.v (), Value.v ()) in 45 | let () = 46 | assert (k1 <> k2); 47 | Stats.reset_stats (); 48 | Index.replace idx k2 v2; 49 | (* [k2] has replaced [k1] in the LRU, so we miss on [find k1]: *) 50 | check_value __POS__ ~expected:v1 (Index.find idx k1); 51 | check_lru_stats __POS__ ~hits:0 ~misses:1; 52 | (* [k1] is now in the LRU: *) 53 | check_value __POS__ ~expected:v1 (Index.find idx k1); 54 | check_lru_stats __POS__ ~hits:1 ~misses:1 55 | in 56 | () 57 | 58 | let test_mem () = 59 | let lru_size = 1 in 60 | let* { rw = idx; _ } = Context.with_empty_index ~lru_size () in 61 | 62 | (* Initially, [k2] is in the LRU and [k1] is not: *) 63 | let k1, k2, v1, v2 = (Key.v (), Key.v (), Value.v (), Value.v ()) in 64 | Index.replace idx k1 v1; 65 | Index.replace idx k2 v2; 66 | 67 | (* [mem k2] hits in the LRU: *) 68 | let () = 69 | Stats.reset_stats (); 70 | check_bool __POS__ ~expected:true (Index.mem idx k2); 71 | check_lru_stats __POS__ ~hits:1 ~misses:0 72 | in 73 | 74 | (* [mem k1] initially misses in the LRU, but subsequent calls hit in the LRU 75 | (because the [k2] binding is replaced by [k1] on the miss). *) 76 | let () = 77 | Stats.reset_stats (); 78 | check_bool __POS__ ~expected:true (Index.mem idx k1); 79 | check_lru_stats __POS__ ~hits:0 ~misses:1; 80 | check_bool __POS__ ~expected:true (Index.mem idx k1); 81 | check_lru_stats __POS__ ~hits:1 ~misses:1 82 | in 83 | () 84 | 85 | (* Check that the LRU is cleared on [clear]. *) 86 | let test_clear () = 87 | let lru_size = 1 in 88 | let* { rw = idx; _ } = Context.with_full_index ~lru_size () in 89 | 90 | (* Add a binding and ensure that it's in the LRU: *) 91 | let k, v = (Key.v (), Value.v ()) in 92 | Index.replace idx k v; 93 | check_value __POS__ ~expected:v (Index.find idx k); 94 | 95 | (* We should miss in the LRU when attempting to find [k] after [clear]: *) 96 | Index.clear idx; 97 | Stats.reset_stats (); 98 | Alcotest.check_raises "find after clear" Not_found (fun () -> 99 | ignore (Index.find idx k)); 100 | check_lru_stats __POS__ ~hits:0 ~misses:1 101 | 102 | (* Check that bindings in the LRU are properly removed by [filter]: *) 103 | let test_filter () = 104 | let lru_size = 1 in 105 | let* { rw = idx; _ } = Context.with_full_index ~lru_size () in 106 | 107 | (* Add a binding and ensure that it's in the LRU: *) 108 | let k, v = (Key.v (), Value.v ()) in 109 | Index.replace idx k v; 110 | check_value __POS__ ~expected:v (Index.find idx k); 111 | 112 | (* Remove [k] from the index via [filter], then try to [find] it: *) 113 | Index.filter idx (fun (k', _) -> not (Key.equal k k')); 114 | Stats.reset_stats (); 115 | Alcotest.check_raises ~pos:__POS__ "find after filter-false" Not_found 116 | (fun () -> ignore (Index.find idx k)); 117 | check_lru_stats __POS__ ~hits:0 ~misses:1 118 | 119 | let tests = 120 | [ 121 | ("replace_and_find", `Quick, test_replace_and_find); 122 | ("mem", `Quick, test_mem); 123 | ("clear", `Quick, test_clear); 124 | ("filter", `Quick, test_filter); 125 | ] 126 | --------------------------------------------------------------------------------