├── .github
    └── workflows
    │   └── main.yml
├── .gitignore
├── .ocamlformat
├── CHANGES.md
├── LICENSE
├── Makefile
├── README.md
├── bench
    ├── dune
    └── main.ml
├── bloomf-bench.opam
├── bloomf.opam
├── dune-project
├── src
    ├── bloomf.ml
    ├── bloomf.mli
    └── dune
└── test
    ├── dune
    ├── main.ml
    └── main.mli


/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: Changelog check
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches: [ master ]
 6 |     types: [ opened, synchronize, reopened, labeled, unlabeled ]
 7 | 
 8 | jobs:
 9 |   build:
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |     - uses: actions/checkout@v1
14 | 
15 |     - name: git diff
16 |       if: ${{ !contains(github.event.pull_request.labels.*.name, 'no-changelog-needed') }}
17 |       env:
18 |         BASE_REF: ${{ github.event.pull_request.base.ref }}
19 |       run: |
20 |         ! git diff --exit-code origin/$BASE_REF -- CHANGES.md
21 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | _bench/
2 | _build/
3 | _metrics/
4 | _opam/
5 | _tests/
6 | *.install
7 | *.merlin
8 | **/.DS_Store
9 | 


--------------------------------------------------------------------------------
/.ocamlformat:
--------------------------------------------------------------------------------
1 | version = 0.19.0
2 | parse-docstrings = true
3 | break-infix = fit-or-vertical
4 | module-item-spacing = compact
5 | 


--------------------------------------------------------------------------------
/CHANGES.md:
--------------------------------------------------------------------------------
 1 | # unreleased
 2 | 
 3 | - Added `copy`
 4 | 
 5 | # v0.2.0 2021-02-10
 6 | 
 7 | - Added `to_bytes` and `of_bytes`.
 8 | - Added `union` and `inter`.
 9 | 
10 | # v0.1.0 2019-03-07
11 | 
12 | - Initial release
13 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright (c) 2019 Clément Pascutto
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: all test bench doc clean
 2 | 
 3 | all:
 4 | 	dune build
 5 | 
 6 | test:
 7 | 	dune runtest
 8 | 
 9 | bench:
10 | 	dune build @bench
11 | 
12 | clean:
13 | 	dune clean
14 | 
15 | doc:
16 | 	dune build @doc
17 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Bloomf - Efficient Bloom filters for OCaml [![OCaml-CI Build Status](https://img.shields.io/endpoint?url=https%3A%2F%2Fci.ocamllabs.io%2Fbadge%2Fmirage%2Fbloomf%2Fmaster&logo=ocaml)](https://ci.ocamllabs.io/github/mirage/bloomf)
 2 | Bloom filters are memory and time efficient data structures allowing
 3 | probabilistic membership queries in a set.
 4 | 
 5 | A query negative result ensures that the element is not present in the set,
 6 | while a positive result might be a false positive, i.e. the element might not be
 7 | present and the BF membership query can return true anyway.
 8 | 
 9 | Internal parameters of the BF allow to control its false positive rate depending
10 | on the expected number of elements in it.
11 | 
12 | Online documentation is available [here](https://mirage.github.io/bloomf/).
13 | 
14 | ## Install
15 | 
16 | The latest version of `bloomf` is available on opam with `opam install bloomf`.
17 | 
18 | Alternatively, you can build from sources with `make` or `dune build`.
19 | 
20 | ## Tests
21 | 
22 | Some of the tests, measuring false positive rate or size estimation, might fail
23 | once in a while since they are randomized. They are thus removed from `dune
24 | runtest` alias.
25 | 
26 | To run the whole test suite, run `dune build @runtest-rand` instead.
27 | 
28 | ## Benchmarks
29 | 
30 | Micro benchmarks are provided for `create`, `add`, `mem` and `size_estimate`
31 | operations. Expected error rate is 0.01.
32 | 
33 | They preform OLS regression analysis using the development version of
34 | [bechamel](https://github.com/dinosaure/bechamel). To reproduce them, pin
35 | `bechamel` then run `dune build @bench`.
36 | 


--------------------------------------------------------------------------------
/bench/dune:
--------------------------------------------------------------------------------
 1 | (executable
 2 |  (name main)
 3 |  (libraries bechamel bechamel-notty notty.unix bloomf))
 4 | 
 5 | (rule
 6 |  (alias bench)
 7 |  (package bloomf-bench)
 8 |  (action
 9 |   (run %{exe:main.exe})))
10 | 


--------------------------------------------------------------------------------
/bench/main.ml:
--------------------------------------------------------------------------------
 1 | open Bechamel
 2 | open Toolkit
 3 | 
 4 | let () = Random.self_init ()
 5 | let random_char () = char_of_int (Random.int 256)
 6 | let random_string n = String.init n (fun _i -> random_char ())
 7 | let create size = Staged.stage (fun () -> Bloomf.create size)
 8 | 
 9 | let add size =
10 |   let bf = Bloomf.create size in
11 |   let r = random_string 1024 in
12 |   Staged.stage (fun () -> Bloomf.add bf r)
13 | 
14 | let fill_bf bf n =
15 |   let rec loop i =
16 |     if i = 0 then ()
17 |     else
18 |       let r = random_string 1024 in
19 |       let () = Bloomf.add bf r in
20 |       loop (i - 1)
21 |   in
22 |   loop n
23 | 
24 | let mem_absent size =
25 |   let bf = Bloomf.create size in
26 |   let () = fill_bf bf size in
27 |   let r = random_string 1024 in
28 |   Staged.stage (fun () -> ignore (Bloomf.mem bf r))
29 | 
30 | let mem_present size =
31 |   let bf = Bloomf.create size in
32 |   let () = fill_bf bf size in
33 |   let r = random_string 1024 in
34 |   let () = Bloomf.add bf r in
35 |   Staged.stage (fun () -> ignore (Bloomf.mem bf r))
36 | 
37 | let size_estimate size =
38 |   let bf = Bloomf.create size in
39 |   let () = fill_bf bf size in
40 |   Staged.stage (fun () -> ignore (Bloomf.size_estimate bf))
41 | 
42 | let test =
43 |   Test.make_grouped ~name:"bloomf"
44 |     [
45 |       Test.make_indexed ~name:"create" ~fmt:"%s %d"
46 |         ~args:[ 10_000; 100_000; 1_000_000 ]
47 |         create;
48 |       Test.make_indexed ~name:"add" ~fmt:"%s %d"
49 |         ~args:[ 10_000; 100_000; 1_000_000 ]
50 |         add;
51 |       Test.make_indexed ~name:"mem (absent)" ~fmt:"%s %d"
52 |         ~args:[ 10_000; 100_000; 1_000_000 ]
53 |         mem_absent;
54 |       Test.make_indexed ~name:"mem (present)" ~fmt:"%s %d"
55 |         ~args:[ 10_000; 100_000; 1_000_000 ]
56 |         mem_present;
57 |       Test.make_indexed ~name:"size_estimate" ~fmt:"%s %d"
58 |         ~args:[ 10_000; 100_000; 1_000_000 ]
59 |         size_estimate;
60 |     ]
61 | 
62 | let benchmark () =
63 |   let config = Benchmark.(cfg ~limit:100 ~quota:(Time.millisecond 100.)) () in
64 |   let ols =
65 |     Analyze.ols ~bootstrap:0 ~r_square:true ~predictors:Measure.[| run |]
66 |   in
67 |   let instances =
68 |     Instance.[ minor_allocated; major_allocated; monotonic_clock ]
69 |   in
70 |   let raw_results = Benchmark.all config instances test in
71 |   List.map (fun instance -> Analyze.all ols instance raw_results) instances
72 |   |> Analyze.merge ols instances
73 | 
74 | let () = Bechamel_notty.Unit.add Instance.monotonic_clock "ns"
75 | let () = Bechamel_notty.Unit.add Instance.minor_allocated "w"
76 | let () = Bechamel_notty.Unit.add Instance.major_allocated "mw"
77 | 
78 | let img (window, results) =
79 |   Bechamel_notty.Multiple.image_of_ols_results ~rect:window
80 |     ~predictor:Measure.run results
81 | 
82 | open Notty_unix
83 | 
84 | let rect w h = Bechamel_notty.{ w; h }
85 | 
86 | let () =
87 |   let window =
88 |     match winsize Unix.stdout with
89 |     | Some (_, _) -> Bechamel_notty.{ w = 80; h = 1 }
90 |     | None -> { w = 80; h = 1 }
91 |   in
92 |   img (window, benchmark ()) |> eol |> output_image
93 | 


--------------------------------------------------------------------------------
/bloomf-bench.opam:
--------------------------------------------------------------------------------
 1 | opam-version: "2.0"
 2 | maintainer:   "Clément Pascutto <clement@pascutto.fr>"
 3 | authors:      "Clément Pascutto <clement@pascutto.fr>"
 4 | license:      "MIT"
 5 | homepage:     "https://github.com/mirage/bloomf"
 6 | bug-reports:  "https://github.com/mirage/bloomf/issues"
 7 | dev-repo:     "git+https://github.com/mirage/bloomf.git"
 8 | doc:          "https://mirage.github.io/bloomf/"
 9 | 
10 | build: [
11 |   ["dune" "subst"] {dev}
12 |   [
13 |     "dune"
14 |     "build"
15 |     "-p"
16 |     name
17 |     "-j"
18 |     jobs
19 |     "@install"
20 |     "@runtest" {with-test}
21 |     "@doc" {with-doc}
22 |   ]
23 | ]
24 | 
25 | depends: [
26 |   "ocaml" {>= "4.03.0"}
27 |   "dune" {>= "2.0.0"}
28 |   "bloomf" {=version}
29 |   "bechamel-notty"
30 |   "alcotest" {>= "1.0.0" & with-test}
31 | ]
32 | synopsis: "Benchmarking package for `bloomf`"
33 | description: "Benchmarking package for `bloomf`"
34 | 


--------------------------------------------------------------------------------
/bloomf.opam:
--------------------------------------------------------------------------------
 1 | opam-version: "2.0"
 2 | maintainer:   "Clément Pascutto <clement@pascutto.fr>"
 3 | authors:      "Clément Pascutto <clement@pascutto.fr>"
 4 | license:      "MIT"
 5 | homepage:     "https://github.com/mirage/bloomf"
 6 | bug-reports:  "https://github.com/mirage/bloomf/issues"
 7 | dev-repo:     "git+https://github.com/mirage/bloomf.git"
 8 | doc:          "https://mirage.github.io/bloomf/"
 9 | 
10 | build: [
11 |   ["dune" "subst"] {dev}
12 |   [
13 |     "dune"
14 |     "build"
15 |     "-p"
16 |     name
17 |     "-j"
18 |     jobs
19 |     "@install"
20 |     "@runtest" {with-test}
21 |     "@doc" {with-doc}
22 |   ]
23 | ]
24 | 
25 | depends: [
26 |   "ocaml"   {>= "4.03.0"}
27 |   "dune"    {>= "1.7.0"}
28 |   "bitv"    {>= "1.4"}
29 |   "alcotest" {>= "1.0.0" & with-test}
30 | ]
31 | synopsis: "Efficient Bloom filters for OCaml"
32 | description: "Efficient Bloom filters for OCaml"
33 | 


--------------------------------------------------------------------------------
/dune-project:
--------------------------------------------------------------------------------
1 | (lang dune 2.0)
2 | (name bloomf)
3 | (implicit_transitive_deps false)


--------------------------------------------------------------------------------
/src/bloomf.ml:
--------------------------------------------------------------------------------
  1 | type priv = { m : int; k : int; p_len : (int * int) list; b : Bitv.t }
  2 | type 'a t = priv
  3 | 
  4 | let copy t = { m = t.m; k = t.k; p_len = t.p_len; b = Bitv.copy t.b }
  5 | let rec gcd a b = if b = 0 then a else gcd b (a mod b)
  6 | 
  7 | let partition_lengths m k =
  8 |   let rec aux sum acc i =
  9 |     if List.length acc = k then (sum, acc)
 10 |     else
 11 |       let rec loop step =
 12 |         let k = i + step in
 13 |         let gcd_k = gcd k in
 14 |         if List.for_all (fun p -> gcd_k p = 1) acc then
 15 |           aux (sum + k) (k :: acc) (k + 1)
 16 |         else loop (step + 1)
 17 |       in
 18 |       loop 1
 19 |   in
 20 |   aux 0 [] (m / k)
 21 | 
 22 | let v m k =
 23 |   let m, lengths = partition_lengths m k in
 24 |   let p_len =
 25 |     let rec aux acc off = function
 26 |       | [] -> acc
 27 |       | h :: t -> aux ((off, h) :: acc) (off + h) t
 28 |     in
 29 |     aux [] 0 lengths
 30 |   in
 31 |   try
 32 |     let b = Bitv.create m false in
 33 |     { m; k; p_len; b }
 34 |   with Invalid_argument _ -> invalid_arg "Bloomf.create"
 35 | 
 36 | let estimate_parameters n p =
 37 |   let log2 = log 2. in
 38 |   let nf = float_of_int n in
 39 |   let m = ceil (-.nf *. log p /. log (2. ** log2)) in
 40 |   let k = ceil (log2 *. m /. nf) in
 41 |   (m, k)
 42 | 
 43 | let create ?(error_rate = 0.01) n_items =
 44 |   let m, k = estimate_parameters n_items error_rate in
 45 |   if error_rate <= 0. || error_rate >= 1. then invalid_arg "Bloomf.create";
 46 |   v (int_of_float m) (int_of_float k)
 47 | 
 48 | let add_priv t hashed_data =
 49 |   let rec loop = function
 50 |     | [] -> ()
 51 |     | (off, len) :: tl ->
 52 |         let loc = off + (hashed_data mod len) in
 53 |         let () = Bitv.unsafe_set t.b loc true in
 54 |         loop tl
 55 |   in
 56 |   loop t.p_len
 57 | 
 58 | let add bf data = add_priv bf (Hashtbl.hash data)
 59 | 
 60 | let op f bf1 bf2 =
 61 |   if bf1.k <> bf2.k || bf1.m <> bf2.m then
 62 |     invalid_arg "incompatible bloom filters";
 63 |   { m = bf1.m; k = bf2.k; p_len = bf1.p_len; b = f bf1.b bf2.b }
 64 | 
 65 | let union bf1 bf2 = op Bitv.bw_or bf1 bf2
 66 | let inter bf1 bf2 = op Bitv.bw_and bf1 bf2
 67 | 
 68 | let mem_priv t hashed_data =
 69 |   let rec loop = function
 70 |     | [] -> true
 71 |     | (off, len) :: tl ->
 72 |         let loc = off + (hashed_data mod len) in
 73 |         let res = Bitv.unsafe_get t.b loc in
 74 |         if res then loop tl else false
 75 |   in
 76 |   loop t.p_len
 77 | 
 78 | let mem bf data = mem_priv bf (Hashtbl.hash data)
 79 | let clear t = Bitv.fill t.b 0 t.m false
 80 | 
 81 | (* Bitv.pop is really slow *)
 82 | let size_estimate t =
 83 |   let mf = float_of_int t.m in
 84 |   let kf = float_of_int t.k in
 85 |   let xf = float_of_int (Bitv.pop t.b) in
 86 |   int_of_float (-.mf /. kf *. log (1. -. (xf /. mf)))
 87 | 
 88 | (* Serialisers *)
 89 | 
 90 | external set_64 : bytes -> int -> int64 -> unit = "%caml_string_set64u"
 91 | external swap64 : int64 -> int64 = "%bswap_int64"
 92 | 
 93 | let set_uint64 buf off v =
 94 |   if not Sys.big_endian then set_64 buf off (swap64 v) else set_64 buf off v
 95 | 
 96 | (* type priv = { m : int; k : int; p_len : (int * int) list; b : Bitv.t } *)
 97 | 
 98 | let to_bytes t =
 99 |   let enc_b = Bitv.to_bytes t.b in
100 |   let enc_b_len = Bytes.length enc_b in
101 |   let enc_p_len_len = 16 * List.length t.p_len in
102 |   let len = 8 + 8 + 8 + enc_p_len_len + enc_b_len in
103 |   let buf = Bytes.create len in
104 |   set_uint64 buf 0 (Int64.of_int t.m);
105 |   set_uint64 buf 8 (Int64.of_int t.k);
106 |   set_uint64 buf 16 (Int64.of_int (List.length t.p_len));
107 |   List.iteri
108 |     (fun i (i1, i2) ->
109 |       set_uint64 buf (24 + (8 * (2 * i))) (Int64.of_int i1);
110 |       set_uint64 buf (24 + (8 * ((2 * i) + 1))) (Int64.of_int i2))
111 |     t.p_len;
112 |   Bytes.blit enc_b 0 buf (24 + enc_p_len_len) enc_b_len;
113 |   buf
114 | 
115 | external get_64 : bytes -> int -> int64 = "%caml_string_get64"
116 | 
117 | let get_uint64 buf off =
118 |   if not Sys.big_endian then swap64 (get_64 buf off) else get_64 buf off
119 | 
120 | let of_bytes buf =
121 |   try
122 |     let m = get_uint64 buf 0 |> Int64.to_int in
123 |     let k = get_uint64 buf 8 |> Int64.to_int in
124 |     let p_len_len = get_uint64 buf 16 |> Int64.to_int in
125 |     let p_len =
126 |       List.init p_len_len (fun i ->
127 |           let i1 = get_uint64 buf (24 + (8 * (2 * i))) |> Int64.to_int in
128 |           let i2 = get_uint64 buf (24 + (8 * ((2 * i) + 1))) |> Int64.to_int in
129 |           (i1, i2))
130 |     in
131 |     let read = 24 + (16 * p_len_len) in
132 |     let b = Bytes.sub buf read (Bytes.length buf - read) |> Bitv.of_bytes in
133 |     Ok { m; k; p_len; b }
134 |   with _ -> Error (`Msg "invalid serialisation format")
135 | 
136 | module type Hashable = sig
137 |   type t
138 | 
139 |   val hash : t -> int
140 | end
141 | 
142 | module Make (H : Hashable) = struct
143 |   type t = priv
144 | 
145 |   let create = create
146 |   let copy = copy
147 |   let add bf data = add_priv bf (H.hash data)
148 |   let mem bf data = mem_priv bf (H.hash data)
149 |   let clear = clear
150 |   let size_estimate = size_estimate
151 |   let to_bytes = to_bytes
152 |   let of_bytes = of_bytes
153 | end
154 | 


--------------------------------------------------------------------------------
/src/bloomf.mli:
--------------------------------------------------------------------------------
 1 | (** Bloom filters
 2 | 
 3 |     bloomf is an implementation of Bloom filters in OCaml.
 4 | 
 5 |     Bloom filters are memory and time efficient data structures allowing
 6 |     probabilistic membership queries in a set. A query negative result ensures
 7 |     that the element is not present in the set, while a positive result might be
 8 |     a false positive, i.e. the element might not be present and the BF
 9 |     membership query can return true anyway. Internal parameters of the BF allow
10 |     to control its false positive rate depending on the expected number of
11 |     elements in it. *)
12 | 
13 | (** {1 Generic interface} *)
14 | 
15 | type 'a t
16 | (** The type of the Bloom filter. *)
17 | 
18 | val create : ?error_rate:float -> int -> 'a t
19 | (** [create ~error_rate size] creates a fresh BF for which expected false
20 |     positive rate when filled with [size] elements is [error_rate].
21 | 
22 |     @raise Invalid_argument
23 |       if [error_rate] is not in \]0, 1\[, or [size] is negative. *)
24 | 
25 | val copy : 'a t -> 'a t
26 | (** [copy t] copies the BF. The fresh returned BF is completely separated from
27 |     the given one. *)
28 | 
29 | val add : 'a t -> 'a -> unit
30 | (** [add t e] adds [e] to [t]. *)
31 | 
32 | val mem : 'a t -> 'a -> bool
33 | (** [mem t e] is [true] if [e] is in [t]. *)
34 | 
35 | val clear : 'a t -> unit
36 | (** [clear t] clears the contents of [t]. *)
37 | 
38 | val union : 'a t -> 'a t -> 'a t
39 | (** [union t1 t2] computes the union of the two inputs. This operation is
40 |     lossless in the sense that the resulting Bloom filter is the same as the
41 |     Bloom filter created from scratch using the union of the two sets.
42 | 
43 |     Raises [Invalid_argument] if the two bloom filters were created with
44 |     different parameters *)
45 | 
46 | val inter : 'a t -> 'a t -> 'a t
47 | (** [inter t1 t2] computes the intersection of the two inputs. The false
48 |     positive probability in the resulting Bloom filter is at most the
49 |     false-positive probability in one of the constituent Bloom filters, but may
50 |     be larger than the false positive probability in the Bloom filter created
51 |     from scratch using the intersection of the two sets.
52 | 
53 |     Raises [Invalid_argument] if the two bloom filters were created with
54 |     different parameters *)
55 | 
56 | val size_estimate : 'a t -> int
57 | (** [size_estimate t] is an approximation of the number of elements stored in
58 |     the bloom filter. Please note that this operation is costly (see
59 |     benchmarks). *)
60 | 
61 | (** {2 Serializers/Deserializers} *)
62 | 
63 | val to_bytes : 'a t -> bytes
64 | val of_bytes : bytes -> ('a t, [ `Msg of string ]) result
65 | 
66 | (** {1 Functorial interface} *)
67 | 
68 | (** The functorial interface allows you to specify your own hash function. *)
69 | 
70 | (** The input interface for [Bloomf.Make]. *)
71 | module type Hashable = sig
72 |   type t
73 |   (** The type of the values to be stored. *)
74 | 
75 |   val hash : t -> int
76 |   (** The hash function. {e This function must return positive integers.}
77 |       Behavior is undefined otherwise. Please note that false positive rate
78 |       might be affected by unevenly distributed hash functions. *)
79 | end
80 | 
81 | (** The output interface for [Bloomf.Make]. *)
82 | module Make (H : Hashable) : sig
83 |   type t
84 | 
85 |   val create : ?error_rate:float -> int -> t
86 |   val copy : t -> t
87 |   val add : t -> H.t -> unit
88 |   val mem : t -> H.t -> bool
89 |   val clear : t -> unit
90 |   val size_estimate : t -> int
91 |   val to_bytes : t -> bytes
92 |   val of_bytes : bytes -> (t, [ `Msg of string ]) result
93 | end
94 | 


--------------------------------------------------------------------------------
/src/dune:
--------------------------------------------------------------------------------
1 | (library
2 |  (public_name bloomf)
3 |  (name bloomf)
4 |  (libraries bitv))
5 | 


--------------------------------------------------------------------------------
/test/dune:
--------------------------------------------------------------------------------
 1 | (executable
 2 |  (name main)
 3 |  (libraries bloomf alcotest))
 4 | 
 5 | (rule
 6 |  (alias runtest)
 7 |  (action
 8 |   (run %{exe:main.exe} -q)))
 9 | 
10 | (rule
11 |  (alias runtest-rand)
12 |  (action
13 |   (run %{exe:main.exe})))
14 | 


--------------------------------------------------------------------------------
/test/main.ml:
--------------------------------------------------------------------------------
  1 | let () = Random.self_init ()
  2 | let random_char () = char_of_int (Random.int 256)
  3 | let random_string n = String.init n (fun _i -> random_char ())
  4 | 
  5 | module StringSet = Set.Make (String)
  6 | 
  7 | let expected_error_rate = 0.001
  8 | 
  9 | let create_and_fill size =
 10 |   let bf = Bloomf.create ~error_rate:expected_error_rate size in
 11 |   let rec loop i acc =
 12 |     if i = 0 then acc
 13 |     else
 14 |       let r = random_string 1024 in
 15 |       let () = Bloomf.add bf r in
 16 |       loop (i - 1) (StringSet.add r acc)
 17 |   in
 18 |   let elts = loop size StringSet.empty in
 19 |   (bf, elts)
 20 | 
 21 | let test_mem () =
 22 |   let sizes = [ 1_000; 10_000; 100_000 ] in
 23 |   List.iter
 24 |     (fun i ->
 25 |       let bf, elts = create_and_fill i in
 26 |       StringSet.iter
 27 |         (fun r -> Alcotest.(check bool) "mem (empty)" true (Bloomf.mem bf r))
 28 |         elts)
 29 |     sizes
 30 | 
 31 | let test_errors () =
 32 |   let sizes = [ 1_000; 10_000; 100_000 ] in
 33 |   List.iter
 34 |     (fun i ->
 35 |       let bf, elts = create_and_fill i in
 36 |       let attempts = 100_000 in
 37 |       let rec loop i count =
 38 |         if i = 0 then count
 39 |         else
 40 |           let r = random_string 1024 in
 41 |           if StringSet.mem r elts then loop i count
 42 |           else loop (i - 1) (if Bloomf.mem bf r then count + 1 else count)
 43 |       in
 44 |       let count = loop attempts 0 in
 45 |       let error_rate = float_of_int count /. float_of_int attempts in
 46 |       if error_rate > 1.15 *. expected_error_rate then
 47 |         Alcotest.failf "error_rate: expecting@\n%f, got@\n%f"
 48 |           expected_error_rate error_rate
 49 |       else ())
 50 |     sizes
 51 | 
 52 | let test_size () =
 53 |   let sizes = [ 1_000; 10_000; 100_000 ] in
 54 |   List.iter
 55 |     (fun i ->
 56 |       let bf, _ = create_and_fill i in
 57 |       let len = Bloomf.size_estimate bf in
 58 |       if abs (len - i) > int_of_float (0.15 *. float_of_int i) then
 59 |         Alcotest.failf "size_estimate: expecting@\n%d, got@\n%d" i len)
 60 |     sizes
 61 | 
 62 | let test_op msg bop sop =
 63 |   let sizes = [ 1_000; 10_000; 100_000 ] in
 64 |   List.iter
 65 |     (fun i ->
 66 |       let bf1, elts1 = create_and_fill i in
 67 |       let bf2, elts2 = create_and_fill i in
 68 |       let bf3 = bop bf1 bf2 in
 69 |       let elts3 = sop elts1 elts2 in
 70 |       StringSet.iter
 71 |         (fun r -> Alcotest.(check bool) msg true (Bloomf.mem bf3 r))
 72 |         elts3)
 73 |     sizes
 74 | 
 75 | let test_union () = test_op "union" Bloomf.union StringSet.union
 76 | let test_inter () = test_op "intersection" Bloomf.inter StringSet.inter
 77 | 
 78 | let test_bytes () =
 79 |   let sizes = [ 1_000; 10_000; 100_000 ] in
 80 |   List.iter
 81 |     (fun i ->
 82 |       let bf1, _ = create_and_fill i in
 83 |       match Bloomf.to_bytes bf1 |> Bloomf.of_bytes with
 84 |       | Ok bf2 ->
 85 |           Alcotest.(check bool)
 86 |             "serialisation / deserialisation" true (bf1 = bf2)
 87 |       | Error _ -> Alcotest.failf "deserialisation failed")
 88 |     sizes
 89 | 
 90 | let suite =
 91 |   [
 92 |     ("Mem returns true when element was added", `Quick, test_mem);
 93 |     ( "False positive rate is as specified (15% error allowed)",
 94 |       `Slow,
 95 |       test_errors );
 96 |     ("Size estimate is correct", `Slow, test_size);
 97 |     ("Union", `Quick, test_union);
 98 |     ("Intersection", `Quick, test_inter);
 99 |     ("Serialisation", `Quick, test_bytes);
100 |   ]
101 | 
102 | let () = Alcotest.run "Bloomf" [ ("bloomf", suite) ]
103 | 


--------------------------------------------------------------------------------
/test/main.mli:
--------------------------------------------------------------------------------
1 | (* Left empty on purpose *)
2 | 


--------------------------------------------------------------------------------