├── .dockerignore
├── .gitignore
├── .ocamlformat
├── LICENSE
├── README.md
├── bin
    ├── bench.ml
    └── dune
├── dune-project
├── src
    ├── cmap.ml
    ├── dune
    ├── tdigest.ml
    └── tdigest.mli
├── tdigest.opam
└── test
    ├── dune
    ├── shared.ml
    ├── test_discrete.ml
    └── test_tdigest.ml


/.dockerignore:
--------------------------------------------------------------------------------
1 | **/.DS_Store
2 | .vscode
3 | _opam/
4 | _build/
5 | **/.merlin
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | **/.DS_Store
2 | .vscode
3 | _opam/
4 | _build/
5 | **/.merlin
6 | 


--------------------------------------------------------------------------------
/.ocamlformat:
--------------------------------------------------------------------------------
 1 | # "Asemio Style" v1.3
 2 | # Last updated for ocamlformat 0.25.1
 3 | ocaml-version=5.0.0
 4 | 
 5 | # All available options are listed.
 6 | # An option is commented out when the value matches the default
 7 | # When updating ocamlformat, update this file:
 8 | # - add any new options
 9 | # - try out new values for existing options
10 | # - if a default changed, see if we should follow it or not
11 | 
12 | # assignment-operator = end-line
13 | # break-before-in = fit-or-vertical
14 | break-cases = all
15 | # break-collection-expressions = fit-or-vertical
16 | # break-colon = after
17 | # break-fun-decl = wrap
18 | # break-fun-sig = wrap
19 | break-infix = fit-or-vertical
20 | # break-infix-before-func = false
21 | # break-separators = after
22 | # break-sequences = true
23 | # break-string-literals = auto
24 | # break-struct = force
25 | cases-exp-indent = 2
26 | # cases-matching-exp-indent = normal
27 | disambiguate-non-breaking-match = true
28 | doc-comments = before
29 | # doc-comments-padding = 2
30 | # doc-comments-tag-only = default
31 | # dock-collection-brackets = true
32 | exp-grouping = preserve
33 | # extension-indent = 2
34 | field-space = tight-decl
35 | function-indent = 0
36 | # function-indent-nested = never
37 | if-then-else = keyword-first
38 | # indent-after-in = 0
39 | indicate-multiline-delimiters = space
40 | indicate-nested-or-patterns = space
41 | # infix-precedence = indent
42 | # leading-nested-match-parens = false
43 | let-and = sparse
44 | # let-binding-indent = 2
45 | # let-binding-spacing = compact
46 | # let-module = compact
47 | # line-endings = lf
48 | margin = 106
49 | # match-indent = 0
50 | # match-indent-nested = never
51 | max-indent = 2
52 | module-item-spacing = sparse
53 | # nested-match = wrap
54 | parens-ite = true
55 | parens-tuple = multi-line-only
56 | # parens-tuple-patterns = multi-line-only
57 | # parse-docstrings = false
58 | # parse-toplevel-phrases = false
59 | # sequence-blank-line = preserve-one
60 | # sequence-style = terminator
61 | single-case = sparse
62 | # space-around-arrays = true
63 | # space-around-lists = true
64 | # space-around-records = true
65 | # space-around-variants = true
66 | # stritem-extension-indent = 0
67 | type-decl = sparse
68 | # type-decl-indent = 2
69 | # wrap-comments = false
70 | # wrap-fun-args = true
71 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2020 Simon Grondin
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so,
10 | subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Tdigest
 2 | =======
 3 | 
 4 | OCaml implementation of the T-Digest algorithm.
 5 | 
 6 | ```ocaml
 7 | let td =
 8 |   Tdigest.create ()
 9 |   |> Tdigest.add_list [ 10.0; 11.0; 12.0; 13.0 ]
10 | in
11 | 
12 | Tdigest.percentiles td [ 0.; 0.25; 0.5; 0.75; 1. ]
13 | (* [ Some 10; Some 10.5; Some 11.5; Some 12.5; Some 13 ] *)
14 | 
15 | Tdigest.p_ranks td [ 9.; 10.; 11.; 12.; 13.; 14. ]
16 | (* [ Some 0; Some 0.125; Some 0.375; Some 0.625; Some 0.875; Some 1 ] *)
17 | ```
18 | 
19 | The T-Digest is a data structure and algorithm for constructing an approximate distribution for a collection of real numbers presented as a stream.
20 | 
21 | The T-Digest can estimate percentiles or quantiles extremely accurately even at the tails, while using a fraction of the space of the original data.
22 | 
23 | A median of medians is not equal to the median of the whole dataset. Percentiles are critical measures that are expensive to compute due to their requirement of having the entire **sorted** dataset present in one place. These downsides are addressed by using the T-Digest.
24 | 
25 | A T-Digest is concatenable, making it a good fit for distributed systems. The internal state of a T-Digest can be exported as a binary string, and the concatenation of any number of those strings can then be imported to form a new T-Digest.
26 | 
27 | ```ocaml
28 | let combined = Tdigest.merge [ td1; td2; td3 ] in
29 | ```
30 | 
31 | A T-Digest's state can be stored in a database `VARCHAR`/`TEXT` column and multiple such states can be merged by concatenating strings:
32 | ```sql
33 | -- Combine multiple states in the database
34 | SELECT
35 |   STRING_AGG(M.tdigest_state) AS concat_state
36 | FROM my_table AS M
37 | ```
38 | ```ocaml
39 | (* Then load this combined state into a single T-Digest *)
40 | let combined = Tdigest.of_string concat_state in
41 | ```
42 | 
43 | Links:
44 | - [A simple overview of the T-Digest](https://dataorigami.net/blogs/napkin-folding/19055451-percentile-and-quantile-estimation-of-big-data-the-t-digest)
45 | - [A walkthrough of the algorithm by its creator](https://mapr.com/blog/better-anomaly-detection-t-digest-whiteboard-walkthrough/)
46 | - [The white paper](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf)
47 | 
48 | This library started off as a port of [Will Welch's JavaScript implementation](https://github.com/welch/tdigest), down to the unit tests. However some modifications have been made to adapt it to OCaml, the most important one being immutability. As such, almost every function in the `Tdigest` module return a new `Tdigest.t`, including "reading" ones since they may trigger intermediate computations worth caching.
49 | 
50 | ## Usage
51 | 
52 | The API is well documented [here](https://github.com/SGrondin/tdigest/blob/master/src/tdigest.mli).
53 | 
54 | ```sh
55 | opam install tdigest
56 | ```
57 | 
58 | ### Marshal
59 | 
60 | The `Tdigest.t` type cannot be marshalled.
61 | 
62 | Use the functions in `Tdigest.Marshallable` if your application requires marshalling a T-Digest data structure. Note that `Tdigest.Marshallable.t` is approximately 5 times slower than `Tdigest.t`.
63 | 
64 | ## Performance
65 | 
66 | On an ancient 2015 MacBook Pro, this implementation can incorporate 1,000,000 random floating points in just 770ms.
67 | 
68 | Exporting and importing state (`to_string`/`of_string`) is cheap.
69 | 


--------------------------------------------------------------------------------
/bin/bench.ml:
--------------------------------------------------------------------------------
 1 | open! Base
 2 | 
 3 | let print_endline = Stdlib.print_endline
 4 | 
 5 | let sprintf = Printf.sprintf
 6 | 
 7 | let info_to_string x =
 8 |   let open Tdigest in
 9 |   sprintf {s|-----
10 | Count: %d
11 | Size: %d
12 | # Cumulates: %d
13 | # Compressions: %d
14 | # Auto Compressions: %d
15 | -----|s}
16 |     x.count x.size x.cumulates_count x.compress_count x.auto_compress_count
17 | 
18 | let snapshot sf t0 t1 =
19 |   print_endline
20 |     (sprintf sf Float.((t1 - t0) * 1_000_000. |> round_nearest |> to_string_hum ~delimiter:','))
21 | 
22 | module type S = module type of Tdigest.M
23 | 
24 | let test (module M : S) =
25 |   let arr = Array.init 2_000_000 ~f:(fun _i -> Random.float 1.) in
26 |   let td = M.create () in
27 | 
28 |   let t0 = Unix.gettimeofday () in
29 |   let td = Array.fold arr ~init:td ~f:(fun acc data -> M.add acc ~data) in
30 | 
31 |   let t1 = Unix.gettimeofday () in
32 |   let info1 = M.info td in
33 |   let td = M.compress td in
34 | 
35 |   let t2 = Unix.gettimeofday () in
36 |   let info2 = M.info td in
37 | 
38 |   snapshot "Add 2_000_000: %s us" t0 t1;
39 |   print_endline (info_to_string info1);
40 |   snapshot "Compress: %s us" t1 t2;
41 |   print_endline (info_to_string info2);
42 | 
43 |   let t3 = Unix.gettimeofday () in
44 |   let _td, str = M.to_string td in
45 |   let t4 = Unix.gettimeofday () in
46 |   print_endline (sprintf "Serialized into %d bytes" (String.length str));
47 |   snapshot "Serialization: %s us" t3 t4;
48 | 
49 |   let t5 = Unix.gettimeofday () in
50 |   let td = M.of_string (sprintf "%s%s" str str) in
51 |   let t6 = Unix.gettimeofday () in
52 |   snapshot "Parsing: %s us" t5 t6;
53 |   let info3 = M.info td in
54 |   print_endline (info_to_string info3)
55 | 
56 | let () = test (module Tdigest)
57 | 
58 | let () = test (module Tdigest.Marshallable)
59 | 


--------------------------------------------------------------------------------
/bin/dune:
--------------------------------------------------------------------------------
 1 | (executable
 2 |   (name bench)
 3 |   (libraries
 4 |     tdigest
 5 | 
 6 |     base
 7 |     unix
 8 |   )
 9 | 
10 |   (modes exe)
11 |   (ocamlopt_flags -O3 -unbox-closures)
12 | )
13 | 
14 | (env
15 |   (dev
16 |     (flags (:standard -warn-error -A))))
17 | 


--------------------------------------------------------------------------------
/dune-project:
--------------------------------------------------------------------------------
1 | (lang dune 1.9)
2 | (name tdigest)
3 | (version 2.2.1)
4 | 


--------------------------------------------------------------------------------
/src/cmap.ml:
--------------------------------------------------------------------------------
  1 | (* Code adapted from https://github.com/janestreet/core/blob/e70fdcdaa308dffb1b5bb8bb38acf46c73b40161/core/src/map.ml and map_intf.ml
  2 | 
  3 |    The MIT License
  4 | 
  5 |    Copyright (c) 2008--2023 Jane Street Group, LLC opensource-contacts@janestreet.com
  6 | 
  7 |    Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  8 | 
  9 |    The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 10 | 
 11 |    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 12 | *)
 13 | open! Base
 14 | module Tree0 = Map.Using_comparator.Tree
 15 | 
 16 | module type Creators_generic = sig
 17 |   include Map.Creators_generic
 18 | 
 19 |   val of_hashtbl_exn : ('k, 'cmp, ('k key, 'v) Hashtbl.t -> ('k, 'v, 'cmp) t) create_options
 20 | 
 21 |   (** Never requires a comparator because it can get one from the input [Set.t]. *)
 22 |   val of_key_set : ('k key, 'cmp cmp) Set.t -> f:(('k key -> 'v)[@local]) -> ('k, 'v, 'cmp) t
 23 | end
 24 | 
 25 | type ('k, 'cmp) comparator = (module Comparator.S with type t = 'k and type comparator_witness = 'cmp)
 26 | 
 27 | let to_comparator (type k cmp) ((module M) : (k, cmp) Comparator.Module.t) = M.comparator
 28 | 
 29 | let of_comparator (type k cmp) comparator : (k, cmp) Comparator.Module.t =
 30 |   ( module struct
 31 |     type t = k
 32 | 
 33 |     type comparator_witness = cmp
 34 | 
 35 |     let comparator = comparator
 36 |   end )
 37 | 
 38 | module Using_comparator = struct
 39 |   include Map.Using_comparator
 40 | 
 41 |   let of_hashtbl_exn ~comparator hashtbl =
 42 |     match of_iteri ~comparator ~iteri:(Hashtbl.iteri hashtbl) with
 43 |     | `Ok map -> map
 44 |     | `Duplicate_key key ->
 45 |       Error.create "Map.of_hashtbl_exn: duplicate key" key comparator.sexp_of_t |> Error.raise
 46 | 
 47 |   let tree_of_hashtbl_exn ~comparator hashtbl = to_tree (of_hashtbl_exn ~comparator hashtbl)
 48 | 
 49 |   let key_set ~comparator t =
 50 |     Set.Using_comparator.of_sorted_array_unchecked ~comparator (List.to_array (keys t))
 51 | 
 52 |   let key_set_of_tree ~comparator t = key_set ~comparator (of_tree ~comparator t)
 53 | 
 54 |   let of_key_set key_set ~f =
 55 |     of_sorted_array_unchecked ~comparator:(Set.comparator key_set)
 56 |       (Array.map (Set.to_array key_set) ~f:(fun key -> key, f key))
 57 | 
 58 |   let tree_of_key_set key_set ~f = to_tree (of_key_set key_set ~f)
 59 | end
 60 | 
 61 | module Accessors = struct
 62 |   include (
 63 |     Map.Using_comparator :
 64 |       Map.Accessors_generic
 65 |         with type ('a, 'b, 'c) access_options := ('a, 'b, 'c) Map.Without_comparator.t
 66 |         with type ('a, 'b, 'c) t := ('a, 'b, 'c) Map.t
 67 |         with type ('a, 'b, 'c) tree := ('a, 'b, 'c) Tree0.t
 68 |         with type 'k key := 'k
 69 |         with type 'c cmp := 'c )
 70 | 
 71 |   let key_set t = Using_comparator.key_set t ~comparator:(Using_comparator.comparator t)
 72 | end
 73 | 
 74 | let key_set t = Using_comparator.key_set ~comparator:(Using_comparator.comparator t) t
 75 | 
 76 | let of_key_set = Using_comparator.of_key_set
 77 | 
 78 | let hash_fold_direct = Using_comparator.hash_fold_direct
 79 | 
 80 | let comparator = Using_comparator.comparator
 81 | 
 82 | let comparator_s = Map.comparator_s
 83 | 
 84 | type 'k key = 'k
 85 | 
 86 | type 'c cmp = 'c
 87 | 
 88 | include (
 89 |   struct
 90 |     include Map
 91 | 
 92 |     let of_tree m = Map.Using_comparator.of_tree ~comparator:(to_comparator m)
 93 | 
 94 |     let to_tree = Map.Using_comparator.to_tree
 95 |   end :
 96 |     sig
 97 |       type ('a, 'b, 'c) t = ('a, 'b, 'c) Map.t
 98 | 
 99 |       include
100 |         Map.Creators_and_accessors_generic
101 |           with type ('a, 'b, 'c) create_options := ('a, 'b, 'c) Map.With_first_class_module.t
102 |           with type ('a, 'b, 'c) access_options := ('a, 'b, 'c) Map.Without_comparator.t
103 |           with type ('a, 'b, 'c) t := ('a, 'b, 'c) t
104 |           with type ('a, 'b, 'c) tree := ('a, 'b, 'c) Tree0.t
105 |           with type 'k key := 'k key
106 |           with type 'c cmp := 'c cmp
107 |     end )
108 | 
109 | module Empty_without_value_restriction = Using_comparator.Empty_without_value_restriction
110 | 
111 | let merge_skewed = Map.merge_skewed
112 | 
113 | let of_hashtbl_exn m t = Using_comparator.of_hashtbl_exn ~comparator:(to_comparator m) t
114 | 
115 | module Creators (Key : Comparator.S1) : sig
116 |   type ('a, 'b, 'c) t_ = ('a Key.t, 'b, Key.comparator_witness) t
117 | 
118 |   type ('a, 'b, 'c) tree = ('a, 'b, Key.comparator_witness) Tree0.t
119 | 
120 |   val t_of_sexp : (Sexp.t -> 'a Key.t) -> (Sexp.t -> 'b) -> Sexp.t -> ('a, 'b, _) t_
121 | 
122 |   include
123 |     Creators_generic
124 |       with type ('a, 'b, 'c) t := ('a, 'b, 'c) t_
125 |       with type ('a, 'b, 'c) tree := ('a, 'b, 'c) tree
126 |       with type 'a key := 'a Key.t
127 |       with type 'a cmp := Key.comparator_witness
128 |       with type ('a, 'b, 'c) create_options := ('a, 'b, 'c) Map.Without_comparator.t
129 |       with type ('a, 'b, 'c) access_options := ('a, 'b, 'c) Map.Without_comparator.t
130 | end = struct
131 |   let comparator = Key.comparator
132 | 
133 |   type ('a, 'b, 'c) t_ = ('a Key.t, 'b, Key.comparator_witness) t
134 | 
135 |   type ('a, 'b, 'c) tree = ('a, 'b, Key.comparator_witness) Tree0.t
136 | 
137 |   module M_empty = Empty_without_value_restriction (Key)
138 | 
139 |   let empty = M_empty.empty
140 | 
141 |   let of_tree tree = Using_comparator.of_tree ~comparator tree
142 | 
143 |   let singleton k v = Using_comparator.singleton ~comparator k v
144 | 
145 |   let of_sorted_array_unchecked array = Using_comparator.of_sorted_array_unchecked ~comparator array
146 | 
147 |   let of_sorted_array array = Using_comparator.of_sorted_array ~comparator array
148 | 
149 |   let of_increasing_iterator_unchecked ~len ~f =
150 |     Using_comparator.of_increasing_iterator_unchecked ~comparator ~len ~f
151 | 
152 |   let of_increasing_sequence seq = Using_comparator.of_increasing_sequence ~comparator seq
153 | 
154 |   let of_sequence seq = Using_comparator.of_sequence ~comparator seq
155 | 
156 |   let of_sequence_or_error seq = Using_comparator.of_sequence_or_error ~comparator seq
157 | 
158 |   let of_sequence_exn seq = Using_comparator.of_sequence_exn ~comparator seq
159 | 
160 |   let of_sequence_multi seq = Using_comparator.of_sequence_multi ~comparator seq
161 | 
162 |   let of_sequence_fold seq ~init ~f = Using_comparator.of_sequence_fold ~comparator seq ~init ~f
163 | 
164 |   let of_sequence_reduce seq ~f = Using_comparator.of_sequence_reduce ~comparator seq ~f
165 | 
166 |   let of_list_with_key list ~get_key = Using_comparator.of_list_with_key ~comparator list ~get_key
167 | 
168 |   let of_list_with_key_or_error list ~get_key =
169 |     Using_comparator.of_list_with_key_or_error ~comparator list ~get_key
170 | 
171 |   let of_list_with_key_exn list ~get_key = Using_comparator.of_list_with_key_exn ~comparator list ~get_key
172 | 
173 |   let of_list_with_key_multi list ~get_key =
174 |     Using_comparator.of_list_with_key_multi ~comparator list ~get_key
175 | 
176 |   let of_alist alist = Using_comparator.of_alist ~comparator alist
177 | 
178 |   let of_alist_or_error alist = Using_comparator.of_alist_or_error ~comparator alist
179 | 
180 |   let of_alist_exn alist = Using_comparator.of_alist_exn ~comparator alist
181 | 
182 |   let of_hashtbl_exn hashtbl = Using_comparator.of_hashtbl_exn ~comparator hashtbl
183 | 
184 |   let of_alist_multi alist = Using_comparator.of_alist_multi ~comparator alist
185 | 
186 |   let of_alist_fold alist ~init ~f = Using_comparator.of_alist_fold ~comparator alist ~init ~f
187 | 
188 |   let of_alist_reduce alist ~f = Using_comparator.of_alist_reduce ~comparator alist ~f
189 | 
190 |   let of_iteri ~iteri = Using_comparator.of_iteri ~comparator ~iteri
191 | 
192 |   let of_iteri_exn ~iteri = Using_comparator.of_iteri_exn ~comparator ~iteri
193 | 
194 |   let t_of_sexp k_of_sexp v_of_sexp sexp =
195 |     Using_comparator.t_of_sexp_direct ~comparator k_of_sexp v_of_sexp sexp
196 | 
197 |   let of_key_set key_set ~f = Using_comparator.of_key_set key_set ~f
198 | 
199 |   let map_keys t ~f = Using_comparator.map_keys ~comparator t ~f
200 | 
201 |   let map_keys_exn t ~f = Using_comparator.map_keys_exn ~comparator t ~f
202 | 
203 |   let transpose_keys t = Using_comparator.transpose_keys ~comparator t
204 | 
205 |   let of_list_with_key_fold list ~get_key ~init ~f =
206 |     Using_comparator.of_list_with_key_fold ~comparator list ~get_key ~init ~f
207 | 
208 |   let of_list_with_key_reduce l ~get_key ~f =
209 |       Using_comparator.of_list_with_key_reduce ~comparator l ~get_key ~f
210 | end
211 | 
212 | module Make_tree_S1 (Key : Comparator.S1) = struct
213 |   open Tree0
214 | 
215 |   let comparator = Key.comparator
216 | 
217 |   let sexp_of_t = sexp_of_t
218 | 
219 |   let t_of_sexp a b c = t_of_sexp_direct a b c ~comparator
220 | 
221 |   let empty = empty_without_value_restriction
222 | 
223 |   let of_tree tree = tree
224 | 
225 |   let singleton a = singleton a ~comparator
226 | 
227 |   let of_sorted_array_unchecked a = of_sorted_array_unchecked a ~comparator
228 | 
229 |   let of_sorted_array a = of_sorted_array a ~comparator
230 | 
231 |   let of_increasing_iterator_unchecked ~len ~f = of_increasing_iterator_unchecked ~len ~f ~comparator
232 | 
233 |   let of_increasing_sequence seq = of_increasing_sequence ~comparator seq
234 | 
235 |   let of_sequence s = of_sequence s ~comparator
236 | 
237 |   let of_sequence_or_error s = of_sequence_or_error s ~comparator
238 | 
239 |   let of_sequence_exn s = of_sequence_exn s ~comparator
240 | 
241 |   let of_sequence_multi s = of_sequence_multi s ~comparator
242 | 
243 |   let of_sequence_fold s ~init ~f = of_sequence_fold s ~init ~f ~comparator
244 | 
245 |   let of_sequence_reduce s ~f = of_sequence_reduce s ~f ~comparator
246 | 
247 |   let of_alist a = of_alist a ~comparator
248 | 
249 |   let of_alist_or_error a = of_alist_or_error a ~comparator
250 | 
251 |   let of_alist_exn a = of_alist_exn a ~comparator
252 | 
253 |   let of_hashtbl_exn a = Using_comparator.tree_of_hashtbl_exn a ~comparator
254 | 
255 |   let of_alist_multi a = of_alist_multi a ~comparator
256 | 
257 |   let of_alist_fold a ~init ~f = of_alist_fold a ~init ~f ~comparator
258 | 
259 |   let of_alist_reduce a ~f = of_alist_reduce a ~f ~comparator
260 | 
261 |   let of_list_with_key l ~get_key = of_list_with_key l ~get_key ~comparator
262 | 
263 |   let of_list_with_key_or_error l ~get_key = of_list_with_key_or_error l ~get_key ~comparator
264 | 
265 |   let of_list_with_key_exn l ~get_key = of_list_with_key_exn l ~get_key ~comparator
266 | 
267 |   let of_list_with_key_multi l ~get_key = of_list_with_key_multi l ~get_key ~comparator
268 | 
269 |   let of_iteri ~iteri = of_iteri ~iteri ~comparator
270 | 
271 |   let of_iteri_exn ~iteri = of_iteri_exn ~iteri ~comparator
272 | 
273 |   let of_key_set = Using_comparator.tree_of_key_set
274 | 
275 |   let to_tree t = t
276 | 
277 |   let invariants a = invariants a ~comparator
278 | 
279 |   let is_empty a = is_empty a
280 | 
281 |   let length a = length a
282 | 
283 |   let set a ~key ~data = set a ~key ~data ~comparator
284 | 
285 |   let add a ~key ~data = add a ~key ~data ~comparator
286 | 
287 |   let add_exn a ~key ~data = add_exn a ~key ~data ~comparator
288 | 
289 |   let add_multi a ~key ~data = add_multi a ~key ~data ~comparator
290 | 
291 |   let remove_multi a b = remove_multi a b ~comparator
292 | 
293 |   let find_multi a b = find_multi a b ~comparator
294 | 
295 |   let change a b ~f = change a b ~f ~comparator
296 | 
297 |   let update a b ~f = update a b ~f ~comparator
298 | 
299 |   let find_exn a b = find_exn a b ~comparator
300 | 
301 |   let find a b = find a b ~comparator
302 | 
303 |   let remove a b = remove a b ~comparator
304 | 
305 |   let mem a b = mem a b ~comparator
306 | 
307 |   let iter_keys = iter_keys
308 | 
309 |   let iter = iter
310 | 
311 |   let iteri = iteri
312 | 
313 |   let iteri_until = iteri_until
314 | 
315 |   let iter2 a b ~f = iter2 a b ~f ~comparator
316 | 
317 |   let map = map
318 | 
319 |   let mapi = mapi
320 | 
321 |   let fold = fold
322 | 
323 |   let fold_until = fold_until
324 | 
325 |   let fold_right = fold_right
326 | 
327 |   let fold2 a b ~init ~f = fold2 a b ~init ~f ~comparator
328 | 
329 |   let filter_keys a ~f = filter_keys a ~f
330 | 
331 |   let filter a ~f = filter a ~f
332 | 
333 |   let filteri a ~f = filteri a ~f
334 | 
335 |   let filter_map a ~f = filter_map a ~f
336 | 
337 |   let filter_mapi a ~f = filter_mapi a ~f
338 | 
339 |   let partition_mapi t ~f = partition_mapi t ~f
340 | 
341 |   let partition_map t ~f = partition_map t ~f
342 | 
343 |   let partitioni_tf t ~f = partitioni_tf t ~f
344 | 
345 |   let partition_tf t ~f = partition_tf t ~f
346 | 
347 |   let combine_errors t = combine_errors t ~comparator
348 | 
349 |   let compare_direct a b c = compare_direct a b c ~comparator
350 | 
351 |   let equal a b c = equal a b c ~comparator
352 | 
353 |   let keys = keys
354 | 
355 |   let data = data
356 | 
357 |   let to_alist = to_alist
358 | 
359 |   let symmetric_diff a b ~data_equal = symmetric_diff a b ~data_equal ~comparator
360 | 
361 |   let fold_symmetric_diff a b ~data_equal ~init ~f =
362 |     fold_symmetric_diff a b ~data_equal ~f ~init ~comparator
363 | 
364 |   let merge a b ~f = merge a b ~f ~comparator
365 | 
366 |   let merge_skewed a b ~combine = merge_skewed a b ~combine ~comparator
367 | 
368 |   let min_elt = min_elt
369 | 
370 |   let min_elt_exn = min_elt_exn
371 | 
372 |   let max_elt = max_elt
373 | 
374 |   let max_elt_exn = max_elt_exn
375 | 
376 |   let for_all = for_all
377 | 
378 |   let for_alli = for_alli
379 | 
380 |   let exists = exists
381 | 
382 |   let existsi = existsi
383 | 
384 |   let count = count
385 | 
386 |   let counti = counti
387 | 
388 |   let split a b = split a b ~comparator
389 | 
390 |   let split_le_gt a b = split_le_gt a b ~comparator
391 | 
392 |   let split_lt_ge a b = split_lt_ge a b ~comparator
393 | 
394 |   let append ~lower_part ~upper_part = append ~lower_part ~upper_part ~comparator
395 | 
396 |   let subrange t ~lower_bound ~upper_bound = subrange t ~lower_bound ~upper_bound ~comparator
397 | 
398 |   let fold_range_inclusive t ~min ~max ~init ~f = fold_range_inclusive t ~min ~max ~init ~f ~comparator
399 | 
400 |   let range_to_alist t ~min ~max = range_to_alist t ~min ~max ~comparator
401 | 
402 |   let closest_key a b c = closest_key a b c ~comparator
403 | 
404 |   let nth = nth
405 | 
406 |   let nth_exn = nth_exn
407 | 
408 |   let rank a b = rank a b ~comparator
409 | 
410 |   let to_sequence ?order ?keys_greater_or_equal_to ?keys_less_or_equal_to t =
411 |     to_sequence ~comparator ?order ?keys_greater_or_equal_to ?keys_less_or_equal_to t
412 | 
413 |   let binary_search t ~compare how v = binary_search ~comparator t ~compare how v
414 | 
415 |   let binary_search_segmented t ~segment_of how = binary_search_segmented ~comparator t ~segment_of how
416 | 
417 |   let binary_search_subrange t ~compare ~lower_bound ~upper_bound =
418 |     binary_search_subrange ~comparator t ~compare ~lower_bound ~upper_bound
419 | 
420 |   module Make_applicative_traversals (A : Applicative.Lazy_applicative) = struct
421 |     module Traversals = Make_applicative_traversals (A)
422 | 
423 |     let mapi = Traversals.mapi
424 | 
425 |     let filter_mapi = Traversals.filter_mapi
426 |   end
427 | 
428 |   let key_set t = Using_comparator.key_set_of_tree ~comparator t
429 | 
430 |   let map_keys t ~f = map_keys t ~f ~comparator
431 | 
432 |   let map_keys_exn t ~f = map_keys_exn t ~f ~comparator
433 | 
434 |   let transpose_keys t = transpose_keys ~comparator ~comparator t
435 | end
436 | 
437 | module Make_tree_plain (Key : sig
438 |   type t [@@deriving sexp_of]
439 | 
440 |   include Comparator.S with type t := t
441 | end) =
442 | struct
443 |   module Key_S1 = Comparator.S_to_S1 (Key)
444 |   include Make_tree_S1 (Key_S1)
445 | 
446 |   type +'v t = (Key.t, 'v, Key.comparator_witness) Tree0.t
447 | 
448 |   let sexp_of_t sexp_of_v t = sexp_of_t Key.sexp_of_t sexp_of_v [%sexp_of: _] t
449 | 
450 |   module Provide_of_sexp
451 |     (X : sig
452 |       type t [@@deriving of_sexp]
453 |     end
454 |     with type t := Key.t) =
455 |   struct
456 |     let t_of_sexp v_of_sexp sexp = t_of_sexp X.t_of_sexp v_of_sexp sexp
457 |   end
458 | end
459 | 
460 | module Make_tree (Key : sig
461 |   type t [@@deriving sexp]
462 | 
463 |   include Comparator.S with type t := t
464 | end) =
465 | struct
466 |   include Make_tree_plain (Key)
467 |   include Provide_of_sexp (Key)
468 | end
469 | 
470 | module type Key_plain = sig
471 |   type t [@@deriving sexp_of]
472 | 
473 |   val compare : t -> t -> int
474 | end
475 | 
476 | module type Key = sig
477 |   type t [@@deriving sexp]
478 | 
479 |   val compare : t -> t -> int
480 | end
481 | 
482 | module Make_plain_using_comparator (Key : sig
483 |   type t [@@deriving sexp_of]
484 | 
485 |   include Comparator.S with type t := t
486 | end) =
487 | struct
488 |   module Key = Key
489 |   module Key_S1 = Comparator.S_to_S1 (Key)
490 |   include Creators (Key_S1)
491 | 
492 |   type key = Key.t
493 | 
494 |   type ('a, 'b, 'c) map = ('a, 'b, 'c) t
495 | 
496 |   type 'v t = (key, 'v, Key.comparator_witness) map
497 | 
498 |   include Accessors
499 | 
500 |   let compare cmpv t1 t2 = compare_direct cmpv t1 t2
501 | 
502 |   let sexp_of_t sexp_of_v t = Using_comparator.sexp_of_t Key.sexp_of_t sexp_of_v [%sexp_of: _] t
503 | 
504 |   module Provide_of_sexp
505 |     (Key : sig
506 |       type t [@@deriving of_sexp]
507 |     end
508 |     with type t := Key.t) =
509 |   struct
510 |     let t_of_sexp v_of_sexp sexp = t_of_sexp Key.t_of_sexp v_of_sexp sexp
511 |   end
512 | 
513 |   module Provide_hash (Key' : Hasher.S with type t := Key.t) = struct
514 |     let hash_fold_t (type a) hash_fold_data state (t : a t) =
515 |       Using_comparator.hash_fold_direct Key'.hash_fold_t hash_fold_data state t
516 |   end
517 | end
518 | 
519 | module Make_plain (Key : Key_plain) = Make_plain_using_comparator (struct
520 |   include Key
521 |   include Comparator.Make (Key)
522 | end)
523 | 
524 | module Make_using_comparator (Key_sexp : sig
525 |   type t [@@deriving sexp]
526 | 
527 |   include Comparator.S with type t := t
528 | end) =
529 | struct
530 |   include Make_plain_using_comparator (Key_sexp)
531 |   module Key = Key_sexp
532 |   include Provide_of_sexp (Key)
533 | 
534 |   module _ = struct
535 |     include Tree0
536 |     include Provide_of_sexp (Key)
537 |   end
538 | end
539 | 
540 | module Make (Key : Key) = Make_using_comparator (struct
541 |   include Key
542 |   include Comparator.Make (Key)
543 | end)
544 | 
545 | module Tree = struct
546 |   include Tree0
547 | 
548 |   let of_hashtbl_exn = Using_comparator.tree_of_hashtbl_exn
549 | 
550 |   let key_set = Using_comparator.key_set_of_tree
551 | 
552 |   let of_key_set = Using_comparator.tree_of_key_set
553 | end
554 | 


--------------------------------------------------------------------------------
/src/dune:
--------------------------------------------------------------------------------
 1 | (library
 2 |   (name tdigest)
 3 |   (public_name tdigest)
 4 |   (libraries
 5 |     base
 6 |   )
 7 |   (preprocess (pps
 8 |     ppx_sexp_conv
 9 |   ))
10 | )
11 | 
12 | (env
13 |   (dev
14 |     (flags (:standard -warn-error -A))))
15 | 


--------------------------------------------------------------------------------
/src/tdigest.ml:
--------------------------------------------------------------------------------
  1 | open! Base
  2 | module Map = Cmap
  3 | open Option.Monad_infix
  4 | 
  5 | type delta =
  6 |   | Merging of float
  7 |   | Discrete
  8 | [@@deriving sexp]
  9 | 
 10 | type k =
 11 |   | Manual
 12 |   | Automatic of float
 13 | [@@deriving sexp]
 14 | 
 15 | type cx =
 16 |   | Always
 17 |   | Growth of float
 18 | [@@deriving sexp]
 19 | 
 20 | let default_delta = Merging 0.01
 21 | 
 22 | let default_k = Automatic 25.0
 23 | 
 24 | let default_cx = Growth 1.1
 25 | 
 26 | type settings = {
 27 |   delta: delta;
 28 |   k: k;
 29 |   cx: cx;
 30 |   k_delta: float option;
 31 | }
 32 | 
 33 | type centroid = {
 34 |   mean: float;
 35 |   cumn: float;
 36 |   mean_cumn: float;
 37 |   n: float;
 38 | }
 39 | 
 40 | let empty_centroid = { mean = 0.0; n = 0.0; cumn = 0.0; mean_cumn = 0.0 }
 41 | 
 42 | type stats = {
 43 |   cumulates_count: int;
 44 |   compress_count: int;
 45 |   auto_compress_count: int;
 46 | }
 47 | [@@deriving sexp]
 48 | 
 49 | let empty_stats = { cumulates_count = 0; compress_count = 0; auto_compress_count = 0 }
 50 | 
 51 | type info = {
 52 |   count: int;
 53 |   size: int;
 54 |   cumulates_count: int;
 55 |   compress_count: int;
 56 |   auto_compress_count: int;
 57 | }
 58 | [@@deriving sexp]
 59 | 
 60 | module type M = sig
 61 |   type 'a t
 62 | 
 63 |   val empty : 'a t
 64 | 
 65 |   module Map : sig
 66 |     val is_empty : 'a t -> bool
 67 | 
 68 |     val length : 'a t -> int
 69 | 
 70 |     val min_elt : 'a t -> (float * 'a) option
 71 | 
 72 |     val max_elt : 'a t -> (float * 'a) option
 73 | 
 74 |     val iter : 'a t -> f:('a -> unit) -> unit
 75 | 
 76 |     val map : 'a t -> f:('a -> 'b) -> 'b t
 77 | 
 78 |     val fold : 'a t -> init:'b -> f:(key:float -> data:'a -> 'b -> 'b) -> 'b
 79 | 
 80 |     val fold_right : 'a t -> init:'b -> f:(key:float -> data:'a -> 'b -> 'b) -> 'b
 81 | 
 82 |     val binary_search :
 83 |       'a t ->
 84 |       compare:(key:float -> data:'a -> float -> int) ->
 85 |       Binary_searchable.Which_target_by_key.t ->
 86 |       float ->
 87 |       (float * 'a) option
 88 | 
 89 |     val add_exn : 'a t -> key:float -> data:'a -> 'a t
 90 | 
 91 |     val remove : 'a t -> float -> 'a t
 92 |   end
 93 | end
 94 | 
 95 | module Make (M : M) = struct
 96 |   open Float
 97 | 
 98 |   type t = {
 99 |     settings: settings;
100 |     centroids: centroid M.t;
101 |     mutable min: centroid option;
102 |     mutable max: centroid option;
103 |     n: float;
104 |     last_cumulate: float;
105 |     stats: stats;
106 |   }
107 | 
108 |   let get_min = function
109 |   | { min = Some _ as x; _ } -> x
110 |   | { min = None; n = 0.0; _ } -> None
111 |   | { min = None; _ } as td ->
112 |     let min = M.Map.min_elt td.centroids >>| snd in
113 |     td.min <- min;
114 |     min
115 | 
116 |   let get_max = function
117 |   | { max = Some _ as x; _ } -> x
118 |   | { max = None; n = 0.0; _ } -> None
119 |   | { max = None; _ } as td ->
120 |     let max = M.Map.max_elt td.centroids >>| snd in
121 |     td.max <- max;
122 |     max
123 | 
124 |   let get_k_delta = function
125 |   | Automatic k, Merging delta -> Some (k / delta)
126 |   | _ -> None
127 | 
128 |   let create ?(delta = default_delta) ?(k = default_k) ?(cx = default_cx) () =
129 |     let k =
130 |       match k with
131 |       | Manual -> k
132 |       | Automatic x when is_positive x -> k
133 |       | Automatic 0.0 ->
134 |         invalid_arg
135 |           "TDigest.create: k parameter cannot be zero, set to Tdigest.Manual to disable automatic \
136 |            compression."
137 |       | Automatic x -> Printf.invalid_argf "TDigest k parameter must be positive, but was %f" x ()
138 |     in
139 |     let cx =
140 |       match cx with
141 |       | Always -> cx
142 |       | Growth x when is_positive x -> cx
143 |       | Growth 0.0 ->
144 |         invalid_arg
145 |           "TDigest.create: cx parameter cannot be zero, set to Tdigest.Always to disable caching of \
146 |            cumulative totals."
147 |       | Growth x -> Printf.invalid_argf "TDigest.create: cx parameter must be positive, but was %f" x ()
148 |     in
149 |     {
150 |       settings = { delta; k; cx; k_delta = get_k_delta (k, delta) };
151 |       centroids = M.empty;
152 |       min = None;
153 |       max = None;
154 |       n = 0.0;
155 |       last_cumulate = 0.0;
156 |       stats = empty_stats;
157 |     }
158 | 
159 |   let is_empty { centroids; _ } = M.Map.is_empty centroids
160 | 
161 |   let info { centroids; n; stats; _ } =
162 |     {
163 |       count = to_int n;
164 |       size = M.Map.length centroids;
165 |       cumulates_count = stats.cumulates_count;
166 |       compress_count = stats.compress_count;
167 |       auto_compress_count = stats.auto_compress_count;
168 |     }
169 | 
170 |   let find_nearest td mean =
171 |     let gt = ref None in
172 |     let lte =
173 |       M.Map.binary_search td.centroids `Last_less_than_or_equal_to mean
174 |         ~compare:(fun ~key ~data against ->
175 |         let x = compare key against in
176 |         if Int.is_positive x then gt := Some (key, data);
177 |         x )
178 |     in
179 |     match lte with
180 |     | Some (k, v) when mean = k -> Some v
181 |     | Some (k1, v1) -> (
182 |       match !gt with
183 |       | None -> None
184 |       | Some (k2, _v2) when mean - k1 < k2 - mean -> Some v1
185 |       | Some (_k2, v2) -> Some v2 )
186 |     | None -> None
187 | 
188 |   let use_cache = function
189 |   | { n; last_cumulate; settings = { cx = Growth cx; _ }; _ } when cx > n / last_cumulate -> true
190 |   | _ -> false
191 | 
192 |   let cumulate td ~exact =
193 |     if td.n = td.last_cumulate || ((not exact) && use_cache td)
194 |     then td
195 |     else (
196 |       let cumn = ref 0.0 in
197 |       let centroids =
198 |         M.Map.map td.centroids ~f:(fun data ->
199 |           let updated = { data with mean_cumn = !cumn + (data.n / 2.); cumn = !cumn + data.n } in
200 |           cumn := updated.cumn;
201 |           updated )
202 |       in
203 |       {
204 |         td with
205 |         centroids;
206 |         min = None;
207 |         max = None;
208 |         n = !cumn;
209 |         last_cumulate = !cumn;
210 |         stats = { td.stats with cumulates_count = Int.succ td.stats.cumulates_count };
211 |       } )
212 | 
213 |   let new_bounds ({ mean; _ } as added) = function
214 |   | { n = 0.0; min = None; max = None; _ } -> Some added, Some added
215 |   | { min = Some existing; max; _ } when mean < existing.mean -> Some added, max
216 |   | { min; max = Some existing; _ } when mean > existing.mean -> min, Some added
217 |   | { min; max; _ } -> min, max
218 | 
219 |   let new_centroid td ~mean ~n ~cumn =
220 |     let data = { mean; cumn; n; mean_cumn = n / 2. } in
221 |     let centroids = M.Map.add_exn td.centroids ~key:mean ~data in
222 |     let min, max = new_bounds data td in
223 |     { td with centroids; min; max; n = td.n + n }
224 | 
225 |   let add_weight td nearest ~mean ~n =
226 |     let updated =
227 |       {
228 |         mean =
229 |           ( if nearest.mean = mean
230 |             then nearest.mean
231 |             else nearest.mean + (n * (mean - nearest.mean) / (nearest.n + n)) );
232 |         cumn = nearest.cumn + n;
233 |         mean_cumn = nearest.mean_cumn + (n / 2.);
234 |         n = nearest.n + n;
235 |       }
236 |     in
237 |     let centroids =
238 |       M.Map.remove td.centroids nearest.mean |> M.Map.add_exn ~key:updated.mean ~data:updated
239 |     in
240 |     { td with centroids; n = td.n + n; min = None; max = None }
241 | 
242 |   let internal_digest td ~n ~mean =
243 |     let nearest_is_boundary boundary nearest =
244 |       Option.value_map boundary ~default:false ~f:(fun { mean; _ } -> mean = nearest.mean)
245 |     in
246 |     let td =
247 |       match find_nearest td mean, td.settings.delta with
248 |       | Some nearest, _ when nearest.mean = mean -> add_weight td nearest ~mean ~n
249 |       | Some nearest, _ when nearest_is_boundary (get_min td) nearest ->
250 |         new_centroid td ~mean ~n ~cumn:0.0
251 |       | Some nearest, _ when nearest_is_boundary (get_max td) nearest ->
252 |         new_centroid td ~mean ~n ~cumn:td.n
253 |       | Some nearest, Discrete -> new_centroid td ~mean ~n ~cumn:nearest.cumn
254 |       | Some nearest, Merging delta ->
255 |         let p = nearest.mean_cumn / td.n in
256 |         let max_n = round_down (4.0 * td.n * delta * p * (1.0 - p)) in
257 |         if max_n - nearest.n >= n
258 |         then add_weight td nearest ~mean ~n
259 |         else new_centroid td ~mean ~n ~cumn:nearest.cumn
260 |       | None, _ -> new_centroid td ~mean ~n ~cumn:0.0
261 |     in
262 |     cumulate td ~exact:false
263 | 
264 |   let weights_of_td = function
265 |   (* n is out of sync, must check centroids *)
266 |   | { centroids; _ } when M.Map.is_empty centroids -> [||]
267 |   | { centroids; _ } ->
268 |     let arr = Array.create ~len:(M.Map.length centroids) empty_centroid in
269 |     let _i =
270 |       M.Map.fold centroids ~init:0 ~f:(fun ~key:_ ~data i ->
271 |         arr.(i) <- data;
272 |         Int.succ i )
273 |     in
274 |     arr
275 | 
276 |   let weights_of_table table =
277 |     let arr = Array.create ~len:(Hashtbl.length table) empty_centroid in
278 |     let _i =
279 |       Hashtbl.fold table ~init:0 ~f:(fun ~key:mean ~data:n i ->
280 |         arr.(i) <- { empty_centroid with mean; n };
281 |         Int.succ i )
282 |     in
283 |     arr
284 | 
285 |   let rebuild ~auto settings (stats : stats) arr =
286 |     Array.permute arr;
287 |     let blank =
288 |       {
289 |         settings;
290 |         centroids = M.empty;
291 |         min = None;
292 |         max = None;
293 |         n = 0.0;
294 |         last_cumulate = 0.0;
295 |         stats =
296 |           {
297 |             stats with
298 |             compress_count = Int.succ stats.compress_count;
299 |             auto_compress_count = (if auto then Int.succ else Fn.id) stats.auto_compress_count;
300 |           };
301 |       }
302 |     in
303 |     let td = Array.fold arr ~init:blank ~f:(fun acc { mean; n; _ } -> internal_digest acc ~n ~mean) in
304 |     cumulate td ~exact:true
305 | 
306 |   let digest ?(n = 1) td ~mean =
307 |     let td = internal_digest td ~n:(of_int n) ~mean in
308 |     match td.settings with
309 |     | { k_delta = Some kd; _ } when M.Map.length td.centroids |> of_int > kd ->
310 |       rebuild ~auto:true td.settings td.stats (weights_of_td td)
311 |     | _ -> td
312 | 
313 |   let add ?(n = 1) ~data td =
314 |     if Int.(n <= 0) then invalid_arg "Tdigest.add: n <= 0";
315 |     digest td ~n ~mean:data
316 | 
317 |   let add_list ?(n = 1) xs td =
318 |     if Int.(n <= 0) then invalid_arg "Tdigest.add_list: n <= 0";
319 |     List.fold xs ~init:td ~f:(fun acc mean -> digest acc ~n ~mean)
320 | 
321 |   let compress ?delta td =
322 |     match delta with
323 |     | None -> rebuild ~auto:false td.settings td.stats (weights_of_td td)
324 |     | Some delta ->
325 |       let settings = td.settings in
326 |       let updated = rebuild ~auto:false { td.settings with delta } td.stats (weights_of_td td) in
327 |       { updated with settings }
328 | 
329 |   let to_string td =
330 |     let buf = Bytes.create (M.Map.length td.centroids |> Int.( * ) 16) in
331 |     let add_float pos ~data:f =
332 |       let v = Int64.bits_of_float f in
333 |       let rec loop pos = function
334 |         | 8 -> pos
335 |         | i ->
336 |           Bytes.set buf pos Int64.(255L land shift_right v Int.(i * 8) |> to_int_exn |> Char.of_int_exn);
337 |           (loop [@tailcall]) (Int.succ pos) (Int.succ i)
338 |       in
339 |       loop pos 0
340 |     in
341 |     let _pos =
342 |       M.Map.fold td.centroids ~init:0 ~f:(fun ~key:_ ~data:{ mean; n; _ } pos ->
343 |         add_float pos ~data:mean |> add_float ~data:n )
344 |     in
345 |     td, Bytes.unsafe_to_string ~no_mutation_while_string_reachable:buf
346 | 
347 |   let parse_float str pos =
348 |     let open Int64 in
349 |     let next off = String.get str Int.(pos + off) |> Char.to_int |> of_int_exn in
350 |     (String.get str pos |> Char.to_int |> of_int_exn)
351 |     lor shift_left (next 1) 8
352 |     lor shift_left (next 2) 16
353 |     lor shift_left (next 3) 24
354 |     lor shift_left (next 4) 32
355 |     lor shift_left (next 5) 40
356 |     lor shift_left (next 6) 48
357 |     lor shift_left (next 7) 56
358 |     |> float_of_bits
359 | 
360 |   let of_string ?(delta = default_delta) ?(k = default_k) ?(cx = default_cx) str =
361 |     if Int.(String.length str % 16 <> 0) then invalid_arg "Tdigest.of_string: invalid string length";
362 |     let settings = { delta; k; cx; k_delta = get_k_delta (k, delta) } in
363 |     let table = Hashtbl.create (module Float) in
364 |     let rec loop = function
365 |       | pos when Int.(pos = String.length str) -> ()
366 |       | pos ->
367 |         let mean = parse_float str pos in
368 |         let n = parse_float str Int.(pos + 8) in
369 |         Hashtbl.update table mean ~f:(Option.value_map ~default:n ~f:(( + ) n));
370 |         (loop [@tailcall]) Int.(pos + 16)
371 |     in
372 |     loop 0;
373 |     weights_of_table table |> rebuild ~auto:true settings empty_stats
374 | 
375 |   module Export = struct
376 |     type td_t = t
377 | 
378 |     type settings = {
379 |       delta: delta;
380 |       k: k;
381 |       cx: cx;
382 |     }
383 |     [@@deriving sexp]
384 | 
385 |     type t = {
386 |       settings: settings;
387 |       state: string;
388 |       stats: stats;
389 |     }
390 |     [@@deriving sexp]
391 | 
392 |     let create ({ settings = { delta; k; cx; _ }; stats; _ } as td : td_t) =
393 |       { settings = { delta; k; cx }; state = to_string td |> snd; stats }
394 |   end
395 | 
396 |   let sexp_of_t td = Export.create td |> [%sexp_of: Export.t]
397 | 
398 |   let t_of_sexp sexp =
399 |     let Export.{ settings = { delta; k; cx }; state; stats } = [%of_sexp: Export.t] sexp in
400 |     { (of_string ~delta ~k ~cx state) with stats }
401 | 
402 |   let merge ?(delta = default_delta) ?(k = default_k) ?(cx = default_cx) tds =
403 |     let settings = { delta; k; cx; k_delta = get_k_delta (k, delta) } in
404 |     let table = Hashtbl.create (module Float) in
405 |     List.iter tds ~f:(fun { centroids; _ } ->
406 |       M.Map.iter centroids ~f:(fun { mean; n; _ } ->
407 |         Hashtbl.update table mean ~f:(Option.value_map ~default:n ~f:(( + ) n)) ) );
408 |     weights_of_table table |> rebuild ~auto:true settings empty_stats
409 | 
410 |   type bounds =
411 |     | Neither
412 |     | Both of centroid * centroid
413 |     | Equal of centroid
414 |     | Lower of centroid
415 |     | Upper of centroid
416 | 
417 |   let bounds td needle lens =
418 |     let gt = ref None in
419 |     let lte =
420 |       M.Map.binary_search td.centroids `Last_less_than_or_equal_to needle
421 |         ~compare:(fun ~key ~data against ->
422 |         let x = compare (lens data) against in
423 |         if Int.is_positive x then gt := Some (key, data);
424 |         x )
425 |     in
426 |     match lte with
427 |     | Some (_k, v) when lens v = needle -> Equal v
428 |     | Some (_k1, v1) -> (
429 |       match !gt with
430 |       | Some (_k2, v2) -> Both (v1, v2)
431 |       | None -> Lower v1 )
432 |     | None -> (
433 |       match get_min td with
434 |       | Some v -> Upper v
435 |       | None -> Neither )
436 | 
437 |   let percentile td p =
438 |     match td with
439 |     | { n = 0.0; _ } -> td, None
440 |     | td -> (
441 |       let td = cumulate td ~exact:true in
442 |       let h = td.n * p in
443 |       match bounds td h (fun { mean_cumn; _ } -> mean_cumn), td.settings.delta with
444 |       | Lower x, _
445 |        |Upper x, _
446 |        |Equal x, _ ->
447 |         td, Some x.mean
448 |       | Both (lower, upper), Merging _ ->
449 |         let num =
450 |           lower.mean
451 |           + ((h - lower.mean_cumn) * (upper.mean - lower.mean) / (upper.mean_cumn - lower.mean_cumn))
452 |         in
453 |         td, Some num
454 |       | Both (lower, _upper), Discrete when h <= lower.cumn -> td, Some lower.mean
455 |       | Both (_lower, upper), Discrete -> td, Some upper.mean
456 |       | Neither, _ -> td, None )
457 | 
458 |   let percentiles td ps = List.fold_map ps ~init:td ~f:percentile
459 | 
460 |   let p_rank td p =
461 |     match get_min td with
462 |     | None -> td, None
463 |     | Some v when p < v.mean -> td, Some 0.0
464 |     | Some _ -> (
465 |       match get_max td with
466 |       | None -> td, None
467 |       | Some v when p > v.mean -> td, Some 1.0
468 |       | Some _ -> (
469 |         let td = cumulate td ~exact:true in
470 |         match bounds td p (fun { mean; _ } -> mean), td.settings.delta with
471 |         | Both (lower, _), Discrete
472 |          |Lower lower, Discrete
473 |          |Equal lower, Discrete ->
474 |           td, Some (lower.cumn / td.n)
475 |         | Neither, Discrete
476 |          |Upper _, Discrete ->
477 |           td, None
478 |         | Equal x, Merging _ -> td, Some (x.mean_cumn / td.n)
479 |         | Both (lower, upper), Merging _ ->
480 |           let num =
481 |             lower.mean_cumn
482 |             + ((p - lower.mean) * (upper.mean_cumn - lower.mean_cumn) / (upper.mean - lower.mean))
483 |           in
484 |           td, Some (num / td.n)
485 |         | _, Merging _ -> td, None ) )
486 | 
487 |   let p_ranks td ps = List.fold_map ps ~init:td ~f:p_rank
488 | 
489 |   module Private = struct
490 |     let centroids td =
491 |       M.Map.fold_right td.centroids ~init:[] ~f:(fun ~key:_ ~data:{ mean; n; _ } acc -> (mean, n) :: acc)
492 | 
493 |     let min td = get_min td >>| fun { mean; n; _ } -> mean, n
494 | 
495 |     let max td = get_max td >>| fun { mean; n; _ } -> mean, n
496 |   end
497 | end
498 | 
499 | module M = Make (struct
500 |   type 'a t = (float, 'a, Float.comparator_witness) Map.t
501 | 
502 |   let empty = Map.empty (module Float)
503 | 
504 |   module Map = Map
505 | end)
506 | 
507 | module Marshallable = Make (struct
508 |   type 'a t = (float, 'a, Float.comparator_witness) Map.Tree.t
509 | 
510 |   module Map = Map.Make_tree (Float)
511 | 
512 |   let empty = Map.empty
513 | end)
514 | 
515 | include M
516 | 


--------------------------------------------------------------------------------
/src/tdigest.mli:
--------------------------------------------------------------------------------
  1 | open! Base
  2 | 
  3 | (**
  4 |    [delta] is the compression factor, the max fraction of mass that can be owned by one centroid (bigger, up to 1.0, means more compression).
  5 |    [~delta:Discrete] switches off TDigest behavior and treats the distribution as discrete, with no merging and exact values reported.
  6 | *)
  7 | type delta =
  8 |   | Merging of float
  9 |   | Discrete
 10 | [@@deriving sexp]
 11 | 
 12 | (**
 13 |    [k] is a size threshold that triggers recompression as the TDigest grows during input.
 14 |    [~k:Manual] disables automatic recompression.
 15 | *)
 16 | type k =
 17 |   | Manual
 18 |   | Automatic of float
 19 | [@@deriving sexp]
 20 | 
 21 | (**
 22 |    [cx] (default: [1.1]) specifies how often to update cached cumulative totals used for quantile estimation during ingest.
 23 |    This is a tradeoff between performance and accuracy.
 24 |    [~cx:Always] will recompute cumulatives on every new datapoint, but the performance drops by 15-25x or even more depending on the size of the dataset.
 25 | *)
 26 | type cx =
 27 |   | Always
 28 |   | Growth of float
 29 | [@@deriving sexp]
 30 | 
 31 | (**
 32 |    [count]: sum of all [n]
 33 | 
 34 |    [size]: size of the internal B-Tree. Calling [Tdigest.compress] will usually reduce this size.
 35 | 
 36 |    [cumulates_count]: number of cumulate operations over the life of this Tdigest instance.
 37 | 
 38 |    [compress_count]: number of compression operations over the life of this Tdigest instance.
 39 | 
 40 |    [auto_cumulates_count]: number of compression operations over the life of this Tdigest instance that were not triggered by a manual call to [Tdigest.compress].
 41 | *)
 42 | type info = {
 43 |   count: int;
 44 |   size: int;
 45 |   cumulates_count: int;
 46 |   compress_count: int;
 47 |   auto_compress_count: int;
 48 | }
 49 | [@@deriving sexp]
 50 | 
 51 | module M : sig
 52 |   type t [@@deriving sexp]
 53 | 
 54 |   (**
 55 |       [Tdigest.create ?delta ?k ?cx ()]
 56 | 
 57 |       Allocate an empty Tdigest instance.
 58 | 
 59 |       [delta] (default: [0.01]) is the compression factor, the max fraction of mass that can be owned by one centroid (bigger, up to 1.0, means more compression).
 60 |       [~delta:Discrete] switches off TDigest behavior and treats the distribution as discrete, with no merging and exact values reported.
 61 | 
 62 |       [k] (default: [25]) is a size threshold that triggers recompression as the TDigest grows during input.
 63 |       [~k:Manual] disables automatic recompression.
 64 | 
 65 |       [cx] (default: [1.1]) specifies how often to update cached cumulative totals used for quantile estimation during ingest.
 66 |       This is a tradeoff between performance and accuracy.
 67 |       [~cx:Always] will recompute cumulatives on every new datapoint, but the performance drops by 15-25x or even more depending on the size of the dataset.
 68 |    *)
 69 |   val create : ?delta:delta -> ?k:k -> ?cx:cx -> unit -> t
 70 | 
 71 |   (**
 72 |       [Tdigest.is_empty td] returns [true] when the T-Digest does not contain any values.
 73 |    *)
 74 |   val is_empty : t -> bool
 75 | 
 76 |   (**
 77 |       [Tdigest.info td] returns a record with these fields:
 78 | 
 79 |       [count]: sum of all [n]
 80 | 
 81 |       [size]: size of the internal B-Tree. Calling [Tdigest.compress] will usually reduce this size.
 82 | 
 83 |       [cumulates_count]: number of cumulate operations over the life of this Tdigest instance.
 84 | 
 85 |       [compress_count]: number of compression operations over the life of this Tdigest instance.
 86 | 
 87 |       [auto_cumulates_count]: number of compression operations over the life of this Tdigest instance that were not triggered by a manual call to [Tdigest.compress].
 88 |    *)
 89 |   val info : t -> info
 90 | 
 91 |   (**
 92 |       [Tdigest.add ?n ~data td]
 93 | 
 94 |       Incorporate a value ([data]) having count [n] (default: [1]) into a new Tdigest.
 95 |    *)
 96 |   val add : ?n:int -> data:float -> t -> t
 97 | 
 98 |   (**
 99 |       [Tdigest.add_list ?n ll td]
100 | 
101 |       Incorporate a list of values each having count [n] (default: [1]) into a new Tdigest.
102 |    *)
103 |   val add_list : ?n:int -> float list -> t -> t
104 | 
105 |   (**
106 |       [Tdigest.merge ?delta ?k ?cx tdigests]
107 | 
108 |       Efficiently combine multiple Tdigests into a new one.
109 |    *)
110 |   val merge : ?delta:delta -> ?k:k -> ?cx:cx -> t list -> t
111 | 
112 |   (**
113 |       [Tdigest.p_rank td q]
114 |       For a value [q] estimate the percentage ([0..1]) of values [<= q].
115 | 
116 |       Returns a new Tdigest to reuse intermediate computations.
117 |    *)
118 |   val p_rank : t -> float -> t * float option
119 | 
120 |   (**
121 |       Same as [Tdigest.p_rank] but for a list of values.
122 | 
123 |       Returns a new Tdigest to reuse intermediate computations.
124 |    *)
125 |   val p_ranks : t -> float list -> t * float option list
126 | 
127 |   (**
128 |       [Tdigest.percentile td p]
129 | 
130 |       For a percentage [p] ([0..1]) estimate the smallest value [q] at which at least [p] percent of the values [<= q].
131 | 
132 |       For discrete distributions, this selects q using the Nearest Rank Method
133 |       [https://en.wikipedia.org/wiki/Percentile#The_Nearest_Rank_method]
134 | 
135 |       For continuous distributions, interpolates data values between count-weighted bracketing means.
136 | 
137 |       Returns a new Tdigest to reuse intermediate computations.
138 |    *)
139 |   val percentile : t -> float -> t * float option
140 | 
141 |   (**
142 |       Same as [Tdigest.percentile] but for a list of values.
143 | 
144 |       Returns a new Tdigest to reuse intermediate computations.
145 |    *)
146 |   val percentiles : t -> float list -> t * float option list
147 | 
148 |   (**
149 |       [Tdigest.compress ?delta td]
150 | 
151 |       Manual recompression. Not guaranteed to reduce size further if too few values have been added since the last compression.
152 | 
153 |       [delta] (default: initial value passed to [Tdigest.create]) The compression level to use for this operation only. This does not alter the [delta] used by the Tdigest going forward.
154 |    *)
155 |   val compress : ?delta:delta -> t -> t
156 | 
157 |   (**
158 |       [Tdigest.to_string td]
159 | 
160 |       Serialize the internal state into a binary string that can be stored or concatenated with other such binary strings.
161 | 
162 |       Use [Tdigest.of_string] to create a new Tdigest instance from it.
163 | 
164 |       Returns a new Tdigest to reuse intermediate computations.
165 |    *)
166 |   val to_string : t -> t * string
167 | 
168 |   (**
169 |       [Tdigest.of_string ?delta ?k ?cx str]
170 | 
171 |       See [Tdigest.create] for the meaning of the optional parameters.
172 | 
173 |       Allocate a new Tdigest from a string or concatenation of strings originally created by [Tdigest.to_string].
174 |    *)
175 |   val of_string : ?delta:delta -> ?k:k -> ?cx:cx -> string -> t
176 | 
177 |   (** For internal use *)
178 |   module Private : sig
179 |     (** For internal use *)
180 |     val centroids : t -> (float * float) list
181 | 
182 |     (** For internal use *)
183 |     val min : t -> (float * float) option
184 | 
185 |     (** For internal use *)
186 |     val max : t -> (float * float) option
187 |   end
188 | end
189 | 
190 | (** A marshallable version of this library. Approximately 5x slower than the non-marshallable version. *)
191 | module Marshallable : sig
192 |   type t
193 | 
194 |   include module type of M with type t := t
195 | end
196 | 
197 | include module type of M
198 | 


--------------------------------------------------------------------------------
/tdigest.opam:
--------------------------------------------------------------------------------
 1 | opam-version: "2.0"
 2 | maintainer: "Simon Grondin"
 3 | authors: [
 4 |   "Simon Grondin"
 5 |   "Will Welch"
 6 | ]
 7 | synopsis: "OCaml implementation of the T-Digest algorithm"
 8 | description: """
 9 | The T-Digest is a data structure and algorithm for constructing an approximate distribution for a collection of real numbers presented as a stream.
10 | 
11 | The T-Digest can estimate percentiles or quantiles extremely accurately even at the tails, while using a fraction of the space.
12 | 
13 | Additionally, the T-Digest is concatenable, making it a good fit for distributed systems. The internal state of a T-Digest can be exported as a binary string, and the concatenation of any number of those strings can then be imported to form a new T-Digest.
14 | """
15 | license: "MIT"
16 | tags: []
17 | homepage: "https://github.com/SGrondin/tdigest"
18 | dev-repo: "git://github.com/SGrondin/tdigest"
19 | doc: "https://github.com/SGrondin/tdigest"
20 | bug-reports: "https://github.com/SGrondin/tdigest/issues"
21 | depends: [
22 |   "ocaml" { >= "4.10.0" }
23 |   "dune" { >= "1.9.0" }
24 | 
25 |   "base" { >= "v0.17.0" & < "v0.18.0" }
26 |   "ppx_sexp_conv" { >= "v0.17.0" }
27 | 
28 |   "ppx_expect" { >= "v0.17.0" & with-test }
29 |   "ppx_custom_printf" { >= "v0.17.0" & with-test }
30 |   "ocamlformat" { = "0.25.1" & with-dev-setup }
31 |   "ocaml-lsp-server" { with-dev-setup }
32 | ]
33 | build: ["dune" "build" "-p" name "-j" jobs]
34 | 


--------------------------------------------------------------------------------
/test/dune:
--------------------------------------------------------------------------------
 1 | (library
 2 |   (name test)
 3 |   (inline_tests)
 4 |   (libraries
 5 |     tdigest
 6 |   )
 7 |   (preprocess (pps
 8 |     ppx_expect
 9 |     ppx_sexp_conv
10 |     ppx_custom_printf
11 |   ))
12 |   (ocamlopt_flags -Oclassic)
13 | )
14 | 


--------------------------------------------------------------------------------
/test/shared.ml:
--------------------------------------------------------------------------------
 1 | open! Base
 2 | 
 3 | let sprintf = Printf.sprintf
 4 | 
 5 | let print_endline = Stdlib.print_endline
 6 | 
 7 | let render_centroids ll =
 8 |   List.map ll ~f:(fun (mean, n) -> ("mean", mean), ("n", n))
 9 |   |> [%sexp_of: ((string * float) * (string * float)) list]
10 | 
11 | let check td = Tdigest.Private.centroids td |> render_centroids |> Sexp.to_string_hum |> print_endline
12 | 
13 | let checkm td =
14 |   Tdigest.Marshallable.Private.centroids td |> render_centroids |> Sexp.to_string_hum |> print_endline
15 | 
16 | let check_size td = (Tdigest.info td).size |> sprintf !"%{sexp: int}" |> print_endline
17 | 
18 | let check_min_max td =
19 |   (Tdigest.Private.min td, Tdigest.Private.max td)
20 |   |> sprintf !"%{sexp: (float * float) option * (float * float) option}"
21 |   |> print_endline
22 | 
23 | let check_p_rank p td = Tdigest.p_rank td p |> snd |> sprintf !"%{sexp: float option}" |> print_endline
24 | 
25 | let check_percentile p td =
26 |   Tdigest.percentile td p |> snd |> sprintf !"%{sexp: float option}" |> print_endline
27 | 
28 | let check_p_ranks ps td =
29 |   Tdigest.p_ranks td ps |> snd |> sprintf !"%{sexp: float option list}" |> print_endline
30 | 
31 | let check_percentiles ps td =
32 |   Tdigest.percentiles td ps |> snd |> sprintf !"%{sexp: float option list}" |> print_endline
33 | 
34 | let identical_sexp ~received ~expected =
35 |   if not (Sexp.equal received expected)
36 |   then print_endline (sprintf !"Not identical.\nReceived: %{Sexp}\nExpected: %{Sexp}" received expected)
37 |   else print_endline "Identical"
38 | 
39 | let expected_centroids td =
40 |   let received = Tdigest.Private.centroids td |> render_centroids in
41 |   let expected = List.init 100 ~f:(fun i -> i * 10 |> Int.to_float, 1.) |> render_centroids in
42 |   identical_sexp ~received ~expected
43 | 


--------------------------------------------------------------------------------
/test/test_discrete.ml:
--------------------------------------------------------------------------------
  1 | open! Base
  2 | open Shared
  3 | 
  4 | let%expect_test "discrete digests" =
  5 |   (* consumes increasing-valued points *)
  6 |   let () =
  7 |     let xs = List.init 100 ~f:(fun i -> i * 10 |> Float.of_int) in
  8 |     let td = Tdigest.create ~delta:Tdigest.Discrete () |> Tdigest.add_list xs in
  9 |     expected_centroids td;
 10 |     [%expect {| Identical |}]
 11 |   in
 12 | 
 13 |   (* consumes decreasing-valued points *)
 14 |   let () =
 15 |     let xs = List.init 100 ~f:(fun i -> (99 - i) * 10 |> Float.of_int) in
 16 |     let td = Tdigest.create ~delta:Tdigest.Discrete () |> Tdigest.add_list xs in
 17 |     expected_centroids td;
 18 |     [%expect {| Identical |}]
 19 |   in
 20 | 
 21 |   (* consumes same-valued points into a single point *)
 22 |   Tdigest.create ~delta:Tdigest.Discrete () |> Fn.apply_n_times ~n:100 (Tdigest.add ~data:1000.) |> check;
 23 |   [%expect {| (((mean 1000) (n 100))) |}];
 24 | 
 25 |   (* selects a run of duplicates containing the percentile *)
 26 |   Tdigest.create ~delta:Tdigest.Discrete ()
 27 |   |> Tdigest.add_list
 28 |        [
 29 |          5.;
 30 |          0.;
 31 |          0.;
 32 |          8.;
 33 |          0.;
 34 |          0.;
 35 |          0.;
 36 |          0.;
 37 |          0.;
 38 |          0.;
 39 |          0.;
 40 |          0.;
 41 |          0.;
 42 |          0.;
 43 |          0.;
 44 |          0.;
 45 |          0.;
 46 |          0.;
 47 |          3.;
 48 |          0.;
 49 |          0.;
 50 |          0.;
 51 |          0.;
 52 |          6.;
 53 |          1.;
 54 |          0.;
 55 |          6.;
 56 |          5.;
 57 |          3.;
 58 |          6.;
 59 |          1.;
 60 |          1.;
 61 |          0.;
 62 |          0.;
 63 |          1.;
 64 |          1.;
 65 |          0.;
 66 |          0.;
 67 |          1.;
 68 |          0.;
 69 |        ]
 70 |   |> check_percentile 0.5;
 71 |   [%expect {| (0) |}];
 72 | 
 73 |   (* handles multiples duplicates *)
 74 |   Tdigest.create ~delta:Tdigest.Discrete ()
 75 |   |> Fn.apply_n_times ~n:10 (fun td ->
 76 |        td |> Tdigest.add ~data:0. |> Tdigest.add ~data:1. |> Tdigest.add ~data:0.5 )
 77 |   |> check;
 78 |   [%expect {| (((mean 0) (n 10)) ((mean 0.5) (n 10)) ((mean 1) (n 10))) |}]
 79 | 
 80 | let%expect_test "discrete percentile ranks" =
 81 |   (* from a single point *)
 82 |   Tdigest.create ~delta:Tdigest.Discrete ()
 83 |   |> Tdigest.add ~data:0.
 84 |   |> check_p_ranks [ -1.5; 0.; 0.5; 1.; 1.5 ];
 85 |   [%expect {| ((0) (1) (1) (1) (1)) |}];
 86 | 
 87 |   (* from two points *)
 88 |   Tdigest.create ~delta:Tdigest.Discrete ()
 89 |   |> Tdigest.add_list [ 0.; 1. ]
 90 |   |> check_p_ranks [ -1.5; 0.; 0.5; 1.; 1.5 ];
 91 |   [%expect {| ((0) (0.5) (0.5) (1) (1)) |}];
 92 | 
 93 |   (* from three points *)
 94 |   Tdigest.create ~delta:Tdigest.Discrete ()
 95 |   |> Tdigest.add_list [ -1.; 0.; 1. ]
 96 |   |> check_p_ranks [ -1.5; -1.; -0.5; 0.; 0.5; 1.; 1.5 ];
 97 |   [%expect
 98 |     {|
 99 |     ((0) (0.33333333333333331) (0.33333333333333331) (0.66666666666666663)
100 |      (0.66666666666666663) (1) (1)) |}];
101 | 
102 |   (* from three points is same as from multiples of those points *)
103 |   let () =
104 |     let ps = [ -1.5; -1.; -0.5; 0.; 0.5; 1.; 1.5 ] in
105 |     let td1 = Tdigest.create ~delta:Tdigest.Discrete () |> Tdigest.add_list [ 0.; 1.; -1. ] in
106 |     check_p_ranks ps td1;
107 |     let td2 = td1 |> Tdigest.add_list [ 0.; 1.; -1. ] |> Tdigest.add_list [ 0.; 1.; -1. ] in
108 |     check_p_ranks ps td2;
109 | 
110 |     [%expect
111 |       {|
112 |       ((0) (0.33333333333333331) (0.33333333333333331) (0.66666666666666663)
113 |        (0.66666666666666663) (1) (1))
114 |       ((0) (0.33333333333333331) (0.33333333333333331) (0.66666666666666663)
115 |        (0.66666666666666663) (1) (1)) |}]
116 |   in
117 | 
118 |   (* from four points away from the origin *)
119 |   Tdigest.create ~delta:Tdigest.Discrete ()
120 |   |> Tdigest.add_list [ 10.; 11.; 12.; 13. ]
121 |   |> check_p_ranks [ 9.; 10.; 11.; 12.; 13.; 14. ];
122 |   [%expect {| ((0) (0.25) (0.5) (0.75) (1) (1)) |}];
123 | 
124 |   (* from four points is same as from multiples of those points *)
125 |   let () =
126 |     let ps = [ 9.; 10.; 11.; 12.; 13.; 14. ] in
127 |     let td1 = Tdigest.create ~delta:Tdigest.Discrete () |> Tdigest.add_list [ 10.; 11.; 12.; 13. ] in
128 |     check_p_ranks ps td1;
129 |     let td2 = td1 |> Tdigest.add_list [ 10.; 11.; 12.; 13. ] |> Tdigest.add_list [ 10.; 11.; 12.; 13. ] in
130 |     check_p_ranks ps td2;
131 |     [%expect {|
132 |       ((0) (0.25) (0.5) (0.75) (1) (1))
133 |       ((0) (0.25) (0.5) (0.75) (1) (1)) |}]
134 |   in
135 |   ()
136 | 
137 | let%expect_test "discrete percentiles" =
138 |   (* from a single point *)
139 |   Tdigest.create ~delta:Tdigest.Discrete () |> Tdigest.add ~data:0. |> check_percentiles [ 0.; 0.5; 1. ];
140 |   [%expect {| ((0) (0) (0)) |}];
141 | 
142 |   (* from two points *)
143 |   Tdigest.create ~delta:Tdigest.Discrete ()
144 |   |> Tdigest.add_list [ 0.; 10. ]
145 |   |> check_percentiles [ 0.; 1 // 4; 1 // 2; 3 // 4; 1. ];
146 |   [%expect {| ((0) (0) (0) (10) (10)) |}];
147 | 
148 |   (* from three points *)
149 |   Tdigest.create ~delta:Tdigest.Discrete ()
150 |   |> Tdigest.add_list [ 0.; 5.; 10. ]
151 |   |> check_percentiles [ 0.; 1 // 4; Float.(1. / 2.9); 1 // 2; 2 // 3; 3 // 4; 1. ];
152 |   [%expect {| ((0) (0) (5) (5) (5) (10) (10)) |}];
153 | 
154 |   (* from four points away from the origin *)
155 |   Tdigest.create ~delta:Tdigest.Discrete ()
156 |   |> Tdigest.add_list [ 10.; 11.; 12.; 13. ]
157 |   |> check_percentiles [ 0.; 1 // 4; 1 // 2; 3 // 4; 1. ];
158 |   [%expect {| ((10) (10) (11) (12) (13)) |}]
159 | 


--------------------------------------------------------------------------------
/test/test_tdigest.ml:
--------------------------------------------------------------------------------
  1 | open! Base
  2 | open Shared
  3 | 
  4 | let%expect_test "T-Digests in which each point becomes a centroid" =
  5 |   (* consumes a point *)
  6 |   Tdigest.create () |> Tdigest.add ~data:0. |> check;
  7 |   [%expect {| (((mean 0) (n 1))) |}];
  8 | 
  9 |   (* consumes two points *)
 10 |   Tdigest.create () |> Tdigest.add_list [ 0.; 1. ] |> check;
 11 |   [%expect {| (((mean 0) (n 1)) ((mean 1) (n 1))) |}];
 12 | 
 13 |   (* consumes three points *)
 14 |   Tdigest.create () |> Tdigest.add_list [ 0.; 1.; -1. ] |> check;
 15 |   [%expect {| (((mean -1) (n 1)) ((mean 0) (n 1)) ((mean 1) (n 1))) |}];
 16 | 
 17 |   (* consumes increasing-valued points *)
 18 |   let () =
 19 |     let td =
 20 |       let xs = List.init 100 ~f:(fun i -> i * 10 |> Float.of_int) in
 21 |       Tdigest.create ~delta:(Tdigest.Merging 0.001) ~k:Tdigest.Manual () |> Tdigest.add_list xs
 22 |     in
 23 |     expected_centroids td;
 24 |     [%expect {| Identical |}]
 25 |   in
 26 | 
 27 |   (* consumes decreasing-valued points *)
 28 |   let () =
 29 |     let td =
 30 |       let xs = List.init 100 ~f:(fun i -> (99 - i) * 10 |> Float.of_int) in
 31 |       Tdigest.create ~delta:(Tdigest.Merging 0.001) ~k:Tdigest.Manual () |> Tdigest.add_list xs
 32 |     in
 33 |     expected_centroids td;
 34 |     [%expect {| Identical |}]
 35 |   in
 36 |   ()
 37 | 
 38 | let%expect_test "T-Digests in which points are merged into centroids" =
 39 |   (* consumes same-valued points into a single point *)
 40 |   Tdigest.create () |> Fn.apply_n_times ~n:100 (Tdigest.add ~data:1000.) |> check;
 41 |   [%expect {| (((mean 1000) (n 100))) |}];
 42 | 
 43 |   (* handles multiple duplicates *)
 44 |   Tdigest.create ~delta:(Tdigest.Merging 1.) ~k:Tdigest.Manual ~cx:Tdigest.Always ()
 45 |   |> Fn.apply_n_times ~n:10 (fun td ->
 46 |        td |> Tdigest.add ~data:0. |> Tdigest.add ~data:1. |> Tdigest.add ~data:0.5 )
 47 |   |> check;
 48 |   [%expect {| (((mean 0) (n 10)) ((mean 0.5) (n 10)) ((mean 1) (n 10))) |}]
 49 | 
 50 | let%expect_test "compress" =
 51 |   (* compresses points and preserves bounds *)
 52 |   let () =
 53 |     let xs = List.init 100 ~f:(fun i -> i * 10 |> Float.of_int) in
 54 |     let td = Tdigest.create ~delta:(Tdigest.Merging 0.001) ~k:Tdigest.Manual () |> Tdigest.add_list xs in
 55 |     (* must be 100 *)
 56 |     check_size td;
 57 | 
 58 |     let td = Tdigest.compress ~delta:(Tdigest.Merging 0.1) td in
 59 |     (* must be < 100 *)
 60 |     check_size td;
 61 |     check_min_max td;
 62 |     [%expect {|
 63 |       100
 64 |       44
 65 |       (((0 1)) ((990 1))) |}]
 66 |   in
 67 | 
 68 |   (* K automatically compresses during ingest *)
 69 |   let () =
 70 |     let td =
 71 |       Array.init 10_000 ~f:(fun i -> i * 10 |> Float.of_int)
 72 |       |> Array.fold ~init:(Tdigest.create ()) ~f:(fun td x -> Tdigest.add td ~data:x)
 73 |     in
 74 |     (* must be < 10_000 *)
 75 |     check_size td;
 76 |     check_min_max td;
 77 |     [%expect {|
 78 |       2132
 79 |       (((0 1)) ((99990 1))) |}]
 80 |   in
 81 |   ()
 82 | 
 83 | let%expect_test "percentile ranks" =
 84 |   (* reports None when given no points *)
 85 |   Tdigest.create () |> check_p_rank 1.;
 86 |   [%expect {| () |}];
 87 | 
 88 |   (* from a single point *)
 89 |   Tdigest.create () |> Tdigest.add ~data:0. |> check_p_ranks [ -0.5; 0.; 0.5; 1.; 1.5 ];
 90 |   [%expect {| ((0) (0.5) (1) (1) (1)) |}];
 91 | 
 92 |   (* from three points *)
 93 |   Tdigest.create ()
 94 |   |> Tdigest.add_list [ -1.; 0.; 1. ]
 95 |   |> check_p_ranks [ -1.5; -1.0; -0.5; 0.; 0.5; 1.0; 1.5 ];
 96 |   [%expect
 97 |     {|
 98 |     ((0) (0.16666666666666666) (0.33333333333333331) (0.5) (0.66666666666666663)
 99 |      (0.83333333333333337) (1)) |}];
100 | 
101 |   (* from three points is same as from multiples of those points *)
102 |   let () =
103 |     let ps = [ -1.5; -1.0; -0.5; 0.; 0.5; 1.0; 1.5 ] in
104 |     let td = Tdigest.create () |> Tdigest.add_list [ 0.; 1.; -1. ] in
105 |     check_percentiles ps td;
106 |     let td = td |> Tdigest.add_list [ 0.; 1.; -1. ] |> Tdigest.add_list [ 0.; 1.; -1. ] in
107 |     check_percentiles ps td;
108 |     [%expect {|
109 |       ((-1) (-1) (-1) (-1) (0) (1) (1))
110 |       ((-1) (-1) (-1) (-1) (0) (1) (1)) |}]
111 |   in
112 | 
113 |   (* from four points away from the origin *)
114 |   let () =
115 |     Tdigest.create ()
116 |     |> Tdigest.add_list [ 10.; 11.; 12.; 13. ]
117 |     |> check_p_ranks [ 9.; 10.; 11.; 12.; 13.; 14. ];
118 |     [%expect {| ((0) (0.125) (0.375) (0.625) (0.875) (1)) |}]
119 |   in
120 | 
121 |   (* from four points is same as from multiples of those points *)
122 |   let () =
123 |     let ps = [ 9.; 10.; 11.; 12.; 13.; 14. ] in
124 |     let td =
125 |       Tdigest.create ~delta:(Tdigest.Merging 0.) ~k:Tdigest.Manual ()
126 |       |> Tdigest.add_list [ 10.; 11.; 12.; 13. ]
127 |     in
128 |     check_p_ranks ps td;
129 |     let td = td |> Tdigest.add_list [ 10.; 11.; 12.; 13. ] |> Tdigest.add_list [ 10.; 11.; 12.; 13. ] in
130 |     check_p_ranks ps td;
131 |     [%expect
132 |       {|
133 |       ((0) (0.125) (0.375) (0.625) (0.875) (1))
134 |       ((0) (0.125) (0.375) (0.625) (0.875) (1)) |}]
135 |   in
136 | 
137 |   (* from lots of uniformly distributed points *)
138 |   let () =
139 |     let td =
140 |       Tdigest.create ()
141 |       |> Fn.apply_n_times ~n:100_000 (fun td -> Tdigest.add td ~data:(Random.float 1.))
142 |       |> Tdigest.compress
143 |     in
144 |     let _i, max_err, _td =
145 |       Fn.apply_n_times ~n:100
146 |         (fun (i, max_err, td) ->
147 |           let td, q = Tdigest.p_rank td i |> fun (x, y) -> x, Option.value_exn y in
148 |           let m = Float.(max max_err (i - q |> abs)) in
149 |           Float.(i + 0.01), m, td)
150 |         (0.01, 0.0, td)
151 |     in
152 |     Float.to_string max_err |> print_endline;
153 |     (* must be < 0.01 *)
154 |     [%expect {| 0.0038269003338100016 |}]
155 |   in
156 | 
157 |   (* from an exact match *)
158 |   let () =
159 |     Tdigest.create ~delta:(Tdigest.Merging 0.001) ~k:Tdigest.Manual ()
160 |     |> Fn.apply_n_times ~n:10 (Tdigest.add_list [ 10.; 20.; 30. ])
161 |     |> check_p_rank 20.;
162 |     [%expect {| (0.5) |}]
163 |   in
164 |   ()
165 | 
166 | let%expect_test "percentiles" =
167 |   (* reports None when given no points *)
168 |   Tdigest.create () |> check_percentile 0.5;
169 |   [%expect {| () |}];
170 | 
171 |   (* from a single point *)
172 |   Tdigest.create () |> Tdigest.add ~data:0. |> check_percentiles [ 0.; 0.5; 1. ];
173 |   [%expect {| ((0) (0) (0)) |}];
174 | 
175 |   (* from two points *)
176 |   Tdigest.create ()
177 |   |> Tdigest.add_list [ 0.; 1. ]
178 |   |> check_percentiles [ -1 // 4; 0.; 1 // 4; 1 // 2; 5 // 8; 3 // 4; 1.; 1.25 ];
179 |   [%expect {| ((0) (0) (0) (0.5) (0.75) (1) (1) (1)) |}];
180 | 
181 |   (* from three points *)
182 |   Tdigest.create ()
183 |   |> Tdigest.add_list [ 0.; 0.5; 1. ]
184 |   |> check_percentiles [ 0.; 1 // 4; 1 // 2; 3 // 4; 1. ];
185 |   [%expect {| ((0) (0.125) (0.5) (0.875) (1)) |}];
186 | 
187 |   (* from four points *)
188 |   Tdigest.create ()
189 |   |> Tdigest.add_list [ 10.; 11.; 12.; 13. ]
190 |   |> check_percentiles [ 0.; 1 // 4; 1 // 2; 3 // 4; 1. ];
191 |   [%expect {| ((10) (10.5) (11.5) (12.5) (13)) |}];
192 | 
193 |   (* from lots of uniformly distributed points *)
194 |   let () =
195 |     let td =
196 |       Tdigest.create ()
197 |       |> Fn.apply_n_times ~n:100_000 (fun td -> Tdigest.add td ~data:(Random.float 1.))
198 |       |> Tdigest.compress
199 |     in
200 |     let _i, max_err, _td =
201 |       Fn.apply_n_times ~n:100
202 |         (fun (i, max_err, td) ->
203 |           let td, q = Tdigest.p_rank td i |> fun (x, y) -> x, Option.value_exn y in
204 |           let m = Float.(max max_err (i - q |> abs)) in
205 |           Float.(i + 0.01), m, td)
206 |         (0.01, 0.0, td)
207 |     in
208 |     Float.to_string max_err |> print_endline;
209 |     (* must be < 0.01 *)
210 |     [%expect {| 0.0038269003338100016 |}]
211 |   in
212 |   ()
213 | 
214 | let%expect_test "serialization" =
215 |   (* identical after recreating (to_string/of_string) *)
216 |   let () =
217 |     let xs = List.init 10 ~f:(fun _i -> Random.float 1.) in
218 |     let td = Tdigest.create () |> Tdigest.add_list xs in
219 |     let td1, export = Tdigest.to_string td in
220 |     if String.length export <> 160 then failwith "export length <> 160";
221 |     let td2 = Tdigest.of_string export in
222 |     check td1;
223 |     check td2;
224 |     [%expect
225 |       {|
226 |     (((mean 0.0278013320652886) (n 1)) ((mean 0.088701617893937754) (n 1))
227 |      ((mean 0.25409775932960554) (n 1)) ((mean 0.29053618718083241) (n 1))
228 |      ((mean 0.48715518185429985) (n 1)) ((mean 0.55401752229333867) (n 1))
229 |      ((mean 0.63535141231675607) (n 1)) ((mean 0.68236915472644311) (n 1))
230 |      ((mean 0.77208758810861389) (n 1)) ((mean 0.78255046724687982) (n 1)))
231 |     (((mean 0.0278013320652886) (n 1)) ((mean 0.088701617893937754) (n 1))
232 |      ((mean 0.25409775932960554) (n 1)) ((mean 0.29053618718083241) (n 1))
233 |      ((mean 0.48715518185429985) (n 1)) ((mean 0.55401752229333867) (n 1))
234 |      ((mean 0.63535141231675607) (n 1)) ((mean 0.68236915472644311) (n 1))
235 |      ((mean 0.77208758810861389) (n 1)) ((mean 0.78255046724687982) (n 1))) |}]
236 |   in
237 | 
238 |   (* identical after recreating (sexp) *)
239 |   let () =
240 |     let xs = List.init 10 ~f:(fun _i -> Random.float 1.) in
241 |     let td1 = Tdigest.create () |> Tdigest.add_list xs in
242 |     let td2 = [%sexp_of: Tdigest.t] td1 |> [%of_sexp: Tdigest.t] in
243 |     print_endline (sprintf !"%{sexp#hum: Tdigest.t}" td1);
244 |     print_endline (sprintf !"%{sexp#hum: Tdigest.t}" td2);
245 |     check td1;
246 |     check td2;
247 |     [%expect
248 |       {|
249 |       ((settings ((delta (Merging 0.01)) (k (Automatic 25)) (cx (Growth 1.1))))
250 |        (state
251 |         "E\160\000<T\236\179?\000\000\000\000\000\000\240?\003\225*G\145\159\181?\000\000\000\000\000\000\240?\019Jvk\146?\195?\000\000\000\000\000\000\240?_\136\253\005\214%\206?\000\000\000\000\000\000\240?\186\158o\233\235\187\209?\000\000\000\000\000\000\240?\007\024\142!@-\212?\000\000\000\000\000\000\240?;\236\175\255<w\214?\000\000\000\000\000\000\240?\240v\249^8\220\220?\000\000\000\000\000\000\240?\127\152\241\150\165\206\227?\000\000\000\000\000\000\240?L !\229\227\166\231?\000\000\000\000\000\000\240?")
252 |        (stats ((cumulates_count 10) (compress_count 0) (auto_compress_count 0))))
253 |       ((settings ((delta (Merging 0.01)) (k (Automatic 25)) (cx (Growth 1.1))))
254 |        (state
255 |         "E\160\000<T\236\179?\000\000\000\000\000\000\240?\003\225*G\145\159\181?\000\000\000\000\000\000\240?\019Jvk\146?\195?\000\000\000\000\000\000\240?_\136\253\005\214%\206?\000\000\000\000\000\000\240?\186\158o\233\235\187\209?\000\000\000\000\000\000\240?\007\024\142!@-\212?\000\000\000\000\000\000\240?;\236\175\255<w\214?\000\000\000\000\000\000\240?\240v\249^8\220\220?\000\000\000\000\000\000\240?\127\152\241\150\165\206\227?\000\000\000\000\000\000\240?L !\229\227\166\231?\000\000\000\000\000\000\240?")
256 |        (stats ((cumulates_count 10) (compress_count 0) (auto_compress_count 0))))
257 |       (((mean 0.077824844979319144) (n 1)) ((mean 0.084466056704403811) (n 1))
258 |        ((mean 0.15037756201907423) (n 1)) ((mean 0.2355296639680686) (n 1))
259 |        ((mean 0.27709481998907004) (n 1)) ((mean 0.3152618720708919) (n 1))
260 |        ((mean 0.35102772683575595) (n 1)) ((mean 0.45094117426729863) (n 1))
261 |        ((mean 0.61897544366071588) (n 1)) ((mean 0.73912234069667582) (n 1)))
262 |       (((mean 0.077824844979319144) (n 1)) ((mean 0.084466056704403811) (n 1))
263 |        ((mean 0.15037756201907423) (n 1)) ((mean 0.2355296639680686) (n 1))
264 |        ((mean 0.27709481998907004) (n 1)) ((mean 0.3152618720708919) (n 1))
265 |        ((mean 0.35102772683575595) (n 1)) ((mean 0.45094117426729863) (n 1))
266 |        ((mean 0.61897544366071588) (n 1)) ((mean 0.73912234069667582) (n 1))) |}]
267 |   in
268 | 
269 |   (* identical after recreating (marshalling) *)
270 |   let () =
271 |     let xs = List.init 10 ~f:(fun _i -> Random.float 1.) in
272 |     let td1 = Tdigest.Marshallable.create () |> Tdigest.Marshallable.add_list xs in
273 |     let td2 : Tdigest.Marshallable.t =
274 |       Stdlib.Marshal.to_string td1 [] |> fun s ->
275 |       print_endline (sprintf "Length: %d" (String.length s));
276 |       Stdlib.Marshal.from_string s 0
277 |     in
278 |     print_endline (sprintf !"%{sexp#hum: Tdigest.Marshallable.t}" td1);
279 |     print_endline (sprintf !"%{sexp#hum: Tdigest.Marshallable.t}" td2);
280 |     checkm td1;
281 |     checkm td2;
282 |     [%expect
283 |       {|
284 |       Length: 531
285 |       ((settings ((delta (Merging 0.01)) (k (Automatic 25)) (cx (Growth 1.1))))
286 |        (state
287 |         "q\13752\204H\161?\000\000\000\000\000\000\240?)'\237\200\164\019\197?\000\000\000\000\000\000\240?\177\177\131SS\168\208?\000\000\000\000\000\000\240?\167\144'\003}/\224?\000\000\000\000\000\000\240?\182\188\187h\203o\225?\000\000\000\000\000\000\240?\133\132\245Gqp\228?\000\000\000\000\000\000\240?\130\246\223Pj\145\230?\000\000\000\000\000\000\240?\178<b\016yA\233?\000\000\000\000\000\000\240?`\197WT\rD\234?\000\000\000\000\000\000\240?b\020\186\238j\204\234?\000\000\000\000\000\000\240?")
288 |        (stats ((cumulates_count 10) (compress_count 0) (auto_compress_count 0))))
289 |       ((settings ((delta (Merging 0.01)) (k (Automatic 25)) (cx (Growth 1.1))))
290 |        (state
291 |         "q\13752\204H\161?\000\000\000\000\000\000\240?)'\237\200\164\019\197?\000\000\000\000\000\000\240?\177\177\131SS\168\208?\000\000\000\000\000\000\240?\167\144'\003}/\224?\000\000\000\000\000\000\240?\182\188\187h\203o\225?\000\000\000\000\000\000\240?\133\132\245Gqp\228?\000\000\000\000\000\000\240?\130\246\223Pj\145\230?\000\000\000\000\000\000\240?\178<b\016yA\233?\000\000\000\000\000\000\240?`\197WT\rD\234?\000\000\000\000\000\000\240?b\020\186\238j\204\234?\000\000\000\000\000\000\240?")
292 |        (stats ((cumulates_count 10) (compress_count 0) (auto_compress_count 0))))
293 |       (((mean 0.033758526925128936) (n 1)) ((mean 0.16466197787149753) (n 1))
294 |        ((mean 0.26027377277093949) (n 1)) ((mean 0.50579691520808445) (n 1))
295 |        ((mean 0.54489679771167832) (n 1)) ((mean 0.6387258916330586) (n 1))
296 |        ((mean 0.70525089069242619) (n 1)) ((mean 0.78924229812573388) (n 1))
297 |        ((mean 0.82080713723386722) (n 1)) ((mean 0.83745333315222248) (n 1)))
298 |       (((mean 0.033758526925128936) (n 1)) ((mean 0.16466197787149753) (n 1))
299 |        ((mean 0.26027377277093949) (n 1)) ((mean 0.50579691520808445) (n 1))
300 |        ((mean 0.54489679771167832) (n 1)) ((mean 0.6387258916330586) (n 1))
301 |        ((mean 0.70525089069242619) (n 1)) ((mean 0.78924229812573388) (n 1))
302 |        ((mean 0.82080713723386722) (n 1)) ((mean 0.83745333315222248) (n 1))) |}]
303 |   in
304 |   ()
305 | 
306 | let%expect_test "merge" =
307 |   (* incorporates all points *)
308 |   let xs1 = [ 3.0; 4.0; 3.5; 7.0 ] in
309 |   let xs2 = [ 3.0; 1.0; 6.5; 9.0 ] in
310 |   let td1 = Tdigest.create () |> Tdigest.add_list (xs1 @ xs2) in
311 |   let td2 =
312 |     let a = Tdigest.create () |> Tdigest.add_list xs1 in
313 |     let b = Tdigest.create () |> Tdigest.add_list xs2 in
314 |     Tdigest.merge [ a; b ]
315 |   in
316 |   let ps = [ 0.0; 0.25; 0.50; 0.75; 1.0 ] in
317 |   check_percentiles ps td1;
318 |   check_percentiles ps td2;
319 |   [%expect {|
320 |     ((1) (3) (3.75) (6.75) (9))
321 |     ((1) (3) (3.75) (6.75) (9)) |}]
322 | 
323 | let%expect_test "is_empty" =
324 |   let xs = [ 3.0; 4.0; 3.5; 7.0 ] in
325 |   let td = Tdigest.create () in
326 |   Tdigest.is_empty td |> Bool.to_string |> print_endline;
327 |   let td = Tdigest.add_list xs td in
328 |   Tdigest.is_empty td |> Bool.to_string |> print_endline;
329 |   [%expect {|
330 |     true
331 |     false |}]
332 | 


--------------------------------------------------------------------------------