├── .dockerignore
├── .gitignore
├── lib
    ├── infix_pair.ml
    ├── suite.ml
    ├── int_ext.ocaml_ge_4_13.ml
    ├── barrier.mli
    ├── int_ext.ocaml_lt_4_13.ml
    ├── map_ext.ml
    ├── set_ext.ml
    ├── ordered.ml
    ├── trend.ml
    ├── multicore_bench.ml
    ├── unit_of_rate.ml
    ├── unit_of_time.ml
    ├── finally.ml
    ├── json.ml
    ├── option_ext.ml
    ├── dune
    ├── list_ext.ml
    ├── metric.ml
    ├── data.ml
    ├── barrier.ml
    ├── countdown.ml
    ├── util.ml
    ├── cmd.ml
    ├── multicore_bench.mli
    └── times.ml
├── .ocamlformat
├── dune
├── .gitattributes
├── Makefile
├── .prettierrc
├── bench.Dockerfile
├── bench
    ├── dune
    ├── main.ml
    ├── bench_stack.ml
    ├── bench_queue.ml
    ├── bench_unix.ml
    ├── bench_ref_mutex.ml
    ├── bench_incr.ml
    ├── bench_hashtbl.ml
    ├── bench_atomic.ml
    ├── bench_ref.ml
    └── bench_bounded_q.ml
├── HACKING.md
├── LICENSE.md
├── CHANGES.md
├── multicore-bench.opam
├── dune-project
├── update-gh-pages-for-tag
├── .github
    └── workflows
    │   └── workflow.yml
└── README.md


/.dockerignore:
--------------------------------------------------------------------------------
1 | _build
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.install
2 | _build
3 | tmp
4 | 


--------------------------------------------------------------------------------
/lib/infix_pair.ml:
--------------------------------------------------------------------------------
1 | type ('a, 'b) t = ( :: ) of 'a * 'b
2 | 


--------------------------------------------------------------------------------
/lib/suite.ml:
--------------------------------------------------------------------------------
1 | type t = budgetf:float -> Yojson.Safe.t list
2 | 


--------------------------------------------------------------------------------
/lib/int_ext.ocaml_ge_4_13.ml:
--------------------------------------------------------------------------------
1 | let min = Int.min
2 | let max = Int.max
3 | 


--------------------------------------------------------------------------------
/.ocamlformat:
--------------------------------------------------------------------------------
1 | profile = default
2 | version = 0.27.0
3 | 
4 | exp-grouping=preserve
5 | 


--------------------------------------------------------------------------------
/dune:
--------------------------------------------------------------------------------
1 | (mdx
2 |  (libraries multicore-bench multicore-magic)
3 |  (files README.md))
4 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # To work around MDX issues
2 | *.md text eol=lf
3 | *.mli text eol=lf
4 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: bench
2 | 
3 | bench:
4 | 	@dune exec --release -- bench/main.exe -budget 1
5 | 


--------------------------------------------------------------------------------
/lib/barrier.mli:
--------------------------------------------------------------------------------
1 | type t
2 | 
3 | val make : int -> t
4 | val await : t -> unit
5 | val poison : t -> exn -> Printexc.raw_backtrace -> unit
6 | 


--------------------------------------------------------------------------------
/lib/int_ext.ocaml_lt_4_13.ml:
--------------------------------------------------------------------------------
1 | let min (x : int) (y : int) = if x < y then x else y
2 | let max (x : int) (y : int) = if x < y then y else x
3 | 


--------------------------------------------------------------------------------
/lib/map_ext.ml:
--------------------------------------------------------------------------------
1 | let make (type t) (compare : t -> _) =
2 |   let (module Elt) = Ordered.make compare in
3 |   (module Map.Make (Elt) : Map.S with type key = t)
4 | 


--------------------------------------------------------------------------------
/lib/set_ext.ml:
--------------------------------------------------------------------------------
1 | let make (type t) (compare : t -> _) =
2 |   let (module Elt) = Ordered.make compare in
3 |   (module Set.Make (Elt) : Set.S with type elt = t)
4 | 


--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 |   "arrowParens": "avoid",
3 |   "bracketSpacing": false,
4 |   "printWidth": 80,
5 |   "semi": false,
6 |   "singleQuote": true,
7 |   "proseWrap": "always"
8 | }
9 | 


--------------------------------------------------------------------------------
/lib/ordered.ml:
--------------------------------------------------------------------------------
1 | let make (type t) (compare : t -> _) =
2 |   (module struct
3 |     type nonrec t = t
4 | 
5 |     let compare = compare
6 |   end : Set.OrderedType
7 |     with type t = t)
8 | 


--------------------------------------------------------------------------------
/lib/trend.ml:
--------------------------------------------------------------------------------
1 | type t = [ `Lower_is_better | `Higher_is_better ]
2 | 
3 | let to_json = function
4 |   | `Lower_is_better -> `String "lower-is-better"
5 |   | `Higher_is_better -> `String "higher-is-better"
6 | 


--------------------------------------------------------------------------------
/lib/multicore_bench.ml:
--------------------------------------------------------------------------------
 1 | module Trend = Trend
 2 | module Metric = Metric
 3 | module Unit_of_rate = Unit_of_rate
 4 | module Unit_of_time = Unit_of_time
 5 | module Times = Times
 6 | module Suite = Suite
 7 | module Cmd = Cmd
 8 | module Countdown = Countdown
 9 | module Util = Util
10 | 


--------------------------------------------------------------------------------
/lib/unit_of_rate.ml:
--------------------------------------------------------------------------------
 1 | type t = [ `_1 | `k | `M | `G ]
 2 | 
 3 | let to_divisor = function
 4 |   | `_1 -> 1.0
 5 |   | `k -> 1_000.0
 6 |   | `M -> 1_000_000.0
 7 |   | `G -> 1_000_000_000.0
 8 | 
 9 | let to_mnemonic = function
10 |   | `_1 -> "1/s"
11 |   | `k -> "k/s"
12 |   | `M -> "M/s"
13 |   | `G -> "G/s"
14 | 


--------------------------------------------------------------------------------
/lib/unit_of_time.ml:
--------------------------------------------------------------------------------
 1 | type t = [ `s | `ms | `mus | `ns ]
 2 | 
 3 | let to_multiplier = function
 4 |   | `s -> 1.0
 5 |   | `ms -> 1_000.0
 6 |   | `mus -> 1_000_000.0
 7 |   | `ns -> 1_000_000_000.0
 8 | 
 9 | let to_mnemonic = function
10 |   | `s -> "s"
11 |   | `ms -> "ms"
12 |   | `mus -> "μs"
13 |   | `ns -> "ns"
14 | 


--------------------------------------------------------------------------------
/lib/finally.ml:
--------------------------------------------------------------------------------
 1 | let[@inline never] finally release acquire scope =
 2 |   let x = acquire () in
 3 |   match scope x with
 4 |   | y ->
 5 |       release x;
 6 |       y
 7 |   | exception exn ->
 8 |       let bt = Printexc.get_raw_backtrace () in
 9 |       release x;
10 |       Printexc.raise_with_backtrace exn bt
11 | 
12 | external ( let@ ) : ('a -> 'b) -> 'a -> 'b = "%apply"
13 | 


--------------------------------------------------------------------------------
/lib/json.ml:
--------------------------------------------------------------------------------
 1 | open Option_ext.Syntax
 2 | 
 3 | type t = Yojson.Safe.t
 4 | 
 5 | let as_assoc = function `Assoc assoc -> Some assoc | (_ : t) -> None
 6 | let prop key = as_assoc >=> List.assoc_opt key
 7 | let as_list = function `List list -> Some list | (_ : t) -> None
 8 | let as_string = function `String string -> Some string | (_ : t) -> None
 9 | let as_float = function `Float float -> Some float | (_ : t) -> None
10 | 


--------------------------------------------------------------------------------
/bench.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ocaml/opam:debian-ocaml-5.3
 2 | WORKDIR /bench-dir
 3 | RUN sudo ln -sf /usr/bin/opam-2.1 /usr/bin/opam
 4 | RUN sudo chown opam .
 5 | COPY *.opam ./
 6 | RUN opam remote add origin https://github.com/ocaml/opam-repository.git && \
 7 |     opam update
 8 | RUN opam pin -yn --with-version=dev .
 9 | RUN opam install -y --deps-only --with-test .
10 | COPY . ./
11 | RUN opam exec -- dune build --release bench/main.exe
12 | 


--------------------------------------------------------------------------------
/bench/dune:
--------------------------------------------------------------------------------
 1 | (* -*- tuareg -*- *)
 2 | 
 3 | let maybe_domain_shims_and_threads =
 4 |   if Jbuild_plugin.V1.ocaml_version < "5" then "domain_shims threads.posix"
 5 |   else ""
 6 | 
 7 | let () =
 8 |   Jbuild_plugin.V1.send
 9 |   @@ {|
10 | 
11 | (test
12 |  (name main)
13 |  (action
14 |   (run %{test} -brief))
15 |  (libraries
16 |   multicore-bench
17 |   backoff
18 |   unix
19 |   multicore-magic |}
20 |   ^ maybe_domain_shims_and_threads ^ {| ))
21 | |}
22 | 


--------------------------------------------------------------------------------
/HACKING.md:
--------------------------------------------------------------------------------
 1 | ### Formatting
 2 | 
 3 | This project uses [ocamlformat](https://github.com/ocaml-ppx/ocamlformat) (for
 4 | OCaml) and [prettier](https://prettier.io/) (for Markdown).
 5 | 
 6 | ### To make a new release
 7 | 
 8 | 1. Update [CHANGES.md](CHANGES.md).
 9 | 2. Run `dune-release tag VERSION` to create a tag for the new `VERSION`.
10 | 3. Run `dune-release` to publish the new `VERSION`.
11 | 4. Run `./update-gh-pages-for-tag VERSION` to update the online documentation.
12 | 


--------------------------------------------------------------------------------
/bench/main.ml:
--------------------------------------------------------------------------------
 1 | let benchmarks =
 2 |   [
 3 |     ("Ref with [@poll error]", Bench_ref.run_suite);
 4 |     ("Ref with Mutex", Bench_ref_mutex.run_suite);
 5 |     ("Atomic", Bench_atomic.run_suite);
 6 |     ("Hashtbl", Bench_hashtbl.run_suite);
 7 |     ("Queue", Bench_queue.run_suite);
 8 |     ("Stack", Bench_stack.run_suite);
 9 |     ("Unix", Bench_unix.run_suite);
10 |     ("Atomic incr", Bench_incr.run_suite);
11 |     ("Bounded_q", Bench_bounded_q.run_suite);
12 |   ]
13 | 
14 | let () = Multicore_bench.Cmd.run ~benchmarks ()
15 | 


--------------------------------------------------------------------------------
/lib/option_ext.ml:
--------------------------------------------------------------------------------
 1 | let pair x y = match (x, y) with Some x, Some y -> Some (x, y) | _ -> None
 2 | 
 3 | module Syntax = struct
 4 |   let ( & ) l r x =
 5 |     match l x with
 6 |     | None -> None
 7 |     | Some l -> begin
 8 |         match r x with None -> None | Some r -> Some Infix_pair.(l :: r)
 9 |       end
10 | 
11 |   let ( let* ) = Option.bind
12 |   let ( >>= ) = Option.bind
13 |   let ( >=> ) f g x = f x >>= g
14 |   let ( let+ ) x f = Option.map f x
15 |   let ( >>+ ) = ( let+ )
16 |   let ( >+> ) f g x = f x >>+ g
17 |   let pure = Option.some
18 |   let ( and* ) = pair
19 |   let ( and+ ) = pair
20 | end
21 | 


--------------------------------------------------------------------------------
/bench/bench_stack.ml:
--------------------------------------------------------------------------------
 1 | open Multicore_bench
 2 | module Stack = Stdlib.Stack
 3 | 
 4 | let run_one_domain ~budgetf ?(n_msgs = 50 * Util.iter_factor) () =
 5 |   let t = Stack.create () in
 6 | 
 7 |   let op push =
 8 |     if push then Stack.push (ref push) t else Stack.pop_opt t |> ignore
 9 |   in
10 | 
11 |   let init _ =
12 |     assert (Stack.is_empty t);
13 |     Util.generate_push_and_pop_sequence n_msgs
14 |   in
15 |   let work _ bits = Util.Bits.iter op bits in
16 | 
17 |   Times.record ~budgetf ~n_domains:1 ~init ~work ()
18 |   |> Times.to_thruput_metrics ~n:n_msgs ~singular:"message" ~config:"one domain"
19 | 
20 | let run_suite ~budgetf = run_one_domain ~budgetf ()
21 | 


--------------------------------------------------------------------------------
/bench/bench_queue.ml:
--------------------------------------------------------------------------------
 1 | open Multicore_bench
 2 | module Queue = Stdlib.Queue
 3 | 
 4 | let run_one_domain ~budgetf ?(n_msgs = 50 * Util.iter_factor) () =
 5 |   let t = Queue.create () in
 6 | 
 7 |   let op push =
 8 |     if push then Queue.push (ref push) t else Queue.take_opt t |> ignore
 9 |   in
10 | 
11 |   let init _ =
12 |     assert (Queue.is_empty t);
13 |     Util.generate_push_and_pop_sequence n_msgs
14 |   in
15 |   let work _ bits = Util.Bits.iter op bits in
16 | 
17 |   Times.record ~budgetf ~n_domains:1 ~init ~work ()
18 |   |> Times.to_thruput_metrics ~n:n_msgs ~singular:"message" ~config:"one domain"
19 | 
20 | let run_suite ~budgetf = run_one_domain ~budgetf ()
21 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2024, Vesa Karvonen <vesa.a.j.k@gmail.com>
 2 | 
 3 | Permission to use, copy, modify, and/or distribute this software for any
 4 | purpose with or without fee is hereby granted, provided that the above
 5 | copyright notice and this permission notice appear in all copies.
 6 | 
 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 8 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 9 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14 | 


--------------------------------------------------------------------------------
/lib/dune:
--------------------------------------------------------------------------------
 1 | (* -*- tuareg -*- *)
 2 | 
 3 | let maybe_domain_shims =
 4 |   if Jbuild_plugin.V1.ocaml_version < "5" then "domain_shims" else ""
 5 | 
 6 | let () =
 7 |   Jbuild_plugin.V1.send
 8 |   @@ {|
 9 | 
10 | (rule
11 |  (enabled_if
12 |   (< %{ocaml_version} 4.13.0))
13 |  (action
14 |   (copy int_ext.ocaml_lt_4_13.ml int_ext.ml)))
15 | 
16 | (rule
17 |  (enabled_if
18 |   (>= %{ocaml_version} 4.13.0))
19 |  (action
20 |   (copy int_ext.ocaml_ge_4_13.ml int_ext.ml)))
21 | 
22 | (library
23 |  (public_name multicore-bench)
24 |  (name multicore_bench)
25 |  (libraries
26 |   backoff
27 |   multicore-magic
28 |   domain-local-await
29 |   mtime
30 |   mtime.clock.os
31 |   yojson
32 |   str |}
33 |   ^ maybe_domain_shims
34 |   ^ {| ))
35 | 
36 | (mdx
37 |  (libraries multicore-bench)
38 |  (files multicore_bench.mli))
39 | |}
40 | 


--------------------------------------------------------------------------------
/lib/list_ext.ml:
--------------------------------------------------------------------------------
 1 | let default_duplicate _ _ = invalid_arg "duplicate key"
 2 | let default_missing _ _ = None
 3 | 
 4 | let zip_by (type k) ?(duplicate = default_duplicate)
 5 |     ?(missing = default_missing) (compare : k -> _) key_of xs ys =
 6 |   let (module M) = Map_ext.make compare in
 7 |   let to_map xs =
 8 |     xs
 9 |     |> List.fold_left
10 |          (fun m x ->
11 |            m
12 |            |> M.update (key_of x) @@ function
13 |               | None -> Some x
14 |               | Some y -> duplicate x y)
15 |          M.empty
16 |   in
17 |   M.merge
18 |     (fun _ x y ->
19 |       match (x, y) with
20 |       | Some x, Some y -> Some (x, y)
21 |       | Some x, None -> missing `R x
22 |       | None, Some y -> missing `L y
23 |       | None, None -> None)
24 |     (to_map xs) (to_map ys)
25 |   |> M.bindings |> List.map snd
26 | 


--------------------------------------------------------------------------------
/lib/metric.ml:
--------------------------------------------------------------------------------
 1 | type t = Yojson.Safe.t
 2 | 
 3 | let a_non_breaking_space = " "
 4 | 
 5 | let to_nonbreaking s =
 6 |   s |> String.split_on_char ' ' |> String.concat a_non_breaking_space
 7 | 
 8 | let name ~metric ~config = to_nonbreaking (metric ^ "/" ^ config)
 9 | 
10 | let make ~metric ~config ?units ?trend ?description
11 |     (value : [< `Float of float ]) =
12 |   let[@inline] ( @: ) x_opt xs =
13 |     match x_opt with None -> xs | Some x -> x :: xs
14 |   in
15 |   `Assoc
16 |     (Some ("name", `String (name ~metric ~config))
17 |     @: Some ("value", (value :> t))
18 |     @: Option.map (fun units -> ("units", `String units)) units
19 |     @: Option.map (fun trend -> ("trend", Trend.to_json trend)) trend
20 |     @: Option.map
21 |          (fun description -> ("description", `String description))
22 |          description
23 |     @: [])
24 | 


--------------------------------------------------------------------------------
/CHANGES.md:
--------------------------------------------------------------------------------
 1 | ## 0.1.7
 2 | 
 3 | - Ported to OCaml 4.12 (@polytypic)
 4 | - Added scalable `Countdown` counter (@polytypic)
 5 | 
 6 | ## 0.1.6
 7 | 
 8 | - Fail when benchmark results have duplicates (@polytypic)
 9 | 
10 | ## 0.1.5
11 | 
12 | - Improved error handling (@polytypic)
13 | 
14 | ## 0.1.4
15 | 
16 | - Automatically filter benchmarks by given diff base file (@polytypic)
17 | - Randomize suites to expose variation from effects on runtime (@polytypic)
18 | 
19 | ## 0.1.3
20 | 
21 | - Add `Metric.make` to allow ad hoc metrics (@polytypic)
22 | 
23 | ## 0.1.2
24 | 
25 | - Add `-brief` to show results in a concise human readable format (@polytypic)
26 | - Add support for `wrap`ping the work without timing `wrap` itself (@polytypic)
27 | - Add `-diff base.json` switch to diff against base results from file
28 |   (@polytypic)
29 | 
30 | ## 0.1.1
31 | 
32 | - Add debug flag/mode to print progress information (@polytypic)
33 | - Fix to use same start time on all domains (@polytypic)
34 | 
35 | ## 0.1.0
36 | 
37 | - Initial version of multicore-bench (@polytypic)
38 | 


--------------------------------------------------------------------------------
/multicore-bench.opam:
--------------------------------------------------------------------------------
 1 | # This file is generated by dune, edit dune-project instead
 2 | opam-version: "2.0"
 3 | synopsis:
 4 |   "Framework for writing multicore benchmark executables to run on current-bench"
 5 | maintainer: ["Vesa Karvonen <vesa.a.j.k@gmail.com>"]
 6 | authors: ["Vesa Karvonen <vesa.a.j.k@gmail.com>"]
 7 | license: "ISC"
 8 | homepage: "https://github.com/ocaml-multicore/multicore-bench"
 9 | bug-reports: "https://github.com/ocaml-multicore/multicore-bench/issues"
10 | depends: [
11 |   "dune" {>= "3.14"}
12 |   "domain-local-await" {>= "1.0.1"}
13 |   "multicore-magic" {>= "2.1.0"}
14 |   "mtime" {>= "2.0.0"}
15 |   "yojson" {>= "2.1.0"}
16 |   "domain_shims" {>= "0.1.0"}
17 |   "backoff" {>= "0.1.0"}
18 |   "mdx" {>= "2.4.0" & with-test}
19 |   "sherlodoc" {>= "0.2" & with-doc}
20 |   "odoc" {>= "2.4.1" & with-doc}
21 |   "ocaml" {>= "4.12.0"}
22 | ]
23 | build: [
24 |   ["dune" "subst"] {dev}
25 |   [
26 |     "dune"
27 |     "build"
28 |     "-p"
29 |     name
30 |     "-j"
31 |     jobs
32 |     "@install"
33 |     "@runtest" {with-test}
34 |     "@doc" {with-doc}
35 |   ]
36 | ]
37 | dev-repo: "git+https://github.com/ocaml-multicore/multicore-bench.git"
38 | 


--------------------------------------------------------------------------------
/dune-project:
--------------------------------------------------------------------------------
 1 | (lang dune 3.14)
 2 | 
 3 | (name multicore-bench)
 4 | 
 5 | (generate_opam_files true)
 6 | 
 7 | (implicit_transitive_deps false)
 8 | 
 9 | (authors "Vesa Karvonen <vesa.a.j.k@gmail.com>")
10 | 
11 | (maintainers "Vesa Karvonen <vesa.a.j.k@gmail.com>")
12 | 
13 | (source
14 |  (github ocaml-multicore/multicore-bench))
15 | 
16 | (homepage "https://github.com/ocaml-multicore/multicore-bench")
17 | 
18 | (license ISC)
19 | 
20 | (using mdx 0.4)
21 | 
22 | (package
23 |  (name multicore-bench)
24 |  (synopsis
25 |   "Framework for writing multicore benchmark executables to run on current-bench")
26 |  (depends
27 |   (domain-local-await
28 |    (>= 1.0.1))
29 |   (multicore-magic
30 |    (>= 2.1.0))
31 |   (mtime
32 |    (>= 2.0.0))
33 |   (yojson
34 |    (>= 2.1.0))
35 |   (domain_shims
36 |    (>= 0.1.0))
37 |   (backoff
38 |    (>= 0.1.0))
39 |   ;; Test dependencies
40 |   (mdx
41 |    (and
42 |     (>= 2.4.0)
43 |     :with-test))
44 |   ;; Documentation dependencies
45 |   (sherlodoc
46 |    (and
47 |     (>= 0.2)
48 |     :with-doc))
49 |   (odoc
50 |    (and
51 |     (>= 2.4.1)
52 |     :with-doc))
53 |   ;; OCaml version
54 |   (ocaml
55 |    (>= 4.12.0))))
56 | 


--------------------------------------------------------------------------------
/bench/bench_unix.ml:
--------------------------------------------------------------------------------
 1 | open Multicore_bench
 2 | 
 3 | let run_one ~budgetf ~n_domains () =
 4 |   let block_size = 4096 in
 5 |   let n_blocks = 16 in
 6 | 
 7 |   let init _ =
 8 |     let inn, out = Unix.pipe ~cloexec:true () in
 9 |     (inn, out, Bytes.create block_size, Bytes.create 1)
10 |   in
11 |   let work _ (inn, out, block, byte) =
12 |     for _ = 1 to n_blocks do
13 |       let n = Unix.write out block 0 block_size in
14 |       assert (n = block_size);
15 |       for _ = 1 to block_size do
16 |         let n : int = Unix.read inn byte 0 1 in
17 |         assert (n = 1)
18 |       done
19 |     done;
20 |     Unix.close inn;
21 |     Unix.close out
22 |   in
23 | 
24 |   let config =
25 |     Printf.sprintf "%d worker%s" n_domains (if n_domains = 1 then "" else "s")
26 |   in
27 |   Times.record ~budgetf ~n_domains ~n_warmups:1 ~n_runs_min:1 ~init ~work ()
28 |   |> Times.to_thruput_metrics
29 |        ~n:(block_size * n_blocks * n_domains)
30 |        ~singular:"blocking read" ~config
31 | 
32 | let run_suite ~budgetf =
33 |   [ 1; 2; 4 ]
34 |   |> List.concat_map @@ fun n_domains ->
35 |      if Sys.win32 || Domain.recommended_domain_count () < n_domains then []
36 |      else run_one ~budgetf ~n_domains ()
37 | 


--------------------------------------------------------------------------------
/lib/data.ml:
--------------------------------------------------------------------------------
 1 | open Option_ext.Syntax
 2 | 
 3 | module Trend = struct
 4 |   type t = [ `Lower_is_better | `Higher_is_better ]
 5 | 
 6 |   let parse =
 7 |     Json.as_string >=> function
 8 |     | "lower-is-better" -> Some `Lower_is_better
 9 |     | "higher-is-better" -> Some `Higher_is_better
10 |     | _ -> None
11 | end
12 | 
13 | module Metric = struct
14 |   type units = string
15 | 
16 |   type t = {
17 |     name : string;
18 |     value : float;
19 |     units : units;
20 |     trend : Trend.t;
21 |     description : string;
22 |   }
23 | 
24 |   let parse =
25 |     (Json.prop "name" >=> Json.as_string
26 |     & Json.prop "value" >=> Json.as_float
27 |     & Json.prop "units" >=> Json.as_string
28 |     & Json.prop "trend" >=> Trend.parse
29 |     & Json.prop "description" >=> Json.as_string)
30 |     >+> fun (name :: value :: units :: trend :: description) ->
31 |     { name; value; units; trend; description }
32 | 
33 |   let name x = x.name
34 | end
35 | 
36 | module Benchmark = struct
37 |   type t = { name : string; metrics : Metric.t list }
38 | 
39 |   let parse =
40 |     (Json.prop "name" >=> Json.as_string
41 |     & Json.prop "metrics" >=> Json.as_list >+> List.filter_map Metric.parse)
42 |     >+> fun (name :: metrics) -> { name; metrics }
43 | 
44 |   let name x = x.name
45 | end
46 | 
47 | module Results = struct
48 |   type t = Benchmark.t list
49 | 
50 |   let parse =
51 |     Json.prop "results" >=> Json.as_list >+> List.filter_map Benchmark.parse
52 | end
53 | 


--------------------------------------------------------------------------------
/update-gh-pages-for-tag:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -xeuo pipefail
 4 | 
 5 | TMP=tmp
 6 | NAME=multicore-bench
 7 | MAIN=doc
 8 | GIT="git@github.com:ocaml-multicore/$NAME.git"
 9 | DOC="_build/default/_doc/_html"
10 | GH_PAGES=gh-pages
11 | 
12 | TAG="$1"
13 | 
14 | if ! [ -e $NAME.opam ] || [ $# -ne 1 ] || \
15 |      { [ "$TAG" != main ] && ! [ "$(git tag -l "$TAG")" ]; }; then
16 |   CMD="${0##*/}"
17 |   cat << EOF
18 | Usage: $CMD tag-name-or-main
19 | 
20 | This script
21 | - clones the repository into a temporary directory ($TMP/$NAME),
22 | - builds the documentation for the specified tag or main,
23 | - updates $GH_PAGES branch with the documentation in directory for the tag,
24 | - prompts whether to also update the main documentation in $MAIN directory, and
25 | - prompts whether to push changes to $GH_PAGES.
26 | 
27 | EOF
28 |   exit 1
29 | fi
30 | 
31 | opam install sherlodoc
32 | 
33 | mkdir $TMP
34 | cd $TMP
35 | 
36 | git clone $GIT
37 | cd $NAME
38 | 
39 | git checkout "$TAG"
40 | dune build @doc --root=.
41 | 
42 | git checkout $GH_PAGES
43 | if [ "$TAG" != main ]; then
44 |   echo "Updating the $TAG doc."
45 |   if [ -e "$TAG" ]; then
46 |     git rm -rf "$TAG"
47 |   fi
48 |   cp -r $DOC "$TAG"
49 |   git add "$TAG"
50 | fi
51 | 
52 | read -p "Update the main doc? (y/N) " -n 1 -r
53 | echo
54 | if [[ $REPLY =~ ^[Yy]$ ]]; then
55 |   if [ -e $MAIN ]; then
56 |     git rm -rf $MAIN
57 |   fi
58 |   cp -r $DOC $MAIN
59 |   git add $MAIN
60 | else
61 |   echo "Skipped main doc update."
62 | fi
63 | 
64 | git commit -m "Update $NAME doc for $TAG"
65 | 
66 | read -p "Push changes to $GH_PAGES? (y/N) " -n 1 -r
67 | echo
68 | if ! [[ $REPLY =~ ^[Yy]$ ]]; then
69 |   echo "Leaving $TMP for you to examine."
70 |   exit 1
71 | fi
72 | 
73 | git push
74 | 
75 | cd ..
76 | cd ..
77 | rm -rf $TMP
78 | 


--------------------------------------------------------------------------------
/.github/workflows/workflow.yml:
--------------------------------------------------------------------------------
 1 | name: build-and-test
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 | 
 9 | jobs:
10 |   build-windows:
11 |     strategy:
12 |       fail-fast: false
13 |       matrix:
14 |         ocaml-compiler:
15 |           - ocaml.5.2.1,ocaml-option-mingw
16 |           - ocaml.5.3.0,ocaml-option-mingw
17 | 
18 |     runs-on: windows-latest
19 | 
20 |     env:
21 |       QCHECK_MSG_INTERVAL: '60'
22 | 
23 |     steps:
24 |       - name: Check out code
25 |         uses: actions/checkout@v3
26 | 
27 |       - name: Set up OCaml
28 |         uses: ocaml/setup-ocaml@v3
29 |         with:
30 |           ocaml-compiler: ${{ matrix.ocaml-compiler }}
31 |           opam-repositories: |
32 |             dra27: https://github.com/dra27/opam-repository.git#windows-5.0
33 |             default: https://github.com/ocaml-opam/opam-repository-mingw.git#sunset
34 |             standard: https://github.com/ocaml/opam-repository.git
35 | 
36 |       - name: Install dependencies
37 |         run: opam install . --deps-only --with-test
38 | 
39 |       - name: Build
40 |         run: opam exec -- dune build
41 | 
42 |       - name: Test
43 |         run: opam exec -- dune runtest
44 | 
45 |   build-on-lower-bound:
46 |     strategy:
47 |       fail-fast: false
48 |       matrix:
49 |         os:
50 |           - ubuntu-latest
51 |         ocaml-compiler:
52 |           - 4.12.x
53 | 
54 |     runs-on: ${{ matrix.os }}
55 | 
56 |     steps:
57 |       - name: Check out code
58 |         uses: actions/checkout@v3
59 | 
60 |       - name: Set up OCaml
61 |         uses: ocaml/setup-ocaml@v3
62 |         with:
63 |           ocaml-compiler: ${{ matrix.ocaml-compiler }}
64 | 
65 |       - name: Install dependencies
66 |         run: opam install . --deps-only --with-test
67 | 
68 |       - name: Build
69 |         run: opam exec -- dune build
70 | 
71 |       - name: Test
72 |         run: opam exec -- dune runtest
73 | 


--------------------------------------------------------------------------------
/lib/barrier.ml:
--------------------------------------------------------------------------------
 1 | (** This barrier is designed to take a single cache line (or word) and to return
 2 |     with the participating domains synchronized as precisely as possible. *)
 3 | 
 4 | type t = Obj.t Atomic.t
 5 | 
 6 | let bits = (Sys.int_size - 1) / 2
 7 | let mask = (1 lsl bits) - 1
 8 | let one = 1 lsl bits
 9 | 
10 | let make total : t =
11 |   if total <= 0 || mask < total then invalid_arg "Barrier: out of bounds";
12 |   Atomic.make (Obj.repr total) |> Multicore_magic.copy_as_padded
13 | 
14 | let rec fad (t : t) (n : int) backoff =
15 |   let before = Atomic.get t in
16 |   if Obj.is_int before then begin
17 |     let state = Obj.obj before in
18 |     let after = Obj.repr (state + n) in
19 |     if Atomic.compare_and_set t before after then state
20 |     else fad t n (Backoff.once backoff)
21 |   end
22 |   else
23 |     let exn, bt = Obj.obj before in
24 |     Printexc.raise_with_backtrace exn bt
25 | 
26 | let rec set (t : t) (n : int) backoff =
27 |   let before = Atomic.get t in
28 |   if Obj.is_int before then begin
29 |     if not (Atomic.compare_and_set t before (Obj.repr n)) then
30 |       set t n (Backoff.once backoff)
31 |   end
32 |   else
33 |     let exn, bt = Obj.obj before in
34 |     Printexc.raise_with_backtrace exn bt
35 | 
36 | let get (t : t) : int =
37 |   let before = Atomic.get t in
38 |   if Obj.is_int before then Obj.obj before
39 |   else
40 |     let exn, bt = Obj.obj before in
41 |     Printexc.raise_with_backtrace exn bt
42 | 
43 | let await (t : t) =
44 |   let state = fad t one Backoff.default in
45 |   let total = state land mask in
46 |   if state lsr bits = total - 1 then
47 |     set t (total - (total lsl bits)) Backoff.default;
48 | 
49 |   while 0 < get t do
50 |     Domain.cpu_relax ()
51 |   done;
52 | 
53 |   fad t one Backoff.default |> ignore;
54 |   while get t < 0 do
55 |     Domain.cpu_relax ()
56 |   done
57 | 
58 | let rec poison t exn bt =
59 |   let before = Atomic.get t in
60 |   if Obj.is_int before then
61 |     let after = Obj.repr (exn, bt) in
62 |     if not (Atomic.compare_and_set t before after) then poison t exn bt
63 | 


--------------------------------------------------------------------------------
/bench/bench_ref_mutex.ml:
--------------------------------------------------------------------------------
 1 | open Multicore_bench
 2 | 
 3 | module Ref = struct
 4 |   type 'a t = 'a ref
 5 | 
 6 |   let make = ref
 7 | 
 8 |   let[@inline] compare_and_set x before after =
 9 |     !x == before
10 |     && begin
11 |          x := after;
12 |          true
13 |        end
14 | 
15 |   let[@inline] exchange x after =
16 |     let before = !x in
17 |     x := after;
18 |     before
19 | end
20 | 
21 | type t = Op : string * 'a * ('a Ref.t -> _) * ('a Ref.t -> _) -> t
22 | 
23 | (** For some reason allocating the mutex inside [run_one] tends to cause
24 |     performance hiccups, i.e. some operations appear to be 10x slower than
25 |     others, which doesn't make sense. So, we allocate the mutex here. *)
26 | let mutex = Mutex.create ()
27 | 
28 | let run_one ~budgetf ?(n_iter = 250 * Util.iter_factor)
29 |     (Op (name, value, op1, op2)) =
30 |   let loc = Ref.make value in
31 | 
32 |   let init _ = () in
33 |   let work _ () =
34 |     let rec loop i =
35 |       if i > 0 then begin
36 |         Mutex.lock mutex;
37 |         op1 loc |> ignore;
38 |         Mutex.unlock mutex;
39 |         Mutex.lock mutex;
40 |         op2 loc |> ignore;
41 |         Mutex.unlock mutex;
42 |         loop (i - 2)
43 |       end
44 |     in
45 |     loop n_iter
46 |   in
47 | 
48 |   Times.record ~budgetf ~n_domains:1 ~init ~work ()
49 |   |> Times.to_thruput_metrics ~n:n_iter ~singular:"op" ~config:name
50 | 
51 | let run_suite ~budgetf =
52 |   [
53 |     (let get x = !x in
54 |      Op ("get", 42, get, get));
55 |     (let incr x = x := !x + 1 in
56 |      Op ("incr", 0, incr, incr));
57 |     (let push x = x := 101 :: !x
58 |      and pop x = match !x with [] -> () | _ :: xs -> x := xs in
59 |      Op ("push & pop", [], push, pop));
60 |     (let cas01 x = Ref.compare_and_set x 0 1
61 |      and cas10 x = Ref.compare_and_set x 1 0 in
62 |      Op ("cas int", 0, cas01, cas10));
63 |     (let xchg1 x = Ref.exchange x 1 and xchg0 x = Ref.exchange x 0 in
64 |      Op ("xchg int", 0, xchg1, xchg0));
65 |     (let swap x =
66 |        let l, r = !x in
67 |        x := (r, l)
68 |      in
69 |      Op ("swap", (4, 2), swap, swap));
70 |   ]
71 |   |> List.concat_map @@ run_one ~budgetf
72 | 


--------------------------------------------------------------------------------
/lib/countdown.ml:
--------------------------------------------------------------------------------
 1 | module Atomic = Multicore_magic.Transparent_atomic
 2 | 
 3 | type t = int Atomic.t array
 4 | 
 5 | let create ~n_domains () =
 6 |   if n_domains < 1 then invalid_arg "n_domains < 1";
 7 |   let ceil_pow_2_minus_1 n =
 8 |     let open Nativeint in
 9 |     let n = of_int n in
10 |     let n = logor n (shift_right_logical n 1) in
11 |     let n = logor n (shift_right_logical n 2) in
12 |     let n = logor n (shift_right_logical n 4) in
13 |     let n = logor n (shift_right_logical n 8) in
14 |     let n = logor n (shift_right_logical n 16) in
15 |     to_int (if Sys.int_size > 32 then logor n (shift_right_logical n 32) else n)
16 |   in
17 |   let n = ceil_pow_2_minus_1 n_domains in
18 |   let atomics = Array.init n_domains (fun _ -> Atomic.make_contended 0) in
19 |   Array.init n @@ fun i -> Array.unsafe_get atomics (i mod n_domains)
20 | 
21 | let rec arity t i =
22 |   if i < Array.length t && Array.unsafe_get t i != Array.unsafe_get t 0 then
23 |     arity t (i + 1)
24 |   else i
25 | 
26 | let[@inline] arity t = arity t 1
27 | 
28 | let non_atomic_set t count =
29 |   if count < 0 then invalid_arg "count < 0";
30 |   let n = arity t in
31 |   let d = count / n in
32 |   let j = count - (n * d) in
33 |   for i = 0 to n - 1 do
34 |     Atomic.set (Array.unsafe_get t i) (d + Bool.to_int (i < j))
35 |   done
36 | 
37 | let rec get t count i =
38 |   if i < Array.length t && Array.unsafe_get t i != Array.unsafe_get t 0 then
39 |     get t (count + Int_ext.max 0 (Atomic.get (Array.unsafe_get t i))) (i + 1)
40 |   else count
41 | 
42 | let[@inline] get t = get t (Int_ext.max 0 (Atomic.get (Array.unsafe_get t 0))) 1
43 | 
44 | let rec alloc t ~batch i =
45 |   if i < Array.length t then
46 |     let c = Array.unsafe_get t i in
47 |     if 0 < Atomic.get c then
48 |       let n = Atomic.fetch_and_add c (-batch) in
49 |       if 0 < n then Int_ext.min n batch else alloc t ~batch (i + 1)
50 |     else alloc t ~batch (i + 1)
51 |   else 0
52 | 
53 | let[@inline] alloc t ~domain_index ~batch =
54 |   let c = Array.unsafe_get t domain_index in
55 |   if 0 < Atomic.get c then
56 |     let n = Atomic.fetch_and_add c (-batch) in
57 |     if 0 < n then Int_ext.min n batch else alloc t ~batch 0
58 |   else alloc t ~batch 0
59 | 


--------------------------------------------------------------------------------
/lib/util.ml:
--------------------------------------------------------------------------------
 1 | let iter_factor =
 2 |   let factor b = if b then 10 else 1 in
 3 |   factor (64 <= Sys.word_size)
 4 |   * factor (Sys.backend_type = Native)
 5 |   * factor (1 < Domain.recommended_domain_count ())
 6 | 
 7 | let rec alloc ?(batch = 1000) counter =
 8 |   let n = Atomic.get counter in
 9 |   if n = 0 then 0
10 |   else
11 |     let batch = Int_ext.min n batch in
12 |     if Atomic.compare_and_set counter n (n - batch) then batch
13 |     else alloc ~batch counter
14 | 
15 | let cross xs ys =
16 |   xs |> List.concat_map @@ fun x -> ys |> List.map @@ fun y -> (x, y)
17 | 
18 | module Bits = struct
19 |   type t = { mutable bytes : Bytes.t; mutable length : int }
20 | 
21 |   let create () = { bytes = Bytes.create 1; length = 0 }
22 | 
23 |   let push t bool =
24 |     let capacity = Bytes.length t.bytes lsl 3 in
25 |     if t.length == capacity then
26 |       t.bytes <- Bytes.extend t.bytes 0 (capacity lsr 3);
27 |     let byte_i = t.length lsr 3 in
28 |     let mask = 1 lsl (t.length land 7) in
29 |     t.length <- t.length + 1;
30 |     let byte = Char.code (Bytes.unsafe_get t.bytes byte_i) in
31 |     let byte = if bool then byte lor mask else byte land lnot mask in
32 |     Bytes.unsafe_set t.bytes byte_i (Char.chr byte)
33 | 
34 |   let length t = t.length
35 | 
36 |   let iter fn t =
37 |     let i = ref 0 in
38 |     let n = t.length in
39 |     while !i < n do
40 |       let ix = !i in
41 |       i := !i + 8;
42 |       let byte = Char.code (Bytes.unsafe_get t.bytes (ix lsr 3)) in
43 |       let n = n - ix in
44 |       fn (0 <> byte land 1);
45 |       if 1 < n then fn (0 <> byte land 2);
46 |       if 2 < n then fn (0 <> byte land 4);
47 |       if 3 < n then fn (0 <> byte land 8);
48 |       if 4 < n then fn (0 <> byte land 16);
49 |       if 5 < n then fn (0 <> byte land 32);
50 |       if 6 < n then fn (0 <> byte land 64);
51 |       if 7 < n then fn (0 <> byte land 128)
52 |     done
53 | end
54 | 
55 | let generate_push_and_pop_sequence ?(state = Random.State.make_self_init ())
56 |     n_msgs =
57 |   let bits = Bits.create () in
58 |   let rec loop length n_push n_pop =
59 |     if 0 < n_push || 0 < n_pop then begin
60 |       let push = Random.State.bool state && 0 < n_push in
61 |       Bits.push bits push;
62 |       loop
63 |         (if push then length + 1 else if 0 < length then length - 1 else length)
64 |         (n_push - Bool.to_int push)
65 |         (n_pop - Bool.to_int ((not push) && 0 < length))
66 |     end
67 |     else length
68 |   in
69 |   let length = loop 0 n_msgs n_msgs in
70 |   assert (length = 0);
71 |   bits
72 | 


--------------------------------------------------------------------------------
/bench/bench_incr.ml:
--------------------------------------------------------------------------------
 1 | open Multicore_bench
 2 | 
 3 | let run_one ~budgetf ~n_domains ~approach () =
 4 |   let counter = Atomic.make 0 |> Multicore_magic.copy_as_padded in
 5 | 
 6 |   let n_ops = 500 * Util.iter_factor / n_domains in
 7 | 
 8 |   let n_ops_todo = Countdown.create ~n_domains () in
 9 | 
10 |   let init _ = Countdown.non_atomic_set n_ops_todo n_ops in
11 |   let work domain_index () =
12 |     match approach with
13 |     | `Cas ->
14 |         let rec work () =
15 |           let n = Countdown.alloc n_ops_todo ~domain_index ~batch:100 in
16 |           if n <> 0 then
17 |             let rec loop n =
18 |               if 0 < n then begin
19 |                 let v = Atomic.get counter in
20 |                 let success = Atomic.compare_and_set counter v (v + 1) in
21 |                 loop (n - Bool.to_int success)
22 |               end
23 |               else work ()
24 |             in
25 |             loop n
26 |         in
27 |         work ()
28 |     | `Cas_backoff ->
29 |         let rec work () =
30 |           let n = Countdown.alloc n_ops_todo ~domain_index ~batch:100 in
31 |           if n <> 0 then
32 |             let rec loop backoff n =
33 |               if 0 < n then begin
34 |                 let v = Atomic.get counter in
35 |                 if Atomic.compare_and_set counter v (v + 1) then
36 |                   loop Backoff.default (n - 1)
37 |                 else loop (Backoff.once backoff) n
38 |               end
39 |               else work ()
40 |             in
41 |             loop Backoff.default n
42 |         in
43 |         work ()
44 |     | `Incr ->
45 |         let rec work () =
46 |           let n = Countdown.alloc n_ops_todo ~domain_index ~batch:100 in
47 |           if n <> 0 then
48 |             let rec loop n =
49 |               if 0 < n then begin
50 |                 Atomic.incr counter;
51 |                 loop (n - 1)
52 |               end
53 |               else work ()
54 |             in
55 |             loop n
56 |         in
57 |         work ()
58 |   in
59 | 
60 |   let config =
61 |     Printf.sprintf "%s, %d domains"
62 |       (match approach with
63 |       | `Cas -> "CAS"
64 |       | `Cas_backoff -> "CAS with backoff"
65 |       | `Incr -> "Incr")
66 |       n_domains
67 |   in
68 |   Times.record ~budgetf ~n_domains ~init ~work ()
69 |   |> Times.to_thruput_metrics ~n:n_ops ~singular:"op" ~config
70 | 
71 | let run_suite ~budgetf =
72 |   Util.cross [ `Cas; `Cas_backoff; `Incr ] [ 1; 2; 4; 8 ]
73 |   |> List.concat_map @@ fun (approach, n_domains) ->
74 |      if Domain.recommended_domain_count () < n_domains then []
75 |      else run_one ~budgetf ~n_domains ~approach ()
76 | 


--------------------------------------------------------------------------------
/bench/bench_hashtbl.ml:
--------------------------------------------------------------------------------
  1 | open Multicore_bench
  2 | 
  3 | module Int = struct
  4 |   include Int
  5 | 
  6 |   let hash = Fun.id
  7 | end
  8 | 
  9 | module Htbl = Hashtbl.Make (Int)
 10 | 
 11 | let mutex = Mutex.create ()
 12 | 
 13 | let run_one ~budgetf ~n_domains ~use_mutex ?(n_keys = 1000) ~percent_mem
 14 |     ?(percent_add = (100 - percent_mem + 1) / 2) ?(prepopulate = true) () =
 15 |   let limit_mem = percent_mem in
 16 |   let limit_add = percent_mem + percent_add in
 17 | 
 18 |   assert (0 <= limit_mem && limit_mem <= 100);
 19 |   assert (limit_mem <= limit_add && limit_add <= 100);
 20 | 
 21 |   let t = Htbl.create n_keys in
 22 | 
 23 |   if prepopulate then
 24 |     for _ = 1 to n_keys do
 25 |       let value = Random.bits () in
 26 |       let key = value mod n_keys in
 27 |       Htbl.replace t key value
 28 |     done;
 29 | 
 30 |   let n_ops = (if use_mutex then 100 else 400) * Util.iter_factor in
 31 |   let n_ops = (100 + percent_mem) * n_ops / 100 in
 32 | 
 33 |   let n_ops_todo = Countdown.create ~n_domains () in
 34 | 
 35 |   let init _ =
 36 |     Countdown.non_atomic_set n_ops_todo n_ops;
 37 |     Random.State.make_self_init ()
 38 |   in
 39 |   let work_no_mutex domain_index state =
 40 |     let rec work () =
 41 |       let n = Countdown.alloc n_ops_todo ~domain_index ~batch:100 in
 42 |       if n <> 0 then
 43 |         let rec loop n =
 44 |           if 0 < n then
 45 |             let value = Random.State.bits state in
 46 |             let op = (value asr 20) mod 100 in
 47 |             let key = value mod n_keys in
 48 |             if op < percent_mem then begin
 49 |               begin
 50 |                 match Htbl.find t key with _ -> () | exception Not_found -> ()
 51 |               end;
 52 |               loop (n - 1)
 53 |             end
 54 |             else if op < limit_add then begin
 55 |               Htbl.replace t key value;
 56 |               loop (n - 1)
 57 |             end
 58 |             else begin
 59 |               Htbl.remove t key;
 60 |               loop (n - 1)
 61 |             end
 62 |           else work ()
 63 |         in
 64 |         loop n
 65 |     in
 66 |     work ()
 67 |   in
 68 |   let work_mutex domain_index state =
 69 |     let rec work () =
 70 |       let n = Countdown.alloc n_ops_todo ~domain_index ~batch:100 in
 71 |       if n <> 0 then
 72 |         let rec loop n =
 73 |           if 0 < n then
 74 |             let value = Random.State.bits state in
 75 |             let op = (value asr 20) mod 100 in
 76 |             let key = value mod n_keys in
 77 |             if op < percent_mem then begin
 78 |               Mutex.lock mutex;
 79 |               begin
 80 |                 match Htbl.find t key with _ -> () | exception Not_found -> ()
 81 |               end;
 82 |               Mutex.unlock mutex;
 83 |               loop (n - 1)
 84 |             end
 85 |             else if op < limit_add then begin
 86 |               Mutex.lock mutex;
 87 |               Htbl.replace t key value;
 88 |               Mutex.unlock mutex;
 89 |               loop (n - 1)
 90 |             end
 91 |             else begin
 92 |               Mutex.lock mutex;
 93 |               Htbl.remove t key;
 94 |               Mutex.unlock mutex;
 95 |               loop (n - 1)
 96 |             end
 97 |           else work ()
 98 |         in
 99 |         loop n
100 |     in
101 |     work ()
102 |   in
103 | 
104 |   let config =
105 |     let percent_mem = Printf.sprintf "%d%% reads" percent_mem in
106 |     if use_mutex then
107 |       Printf.sprintf "%d worker%s, %s" n_domains
108 |         (if n_domains = 1 then "" else "s")
109 |         percent_mem
110 |     else Printf.sprintf "one domain, %s" percent_mem
111 |   in
112 |   let work = if use_mutex then work_mutex else work_no_mutex in
113 |   Times.record ~budgetf ~n_domains ~init ~work ()
114 |   |> Times.to_thruput_metrics ~n:n_ops ~singular:"operation" ~config
115 | 
116 | let run_suite ~budgetf =
117 |   ([ 10; 50; 90 ]
118 |   |> List.concat_map @@ fun percent_mem ->
119 |      run_one ~budgetf ~n_domains:1 ~use_mutex:false ~percent_mem ())
120 |   @ (Util.cross [ 10; 50; 90 ] [ 1; 2; 4; 8 ]
121 |     |> List.concat_map @@ fun (percent_mem, n_domains) ->
122 |        run_one ~budgetf ~n_domains ~use_mutex:true ~percent_mem ())
123 | 


--------------------------------------------------------------------------------
/bench/bench_atomic.ml:
--------------------------------------------------------------------------------
  1 | open Multicore_bench
  2 | 
  3 | module Atomic = struct
  4 |   include Stdlib.Atomic
  5 | 
  6 |   let rec modify ?(backoff = Backoff.default) x f =
  7 |     let before = Atomic.get x in
  8 |     let after = f before in
  9 |     if not (Atomic.compare_and_set x before after) then
 10 |       modify ~backoff:(Backoff.once backoff) x f
 11 | end
 12 | 
 13 | type _ op =
 14 |   | Get : int op
 15 |   | Incr : int op
 16 |   | Push_and_pop : int list op
 17 |   | Cas_int : int op
 18 |   | Xchg_int : int op
 19 |   | Swap : (int * int) op
 20 | 
 21 | let run_one (type a) ~budgetf ?(n_iter = 500 * Util.iter_factor) (op : a op) =
 22 |   let name, extra, (value : a) =
 23 |     match op with
 24 |     | Get -> ("get", 10, 42)
 25 |     | Incr -> ("incr", 1, 0)
 26 |     | Push_and_pop -> ("push & pop", 2, [])
 27 |     | Cas_int -> ("cas int", 1, 0)
 28 |     | Xchg_int -> ("xchg int", 1, 0)
 29 |     | Swap -> ("swap", 1, (4, 2))
 30 |   in
 31 | 
 32 |   let n_iter = n_iter * extra in
 33 | 
 34 |   let loc = Atomic.make value in
 35 | 
 36 |   let init _ = () in
 37 |   let work _ () =
 38 |     match op with
 39 |     | Get ->
 40 |         let rec loop i =
 41 |           if i > 0 then begin
 42 |             let a =
 43 |               Atomic.get (Sys.opaque_identity loc)
 44 |               land Atomic.get (Sys.opaque_identity loc)
 45 |             and b =
 46 |               Atomic.get (Sys.opaque_identity loc)
 47 |               land Atomic.get (Sys.opaque_identity loc)
 48 |             and c =
 49 |               Atomic.get (Sys.opaque_identity loc)
 50 |               land Atomic.get (Sys.opaque_identity loc)
 51 |             and d =
 52 |               Atomic.get (Sys.opaque_identity loc)
 53 |               land Atomic.get (Sys.opaque_identity loc)
 54 |             in
 55 |             loop (i - 8 + (a - b) + (c - d))
 56 |           end
 57 |         in
 58 |         loop n_iter
 59 |     | Incr ->
 60 |         let rec loop i =
 61 |           if i > 0 then begin
 62 |             Atomic.incr loc;
 63 |             Atomic.incr loc;
 64 |             Atomic.incr loc;
 65 |             Atomic.incr loc;
 66 |             Atomic.incr loc;
 67 |             Atomic.incr loc;
 68 |             loop (i - 6)
 69 |           end
 70 |         in
 71 |         loop n_iter
 72 |     | Push_and_pop ->
 73 |         let[@inline] push x = Atomic.modify x (fun xs -> 101 :: xs)
 74 |         and[@inline] pop x =
 75 |           Atomic.modify x (function [] -> [] | _ :: xs -> xs)
 76 |         in
 77 |         let rec loop i =
 78 |           if i > 0 then begin
 79 |             push loc;
 80 |             pop loc |> ignore;
 81 |             push loc;
 82 |             pop loc |> ignore;
 83 |             loop (i - 4)
 84 |           end
 85 |         in
 86 |         loop n_iter
 87 |     | Cas_int ->
 88 |         let rec loop i =
 89 |           if i > 0 then begin
 90 |             Atomic.compare_and_set loc 0 1 |> ignore;
 91 |             Atomic.compare_and_set loc 1 0 |> ignore;
 92 |             Atomic.compare_and_set loc 0 1 |> ignore;
 93 |             Atomic.compare_and_set loc 1 0 |> ignore;
 94 |             Atomic.compare_and_set loc 0 1 |> ignore;
 95 |             Atomic.compare_and_set loc 1 0 |> ignore;
 96 |             loop (i - 6)
 97 |           end
 98 |         in
 99 |         loop n_iter
100 |     | Xchg_int ->
101 |         let rec loop i =
102 |           if i > 0 then begin
103 |             Atomic.exchange loc 1 |> ignore;
104 |             Atomic.exchange loc 0 |> ignore;
105 |             Atomic.exchange loc 1 |> ignore;
106 |             Atomic.exchange loc 0 |> ignore;
107 |             Atomic.exchange loc 1 |> ignore;
108 |             Atomic.exchange loc 0 |> ignore;
109 |             loop (i - 6)
110 |           end
111 |         in
112 |         loop n_iter
113 |     | Swap ->
114 |         let[@inline] swap x = Atomic.modify x (fun (x, y) -> (y, x)) in
115 |         let rec loop i =
116 |           if i > 0 then begin
117 |             swap loc;
118 |             swap loc;
119 |             swap loc;
120 |             swap loc;
121 |             swap loc;
122 |             swap loc;
123 |             loop (i - 6)
124 |           end
125 |         in
126 |         loop n_iter
127 |   in
128 | 
129 |   Times.record ~budgetf ~n_domains:1 ~init ~work ()
130 |   |> Times.to_thruput_metrics ~n:n_iter ~singular:"op" ~config:name
131 | 
132 | let run_suite ~budgetf =
133 |   [
134 |     run_one ~budgetf Get;
135 |     run_one ~budgetf Incr;
136 |     run_one ~budgetf Push_and_pop;
137 |     run_one ~budgetf Cas_int;
138 |     run_one ~budgetf Xchg_int;
139 |     run_one ~budgetf Swap;
140 |   ]
141 |   |> List.concat
142 | 


--------------------------------------------------------------------------------
/bench/bench_ref.ml:
--------------------------------------------------------------------------------
  1 | open Multicore_bench
  2 | 
  3 | module Ref = struct
  4 |   type 'a t = 'a ref
  5 | 
  6 |   let make = ref
  7 |   let get = ( ! )
  8 |   let[@poll error] [@inline never] incr x = x := !x + 1
  9 | 
 10 |   let[@poll error] [@inline never] compare_and_set x before after =
 11 |     !x == before
 12 |     && begin
 13 |          x := after;
 14 |          true
 15 |        end
 16 | 
 17 |   let[@poll error] [@inline never] exchange x after =
 18 |     let before = !x in
 19 |     x := after;
 20 |     before
 21 | 
 22 |   let rec modify ?(backoff = Backoff.default) x f =
 23 |     let before = get x in
 24 |     let after = f before in
 25 |     if not (compare_and_set x before after) then
 26 |       modify ~backoff:(Backoff.once backoff) x f
 27 | end
 28 | 
 29 | type _ op =
 30 |   | Get : int op
 31 |   | Incr : int op
 32 |   | Push_and_pop : int list op
 33 |   | Cas_int : int op
 34 |   | Xchg_int : int op
 35 |   | Swap : (int * int) op
 36 | 
 37 | let run_one (type a) ~budgetf ?(n_iter = 500 * Util.iter_factor) (op : a op) =
 38 |   let name, extra, (value : a) =
 39 |     match op with
 40 |     | Get -> ("get", 10, 42)
 41 |     | Incr -> ("incr", 1, 0)
 42 |     | Push_and_pop -> ("push & pop", 2, [])
 43 |     | Cas_int -> ("cas int", 1, 0)
 44 |     | Xchg_int -> ("xchg int", 1, 0)
 45 |     | Swap -> ("swap", 1, (4, 2))
 46 |   in
 47 | 
 48 |   let n_iter = n_iter * extra in
 49 | 
 50 |   let loc = Ref.make value in
 51 | 
 52 |   let init _ = () in
 53 |   let work _ () =
 54 |     match op with
 55 |     | Get ->
 56 |         let rec loop i =
 57 |           if i > 0 then begin
 58 |             let a =
 59 |               Ref.get (Sys.opaque_identity loc)
 60 |               land Ref.get (Sys.opaque_identity loc)
 61 |             and b =
 62 |               Ref.get (Sys.opaque_identity loc)
 63 |               land Ref.get (Sys.opaque_identity loc)
 64 |             and c =
 65 |               Ref.get (Sys.opaque_identity loc)
 66 |               land Ref.get (Sys.opaque_identity loc)
 67 |             and d =
 68 |               Ref.get (Sys.opaque_identity loc)
 69 |               land Ref.get (Sys.opaque_identity loc)
 70 |             in
 71 |             loop (i - 8 + (a - b) + (c - d))
 72 |           end
 73 |         in
 74 |         loop n_iter
 75 |     | Incr ->
 76 |         let rec loop i =
 77 |           if i > 0 then begin
 78 |             Ref.incr loc;
 79 |             Ref.incr loc;
 80 |             Ref.incr loc;
 81 |             Ref.incr loc;
 82 |             Ref.incr loc;
 83 |             Ref.incr loc;
 84 |             loop (i - 6)
 85 |           end
 86 |         in
 87 |         loop n_iter
 88 |     | Push_and_pop ->
 89 |         let[@inline] push x = Ref.modify x (fun xs -> 101 :: xs)
 90 |         and[@inline] pop x =
 91 |           Ref.modify x (function [] -> [] | _ :: xs -> xs)
 92 |         in
 93 |         let rec loop i =
 94 |           if i > 0 then begin
 95 |             push loc;
 96 |             pop loc |> ignore;
 97 |             push loc;
 98 |             pop loc |> ignore;
 99 |             loop (i - 4)
100 |           end
101 |         in
102 |         loop n_iter
103 |     | Cas_int ->
104 |         let rec loop i =
105 |           if i > 0 then begin
106 |             Ref.compare_and_set loc 0 1 |> ignore;
107 |             Ref.compare_and_set loc 1 0 |> ignore;
108 |             Ref.compare_and_set loc 0 1 |> ignore;
109 |             Ref.compare_and_set loc 1 0 |> ignore;
110 |             Ref.compare_and_set loc 0 1 |> ignore;
111 |             Ref.compare_and_set loc 1 0 |> ignore;
112 |             loop (i - 6)
113 |           end
114 |         in
115 |         loop n_iter
116 |     | Xchg_int ->
117 |         let rec loop i =
118 |           if i > 0 then begin
119 |             Ref.exchange loc 1 |> ignore;
120 |             Ref.exchange loc 0 |> ignore;
121 |             Ref.exchange loc 1 |> ignore;
122 |             Ref.exchange loc 0 |> ignore;
123 |             Ref.exchange loc 1 |> ignore;
124 |             Ref.exchange loc 0 |> ignore;
125 |             loop (i - 6)
126 |           end
127 |         in
128 |         loop n_iter
129 |     | Swap ->
130 |         let[@inline] swap x = Ref.modify x (fun (x, y) -> (y, x)) in
131 |         let rec loop i =
132 |           if i > 0 then begin
133 |             swap loc;
134 |             swap loc;
135 |             swap loc;
136 |             swap loc;
137 |             swap loc;
138 |             swap loc;
139 |             loop (i - 6)
140 |           end
141 |         in
142 |         loop n_iter
143 |   in
144 | 
145 |   Times.record ~budgetf ~n_domains:1 ~init ~work ()
146 |   |> Times.to_thruput_metrics ~n:n_iter ~singular:"op" ~config:name
147 | 
148 | let run_suite ~budgetf =
149 |   [
150 |     run_one ~budgetf Get;
151 |     run_one ~budgetf Incr;
152 |     run_one ~budgetf Push_and_pop;
153 |     run_one ~budgetf Cas_int;
154 |     run_one ~budgetf Xchg_int;
155 |     run_one ~budgetf Swap;
156 |   ]
157 |   |> List.concat
158 | 


--------------------------------------------------------------------------------
/bench/bench_bounded_q.ml:
--------------------------------------------------------------------------------
  1 | open Multicore_bench
  2 | module Queue = Stdlib.Queue
  3 | 
  4 | module Bounded_q : sig
  5 |   type 'a t
  6 | 
  7 |   val create : ?capacity:int -> unit -> 'a t
  8 |   val is_empty : 'a t -> bool
  9 |   val push : 'a t -> 'a -> unit
 10 |   val pop : 'a t -> 'a
 11 |   val pop_opt : 'a t -> 'a option
 12 | end = struct
 13 |   type 'a t = {
 14 |     mutex : Mutex.t;
 15 |     queue : 'a Queue.t;
 16 |     capacity : int;
 17 |     not_empty : Condition.t;
 18 |     not_full : Condition.t;
 19 |   }
 20 | 
 21 |   let create ?(capacity = Int.max_int) () =
 22 |     if capacity < 0 then invalid_arg "negative capacity"
 23 |     else
 24 |       let mutex = Mutex.create ()
 25 |       and queue = Queue.create () |> Multicore_magic.copy_as_padded
 26 |       and not_empty = Condition.create ()
 27 |       and not_full = Condition.create () in
 28 |       { mutex; queue; capacity; not_empty; not_full }
 29 |       |> Multicore_magic.copy_as_padded
 30 | 
 31 |   let is_empty t =
 32 |     Mutex.lock t.mutex;
 33 |     let result = Queue.is_empty t.queue in
 34 |     Mutex.unlock t.mutex;
 35 |     result
 36 | 
 37 |   let is_full_unsafe t = t.capacity <= Queue.length t.queue
 38 | 
 39 |   let push t x =
 40 |     let was_full = ref false in
 41 |     Mutex.lock t.mutex;
 42 |     match
 43 |       while is_full_unsafe t do
 44 |         was_full := true;
 45 |         Condition.wait t.not_full t.mutex
 46 |       done
 47 |     with
 48 |     | () ->
 49 |         Queue.push x t.queue;
 50 |         let n = Queue.length t.queue in
 51 |         Mutex.unlock t.mutex;
 52 |         if n = 1 then Condition.signal t.not_empty;
 53 |         if !was_full && n < t.capacity then Condition.signal t.not_full
 54 |     | exception exn ->
 55 |         Mutex.unlock t.mutex;
 56 |         raise exn
 57 | 
 58 |   let pop t =
 59 |     let was_empty = ref false in
 60 |     Mutex.lock t.mutex;
 61 |     match
 62 |       while Queue.length t.queue = 0 do
 63 |         was_empty := true;
 64 |         Condition.wait t.not_empty t.mutex
 65 |       done
 66 |     with
 67 |     | () ->
 68 |         let n = Queue.length t.queue in
 69 |         let elem = Queue.pop t.queue in
 70 |         Mutex.unlock t.mutex;
 71 |         if n = t.capacity then Condition.signal t.not_full;
 72 |         if !was_empty && 1 < n then Condition.signal t.not_empty;
 73 |         elem
 74 |     | exception exn ->
 75 |         Mutex.unlock t.mutex;
 76 |         raise exn
 77 | 
 78 |   let pop_opt t =
 79 |     Mutex.lock t.mutex;
 80 |     let n = Queue.length t.queue in
 81 |     let elem_opt = Queue.take_opt t.queue in
 82 |     Mutex.unlock t.mutex;
 83 |     if n = t.capacity then Condition.signal t.not_full;
 84 |     elem_opt
 85 | end
 86 | 
 87 | let run_one_domain ~budgetf ?(n_msgs = 50 * Util.iter_factor) () =
 88 |   let t = Bounded_q.create () in
 89 | 
 90 |   let op push =
 91 |     if push then Bounded_q.push t (ref push) else Bounded_q.pop_opt t |> ignore
 92 |   in
 93 | 
 94 |   let init _ =
 95 |     assert (Bounded_q.is_empty t);
 96 |     Util.generate_push_and_pop_sequence n_msgs
 97 |   in
 98 |   let work _ bits = Util.Bits.iter op bits in
 99 | 
100 |   Times.record ~budgetf ~n_domains:1 ~init ~work ()
101 |   |> Times.to_thruput_metrics ~n:n_msgs ~singular:"message" ~config:"one domain"
102 | 
103 | let run_one ~budgetf ~n_adders ~n_takers ?(n_msgs = 50 * Util.iter_factor) () =
104 |   let n_domains = n_adders + n_takers in
105 | 
106 |   let t = Bounded_q.create () in
107 | 
108 |   let n_msgs_to_take = Countdown.create ~n_domains:n_takers () in
109 |   let n_msgs_to_add = Countdown.create ~n_domains:n_adders () in
110 | 
111 |   let init _ =
112 |     assert (Bounded_q.is_empty t);
113 |     Countdown.non_atomic_set n_msgs_to_take n_msgs;
114 |     Countdown.non_atomic_set n_msgs_to_add n_msgs
115 |   in
116 |   let work i () =
117 |     if i < n_adders then
118 |       let domain_index = i in
119 |       let rec work () =
120 |         let n = Countdown.alloc n_msgs_to_add ~domain_index ~batch:100 in
121 |         if 0 < n then begin
122 |           for i = 1 to n do
123 |             Bounded_q.push t (ref i)
124 |           done;
125 |           work ()
126 |         end
127 |       in
128 |       work ()
129 |     else
130 |       let domain_index = i - n_adders in
131 |       let rec work () =
132 |         let n = Countdown.alloc n_msgs_to_take ~domain_index ~batch:100 in
133 |         if n <> 0 then begin
134 |           for _ = 1 to n do
135 |             ignore (Bounded_q.pop t)
136 |           done;
137 |           work ()
138 |         end
139 |       in
140 |       work ()
141 |   in
142 | 
143 |   let config =
144 |     let format role n =
145 |       Printf.sprintf "%d %s%s" n role (if n = 1 then "" else "s")
146 |     in
147 |     Printf.sprintf "%s, %s" (format "adder" n_adders) (format "taker" n_takers)
148 |   in
149 | 
150 |   Times.record ~budgetf ~n_domains ~init ~work ()
151 |   |> Times.to_thruput_metrics ~n:n_msgs ~singular:"message" ~config
152 | 
153 | let run_suite ~budgetf =
154 |   run_one_domain ~budgetf ()
155 |   @ (Util.cross [ 1; 2; 4 ] [ 1; 2; 4 ]
156 |     |> List.concat_map @@ fun (n_adders, n_takers) ->
157 |        if Domain.recommended_domain_count () < n_adders + n_takers then []
158 |        else run_one ~budgetf ~n_adders ~n_takers ())
159 | 


--------------------------------------------------------------------------------
/lib/cmd.ml:
--------------------------------------------------------------------------------
  1 | open Data
  2 | 
  3 | type output = [ `JSON | `Brief | `Diff of string ]
  4 | 
  5 | let worse_colors = [| 196; 197; 198; 199; 200; 201 |]
  6 | let better_colors = [| 46; 47; 48; 49; 50; 51 |]
  7 | 
  8 | let replace_non_breaking_spaces =
  9 |   let a_non_breaking_space = Str.regexp " " in
 10 |   Str.global_substitute a_non_breaking_space (fun _ -> " ")
 11 | 
 12 | let duplicate kind name x _ =
 13 |   failwith
 14 |     (Printf.sprintf "Duplicate %s: %s" kind
 15 |        (name x |> replace_non_breaking_spaces))
 16 | 
 17 | let print_diff base next =
 18 |   List_ext.zip_by
 19 |     ~duplicate:(duplicate "benchmark" Benchmark.name)
 20 |     String.compare Benchmark.name base next
 21 |   |> List.iter @@ fun ((base : Benchmark.t), (next : Benchmark.t)) ->
 22 |      Printf.printf "%s:\n" base.name;
 23 |      let zipped =
 24 |        List_ext.zip_by
 25 |          ~duplicate:(duplicate "metric" Metric.name)
 26 |          String.compare Metric.name base.metrics next.metrics
 27 |      in
 28 |      let extreme_of join trend =
 29 |        List.fold_left
 30 |          (fun acc ((base : Metric.t), (next : Metric.t)) ->
 31 |            if trend <> base.trend || trend <> next.trend then acc
 32 |            else join acc (next.value /. base.value))
 33 |          1.0 zipped
 34 |      in
 35 |      let min_higher = extreme_of Float.min `Higher_is_better in
 36 |      let max_higher = extreme_of Float.max `Higher_is_better in
 37 |      let min_lower = extreme_of Float.min `Lower_is_better in
 38 |      let max_lower = extreme_of Float.max `Lower_is_better in
 39 |      zipped
 40 |      |> List.iter @@ fun ((base : Metric.t), (next : Metric.t)) ->
 41 |         Printf.printf "  %s:\n" base.name;
 42 |         if
 43 |           base.trend <> next.trend || base.units <> next.units
 44 |           || Float.equal base.value next.value
 45 |         then Printf.printf "    %.2f %s\n" next.value next.units
 46 |         else
 47 |           let times = next.value /. base.value in
 48 |           let colors, extreme =
 49 |             if next.trend = `Higher_is_better then
 50 |               if times < 1.0 then (worse_colors, min_higher)
 51 |               else (better_colors, max_higher)
 52 |             else if 1.0 < times then (worse_colors, max_lower)
 53 |             else (better_colors, min_lower)
 54 |           in
 55 |           let range = Float.abs (extreme -. 1.0) in
 56 |           let color =
 57 |             colors.(Float.to_int
 58 |                       (Float.round
 59 |                          (Float.of_int (Array.length colors - 1)
 60 |                          *. Float.abs (extreme -. times)
 61 |                          /. range)))
 62 |           in
 63 |           Printf.printf
 64 |             "    %.2f %s = \x1b[1;38;5;%dm%.2f\x1b\x1b[0;39;49m x %.2f %s\n"
 65 |             next.value next.units color times base.value base.units
 66 | 
 67 | let run_benchmark ~budgetf ~debug (name, fn) =
 68 |   if debug then
 69 |     (* I wish there was a way to tell dune not to capture stderr. *)
 70 |     Printf.printf "Running: %s\n%!" name;
 71 |   `Assoc [ ("name", `String name); ("metrics", `List (fn ~budgetf)) ]
 72 | 
 73 | let name_of = function
 74 |   | `Assoc (("name", `String name) :: _) -> name
 75 |   | _ -> failwith "bug"
 76 | 
 77 | let build_filter = function
 78 |   | [] -> Fun.const true
 79 |   | filters -> begin
 80 |       let regexps = filters |> List.map Str.regexp in
 81 |       fun (name, _) ->
 82 |         regexps
 83 |         |> List.exists @@ fun regexp ->
 84 |            match Str.search_forward regexp name 0 with
 85 |            | _ -> true
 86 |            | exception Not_found -> false
 87 |     end
 88 | 
 89 | let shuffle xs =
 90 |   let xs = Array.of_list xs in
 91 |   let state = Random.State.make_self_init () in
 92 |   let n = Array.length xs in
 93 |   for i = 0 to n - 2 do
 94 |     let j = Random.State.int state (n - i) + i in
 95 |     let t = xs.(i) in
 96 |     xs.(i) <- xs.(j);
 97 |     xs.(j) <- t
 98 |   done;
 99 |   Array.to_list xs
100 | 
101 | let run ~benchmarks ?(budgetf = 0.025) ?(filters = []) ?(debug = false)
102 |     ?(output = `JSON) ?(argv = Sys.argv) ?(flush = true) ?(randomize = true) ()
103 |     =
104 |   let budgetf = ref budgetf in
105 |   let filters = ref filters in
106 |   let debug = ref debug in
107 |   let output = ref output in
108 |   let randomize = ref randomize in
109 | 
110 |   let rec specs =
111 |     [
112 |       ("-budget", Arg.Set_float budgetf, "seconds\t  Budget for a benchmark");
113 |       ( "-debug",
114 |         Arg.Set debug,
115 |         "\t  Print progress information to help debugging" );
116 |       ( "-diff",
117 |         Arg.String (fun path -> output := `Diff path),
118 |         "path.json\t  Show diff against specified base results" );
119 |       ( "-brief",
120 |         Arg.Unit (fun () -> output := `Brief),
121 |         "\t  Show brief human readable results." );
122 |       ("-help", Unit help, "\t  Show this help message");
123 |       ("--help", Unit help, "\t  Show this help message");
124 |     ]
125 |   and help () =
126 |     Arg.usage (Arg.align specs)
127 |       (Printf.sprintf
128 |          "\n\
129 |           Usage: %s <option>* filter*\n\n\
130 |           The filters are regular expressions for selecting benchmarks to run.\n\n\
131 |           Benchmarks:\n\n\
132 |           %s\n\n\
133 |           Options:\n"
134 |          (Filename.basename argv.(0))
135 |          (benchmarks
136 |          |> List.map (fun (name, _) -> "  " ^ name)
137 |          |> String.concat "\n"));
138 |     exit 1
139 |   in
140 |   Arg.parse_argv argv specs (fun filter -> filters := filter :: !filters) "";
141 | 
142 |   if !budgetf < 0.0 || 60.0 *. 60.0 < !budgetf then
143 |     invalid_arg "budgetf out of range";
144 | 
145 |   let base_results =
146 |     match !output with
147 |     | `Diff fname -> begin
148 |         match Results.parse (Yojson.Safe.from_file fname) with
149 |         | None -> []
150 |         | Some results -> results
151 |       end
152 |     | `JSON | `Brief -> []
153 |   in
154 | 
155 |   let benchmark_jsons =
156 |     benchmarks
157 |     |> List.filter (build_filter !filters)
158 |     |> begin
159 |          match base_results with
160 |          | [] -> Fun.id
161 |          | results ->
162 |              let (module S) = Set_ext.make String.compare in
163 |              let names = results |> List.map Benchmark.name |> S.of_list in
164 |              List.filter (fun (name, _) -> S.mem name names)
165 |        end
166 |     |> (if !randomize then shuffle else Fun.id)
167 |     |> List.map (run_benchmark ~debug:!debug ~budgetf:!budgetf)
168 |     |> List.sort @@ fun l r -> String.compare (name_of l) (name_of r)
169 |   in
170 | 
171 |   let results_json = `Assoc [ ("results", `List benchmark_jsons) ] in
172 |   let results =
173 |     lazy
174 |       (match Results.parse results_json with
175 |       | None -> []
176 |       | Some results -> results)
177 |   in
178 | 
179 |   begin
180 |     match !output with
181 |     | `JSON ->
182 |         Yojson.Safe.pretty_print ~std:true Format.std_formatter results_json
183 |     | `Brief -> print_diff (Lazy.force results) (Lazy.force results)
184 |     | `Diff _ -> print_diff base_results (Lazy.force results)
185 |   end;
186 | 
187 |   if flush then Format.print_flush ()
188 | 


--------------------------------------------------------------------------------
/lib/multicore_bench.mli:
--------------------------------------------------------------------------------
  1 | (** Multicore bench is a framework for writing multicore benchmark executables
  2 |     to run on {{:https://github.com/ocurrent/current-bench}current-bench}.
  3 | 
  4 |     To use the framework one typically opens it
  5 | 
  6 |     {[
  7 |       open Multicore_bench
  8 |     ]}
  9 | 
 10 |     which brings a number of submodules into scope. *)
 11 | 
 12 | module Trend : sig
 13 |   (** Dealing with trends. *)
 14 | 
 15 |   type t = [ `Lower_is_better | `Higher_is_better ]
 16 |   (** Whether a lower or higher value is better. *)
 17 | end
 18 | 
 19 | module Metric : sig
 20 |   (** Dealing with benchmark metrics. *)
 21 | 
 22 |   type t
 23 |   (** Represents a metric. *)
 24 | 
 25 |   val make :
 26 |     metric:string ->
 27 |     config:string ->
 28 |     ?units:string ->
 29 |     ?trend:[< Trend.t ] ->
 30 |     ?description:string ->
 31 |     [< `Float of float ] ->
 32 |     t
 33 |   (** [make ~metric ~config value] constructs a metric with given specification.
 34 |   *)
 35 | end
 36 | 
 37 | module Unit_of_rate : sig
 38 |   (** Dealing with units of rate. *)
 39 | 
 40 |   type t =
 41 |     [ `_1  (** 1/s *)
 42 |     | `k  (** 10{^ 3}/s or k/s *)
 43 |     | `M  (** 10{^ 6}/s or M/s *)
 44 |     | `G  (** 10{^ 9}/s or G/s *) ]
 45 |   (** Represents a unit of rate, i.e. how many per second. *)
 46 | 
 47 |   val to_divisor : [< t ] -> float
 48 |   (** [to_divisor t] converts the unit of rate [t] to a divisor. *)
 49 | 
 50 |   val to_mnemonic : [< t ] -> string
 51 |   (** [to_mnemonic t] returns a human readable mnemonic for the unit of rate
 52 |       [t]. *)
 53 | end
 54 | 
 55 | module Unit_of_time : sig
 56 |   (** Dealing with units of time. *)
 57 | 
 58 |   type t =
 59 |     [ `s  (** seconds *)
 60 |     | `ms  (** milliseconds *)
 61 |     | `mus  (** microseconds *)
 62 |     | `ns  (** nanoseconds *) ]
 63 |   (** Represents a unit of time. *)
 64 | 
 65 |   val to_multiplier : [< t ] -> float
 66 |   (** [to_multiplier t] converts the unit of time [t] to a multiplier. *)
 67 | 
 68 |   val to_mnemonic : [< t ] -> string
 69 |   (** [to_mnemonic t] returns a human readable mnemonic for the unit of time
 70 |       [t]. *)
 71 | end
 72 | 
 73 | module Times : sig
 74 |   (** Recording timings of benchmarks running on multiple domains in parallel
 75 |       and producing metrics from the recorded timings. *)
 76 | 
 77 |   type t
 78 |   (** Represents a record of elapsed times of multiple runs of a benchmark
 79 |       running on multiple domains. *)
 80 | 
 81 |   val record :
 82 |     budgetf:float ->
 83 |     n_domains:int ->
 84 |     ?ensure_multi_domain:bool ->
 85 |     ?domain_local_await:[< `Busy_wait | `Neglect > `Busy_wait ] ->
 86 |     ?n_warmups:int ->
 87 |     ?n_runs_min:int ->
 88 |     ?n_runs_max:int ->
 89 |     ?before:(unit -> unit) ->
 90 |     init:(int -> 's) ->
 91 |     ?wrap:(int -> 's -> (unit -> unit) -> unit) ->
 92 |     work:(int -> 's -> unit) ->
 93 |     ?after:(unit -> unit) ->
 94 |     unit ->
 95 |     t
 96 |   (** [record ~budgetf ~n_domains ~init ~work ()] essentially repeatedly runs
 97 |       [let x = init i in wrap i x (fun () -> .. work i x ..)] on specified
 98 |       number of domains, [i ∊ [0, n_domains-1]], and records the times that
 99 |       calls of [work] take. The calls of [work] are synchronized to start as
100 |       simultaneously as possible.
101 | 
102 |       Optional arguments:
103 | 
104 |       - [~ensure_multi_domain]: Whether to run an extra busy untimed domain when
105 |         [n_domains] is [1]. Doing so prevents the OCaml runtime from using
106 |         specialized runtime implementations. Defaults to [true].
107 | 
108 |       - [~domain_local_await]: Specifies whether and how to configure
109 |         {{:https://github.com/ocaml-multicore/domain-local-await/}domain-local-await}
110 |         or DLA. [`Neglect] does not reconfigure DLA. [`Busy_wait] configures DLA
111 |         to use a busy-wait implementation, which prevents domains from going to
112 |         sleep. Defaults to [`Busy_wait].
113 | 
114 |       - [~n_warmups]: Specifies the number of warmup runs to perform before the
115 |         actual measurements. Defaults to [3].
116 | 
117 |       - [~n_runs_min]: Specifies the minimum number of timed runs. The upper
118 |         bound is determined dynamically based on [budgetf]. Defaults to [7].
119 | 
120 |       - [~n_runs_max]: Specifies the maximum number of timed runs. Defaults to
121 |         [1023].
122 | 
123 |       - [~before]: Specifies an action to run on one domain before [init].
124 | 
125 |       - [~after]: Specifies an action to run on one domain after [work]. *)
126 | 
127 |   val to_thruput_metrics :
128 |     n:int ->
129 |     singular:string ->
130 |     ?plural:string ->
131 |     config:string ->
132 |     ?unit_of_time:Unit_of_time.t ->
133 |     ?unit_of_rate:Unit_of_rate.t ->
134 |     t ->
135 |     Metric.t list
136 |   (** [to_thruput_metrics ~n ~singular ~config times] produces a pair of metrics
137 |       from the recorded [times] where one metric is for the time a single
138 |       operation takes and the other is the thruput of operations over all
139 |       domains.
140 | 
141 |       Optional arguments:
142 | 
143 |       - [~plural]: Plural for the operation. Defaults to [singular ^ "s"].
144 | 
145 |       - [~unit_of_time]: Unit of time for the duration of a single operation.
146 |         Defaults to [`ns].
147 | 
148 |       - [~unit_of_rate]: Unit of rate for the number of operations per second.
149 |         Defaults to [`M]. *)
150 | end
151 | 
152 | module Suite : sig
153 |   (** Dealing with benchmark suites. *)
154 | 
155 |   type t = budgetf:float -> Metric.t list
156 |   (** Represents a benchmark suite, i.e. a function that produces a list of
157 |       metric outputs for
158 |       {{:https://github.com/ocurrent/current-bench}current-bench}. *)
159 | end
160 | 
161 | module Cmd : sig
162 |   (** Command line interface for a benchmark executable. *)
163 | 
164 |   type output =
165 |     [ `JSON
166 |       (** [`JSON] gives the JSON output for
167 |           {{:https://github.com/ocurrent/current-bench} current-bench}. *)
168 |     | `Brief  (** [`Brief] gives concise human readable output. *)
169 |     | `Diff of string
170 |       (** [`Diff "path.json"] gives concise human readable diff against results
171 |           stored in specified [path.json] file. *) ]
172 |   (** Specifies the output format. *)
173 | 
174 |   val run :
175 |     benchmarks:(string * Suite.t) list ->
176 |     ?budgetf:float ->
177 |     ?filters:string list ->
178 |     ?debug:bool ->
179 |     ?output:output ->
180 |     ?argv:string array ->
181 |     ?flush:bool ->
182 |     ?randomize:bool ->
183 |     unit ->
184 |     unit
185 |   (** [run ~benchmarks ()] interprets command line arguments and runs the
186 |       benchmarks suites based on the arguments.
187 | 
188 |       Optional arguments:
189 | 
190 |       - [~budgetf]: A budget (usually) in seconds passed to each benchmark
191 |         suite. This defaults to a small number so that a benchmark suite can be
192 |         used as a test.
193 | 
194 |       - [~filters]: A list of regular expressions to match names of benchmark
195 |         suites. If any regular expression matches the name of benchmark, then
196 |         that benchmark will be run. Defaults to [[]].
197 | 
198 |       - [~debug]: Print progress information to help debugging. Defaults to
199 |         [false].
200 | 
201 |       - [~output]: Output mode. Defaults to [`JSON].
202 | 
203 |       - [~argv]: Array of command line arguments. Defaults to [Sys.argv].
204 | 
205 |       - [~flush]: Whether to flush the standard output after writing it.
206 |         Defaults to [true].
207 | 
208 |       - [~randomize]: Whether to randomize the order of suites or not. Defaults
209 |         to [true].
210 | 
211 |       Command line arguments take precedence over the optional arguments. In
212 |       other words, you can specify the optional arguments to give defaults for
213 |       the benchmark executable. *)
214 | end
215 | 
216 | module Countdown : sig
217 |   (** Scalable low-level countdown. *)
218 | 
219 |   type t
220 |   (** Represents a countdown counter. *)
221 | 
222 |   val create : n_domains:int -> unit -> t
223 |   (** [create ~n_domains ()] returns a new countdown counter with initial value
224 |       of [0]. *)
225 | 
226 |   val non_atomic_set : t -> int -> unit
227 |   (** [non_atomic_set countdown count] sets the [count] of the [countdown].
228 | 
229 |       ⚠️ This operation is not atomic. However, it is safe to call
230 |       [non_atomic_set] with the same [countdown] and [count] in parallel,
231 |       because the [countdown] will be initialized deterministically. *)
232 | 
233 |   val get : t -> int
234 |   (** [get countdown] returns the count of the [countdown]. *)
235 | 
236 |   val alloc : t -> domain_index:int -> batch:int -> int
237 |   (** [alloc countdown ~domain_index ~batch] tries to reduce the count of the
238 |       [countdown] by at most [batch] (which must be positive) and returns the
239 |       number by which the count was reduced or [0] in case the count was already
240 |       [0]. *)
241 | end
242 | 
243 | module Util : sig
244 |   (** Utilities for creating benchmarks.
245 | 
246 |       ⚠️ In the future we expect to regroup these utilities under different
247 |       modules and deprecate them in this module. *)
248 | 
249 |   val iter_factor : int
250 |   (** A multiplier depending various factors such as whether we are running on a
251 |       32- or 64-bit machine (1x/10x), bytecode or native (1x/10x), and whether
252 |       we are running on single-core or multicore OCaml (1x/10x). *)
253 | 
254 |   val alloc : ?batch:int -> int Atomic.t -> int
255 |   (** [alloc ~batch n] tries to decrement the specified atomic variable [n] by
256 |       at most the optional amount [~batch] and not beyond [n] having value [0].
257 |       Returns the amount by which [n] was decremented, which is [0] only in case
258 |       [n] is [0]. *)
259 | 
260 |   val cross : 'a list -> 'b list -> ('a * 'b) list
261 |   (** [cross xs ys] returns a list formed by pairing each element of [xs] with
262 |       each element of [ys].
263 | 
264 |       For example:
265 |       {[
266 |         # Util.cross [1; 2; 3] ["a"; "b"]
267 |         - : (int * string) list =
268 |         [(1, "a"); (1, "b"); (2, "a"); (2, "b"); (3, "a"); (3, "b")]
269 |       ]} *)
270 | 
271 |   module Bits : sig
272 |     (** A minimalistic bitset data structure. *)
273 | 
274 |     type t
275 |     (** Represents a bitset. *)
276 | 
277 |     val create : unit -> t
278 |     (** [create ()] returns a new zero length bitset. *)
279 | 
280 |     val push : t -> bool -> unit
281 |     (** [push bs b] adds the bit [b] to the end of the bitset [bs]. *)
282 | 
283 |     val iter : (bool -> unit) -> t -> unit
284 |     (** [iter action bs] calls the [action] for each bit in the bitset [bs]. *)
285 |   end
286 | 
287 |   val generate_push_and_pop_sequence : ?state:Random.State.t -> int -> Bits.t
288 |   (** [generate_push_and_pop_sequence n] generates a bitset where each [true]
289 |       bit represents a "push" operation and each [false] bit represents a
290 |       "try_pop" operation. Performing the operations on an initially empty
291 |       dispenser leaves the dispenser empty. The sequence may include "try_pop"
292 |       operations at points where the dispenser will be empty. *)
293 | end
294 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [API reference](https://ocaml-multicore.github.io/multicore-bench/doc/multicore-bench/Multicore_bench/index.html)
  2 | &middot;
  3 | [Benchmarks](https://bench.ci.dev/ocaml-multicore/multicore-bench/branch/main?worker=pascal&image=bench.Dockerfile)
  4 | 
  5 | # Multicore-bench
  6 | 
  7 | Multicore bench is a framework for writing multicore benchmark executables to
  8 | run locally on your computer and on
  9 | [current-bench](https://github.com/ocurrent/current-bench).
 10 | 
 11 | Benchmarking multicore algorithms tends to require a certain amount of setup,
 12 | such as spawning domains, synchronizing them before work, timing the work,
 13 | collecting the times, and joining domains, that this framework tries to take
 14 | care of for you as conveniently as possible. Furthermore, benchmarking multicore
 15 | algorithms in OCaml also involves a number of pitfalls related to how the OCaml
 16 | runtime works. For example, when only a single domain is running, several
 17 | operations provided by the OCaml runtime use specialized implementations that
 18 | take advantage of the fact that there is only a single domain running. In most
 19 | cases, when trying to benchmark multicore algorithms, you don't actually want to
 20 | measure those specialized runtime implementations.
 21 | 
 22 | The design of multicore bench is considered **_experimental_**. We are planning
 23 | to improve the design along with
 24 | [current-bench](https://github.com/ocurrent/current-bench) in the future to
 25 | allow more useful benchmarking experience.
 26 | 
 27 | ## Crash course to [current-bench](https://github.com/ocurrent/current-bench)
 28 | 
 29 | Note that, at the time of writing this,
 30 | [current-bench](https://github.com/ocurrent/current-bench) is work in progress
 31 | and does not accept enrollment for community projects. However, assuming you
 32 | have access to it, to run multicore benchmarks with
 33 | [current-bench](https://github.com/ocurrent/current-bench) a number of things
 34 | need to be setup:
 35 | 
 36 | - You will need a [Makefile](Makefile) with a `bench` target at the root of the
 37 |   project. The [current-bench](https://github.com/ocurrent/current-bench)
 38 |   service will run your benchmarks through that.
 39 | 
 40 | - You likely also want to have a [bench.Dockerfile](bench.Dockerfile) and
 41 |   [.dockerignore](.dockerignore) at the root of the project. Make sure that the
 42 |   Dockerfile is layered such that it will pickup opam updates when desired while
 43 |   also avoiding unnecessary work during rebuilds.
 44 | 
 45 | - You will also need the benchmarks and that is where this framework may help.
 46 |   You can find examples of multicore benchmarks from the
 47 |   [Saturn](https://github.com/ocaml-multicore/saturn/tree/main/bench),
 48 |   [Kcas](https://github.com/ocaml-multicore/kcas/tree/main/bench), and
 49 |   [Picos](https://github.com/ocaml-multicore/picos/tree/main/bench) projects and
 50 |   from the [bench](bench) directory of this repository.
 51 | 
 52 | For multicore benchmarks you will also need to have
 53 | [current-bench](https://github.com/ocurrent/current-bench) configured to use a
 54 | multicore machine, which currently needs to be done by the
 55 | [current-bench](https://github.com/ocurrent/current-bench) maintainers.
 56 | 
 57 | ## Example: Benchmarking `Atomic.incr` under contention
 58 | 
 59 | Let's look at a simple example with detailed comments of how one might benchmark
 60 | `Atomic.incr` under contention.
 61 | 
 62 | Note that this example is written here as a
 63 | [MDX](https://github.com/realworldocaml/mdx) document or test. Normally you
 64 | would write a benchmark as a command line executable and would likely compile it
 65 | in release mode with a native compiler.
 66 | 
 67 | We first open the
 68 | [`Multicore_bench`](https://ocaml-multicore.github.io/multicore-bench/doc/multicore-bench/Multicore_bench/index.html)
 69 | module:
 70 | 
 71 | ```ocaml
 72 | # open Multicore_bench
 73 | ```
 74 | 
 75 | This brings into scope multiple modules including
 76 | [`Suite`](https://ocaml-multicore.github.io/multicore-bench/doc/multicore-bench/Multicore_bench/Suite/index.html),
 77 | [`Util`](https://ocaml-multicore.github.io/multicore-bench/doc/multicore-bench/Multicore_bench/Util/index.html),
 78 | [`Times`](https://ocaml-multicore.github.io/multicore-bench/doc/multicore-bench/Multicore_bench/Times/index.html),
 79 | and
 80 | [`Cmd`](https://ocaml-multicore.github.io/multicore-bench/doc/multicore-bench/Multicore_bench/Cmd/index.html)
 81 | that we used below.
 82 | 
 83 | Typically one would divide a benchmark executable into benchmark suites for
 84 | different algorithms and data structures. To illustrate that pattern, let's
 85 | create a module `Bench_atomic` for our benchmarks suite on atomics:
 86 | 
 87 | ```ocaml
 88 | # module Bench_atomic : sig
 89 |     (* The entrypoint to a suite is basically a function.  There is a type
 90 |        alias for the signature. *)
 91 |     val run_suite : Suite.t
 92 |   end = struct
 93 |     (* [run_one] runs a single benchmark with the given budget and number of
 94 |        domains. *)
 95 |     let run_one ~budgetf ~n_domains () =
 96 |       (* We scale the number of operations using [Util.iter_factor], which
 97 |          depends on various factors such as whether we are running on a 32- or
 98 |          64-bit machine, using a native or bytecode compiler, and whether we are
 99 |          running on multicore OCaml.  The idea is to make it possible to use the
100 |          benchmark executable as a test that can be run even on slow CI
101 |          machines. *)
102 |       let n = 10 * Util.iter_factor in
103 | 
104 |       (* In this example, [atomic] is the data structure we are benchmarking. *)
105 |       let atomic =
106 |         Atomic.make 0
107 |         |> Multicore_magic.copy_as_padded
108 |         (* We explicitly pad the [atomic] to avoid false sharing.  With false
109 |            sharing measurements are likely to have a lot of noise that makes
110 |            it difficult to get useful results. *)
111 |       in
112 | 
113 |       (* We store the number of operations to perform in a scalable countdown
114 |          counter.  The idea is that we want all the workers or domains to work
115 |          at the same time as much as possible, because we want to measure
116 |          performance under contention.  So, instead of e.g. simply having each
117 |          domain run a fixed count loop, which could lead to some domains
118 |          finishing well before others, we let the number of operations performed
119 |          by each domain vary. *)
120 |       let n_ops_to_do =
121 |         Countdown.create ~n_domains ()
122 |       in
123 | 
124 |       (* [init] is called on each domain before [work].  The return value of
125 |          [init] is passed to [work]. *)
126 |       let init _domain_index =
127 |         (* It doesn't matter that we set the countdown counter multiple times.
128 |            We could also use a [before] callback to do setup before [work]. *)
129 |         Countdown.non_atomic_set n_ops_to_do n
130 |       in
131 | 
132 |       (* [work] is called on each domain and the time it takes is recorded.
133 |          The second argument comes from [init]. *)
134 |       let work domain_index () =
135 |         (* Because we are benchmarking operations that take a very small amount
136 |            of time, we run our own loop to perform the operations.  This has
137 |            pros and cons.  One con is that the loop overhead will be part of the
138 |            measurement, which is something to keep in mind when interpreting the
139 |            results.  One pro is that this gives more flexibility in various
140 |            ways. *)
141 |         let rec work () =
142 |           (* We try to allocate some number of operations to perform. *)
143 |           let n = Countdown.alloc n_ops_to_do ~domain_index ~batch:100 in
144 |           (* If we got zero, then we should stop. *)
145 |           if n <> 0 then begin
146 |             (* Otherwise we perform the operations and try again. *)
147 |             for _=1 to n do
148 |               Atomic.incr atomic
149 |             done;
150 |             work ()
151 |           end
152 |         in
153 |         work ()
154 |       in
155 | 
156 |       (* [config] is a name for the configuration of the benchmark.  In this
157 |          case we distinguish by the number of workers or domains. *)
158 |       let config =
159 |         Printf.sprintf "%d worker%s" n_domains
160 |           (if n_domains = 1 then "" else "s")
161 |       in
162 | 
163 |       (* [Times.record] does the heavy lifting to spawn domains and measure
164 |          the time [work] takes on them. *)
165 |       let times = Times.record ~budgetf ~n_domains ~init ~work () in
166 | 
167 |       (* [Times.to_thruput_metrics] takes the measurements and produces both a
168 |          metric for the time of a single operation and for the total thruput
169 |          over all the domains. *)
170 |       Times.to_thruput_metrics ~n ~singular:"incr" ~config times
171 | 
172 |     (* [run_suite] runs the benchmarks in this suite with the given budget. *)
173 |     let run_suite ~budgetf =
174 |       (* In this case we run the benchmark with various number of domains. We
175 |          use [concat_map] to collect the results as a flat list of outputs. *)
176 |       [ 1; 2; 4; 8 ]
177 |       |> List.concat_map @@ fun n_domains ->
178 |          run_one ~budgetf ~n_domains ()
179 |   end
180 | module Bench_atomic : sig val run_suite : Suite.t end
181 | ```
182 | 
183 | We then collect all the suites into an association list. The association list
184 | has a name and entry point for each suite:
185 | 
186 | ```ocaml
187 | # let benchmarks = [
188 |     ("Atomic", Bench_atomic.run_suite)
189 |   ]
190 | val benchmarks : (string * Suite.t) list = [("Atomic", <fun>)]
191 | ```
192 | 
193 | Usually the list of benchmarks is in the main module of the benchmark executable
194 | along with an invocation of
195 | [`Cmd.run`](https://ocaml-multicore.github.io/multicore-bench/doc/multicore-bench/Multicore_bench/Cmd/index.html#val-run):
196 | 
197 | ```ocaml non-deterministic
198 | # Cmd.run ~benchmarks ~argv:[||] ()
199 | {
200 |   "results": [
201 |     {
202 |       "name": "Atomic",
203 |       "metrics": [
204 |         {
205 |           "name": "time per incr/1 worker",
206 |           "value": 11.791,
207 |           "units": "ns",
208 |           "trend": "lower-is-better",
209 |           "description": "Time to process one incr",
210 |           "#best": 9.250000000000002,
211 |           "#mean": 12.149960000000002,
212 |           "#median": 11.791,
213 |           "#sd": 1.851061543655424,
214 |           "#runs": 25
215 |         },
216 |         {
217 |           "name": "incrs over time/1 worker",
218 |           "value": 84.81044864727335,
219 |           "units": "M/s",
220 |           "trend": "higher-is-better",
221 |           "description": "Total number of incrs processed",
222 |           "#best": 108.1081081081081,
223 |           "#mean": 84.25129565093134,
224 |           "#median": 84.81044864727335,
225 |           "#sd": 12.911113376793846,
226 |           "#runs": 25
227 |         },
228 |         // ...
229 |       ]
230 |     }
231 |   ]
232 | }
233 | - : unit = ()
234 | ```
235 | 
236 | By default
237 | [`Cmd.run`](https://ocaml-multicore.github.io/multicore-bench/doc/multicore-bench/Multicore_bench/Cmd/index.html#val-run)
238 | interprets command line arguments from
239 | [`Sys.argv`](https://v2.ocaml.org/api/Sys.html#VALargv). Unlike what one would
240 | typically do, we explicitly specify `~argv:[||]`, because this code is being run
241 | through the [MDX](https://github.com/realworldocaml/mdx) tool.
242 | 
243 | Note that the output above is just a sample. The timings are non-deterministic
244 | and will slightly vary from one run of the benchmark to another even on a single
245 | computer.
246 | 


--------------------------------------------------------------------------------
/lib/times.ml:
--------------------------------------------------------------------------------
  1 | open Finally
  2 | 
  3 | type t = { inverted : bool; times_per_domain : Float.Array.t array; runs : int }
  4 | 
  5 | let with_busy_wait () =
  6 |   let open struct
  7 |     type state = Init | Released | Awaiting of { mutable released : bool }
  8 |   end in
  9 |   let state = Atomic.make Init in
 10 |   let release () =
 11 |     if Multicore_magic.fenceless_get state != Released then
 12 |       match Atomic.exchange state Released with
 13 |       | Awaiting r -> r.released <- true
 14 |       | _ -> ()
 15 |   in
 16 |   let await () =
 17 |     if Multicore_magic.fenceless_get state != Released then
 18 |       let awaiting = Awaiting { released = false } in
 19 |       if Atomic.compare_and_set state Init awaiting then
 20 |         match awaiting with
 21 |         | Awaiting r ->
 22 |             (* Avoid sleeping *)
 23 |             while not r.released do
 24 |               Domain.cpu_relax ()
 25 |             done
 26 |         | _ -> ()
 27 |   in
 28 |   Domain_local_await.{ release; await }
 29 | 
 30 | let wrap _ _ action = action ()
 31 | 
 32 | (** ⚠️ This function is written in a very low level manner to avoid memore use,
 33 |     allocations, and false sharing as much as possible during a run of [work] as
 34 |     those can cause undesirable noise. *)
 35 | let record (type a) ~budgetf ~n_domains ?(ensure_multi_domain = true)
 36 |     ?(domain_local_await = `Busy_wait) ?(n_warmups = 3) ?(n_runs_min = 7)
 37 |     ?(n_runs_max = 1023) ?(before = Fun.id) ~init ?(wrap = wrap) ~work
 38 |     ?(after = Fun.id) () =
 39 |   Gc.full_major ();
 40 |   let open struct
 41 |     type shared = {
 42 |       barrier : Barrier.t;
 43 |       start_earliest : Mtime.Span.t Atomic.t;
 44 |       work : int -> a -> unit;
 45 |       wrap : int -> a -> (unit -> unit) -> unit;
 46 |       results : Float.Array.t Array.t;
 47 |       budget_start : Mtime.Span.t;
 48 |       before : unit -> unit;
 49 |       init : int -> a;
 50 |       after : unit -> unit;
 51 |       n_warmups : int;
 52 |       n_runs_min : int;
 53 |       budgetf : float;
 54 |       mutable budget_used : bool;
 55 |       mutable exit : bool;
 56 |       mutable runs : int;
 57 |     }
 58 |   end in
 59 |   let s =
 60 |     {
 61 |       barrier = Barrier.make n_domains;
 62 |       start_earliest =
 63 |         Atomic.make Mtime.Span.zero |> Multicore_magic.copy_as_padded;
 64 |       work = Multicore_magic.copy_as_padded work;
 65 |       wrap;
 66 |       results =
 67 |         Array.init n_domains (fun _ ->
 68 |             Float.Array.create (Int_ext.max n_runs_min n_runs_max));
 69 |       budget_start = Mtime_clock.elapsed ();
 70 |       before;
 71 |       init;
 72 |       after;
 73 |       n_warmups;
 74 |       n_runs_min;
 75 |       budgetf;
 76 |       budget_used = false;
 77 |       exit = false;
 78 |       runs = 0;
 79 |     }
 80 |   in
 81 |   begin
 82 |     let@ _extra_domain =
 83 |       finally (Option.iter Domain.join) @@ fun () ->
 84 |       if n_domains = 1 && ensure_multi_domain then
 85 |         Some
 86 |           ( Domain.spawn @@ fun () ->
 87 |             while not s.exit do
 88 |               Domain.cpu_relax ()
 89 |             done )
 90 |       else None
 91 |     in
 92 |     let main i =
 93 |       try
 94 |         let benchmark () =
 95 |           let open struct
 96 |             type local = {
 97 |               domain_i : int;
 98 |               mutable stop_current : Mtime.Span.t;
 99 |               mutable state : a;
100 |             }
101 |           end in
102 |           let l =
103 |             Multicore_magic.copy_as_padded
104 |               {
105 |                 domain_i = i;
106 |                 stop_current = Mtime.Span.zero;
107 |                 state = Obj.magic ();
108 |               }
109 |           in
110 |           let doit =
111 |             Multicore_magic.copy_as_padded @@ fun () ->
112 |             Barrier.await s.barrier;
113 |             if Multicore_magic.fenceless_get s.start_earliest == Mtime.Span.zero
114 |             then begin
115 |               let start_current = Mtime_clock.elapsed () in
116 |               if
117 |                 Multicore_magic.fenceless_get s.start_earliest
118 |                 == Mtime.Span.zero
119 |               then
120 |                 Atomic.compare_and_set s.start_earliest Mtime.Span.zero
121 |                   start_current
122 |                 |> ignore
123 |             end;
124 |             s.work l.domain_i l.state;
125 |             l.stop_current <- Mtime_clock.elapsed ()
126 |           in
127 |           (* warmup runs *)
128 |           for _ = 1 to s.n_warmups do
129 |             if l.domain_i = 0 then begin
130 |               Multicore_magic.fenceless_set s.start_earliest Mtime.Span.zero;
131 |               s.before ();
132 |               Gc.major ()
133 |             end;
134 |             Barrier.await s.barrier;
135 |             l.state <- s.init l.domain_i;
136 |             s.wrap l.domain_i l.state doit;
137 |             Barrier.await s.barrier;
138 |             l.state <- Obj.magic ();
139 |             if l.domain_i = 0 then s.after ();
140 |             Barrier.await s.barrier
141 |           done;
142 |           (* timed runs *)
143 |           while s.runs < s.n_runs_min || not s.budget_used do
144 |             if l.domain_i = 0 then begin
145 |               Multicore_magic.fenceless_set s.start_earliest Mtime.Span.zero;
146 |               s.before ();
147 |               Gc.major ()
148 |             end;
149 |             Barrier.await s.barrier;
150 |             l.state <- s.init l.domain_i;
151 |             s.wrap l.domain_i l.state doit;
152 |             Barrier.await s.barrier;
153 |             l.state <- Obj.magic ();
154 |             Float.Array.set s.results.(l.domain_i) s.runs
155 |               (Mtime.Span.to_float_ns
156 |                  (Mtime.Span.abs_diff l.stop_current
157 |                     (Multicore_magic.fenceless_get s.start_earliest))
158 |               *. (1. /. 1_000_000_000.0));
159 |             l.stop_current <- Mtime.Span.zero;
160 |             Barrier.await s.barrier;
161 |             if l.domain_i = 0 then begin
162 |               s.after ();
163 |               s.runs <- s.runs + 1;
164 |               if
165 |                 let budget_stop = Mtime_clock.elapsed () in
166 |                 let elapsedf =
167 |                   Mtime.Span.to_float_ns
168 |                     (Mtime.Span.abs_diff budget_stop s.budget_start)
169 |                   *. (1. /. 1_000_000_000.0)
170 |                 in
171 |                 s.budgetf < elapsedf
172 |                 || Float.Array.length s.results.(l.domain_i) <= s.runs
173 |               then s.budget_used <- true
174 |             end;
175 |             Barrier.await s.barrier
176 |           done
177 |         in
178 |         match domain_local_await with
179 |         | `Busy_wait ->
180 |             Domain_local_await.using ~prepare_for_await:with_busy_wait
181 |               ~while_running:benchmark
182 |         | `Neglect -> benchmark ()
183 |       with exn ->
184 |         let bt = Printexc.get_raw_backtrace () in
185 |         Barrier.poison s.barrier exn bt;
186 |         Printexc.raise_with_backtrace exn bt
187 |     in
188 |     let rec spawn i =
189 |       let i = i - 1 in
190 |       if i = 0 then main 0
191 |       else
192 |         let bt_status = Printexc.backtrace_status () in
193 |         let@ _domain =
194 |           finally (fun domain ->
195 |               match Domain.join domain with
196 |               | None -> ()
197 |               | Some (exn, bt) -> Printexc.raise_with_backtrace exn bt)
198 |           @@ fun () ->
199 |           Domain.spawn @@ fun () ->
200 |           Printexc.record_backtrace bt_status;
201 |           match main i with
202 |           | () -> None
203 |           | exception exn -> Some (exn, Printexc.get_raw_backtrace ())
204 |         in
205 |         spawn i
206 |     in
207 |     match spawn n_domains with
208 |     | () -> s.exit <- true
209 |     | exception exn ->
210 |         s.exit <- true;
211 |         raise exn
212 |   end;
213 |   let times_per_domain =
214 |     s.results |> Array.map @@ fun times -> Float.Array.sub times 0 s.runs
215 |   in
216 |   { inverted = false; times_per_domain; runs = s.runs }
217 | 
218 | let average { inverted; times_per_domain; runs } =
219 |   let domains = Array.length times_per_domain in
220 |   let n = Float.Array.length times_per_domain.(0) in
221 |   let times = Float.Array.create n in
222 |   for run_i = 0 to n - 1 do
223 |     Float.Array.set times run_i 0.0;
224 |     for domain_i = 0 to domains - 1 do
225 |       Float.Array.set times run_i
226 |         (Float.Array.get times run_i
227 |         +. Float.Array.get times_per_domain.(domain_i) run_i)
228 |     done;
229 |     Float.Array.set times run_i
230 |       (Float.Array.get times run_i /. Float.of_int domains)
231 |   done;
232 |   { inverted; times_per_domain = [| times |]; runs }
233 | 
234 | let invert { inverted; times_per_domain; runs } =
235 |   {
236 |     inverted = not inverted;
237 |     times_per_domain =
238 |       Array.map (Float.Array.map (fun v -> 1.0 /. v)) times_per_domain;
239 |     runs;
240 |   }
241 | 
242 | module Stats = struct
243 |   type t = {
244 |     mean : float;
245 |     median : float;
246 |     sd : float;
247 |     inverted : bool;
248 |     best : float;
249 |     runs : int;
250 |   }
251 | 
252 |   let scale factor t =
253 |     {
254 |       t with
255 |       mean = t.mean *. factor;
256 |       median = t.median *. factor;
257 |       sd = t.sd *. factor;
258 |       best = t.best *. factor;
259 |     }
260 | 
261 |   let mean_of times =
262 |     Float.Array.fold_left ( +. ) 0.0 times
263 |     /. Float.of_int (Float.Array.length times)
264 | 
265 |   let sd_of times mean =
266 |     Float.sqrt
267 |       (mean_of
268 |          (Float.Array.map
269 |             (fun v ->
270 |               let d = v -. mean in
271 |               d *. d)
272 |             times))
273 | 
274 |   let median_of times =
275 |     Float.Array.sort Float.compare times;
276 |     let n = Float.Array.length times in
277 |     if n land 1 = 0 then
278 |       (Float.Array.get times ((n asr 1) - 1) +. Float.Array.get times (n asr 1))
279 |       /. 2.0
280 |     else Float.Array.get times (n asr 1)
281 | 
282 |   let of_times { inverted; times_per_domain; runs } =
283 |     let domains = Array.length times_per_domain in
284 |     let n = Float.Array.length times_per_domain.(0) in
285 |     let times = Float.Array.create n in
286 |     for run_i = 0 to n - 1 do
287 |       Float.Array.set times run_i 0.0;
288 |       for domain_i = 0 to domains - 1 do
289 |         Float.Array.set times run_i
290 |           (Float.Array.get times run_i
291 |           +. Float.Array.get times_per_domain.(domain_i) run_i)
292 |       done
293 |     done;
294 |     let mean = mean_of times in
295 |     let sd = sd_of times mean in
296 |     let median = median_of times in
297 |     let best =
298 |       if inverted then Float.Array.fold_left Float.max Float.min_float times
299 |       else Float.Array.fold_left Float.min Float.max_float times
300 |     in
301 |     { mean; sd; median; inverted; best; runs }
302 | 
303 |   let to_json ~metric ~config ~description ~units t =
304 |     let trend =
305 |       if t.inverted then `String "higher-is-better"
306 |       else `String "lower-is-better"
307 |     in
308 |     [
309 |       `Assoc
310 |         [
311 |           ("name", `String (Metric.name ~metric ~config));
312 |           ("value", `Float t.median);
313 |           ("units", `String units);
314 |           ("trend", trend);
315 |           ("description", `String description);
316 |           ("#best", `Float t.best);
317 |           ("#mean", `Float t.mean);
318 |           ("#median", `Float t.median);
319 |           ("#sd", `Float t.sd);
320 |           ("#runs", `Int t.runs);
321 |         ];
322 |     ]
323 | end
324 | 
325 | let to_thruput_metrics ~n ~singular ?(plural = singular ^ "s") ~config
326 |     ?(unit_of_time = `ns) ?(unit_of_rate = `M) times =
327 |   List.concat
328 |     [
329 |       times |> Stats.of_times
330 |       |> Stats.scale (Unit_of_time.to_multiplier unit_of_time /. Float.of_int n)
331 |       |> Stats.to_json
332 |            ~metric:(Printf.sprintf "time per %s" singular)
333 |            ~config
334 |            ~description:(Printf.sprintf "Time to process one %s" singular)
335 |            ~units:(Unit_of_time.to_mnemonic unit_of_time);
336 |       times |> average |> invert |> Stats.of_times
337 |       |> Stats.scale (Float.of_int n /. Unit_of_rate.to_divisor unit_of_rate)
338 |       |> Stats.to_json
339 |            ~metric:(Printf.sprintf "%s over time" plural)
340 |            ~config
341 |            ~description:(Printf.sprintf "Total number of %s processed" plural)
342 |            ~units:(Unit_of_rate.to_mnemonic unit_of_rate);
343 |     ]
344 | 


--------------------------------------------------------------------------------