├── .gitignore
├── web
    ├── Home.jpg
    ├── Download.jpg
    ├── License.jpg
    ├── Summary.jpg
    ├── Documentation.jpg
    ├── OCamlBenchmark.jpg
    └── index.html
├── src
    ├── dune
    ├── benchmark.mli
    └── benchmark.ml
├── tests
    ├── dune
    ├── long_run.ml
    └── tree.ml
├── examples
    ├── dune
    ├── test.pl
    ├── regexps.out
    ├── let_try.ml
    ├── try_if.ml
    ├── loops.ml
    ├── func_record.ml
    ├── match_array.ml
    ├── numbers.out
    ├── Makefile
    ├── iter.ml
    ├── numbers.ml
    ├── regexps.ml
    ├── composition.ml
    └── ar_ba.ml
├── Makefile
├── dune-project
├── .github
    └── workflows
    │   ├── gh-pages.yml
    │   └── main.yml
├── README.md
├── benchmark.opam
├── CHANGES.md
└── LICENSE.md


/.gitignore:
--------------------------------------------------------------------------------
1 | #							-*-conf-unix-*-
2 | _build
3 | .merlin
4 | *.install
5 | IDEAS*


--------------------------------------------------------------------------------
/web/Home.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chris00/ocaml-benchmark/HEAD/web/Home.jpg


--------------------------------------------------------------------------------
/web/Download.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chris00/ocaml-benchmark/HEAD/web/Download.jpg


--------------------------------------------------------------------------------
/web/License.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chris00/ocaml-benchmark/HEAD/web/License.jpg


--------------------------------------------------------------------------------
/web/Summary.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chris00/ocaml-benchmark/HEAD/web/Summary.jpg


--------------------------------------------------------------------------------
/web/Documentation.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chris00/ocaml-benchmark/HEAD/web/Documentation.jpg


--------------------------------------------------------------------------------
/web/OCamlBenchmark.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Chris00/ocaml-benchmark/HEAD/web/OCamlBenchmark.jpg


--------------------------------------------------------------------------------
/src/dune:
--------------------------------------------------------------------------------
1 | 
2 | (library
3 |  (name        benchmark)
4 |  (public_name benchmark)
5 |  (libraries   unix)
6 |  (synopsis "Benchmark running times of code"))
7 | 


--------------------------------------------------------------------------------
/tests/dune:
--------------------------------------------------------------------------------
 1 | (executables
 2 |  (names     long_run tree)
 3 |  (libraries benchmark unix))
 4 | 
 5 | (alias
 6 |  (name tests)
 7 |  (deps long_run.exe tree.exe))
 8 | 
 9 | (rule
10 |  (alias runtest)
11 |  (deps long_run.exe tree.exe)
12 |  (action (progn
13 |           (run %{dep:long_run.exe})
14 |           (run %{dep:tree.exe}))))
15 | 


--------------------------------------------------------------------------------
/examples/dune:
--------------------------------------------------------------------------------
 1 | 
 2 | (executables
 3 |  (names ar_ba composition iter let_try loops
 4 |         match_array numbers regexps try_if func_record)
 5 |  (libraries benchmark bigarray str re))
 6 | 
 7 | (alias
 8 |  (name examples)
 9 |  (deps ar_ba.exe composition.exe iter.exe let_try.exe loops.exe
10 |        match_array.exe numbers.exe regexps.exe try_if.exe func_record.exe))
11 | 


--------------------------------------------------------------------------------
/tests/long_run.ml:
--------------------------------------------------------------------------------
 1 | (* Test that the test is performed correctly even when the test
 2 |    execution time exceeds the max throughput time. *)
 3 | 
 4 | open Benchmark
 5 | 
 6 | let long () =
 7 |   let s = ref 0. in
 8 |   for _ = 1 to 100_000 do
 9 |     for _ = 1 to 2_000 do
10 |       s := !s +. 1.
11 |     done
12 |   done
13 | 
14 | let () =
15 |   let t = throughputN 1 ["long", long, ()] in
16 |   tabulate t
17 | 


--------------------------------------------------------------------------------
/examples/test.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use lib qw(.);
 4 | use Benchmark qw(timethese cmpthese);
 5 | 
 6 | sub f {
 7 |     my $g=0;
 8 |     foreach (0..$_[0]) { $g = 2.0 * 2.0 * 2.0 * 2.0 };
 9 |     $g;
10 | };
11 | 
12 | $res = timethese(10, { foo => sub {f(1000000)},
13 |                        bar => sub {f(2000000)},
14 |                        baz => sub {f(3000000)}, });
15 | print "\n";
16 | cmpthese $res;
17 | 
18 | 


--------------------------------------------------------------------------------
/tests/tree.ml:
--------------------------------------------------------------------------------
 1 | let () =
 2 |   let open Benchmark.Tree in
 3 |   "" @> lazy (let create() = Array.init 1_000_000 (fun i -> i) in
 4 |               Benchmark.latency1 18L create () )
 5 |   |> register;
 6 | 
 7 |   "map" @> lazy (let a = Array.init 1_000_000 (fun i -> i) in
 8 |                  let f x = x + 1 in
 9 |                  Benchmark.latency1 18L (Array.map f) a )
10 |   |> register;
11 | 
12 |   "sort"
13 |   @> lazy (let a = Array.init 1_000_000 (fun i -> -i) in
14 |            Benchmark.latency1 18L (Array.sort compare) a )
15 |  |> register;
16 | 
17 |   "sort.add"
18 |   @> lazy (Benchmark.latency1 18L (fun x -> x + 1) 1)
19 |   |> register
20 | 
21 | let () =
22 |   Benchmark.Tree.run_global ()
23 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | PKGVERSION = $(shell git describe --always)
 2 | PKGTARBALL = benchmark-$(PKGVERSION).tbz
 3 | 
 4 | DUNE_OPTS?=
 5 | 
 6 | all build byte native:
 7 | 	dune build $(DUNE_OPTS) @install @tests @examples
 8 | 
 9 | install uninstall:
10 | 	dune $@
11 | 
12 | doc:
13 | 	sed -e 's/%%VERSION%%/$(PKGVERSION)/' benchmark.mli \
14 | 	  > _build/default/benchmark.mli
15 | 	dune build $(DUNE_OPTS) @doc
16 | 	@echo '.def { background: #f0f0f0; }' \
17 | 	  >> _build/default/_doc/_html/odoc.css
18 | 
19 | lint:
20 | 	opam lint benchmark.opam
21 | 
22 | clean:
23 | 	dune clean
24 | 
25 | WATCH?=@install
26 | watch:
27 | 	dune build $(DUNE_OPTS) $(WATCH) -w
28 | 
29 | 
30 | .PHONY: all build byte native install uninstall doc lint clean watch
31 | 


--------------------------------------------------------------------------------
/examples/regexps.out:
--------------------------------------------------------------------------------
 1 | Pcre matches: 012345678 0123456789
 2 | Str  matches: 012345678 0123456789
 3 | 
 4 | Throughputs for pcre match, str match ...
 5 | pcre match:  6 WALL ( 5.40 usr +  0.06 sys =  5.46 CPU) @ 11.54/s (n=63)
 6 |  str match:  5 WALL ( 4.99 usr +  0.06 sys =  5.05 CPU) @  8.71/s (n=44)
 7 | 
 8 |              Rate  str match pcre match
 9 |  str match 8.71/s         --       -24%
10 | pcre match 11.5/s        32%         --
11 | 
12 | Latencies for 100 iterations of pcre match, str match ...
13 | pcre match:  9 WALL ( 8.47 usr +  0.17 sys =  8.64 CPU) @ 11.57/s (n=100)
14 |  str match: 12 WALL (11.30 usr +  0.17 sys = 11.47 CPU) @  8.72/s (n=100)
15 | 
16 |              Rate  str match pcre match
17 |  str match 8.72/s         --       -25%
18 | pcre match 11.6/s        33%         --
19 | 


--------------------------------------------------------------------------------
/examples/let_try.ml:
--------------------------------------------------------------------------------
 1 | (* Compare two possible implementations of let try x = ... with ... *)
 2 | 
 3 | let k x = if x >= 0 then x else failwith "x < 0"
 4 | 
 5 | let f a =
 6 |   let sgn s x =
 7 |     let y = (try Some(k x) with _ -> None) in
 8 |     match y with
 9 |     | None -> s
10 |     | Some y -> s + y in
11 |   ignore(Array.fold_left sgn 0 a)
12 | 
13 | let g a =
14 |   let sgn s x =
15 |     (try
16 |        let y = k x in
17 |        (fun () -> s + y)
18 |      with _ ->
19 |        (fun () -> s)
20 |     )() in
21 |   ignore(Array.fold_left sgn 0 a)
22 | 
23 | 
24 | open Benchmark
25 | 
26 | let () =
27 |   let a = Array.init 1000 (fun _ -> Random.int 2 - 1) in
28 |   let res = throughputN ~repeat:5 1 [("Some", f, a);
29 |                                      ("()->", g, a); ] in
30 |   tabulate res
31 | 


--------------------------------------------------------------------------------
/examples/try_if.ml:
--------------------------------------------------------------------------------
 1 | open Bigarray
 2 | 
 3 | let n = 100
 4 | 
 5 | let a = Array1.create float64 fortran_layout n
 6 | 
 7 | (* Base case: no test *)
 8 | let f0 () =
 9 |   for i = 1 to n do
10 |     let x = a.{i} in
11 |     ignore(x)
12 |   done;
13 |   ignore(0.)
14 | 
15 | let f1 () =
16 |   for i = 1 to n+1 do
17 |     let x = try a.{i} with _ -> 0. in
18 |     ignore(x)
19 |   done
20 | 
21 | let f2 () =
22 |   for i = 1 to n+1 do
23 |     let x = if i <= n then a.{i} else 0. in
24 |     ignore(x)
25 |   done
26 | 
27 | open Benchmark
28 | 
29 | let () =
30 |   let res = throughputN ~repeat:5 3 [("no test", f0, ());
31 |                                      ("try", f1, ());
32 |                                      ("if", f2, ())  ] in
33 |   print_endline "Bigarray bound checking:";
34 |   tabulate res;
35 |   print_gc res
36 | 


--------------------------------------------------------------------------------
/dune-project:
--------------------------------------------------------------------------------
 1 | (lang dune 2.0)
 2 | (generate_opam_files true)
 3 | 
 4 | (name benchmark)
 5 | (version 1.7)
 6 | (homepage "https://github.com/Chris00/ocaml-benchmark")
 7 | (source (github Chris00/ocaml-benchmark))
 8 | (license "LGPL-3.0 with OCaml linking exception")
 9 | (maintainers "Christophe Troestler <Christophe.Troestler@umons.ac.be>")
10 | (authors "Christophe Troestler <Christophe.Troestler@umons.ac.be>"
11 |           "Doug Bagley" "c-cube")
12 | 
13 | (package
14 |   (name benchmark)
15 |   (synopsis "Benchmark running times of code")
16 |   (description "This module provides a set of tools to measure the running times of
17 | your functions and to easily compare the results.  A statistical test
18 | is used to determine whether the results truly differ.")
19 |   (depends
20 |     (ocaml (>= 4.03))
21 |     base-unix
22 |     (dune (>= 2.0))))
23 | 


--------------------------------------------------------------------------------
/.github/workflows/gh-pages.yml:
--------------------------------------------------------------------------------
 1 | name: github pages
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master # Set a branch name to trigger deployment
 7 | 
 8 | jobs:
 9 |   deploy:
10 |     name: Deploy doc
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - uses: actions/checkout@main
14 | 
15 |       - name: Use OCaml
16 |         uses: ocaml/setup-ocaml@v3
17 |         with:
18 |           ocaml-compiler: '5.2'
19 |           dune-cache: true
20 |           allow-prerelease-opam: true
21 | 
22 |       - run: opam install odig benchmark
23 | 
24 |       - run: opam exec -- odig odoc --cache-dir=_doc/ benchmark
25 | 
26 |       - name: Deploy
27 |         uses: peaceiris/actions-gh-pages@v3
28 |         with:
29 |           github_token: ${{ secrets.GITHUB_TOKEN }}
30 |           publish_dir: ./_doc/html
31 |           destination_dir: .
32 |           enable_jekyll: false
33 | 


--------------------------------------------------------------------------------
/examples/loops.ml:
--------------------------------------------------------------------------------
 1 | open Benchmark
 2 | 
 3 | (* Test for the speed of recusion w.r.t. imperative loops to access
 4 |    arrays fo floats. *)
 5 | 
 6 | let rec_loop (a : float array) =
 7 |   let rec loop i =
 8 |     if i < Array.length a then begin
 9 |       a.(i) <- a.(i) +. 1.;
10 |       loop (i + 1)
11 |     end in
12 |   loop 0
13 | 
14 | let rec_loop2 (a : float array) =
15 |   let len = Array.length a in
16 |   let rec loop i =
17 |     if i < len then begin
18 |       a.(i) <- a.(i) +. 1.;
19 |       loop (i + 1)
20 |     end in
21 |   loop 0
22 | 
23 | let for_loop (a : float array) =
24 |   for i = 0 to Array.length a - 1 do
25 |     a.(i) <- a.(i) +. 1.
26 |   done
27 | 
28 | let () =
29 |   let a = Array.make 100 1. in
30 |   let res = throughputN ~repeat:5 1
31 |               [("rec", rec_loop, a);
32 |                ("rec2", rec_loop2, a);
33 |                ("for", for_loop, a); ] in
34 |   tabulate res;
35 |   print_gc res
36 | 


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: Build and Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |   pull_request:
 8 | 
 9 | jobs:
10 |   run:
11 |     name: build  # build+test on various versions of OCaml, on linux
12 |     timeout-minutes: 15
13 |     strategy:
14 |       fail-fast: true
15 |       matrix:
16 |         os:
17 |             - ubuntu-latest
18 |             #- windows-latest
19 |         ocaml-compiler:
20 |             - '4.03'
21 |             - '4.14'
22 |             - '5.2'
23 | 
24 |     runs-on: ${{ matrix.os }}
25 |     steps:
26 |       - uses: actions/checkout@main
27 |       - name: Use OCaml ${{ matrix.ocaml-compiler }}
28 |         uses: ocaml/setup-ocaml@v3
29 |         with:
30 |           ocaml-compiler: ${{ matrix.ocaml-compiler }}
31 |           dune-cache: true
32 |           allow-prerelease-opam: true
33 | 
34 |       - run: opam install -t --deps-only benchmark
35 |       - run: opam exec -- dune build --profile=release --force @install @runtest
36 | 
37 | 


--------------------------------------------------------------------------------
/examples/func_record.ml:
--------------------------------------------------------------------------------
 1 | module F(E : sig
 2 |   val f : float -> float
 3 |   val g : float -> float
 4 | end) =
 5 | struct
 6 |   let h x = 1. +. E.f x +. E.g x
 7 | end
 8 | 
 9 | module A = F(struct let f x = x +. 1.  let g x = 2. *. x end)
10 | 
11 | 
12 | type env = { f : float -> float;  g : float -> float }
13 | 
14 | let h_rec e x = 1. +. e.f x +. e.g x
15 | 
16 | 
17 | let h_fun f g x = 1. +. f x +. g x
18 | 
19 | let f x = x +. 1.
20 | let g x = 2. *. x
21 | 
22 | let h x = 1. +. f x +. g x
23 | 
24 | 
25 | open Benchmark
26 | 
27 | let () =
28 |   let res = throughputN ~repeat:3 3
29 |     [("functor", (fun () -> A.h 1.), ());
30 |      ("record", (fun () -> h_rec { f = f; g = g } 1.), ());
31 |      ("fun arg", (fun () -> h_fun f g 1.), ());
32 |      ("no arg", (fun () -> h 1.), ());
33 |     ] in
34 |   print_endline "Functor vesus records vesus passing as arg:";
35 |   tabulate res;
36 |   print_gc res
37 | 
38 | (* Local Variables: *)
39 | (* compile-command: "make -k -C .." *)
40 | (* End: *)
41 | 


--------------------------------------------------------------------------------
/examples/match_array.ml:
--------------------------------------------------------------------------------
 1 | (* This is a typical problem where the functions are so fast (on a
 2 |    2Ghz machine) that it takes way too long to get results.  Thus a
 3 |    wrapping in a loop is done. *)
 4 | 
 5 | let n = 100
 6 | 
 7 | let string_of_month1 =
 8 |   let month = [| "Jan"; "Feb"; "Mar"; "Apr"; "May"; "Jun"; "Jul"; "Aug";
 9 | 		 "Sep"; "Oct"; "Nov"; "Dec" |] in
10 |   fun i -> Array.unsafe_get month i
11 | 
12 | let f1 () =
13 |   for _ = 1 to n do ignore(string_of_month1 7) done
14 | 
15 | let string_of_month2 = function
16 |   | 0 -> "Jan"
17 |   | 1 -> "Feb"
18 |   | 2 -> "Mar"
19 |   | 3 -> "Apr"
20 |   | 4 -> "May"
21 |   | 5 -> "Jun"
22 |   | 6 -> "Jul"
23 |   | 7 -> "Aug"
24 |   | 8 -> "Sep"
25 |   | 9 -> "Oct"
26 |   | 10 -> "Nov"
27 |   | 11 -> "Dec"
28 |   | _ -> failwith "h"
29 | 
30 | let f2 () =
31 |   for _ = 1 to n do ignore(string_of_month2 7) done
32 | 
33 | 
34 | open Benchmark
35 | 
36 | let () =
37 |   let res = throughputN 3 ~repeat:5 [ ("arr", f1, ());
38 | 				      ("pat", f2, ()); ] in
39 |   tabulate res;
40 |   print_gc res
41 | 
42 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Build and Test](https://github.com/Chris00/ocaml-benchmark/actions/workflows/main.yml/badge.svg)](https://github.com/Chris00/ocaml-benchmark/actions/workflows/main.yml)
 2 | 
 3 | Benchmark — measure/compare run-time of OCaml functions
 4 | =======================================================
 5 | 
 6 | Benchmark provides functions to measure and compare the run-time of
 7 | functions.  It is inspired by the Perl module of the same name.
 8 | 
 9 | 
10 | Installation
11 | ------------
12 | 
13 | The easier way to install it is by using opam:
14 | 
15 |     opam install benchmark
16 | 
17 | If you use the development version of this project, install [Dune][]
18 | and issue
19 | 
20 |     make
21 |     make install
22 | 
23 | [Dune]: https://github.com/ocaml/dune
24 | 
25 | 
26 | Documentation
27 | -------------
28 | 
29 | See the [interface of `Benchmark`](src/benchmark.mli).  It can also be
30 | read in [HTML](https://chris00.github.io/ocaml-benchmark/doc/).
31 | 
32 | 
33 | 
34 | 
35 | Copyright 2004-present, Christophe Troestler  
36 | Copyright 2002-2003, Doug Bagley
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/examples/numbers.out:
--------------------------------------------------------------------------------
 1 | f_int   666 = 666
 2 | f_int32 666 = 666
 3 | f_int64 666 = 666
 4 | 
 5 |  int-1-lat:  3 WALL ( 2.82 usr +  0.06 sys =  2.88 CPU) @ 347.22/s (n=1000)
 6 | int-1-thru:  5 WALL ( 5.27 usr +  0.07 sys =  5.34 CPU) @ 344.94/s (n=1842)
 7 | 
 8 | Throughputs for int, int32, int64 ...
 9 |        int: 11 WALL (10.42 usr +  0.09 sys = 10.51 CPU) @ 345.67/s (n=3633)
10 |      int32: 11 WALL (10.18 usr +  0.17 sys = 10.35 CPU) @ 201.93/s (n=2090)
11 |      int64: 11 WALL (10.53 usr +  0.10 sys = 10.63 CPU) @ 187.30/s (n=1991)
12 | 
13 |        Rate int64 int32   int
14 | int64 187/s    --   -7%  -46%
15 | int32 202/s    8%    --  -42%
16 |   int 346/s   85%   71%    --
17 | 
18 | Latencies for 2000 iterations of int, int32, int64 ...
19 |        int:  6 WALL ( 5.73 usr +  0.05 sys =  5.78 CPU) @ 346.02/s (n=2000)
20 |      int32: 10 WALL ( 9.75 usr +  0.11 sys =  9.86 CPU) @ 202.84/s (n=2000)
21 |      int64: 11 WALL (10.49 usr +  0.17 sys = 10.66 CPU) @ 187.62/s (n=2000)
22 | 
23 |        Rate int64 int32   int
24 | int64 188/s    --   -8%  -46%
25 | int32 203/s    8%    --  -41%
26 |   int 346/s   84%   71%    --
27 | 


--------------------------------------------------------------------------------
/examples/Makefile:
--------------------------------------------------------------------------------
 1 | # Makefile for benchmark examples
 2 | #
 3 | # Just type "make"!
 4 | 
 5 | # Do not require that the package be installed to try the examples
 6 | REQUIRES 	= str unix bigarray
 7 | BENCHMARK	= -I .. benchmark.cma
 8 | 
 9 | EXAMPLES        = $(wildcard *.ml)
10 | PACKAGE_OPTS    = $(if $(REQUIRES), -package "$(REQUIRES)", )
11 | PREDICATE_OPTS  = $(if $(PREDICATES), -predicates $(PREDICATES), )
12 | ALL_OPTS	= -annot $(PACKAGE_OPTS) $(PREDICATE_OPTS)
13 | 
14 | OCAMLC		= ocamlfind ocamlc
15 | OCAMLOPT	= ocamlfind ocamlopt
16 | 
17 | .PHONY: all
18 | 
19 | all: byte opt native
20 | byte: $(EXAMPLES:.ml=.exe)
21 | opt native:  $(EXAMPLES:.ml=.com)
22 | 
23 | regexps.exe regexps.com: REQUIRES += re
24 | 
25 | %.exe: %.ml ../benchmark.cma
26 | 	$(OCAMLC) -o $@ $(ALL_OPTS) -linkpkg $(BENCHMARK) $<
27 | 
28 | %.com: %.ml ../benchmark.cmxa
29 | 	$(OCAMLOPT) -o $@ $(ALL_OPTS) -linkpkg $(BENCHMARK:.cma=.cmxa) $<
30 | 
31 | ######################################################################
32 | .PHONY: clean
33 | 
34 | clean:
35 | 	$(RM) -f $(EXAMPLES:.ml=.exe) $(EXAMPLES:.ml=.com)
36 | 	$(RM) $(wildcard *.o *.cm[iox] *~ *.annot *.dat)
37 | 
38 | 


--------------------------------------------------------------------------------
/benchmark.opam:
--------------------------------------------------------------------------------
 1 | # This file is generated by dune, edit dune-project instead
 2 | opam-version: "2.0"
 3 | version: "1.7"
 4 | synopsis: "Benchmark running times of code"
 5 | description: """
 6 | This module provides a set of tools to measure the running times of
 7 | your functions and to easily compare the results.  A statistical test
 8 | is used to determine whether the results truly differ."""
 9 | maintainer: ["Christophe Troestler <Christophe.Troestler@umons.ac.be>"]
10 | authors: [
11 |   "Christophe Troestler <Christophe.Troestler@umons.ac.be>"
12 |   "Doug Bagley"
13 |   "c-cube"
14 | ]
15 | license: "LGPL-3.0 with OCaml linking exception"
16 | homepage: "https://github.com/Chris00/ocaml-benchmark"
17 | bug-reports: "https://github.com/Chris00/ocaml-benchmark/issues"
18 | depends: [
19 |   "ocaml" {>= "4.03"}
20 |   "base-unix"
21 |   "dune" {>= "2.0"}
22 | ]
23 | build: [
24 |   ["dune" "subst"] {pinned}
25 |   [
26 |     "dune"
27 |     "build"
28 |     "-p"
29 |     name
30 |     "-j"
31 |     jobs
32 |     "@install"
33 |     "@runtest" {with-test}
34 |     "@doc" {with-doc}
35 |   ]
36 | ]
37 | dev-repo: "git+https://github.com/Chris00/ocaml-benchmark.git"
38 | 


--------------------------------------------------------------------------------
/examples/iter.ml:
--------------------------------------------------------------------------------
 1 | open Bigarray
 2 | 
 3 | let n = 1_000
 4 | 
 5 | (* Bigarrays *)
 6 | type vec = (float, float64_elt, c_layout) Array1.t
 7 | let a = Array1.create float64 c_layout n
 8 | let () = Array1.fill a 1.
 9 | 
10 | let ba f (x: vec) =
11 |   for i = 0 to n - 1 do f x.{i} done
12 | 
13 | let ba_unsafe f (x: vec) =
14 |   for i = 0 to n - 1 do f (Array1.unsafe_get x i) done
15 | 
16 | (* Arrays *)
17 | let b = Array.make n 1.
18 | 
19 | let arr f (x: float array) =
20 |   for i = 0 to n-1 do f x.(i) done
21 | 
22 | let arr_unsafe f (x: float array) =
23 |   for i = 0 to n-1 do f (Array.unsafe_get x i) done
24 | 
25 | (* Lists *)
26 | let c = Array.to_list b
27 | 
28 | 
29 | open Benchmark
30 | 
31 | 
32 | let () =
33 |   (* Simulate a simple side effect *)
34 |   let z = ref 0. in
35 |   let f x = z := x in
36 | 
37 |   let res = throughputN ~repeat:3 3
38 |     [("ba", (fun () -> ba f a), ());
39 |      ("ba_unsafe", (fun () -> ba_unsafe f a), ());
40 |      ("arr", (fun () -> arr f b), ());
41 |      ("arr_unsafe", (fun () -> arr_unsafe f b), ());
42 |      ("list", (fun () -> List.iter f c), ())
43 |     ] in
44 |   print_endline "Iterating a function with a simple side effect:";
45 |   tabulate res
46 | 


--------------------------------------------------------------------------------
/CHANGES.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 1.7 2025-01-29
 3 | --------------
 4 | 
 5 | - move to dune 2.0
 6 | - collect and display GC statistics
 7 | - use Re.Pcre instead of pcre in example
 8 | 
 9 | 1.6 2018-09-07
10 | --------------
11 | 
12 | - Port to Dune (not the former Jbuilder) and dune-release.
13 | - Fix some typos in the documentation.
14 | 
15 | 1.5 2018-05-17
16 | --------------
17 | 
18 | - Port to Dune/Jbuilder and Topkg.
19 | - Add option `--all` to the `Tree.arg`.
20 | - Fix uncaught exception in `Tree.run_global`.
21 | 
22 | 
23 | 
24 | Very old changes
25 | ----------------
26 | 
27 | 2004-08-22  Troestler Christophe  <chris_77@users.sourceforge.net>
28 | 
29 | * benchmark.ml: Code mostly rewritten to improve clarity (and to
30 |   correct some bugs).  Allows to return multiple times for a given
31 |   test.  Student's statistical test to determine whether two rates
32 |   are significantly different (see `log_gamma`, `betai`,
33 |   `cpl_student_t`, `comp_rates` and `different_rates`).
34 | 
35 | * benchmark.mli: The documentation is greatly improved.  Functions
36 |   `make`, `add`, `sub` instead of `create`, `sum`, `diff` for
37 |   uniformity with the OCaml standard library.
38 | 
39 | 2004-08-18  Troestler Christophe  <chris_77@users.sourceforge.net>
40 | 
41 | * benchmark: Checked Doug Bagley module in CVS.
42 | 


--------------------------------------------------------------------------------
/examples/numbers.ml:
--------------------------------------------------------------------------------
 1 | open Printf
 2 | open Benchmark
 3 | 
 4 | (* Test the speed of addition for native ints (unboxed), and
 5 |    Int32/Int64 (which are both boxed).
 6 | 
 7 |    The output looks something like numbers.out
 8 | *)
 9 | 
10 | let f_int n =
11 |   let rec loop i sum =
12 |     if i < n then loop (i + 1) (sum + 1) else sum in
13 |   loop 0 0
14 | 
15 | let f_int32 n =
16 |   let rec loop i sum =
17 |     if i < n then loop (i + 1) (Int32.add sum Int32.one) else sum in
18 |   Int32.to_int (loop 0 Int32.zero)
19 | 
20 | let f_int64 n =
21 |   let rec loop i sum =
22 |     if i < n then loop (i + 1) (Int64.add sum Int64.one) else sum in
23 |   Int64.to_int (loop 0 Int64.zero)
24 | 
25 | let () =
26 |   (* print out the results of the f_* functions to doublecheck that
27 |      they work as we intend. *)
28 |   printf "f_int   666 = %d\n" (f_int   666);
29 |   printf "f_int32 666 = %d\n" (f_int32 666);
30 |   printf "f_int64 666 = %d\n" (f_int64 666);
31 |   print_newline ();
32 | 
33 |   (* let's exercise the *1 functions: *)
34 |   let _ = latency1 ~name:"int-1-lat" 1000L f_int 10000 in
35 |   let s = throughput1 ~name:"int-1-thru" 5 f_int 10000 in
36 |   print_gc s;
37 |   print_newline ();
38 | 
39 |   (* now let's exercise the *N functions: *)
40 |   let res = throughputN ~repeat:5 10
41 |               [("int",   f_int,   10000);
42 |                ("int32", f_int32, 10000);
43 |                ("int64", f_int64, 10000); ] in
44 |   print_newline ();
45 |   tabulate res;
46 |   print_gc res;
47 | 
48 |   print_newline ();
49 |   let res = latencyN 2000L [("int",   f_int,   10000);
50 |                             ("int32", f_int32, 10000);
51 |                             ("int64", f_int64, 10000); ] in
52 |   print_newline ();
53 |   tabulate res
54 | 


--------------------------------------------------------------------------------
/examples/regexps.ml:
--------------------------------------------------------------------------------
 1 | module Pcre = Re.Pcre
 2 | open Printf
 3 | open Benchmark
 4 | 
 5 | (* Test the speed of standard regular expressions vs. Pcre using a
 6 |    simple regexp with captures.
 7 | 
 8 |    The output looks something like regexps.out
 9 | *)
10 | 
11 | (* Create a chunk of data to search.
12 |    It's full of "near hits", strings of "012345678"
13 |    with a string on the end we are searching for: "0123456789" *)
14 | let bigdata =
15 |   let size = 500000 in
16 |   let buf = Buffer.create size in
17 |   for _ = 1 to size/10 - 1 do Buffer.add_string buf "012345678 " done;
18 |   Buffer.add_string buf "0123456789";
19 |   Buffer.contents buf
20 | 
21 | let pcre_re = Pcre.regexp "(012345678) (0123456789)"
22 | let str_re  = Str.regexp  "\\(012345678\\) \\(0123456789\\)"
23 | 
24 | let pcre_match dat =
25 |   let group = Pcre.extract ~rex:pcre_re dat in
26 |   (group.(1), group.(2))
27 | 
28 | let str_match dat =
29 |   let _pos = Str.search_forward str_re dat 0 in
30 |   (Str.matched_group 1 dat, Str.matched_group 2 dat)
31 | 
32 | let () =
33 |   (* Print out the results of the functions to doublecheck that they
34 |      work as we intend. *)
35 |   let (a, b) = pcre_match bigdata in printf "Pcre matches: %s %s\n" a b;
36 |   let (a, b) = str_match  bigdata in printf "Str  matches: %s %s\n" a b;
37 |   print_newline ();
38 | 
39 |   let res = throughputN ~repeat:5 5
40 |     [("pcre match", pcre_match, bigdata);
41 |      ("str match",  str_match,  bigdata)] in
42 |   print_newline();
43 |   tabulate res
44 | 
45 | (*   print_newline(); *)
46 | (*   let res = latencyN ~repeat:5 100 *)
47 | (*               [("pcre match", pcre_match, bigdata); *)
48 | (*                ("str match",  str_match,  bigdata)] in *)
49 | (*   print_newline(); *)
50 | (*   tabulate res *)
51 | 


--------------------------------------------------------------------------------
/examples/composition.ml:
--------------------------------------------------------------------------------
 1 | (* Tries to show the profile cost of composing small functions. *)
 2 | 
 3 | (* Small functions: permutations of [0 .. n-1] *)
 4 | 
 5 | let n = 100000
 6 | let rotate r = fun k -> (k + r) mod n
 7 | let reverse i j = fun k -> if i <= k && k <= j then j + i - k else k
 8 | let splice l i j = fun k ->
 9 |   if k < j then if k < i then k else k + l + 1
10 |   else if k <= j + l then k - j + i
11 |   else let k' = k - l - 1 in if k' < i then k' else k
12 | 
13 | 
14 | open Benchmark
15 | 
16 | let ncomp = 400 (* Number of compositions *)
17 | 
18 | let make_perms =
19 |   (* Create a random list of transformations *)
20 |   Random.self_init();
21 |   let rec random_perm ((p_f, p_v) as acc) i =
22 |     if i <= 0 then acc else
23 |       let c = Random.int 3 in
24 |       (* New function *)
25 |       let p =
26 |         if c = 0 then rotate (Random.int n)
27 |         else if c = 1 then reverse (Random.int n) (Random.int n)
28 |         else (* c = 2 *) splice (Random.int n) (Random.int n) (Random.int n) in
29 |       (* Corresponding array transformer *)
30 |       let p_vec w v =
31 |         for i = 0 to Array.length v - 1 do w.(i) <- p v.(i) done in
32 |       random_perm (p :: p_f, p_vec :: p_v) (i - 1) in
33 |   random_perm ([], [])
34 | 
35 | let () =
36 |   let ncomp = 300 in
37 |   let p_f, p_v = make_perms ncomp in
38 |   let v = Array.init n (fun k -> k) in
39 |   let do_f () =
40 |     let f = List.fold_left (fun f f0 -> (fun k -> f0(f k))) (fun k -> k) p_f in
41 |     Array.map f v
42 |   and do_v () =
43 |     snd(List.fold_left (fun (w,v) f -> f w v; (v,w)) (Array.make n 0, v) p_v)
44 |   in
45 | 
46 |   let res = throughputN ~repeat:3 5 [("fun", do_f, ());
47 |                                      ("vec", do_v, ()) ] in
48 |   tabulate res;
49 |   print_gc res
50 | 


--------------------------------------------------------------------------------
/examples/ar_ba.ml:
--------------------------------------------------------------------------------
  1 | (* Compare bigarray and standard float array access times.  *)
  2 | 
  3 | open Bigarray
  4 | 
  5 | let n = 10_000
  6 | let m = 1000
  7 | 
  8 | (* Bigarrays
  9 |  ***********************************************************************)
 10 | type vec = (float, float64_elt, c_layout) Array1.t
 11 | 
 12 | let a = Array1.create float64 c_layout n
 13 | let () = Array1.fill a 1.
 14 | 
 15 | let ba (a: vec) =
 16 |   let s = ref 0. in
 17 |   for i = 0 to n-1 do s := !s +. a.{i} done
 18 | 
 19 | let ba_u (a: vec) =
 20 |   let s = ref 0. in
 21 |   for i = 0 to n-1 do s := !s +. Array1.unsafe_get a i done
 22 | 
 23 | let ba_cl () =
 24 |   let s = ref 0. in
 25 |   for i = 0 to n-1 do s := !s +. a.{i} done
 26 | 
 27 | let ba_gen a =
 28 |   let s = ref 0. in
 29 |   for i = 0 to n-1 do s := !s +. a.{i} done
 30 | 
 31 | let ba_alloc () =
 32 |   let a = Array1.create float64 c_layout n in
 33 |   Array1.fill a 1.;
 34 |   let s = ref 0. in
 35 |   for _ = 1 to m do
 36 |     for i = 0 to n-1 do s := !s +. a.{i} done
 37 |   done
 38 | 
 39 | let set_ba (a: vec) = for i = 0 to n-1 do a.{i} <- 1. done
 40 | 
 41 | let set_ba_alloc () =
 42 |   let a = Array1.create float64 c_layout n in
 43 |   for _ = 1 to m do
 44 |     for i = 0 to n-1 do a.{i} <- 3. done
 45 |   done
 46 | 
 47 | (* Arrays
 48 |  ***********************************************************************)
 49 | let b = Array.make n 1.
 50 | 
 51 | let arr (b: float array) =
 52 |   let s = ref 0. in
 53 |   for i = 0 to n-1 do s := !s +. b.(i) done
 54 | 
 55 | let arr_u (b: float array) =
 56 |   let s = ref 0. in
 57 |   for i = 0 to n - 1 do s := !s +. Array.unsafe_get b (i) done
 58 | 
 59 | let arr_cl () =
 60 |   let s = ref 0. in
 61 |   for i = 0 to n-1 do s := !s +. b.(i) done
 62 | 
 63 | let arr_alloc () =
 64 |   let b = Array.make n 1. in
 65 |   let s = ref 0. in
 66 |   for _ = 1 to m do
 67 |     for i = 0 to n-1 do s := !s +. b.(i) done
 68 |   done
 69 | 
 70 | let set_arr (b: float array) = for i = 0 to n-1 do b.(i) <- 1. done
 71 | 
 72 | let set_arr_alloc () =
 73 |   let a = Array.make n 0. in
 74 |   for _ = 1 to m do
 75 |     for i = 0 to n-1 do a.(i) <- 3. done
 76 |   done
 77 | 
 78 | (* Lists
 79 |  ***********************************************************************)
 80 | let c = Array.to_list b
 81 | 
 82 | let list c = ignore(List.fold_left ( +. ) 0. c)
 83 | 
 84 | 
 85 | open Benchmark
 86 | 
 87 | let () =
 88 |   let res = throughputN ~repeat:3 3
 89 |     [("ba", (fun () -> ba a), ());
 90 |      ("ba_u", (fun () -> ba_u a), ());
 91 |      ("ba_cl", ba_cl, ());
 92 |      ("ba_gen", (fun () -> ba_gen a), ());
 93 |      ("set_ba", (fun () -> set_ba a), ());
 94 |      ("arr", (fun () -> arr b), ());
 95 |      ("arr_u", (fun () -> arr_u b), ());
 96 |      ("arr_cl", arr_cl, ());
 97 |      ("list", (fun () -> list c), ());
 98 |      ("set_arr", (fun () -> set_arr b), ());
 99 |     ] in
100 |   print_endline "Sum of elements or set all elements to 3.:";
101 |   tabulate res;
102 |   print_gc res;
103 | 
104 |   let res = throughputN ~repeat:3 3
105 |     [("ba", ba_alloc, ());
106 |      ("set_ba", set_ba_alloc, ());
107 |      ("arr", arr_alloc, ());
108 |      ("set_arr", set_arr_alloc, ());
109 |     ] in
110 |   print_endline "With allocation:";
111 |   tabulate res;
112 |   print_gc res
113 | 


--------------------------------------------------------------------------------
/web/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
 2 | <html><head>
 3 | 
 4 | 
 5 |   <meta content="text/html; charset=ISO-8859-1" http-equiv="content-type"><title>Ocaml Benchmark module</title>
 6 |   
 7 |   <meta content="Christophe Troestler" name="author">
 8 |   <meta content="Module to benchmark running times of code" name="description"></head><body style="color: rgb(0, 0, 0); background-color: rgb(255, 255, 255);" alink="#ff6600" link="#e0991d" vlink="#999999">
 9 | <br>
10 | <table style="text-align: left; width: 100%;" border="0" cellpadding="2" cellspacing="2">
11 |   <tbody>
12 |     <tr>
13 |       <td colspan="2" rowspan="1" style="vertical-align: top; text-align: center; white-space: nowrap;"><img style="width: 759px; height: 154px;" alt="OCaml Benchmark" src="OCamlBenchmark.jpg"><br>
14 |       </td>
15 |     </tr>
16 |     <tr>
17 |       <!--      <td style="vertical-align: top; width: 20px;"><a
18 | 		href="http://ocaml-benchmark.sourceforge.net/"><img
19 | 		style="border: 0px solid ; width: 84px; height: 46px;"
20 | 		alt="Home" src="Home.jpg"></a><br> -->
21 |       <td style="vertical-align: top; width: 20px;"><a href="https://forge.ocamlcore.org/projects/ocaml-benchmark/"><img style="border: 0px solid ; width: 128px; height: 46px;" alt="Summary" src="Summary.jpg"></a><br>
22 | 	<a href="doc/index.html"><img style="border: 0px solid ; width: 190px; height: 46px;" alt="Documentation" src="Documentation.jpg"></a><br>
23 | 	<span style="font-family: helvetica,arial,sans-serif;"><a href="https://forge.ocamlcore.org/frs/?group_id=197"><img style="border: 0px solid ; width: 129px; height: 46px;" alt="Download" src="Download.jpg"></a><br>
24 | 	  <a href="LICENSE"><img style="border: 0px solid ; width: 101px; height: 46px;" alt="License" src="License.jpg"></a></span>
25 |       </td>
26 |       <td style="vertical-align: top;">
27 | 	<p>
28 | 	  OCaml Benchmark is a small module to benchmark running times
29 | 	  of code.  It supersedes an earlier version
30 | 	  by Doug Bagley.
31 | 	  A typical use looks like this:<br><br>
32 | 	  <code class="code">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
33 | 	    <span style="color: rgb(255, 0, 0); font-weight: bold;" class="keyword">open</span>
34 | 	    <span style="color: rgb(51, 102, 255);" class="constructor">Benchmark</span><br>
35 | 	    &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
36 | 	    <span style="color: rgb(255, 0, 0); font-weight: bold;" class="keyword">let</span>
37 | 	    res&nbsp;=&nbsp;throughputN&nbsp;8&nbsp;[(<span style="color: rgb(204, 102, 0);" class="string">"foo"</span>,&nbsp;f,&nbsp;1000000);<br>
38 | &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(<span style="color: rgb(204, 102, 0);" class="string">"bar"</span>, g,
39 | 1000000);<br>
40 | &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(<span style="color: rgb(204, 102, 0);" class="string">"baz"</span>, h,
41 | 1000000) ]&nbsp;<span style="color: rgb(255, 0, 0); font-weight: bold;" class="keyword">in</span><br>
42 | &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;print_newline();<br>
43 | &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;tabulate&nbsp;res<br>
44 | 	    <br>
45 | 	  </code>
46 | 	  For a complete description of the functions, see the
47 | 	  interface <a href="doc/Benchmark.html"><span style="font-family: courier new,courier,monospace;">Benchmark.mli</span></a>.
48 | 	  Some examples are also included in
49 | 	  the <a href="https://forge.ocamlcore.org/frs/?group_id=197">tarball</a>.
50 | 	</p>
51 | 	<p>
52 | 	  If you have questions, suggestions, bugs,... you can contact
53 | 	  me by
54 | 	  email: <a href="mailto:Christophe.Troestler@umons.ac.be?SUBJECT=OCaml%20Benchmark">Christophe.Troestler@umons.ac.be</a>.
55 | 	  The source code is maintained in a <a href="http://git-scm.com/" target="_blank">Git</a> repository stored on <a href="https://github.com/Chris00/ocaml-benchmark">Github</a>.
56 | 	</p>
57 | 	<p>The code is released under the GNU Lesser General Public
58 | 	  License (LGPL) with the same special exception as for the
59 | 	  OCaml standard library (see the
60 | 	  file <a href="LICENSE">LICENSE</a> for more
61 | 	  details).</p></td>
62 |     </tr>
63 |   </tbody>
64 | </table>
65 | 
66 | 
67 | </body></html>


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
  1 | GNU LESSER GENERAL PUBLIC LICENSE, Version 3, 29 June 2007
  2 | ==========================================================
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 | 
  9 |   This version of the GNU Lesser General Public License incorporates
 10 | the terms and conditions of version 3 of the GNU General Public
 11 | License, supplemented by the additional permissions listed below.
 12 | 
 13 | ## 0. Additional Definitions.
 14 | 
 15 |   As used herein, "this License" refers to version 3 of the GNU Lesser
 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
 17 | General Public License.
 18 | 
 19 |   "The Library" refers to a covered work governed by this License,
 20 | other than an Application or a Combined Work as defined below.
 21 | 
 22 |   An "Application" is any work that makes use of an interface provided
 23 | by the Library, but which is not otherwise based on the Library.
 24 | Defining a subclass of a class defined by the Library is deemed a mode
 25 | of using an interface provided by the Library.
 26 | 
 27 |   A "Combined Work" is a work produced by combining or linking an
 28 | Application with the Library.  The particular version of the Library
 29 | with which the Combined Work was made is also called the "Linked
 30 | Version".
 31 | 
 32 |   The "Minimal Corresponding Source" for a Combined Work means the
 33 | Corresponding Source for the Combined Work, excluding any source code
 34 | for portions of the Combined Work that, considered in isolation, are
 35 | based on the Application, and not on the Linked Version.
 36 | 
 37 |   The "Corresponding Application Code" for a Combined Work means the
 38 | object code and/or source code for the Application, including any data
 39 | and utility programs needed for reproducing the Combined Work from the
 40 | Application, but excluding the System Libraries of the Combined Work.
 41 | 
 42 | ## 1. Exception to Section 3 of the GNU GPL.
 43 | 
 44 |   You may convey a covered work under sections 3 and 4 of this License
 45 | without being bound by section 3 of the GNU GPL.
 46 | 
 47 | ## 2. Conveying Modified Versions.
 48 | 
 49 |   If you modify a copy of the Library, and, in your modifications, a
 50 | facility refers to a function or data to be supplied by an Application
 51 | that uses the facility (other than as an argument passed when the
 52 | facility is invoked), then you may convey a copy of the modified
 53 | version:
 54 | 
 55 | 1. under this License, provided that you make a good faith effort to
 56 |    ensure that, in the event an Application does not supply the
 57 |    function or data, the facility still operates, and performs
 58 |    whatever part of its purpose remains meaningful, or
 59 | 
 60 | 2. under the GNU GPL, with none of the additional permissions of
 61 |    this License applicable to that copy.
 62 | 
 63 | ## 3. Object Code Incorporating Material from Library Header Files.
 64 | 
 65 |   The object code form of an Application may incorporate material from
 66 | a header file that is part of the Library.  You may convey such object
 67 | code under terms of your choice, provided that, if the incorporated
 68 | material is not limited to numerical parameters, data structure
 69 | layouts and accessors, or small macros, inline functions and templates
 70 | (ten or fewer lines in length), you do both of the following:
 71 | 
 72 | 1. Give prominent notice with each copy of the object code that the
 73 |    Library is used in it and that the Library and its use are
 74 |    covered by this License.
 75 | 
 76 | 2. Accompany the object code with a copy of the GNU GPL and this license
 77 |    document.
 78 | 
 79 | ## 4. Combined Works.
 80 | 
 81 |   You may convey a Combined Work under terms of your choice that,
 82 | taken together, effectively do not restrict modification of the
 83 | portions of the Library contained in the Combined Work and reverse
 84 | engineering for debugging such modifications, if you also do each of
 85 | the following:
 86 | 
 87 | 1. Give prominent notice with each copy of the Combined Work that
 88 |    the Library is used in it and that the Library and its use are
 89 |    covered by this License.
 90 | 
 91 | 2. Accompany the Combined Work with a copy of the GNU GPL and this license
 92 |    document.
 93 | 
 94 | 3. For a Combined Work that displays copyright notices during
 95 |    execution, include the copyright notice for the Library among
 96 |    these notices, as well as a reference directing the user to the
 97 |    copies of the GNU GPL and this license document.
 98 | 
 99 | 4. Do one of the following:
100 | 
101 |    1. Convey the Minimal Corresponding Source under the terms of this
102 |       License, and the Corresponding Application Code in a form
103 |       suitable for, and under terms that permit, the user to
104 |       recombine or relink the Application with a modified version of
105 |       the Linked Version to produce a modified Combined Work, in the
106 |       manner specified by section 6 of the GNU GPL for conveying
107 |       Corresponding Source.
108 | 
109 |    2. Use a suitable shared library mechanism for linking with the
110 |       Library.  A suitable mechanism is one that (a) uses at run time
111 |       a copy of the Library already present on the user's computer
112 |       system, and (b) will operate properly with a modified version
113 |       of the Library that is interface-compatible with the Linked
114 |       Version.
115 | 
116 | 5. Provide Installation Information, but only if you would otherwise
117 |    be required to provide such information under section 6 of the
118 |    GNU GPL, and only to the extent that such information is
119 |    necessary to install and execute a modified version of the
120 |    Combined Work produced by recombining or relinking the
121 |    Application with a modified version of the Linked Version. (If
122 |    you use option 4d0, the Installation Information must accompany
123 |    the Minimal Corresponding Source and Corresponding Application
124 |    Code. If you use option 4d1, you must provide the Installation
125 |    Information in the manner specified by section 6 of the GNU GPL
126 |    for conveying Corresponding Source.)
127 | 
128 | ## 5. Combined Libraries.
129 | 
130 |   You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 | 
136 | 1. Accompany the combined library with a copy of the same work based
137 |    on the Library, uncombined with any other library facilities,
138 |    conveyed under the terms of this License.
139 | 
140 | 2. Give prominent notice with the combined library that part of it
141 |    is a work based on the Library, and explaining where to find the
142 |    accompanying uncombined form of the same work.
143 | 
144 | ## 6. Revised Versions of the GNU Lesser General Public License.
145 | 
146 |   The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 | 
151 |   Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 | 
161 |   If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 | 


--------------------------------------------------------------------------------
/src/benchmark.mli:
--------------------------------------------------------------------------------
  1 | (* File: benchmark.mli
  2 | 
  3 |    Copyright Aug. 2004-present by Troestler Christophe
  4 |    Christophe.Troestler(at)umons.ac.be
  5 | 
  6 |    Copyright 2002-2003, Doug Bagley
  7 |    http://www.bagley.org/~doug/ocaml/
  8 |    Based on the Perl module Benchmark.pm by Jarkko Hietaniemi and Tim Bunce
  9 | 
 10 |    This library is free software; you can redistribute it and/or
 11 |    modify it under the terms of the GNU Lesser General Public License
 12 |    version 3 as published by the Free Software Foundation, with the
 13 |    special exception on linking described in file LICENSE.txt.
 14 | 
 15 |    This library is distributed in the hope that it will be useful, but
 16 |    WITHOUT ANY WARRANTY; without even the implied warranty of
 17 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the file
 18 |    LICENSE.txt for more details.
 19 | *)
 20 | 
 21 | (** Benchmark running times of code.
 22 | 
 23 |     This module implements benchmarking functions for measuring the
 24 |     run-time of one or many functions using latency (multiple
 25 |     repetitions) or throughput (repeat until some time period has
 26 |     passed) tests.
 27 | 
 28 |     {b Examples:}
 29 |     Run the function [f] with input [5000] for [10] iterations and
 30 |     print the CPU times:
 31 |     {[
 32 |     Benchmark.latency1 10 f 5000                        ]}
 33 | 
 34 |     Run the tests [foo], [bar] and [baz] three times for at least [8]
 35 |     seconds each, printing the results of each test, and then print a
 36 |     cross tabulation of the results:
 37 |     {[
 38 |     open Benchmark
 39 |     let res = throughputN ~repeat:3 8 [("foo", foo, 1000000);
 40 |                                        ("bar", bar, 2000000);
 41 |                                        ("baz", baz, 3000000); ] in
 42 |     print_newline();
 43 |     tabulate res                                                    ]}
 44 | 
 45 |     Time how long it takes to some piece of code:
 46 |     {[
 47 |     let t0 = Benchmark.make 0L in
 48 |     (* do something here *)
 49 |     let b = Benchmark.sub (Benchmark.make 0L) t0 in
 50 |     print_endline "Benchmark results:";
 51 |     print_endline (Benchmark.to_string b)                       ]}
 52 |  *)
 53 | 
 54 | 
 55 | (** {2 Timing and samples structures} *)
 56 | 
 57 | (** The information returned by timing tests. *)
 58 | type t = {
 59 |   wall : float;   (** Wallclock time (in seconds) *)
 60 |   utime : float;  (** This process User CPU time (in seconds) *)
 61 |   stime : float;  (** This process System CPU time (in seconds) *)
 62 |   cutime : float; (** Child process User CPU time (in seconds) *)
 63 |   cstime : float; (** Child process System CPU time (in seconds) *)
 64 |   iters : Int64.t;  (** Number of iterations. *)
 65 |   minor_words: float; (** Bytes allocated on minor heap *)
 66 |   major_words: float; (** Bytes allocated on major heap, incl. promoted *)
 67 |   promoted_words: float; (** Bytes moved from minor heap to major heap *)
 68 | }
 69 | 
 70 | (** Style of the output. *)
 71 | type style =
 72 |   | No_child    (** Do not print child CPU times *)
 73 |   | No_parent   (** Do not print parent CPU times *)
 74 |   | All         (** Print parent and child CPU times *)
 75 |   | Auto        (** Same as [No_child] unless there is child CPU used *)
 76 |   | Nil         (** Print nothing *)
 77 | 
 78 | val make : Int64.t -> t
 79 |   (** [Benchmark.make n] create a new {!Benchmark.t} structure with
 80 |       current time values and [n] iterations.  Only the integer part of
 81 |       [n] is used, the fractional part is ignored. *)
 82 | 
 83 | val add : t -> t -> t
 84 |   (** [Benchmark.add b1 b2] add {!Benchmark.t} structure [b1] to [b2]. *)
 85 | 
 86 | val sub : t -> t -> t
 87 |   (** [Benchmark.sub b1 b2] subtract {!Benchmark.t} structure [b2]
 88 |     from [b1]. *)
 89 | 
 90 | val to_string : ?style:style -> ?fwidth:int -> ?fdigits:int -> t -> string
 91 |   (** [Benchmark.to_string ?style ?fwidth ?fdigits b] converts the
 92 |     {!Benchmark.t} structure to a formatted string.
 93 | 
 94 |     @param style printing style (default: [Auto])
 95 |     @param fwidth number of chars reserved for the numbers (default: [5])
 96 |     @param fdigits number of fractional digits of the numbers (default: [2])
 97 |   *)
 98 | 
 99 | type samples = (string * t list) list
100 |   (** Association list that links the names of the tests to the list
101 |     of their timings. *)
102 | 
103 | val merge : samples -> samples -> samples
104 |   (** [merge l1 l2] merges the two association lists of timings [l1]
105 |     and [l2] into a single one, concatenating the timings for the same
106 |     names of [l1] and [l2]. *)
107 | 
108 | 
109 | (** {2 Timing functions} *)
110 | 
111 | val throughputN :
112 |   ?min_count:Int64.t ->
113 |   ?style:style ->
114 |   ?fwidth:int ->
115 |   ?fdigits:int ->
116 |   ?repeat:int ->
117 |   int -> (string * ('a -> 'b) * 'a) list -> samples
118 |   (** [Benchmark.throughputN ?min_count ?style ?fwidth ?fdigits t
119 |       funs] runs each function in list [funs] for at least [t > 0]
120 |       seconds.  The list [funs] has the structure: [[(name1, f1, x1);
121 |       (name2, f2, x2); ...]], where [name1] is the name to label the
122 |       first test, [f1] is the function to run, and [x1] is its
123 |       input,...  If [~style] is not [Nil], then the results are
124 |       printed.  Returns the resulting list which can be passed to
125 |       {!Benchmark.tabulate} if you want a comparison table.
126 | 
127 |       REMARK that [t] is the running time of the functions, not of the
128 |       repetition loop.  Thus a very fast running function will need
129 |       lots of repetitions to make a difference of [t] seconds to the
130 |       empty loop.  In this case, the running time of the loop will
131 |       dominate the whole process which can therefore take much longer
132 |       than [t] seconds.  If you are only interested in the {i
133 |       relative} times of fast functions and not in their real running
134 |       times, we recommend you wrap each of them in a loop.
135 | 
136 |       @param min_count a warning will be printed if the number of runs is
137 |                        less than [min_count].  This is a first defense against
138 |                        meaningless results. (default: [4L])
139 |       @param style   printing style (default: [Auto])
140 |       @param fwidth  number of chars reserved for the numbers (default: [5])
141 |       @param fdigits number of fractional digits of the numbers (default: [2])
142 | 
143 |       @param repeat  number of times each function running time is measured.
144 |                      The default is [1] to be compatible with the former
145 |                      version of this library but it is highly recommended
146 |                      to set it to a higher number to enable confidence
147 |                      statistics to be performed by {!Benchmark.tabulate}.
148 |   *)
149 | 
150 | val throughput1 :
151 |   ?min_count:Int64.t ->
152 |   ?style:style ->
153 |   ?fwidth:int ->
154 |   ?fdigits:int ->
155 |   ?repeat:int ->
156 |   int -> ?name:string -> ('a -> 'b) -> 'a -> samples
157 |   (** [Benchmark.throughput1 ?min_count ?style ?fwidth ?fdigits t ?name f x]
158 |     runs one function [f] with input [x] for at least [t] seconds, and
159 |     returns the result, which is also printed unless [~style] is
160 |     [Nil].  See {!Benchmark.throughputN} for more information.  *)
161 | 
162 | val latencyN :
163 |   ?min_cpu:float ->
164 |   ?style:style ->
165 |   ?fwidth:int ->
166 |   ?fdigits:int ->
167 |   ?repeat:int ->
168 |   Int64.t -> (string * ('a -> 'b) * 'a) list -> samples
169 |   (** [Benchmark.latencyN ?min_cpu ?style ?fwidth ?fdigits n funs]
170 |       runs each function in list [funs] for [n] iterations.  [n] must be
171 |       at least 4.  The list [funs] has the structure: [[(name1, f1, x1);
172 |       (name2, f2, x2); ...]], where [name1] is the name to label the
173 |       first test, [f1] is the function to run, and [x1] is its input,...
174 |       If style is not [Nil], then the results are printed.  Returns the
175 |       results list, which can be passed to {!Benchmark.tabulate} if you
176 |       want to print a comparison table.
177 | 
178 |       @raise Invalid_argument if [n < 4L].
179 |       @param min_cpu a warning will be printed if the total CPU time is
180 |                      less than [min_cpu].  This is a first defense against
181 |                      meaningless results  (default: [0.4]).
182 |       @param style   printing style (default: [Auto]).
183 |       @param fwidth  number of chars reserved for the numbers (default: [5]).
184 |       @param fdigits number of fractional digits of the numbers (default: [2]).
185 | 
186 |       @param repeat  number of times each function running time is measured.
187 |                      The default is [1] to be compatible with the former
188 |                      version of this library but it is highly recommended
189 |                      to set it to a higher number to enable confidence
190 |                      statistics to be performed by {!Benchmark.tabulate}.
191 |       *)
192 | 
193 | val latency1 :
194 |   ?min_cpu:float ->
195 |   ?style:style ->
196 |   ?fwidth:int ->
197 |   ?fdigits:int ->
198 |   ?repeat:int ->
199 |   Int64.t -> ?name:string -> ('a -> 'b) -> 'a -> samples
200 |   (** [Benchmark.latency1 ?min_cpu ?style ?fwidth ?fdigits n ?name f x]
201 |       runs the function [f] with input [x] for [n] iterations, and
202 |       returns the results, which are also printed unless [~style] is
203 |       [Nil].  See {!Benchmark.latencyN} for more information. *)
204 | 
205 | 
206 | val tabulate : ?no_parent:bool -> ?confidence:float -> samples -> unit
207 |   (** [Benchmark.tablulate results] prints a comparison table for a
208 |     list of [results] obtained by {!Benchmark.latencyN} or
209 |     {!Benchmark.throughputN} with each function compared to all the
210 |     others.  The table is of the type
211 | 
212 | {[            Rate name1 name2 ...   OR          s/iter name1 name2 ...
213 |         name1  #/s    --   r12             name1   #       --   r12
214 |         name2  #/s   r21    --             name2   #      r21    --
215 |         ...                                ...                            ]}
216 | 
217 |     where name1, name2,... are the labels of the tests sorted from
218 |     slowest to fastest and rij says how much namei is faster (or
219 |     slower if < 0) than namej (technically it is equal to (ri - rj)
220 |     expressed in percents of rj where ri and rj are the rates of namei
221 |     and namej respectively).
222 | 
223 |     If several results are associated to a given name, they are used
224 |     to compute a Student's statistic to check whether the rates are
225 |     significantly different.  If ri and rj are not believed to be
226 |     different, rij will be printed between brackets.
227 | 
228 |     @param no_parent if [true], only take in account the times of the
229 |     children (default: [false]).
230 | 
231 |     @param confidence is used to determine the confidence interval for
232 |     the Student's test.  (default: [0.95]).  *)
233 | 
234 | val print_gc : samples -> unit
235 | (** Print GC statistics. *)
236 | 
237 | (** {2 Benchmark Tree}
238 | 
239 |     Naming benchmarks within a hierarchy that allows to run them all,
240 |     or filter them so that only a subset is run.  *)
241 | 
242 | module Tree : sig
243 |   type t
244 |   (** A (possibly empty) tree of benchmarks.  Individual benchmarks
245 |       (i.e., calls to {!throughputN}, {!latencyN}, etc. wrapped with
246 |       {!(>:)}) can appear at any node of the tree.  The edges are
247 |       annotated with strings, and paths (see {!path}) are used to
248 |       select subtrees.  *)
249 | 
250 |   val ( @> ) : string -> samples Lazy.t -> t
251 |   (** [name @> bench] returns a (named) node of the benchmark tree.
252 |       If evaluated, it simply returns samples (for instance using
253 |       {!throughputN}).  If the name contains dots, it is interpreted
254 |       as a path.  For examle ["a.b" @> bench] is equivalent to ["a" @>>
255 |       "b" @> bench].
256 | 
257 |       Example (the lazy thunk is used to hide initialization code):
258 | 
259 |       {[
260 |         Benchmark.Tree.(
261 |           "sort" >: lazy
262 |             (let a = Array.init 1_000_000 (fun i -> i) in
263 |              Benchmark.throughput1 18 (Array.sort compare) a
264 |             )
265 |         ) ;;
266 |       ]} *)
267 | 
268 |   val ( @>> ) : string -> t -> t
269 |   (** [name @>> tree] makes [tree] accessible through the given
270 |       [name], i.e., prefix all paths in the tree by [name].  It has no
271 |       effect if [name = ""].  If the name contains dots, it is
272 |       interpreted as a path.  For instance ["n1.n2" @>> tree] is
273 |       equivalent to ["n1" @>> "n2" @>> tree] and adds the path
274 |       [[n1;n2]] as a prefix to the tree.
275 | 
276 |       @raise Invalid_argument is the name is invalid.  At least names
277 |       corresponding to OCaml identifiers are valid.  *)
278 | 
279 |   val concat : t list -> t
280 |   (** Merge the given trees (recursively). Merging proceeds by taking the union
281 |       of all path heads in the list, and, for each such string [x],
282 |       merging recursively all subtrees reachable under [x].
283 | 
284 |       For instance merging the trees [a.{b, c}], [a.b.d] and [{a.d, foo}]
285 |       will give the tree [{a.(b, b.d, c, d}, d}].  *)
286 | 
287 |   val ( @>>> ) : string -> t list -> t
288 |   (** [name @>>> l] is equivalent to [name @>> concat l]. It names a list of
289 |       trees, and is useful to build lists of benchmarks related to some
290 |       common topic.  If the name contains dots, it is interpreted
291 |       as a path.
292 | 
293 |       @raise Invalid_argument is the name is invalid.  At least names
294 |       corresponding to OCaml identifiers are valid.  *)
295 | 
296 |   val with_int : (int -> t) -> int list -> t
297 |   (** [with_int f l] parametrize trees with several integer values
298 |       (e.g. a size).  The tree [f i] is prefixed with the label [i].  *)
299 | 
300 |   val print : Format.formatter -> t -> unit
301 |   (** Print the tree of benchmarks (its structure) on the given formatter.
302 |       Useful in combination with the [path] argument of {!run} *)
303 | 
304 |   (** {2 Path} *)
305 | 
306 |   type path = string list
307 |   (** A path in a tree, pointing at a subtree. *)
308 | 
309 |   val print_path : Format.formatter -> path -> unit
310 | 
311 |   val parse_path : string -> path
312 |   (** Split a string into a path at the "." separators.
313 |     Example: [parse_path "a.b.c"] returns [["a"; "b"; "c"]]. *)
314 | 
315 |   val prefix : path -> t -> t
316 |   (** Add the path as a prefix to the tree, similar to repeated
317 |       calls to [@>>]. *)
318 | 
319 |   val filter : path -> t -> t
320 |   (** [filter p t] return the tree obtained by keeping all the paths
321 |       in [t] that match the path [p].
322 |       Empty components [""] in the middle of the path are ignored.
323 |       Empty components [""] at the end of the path return only the
324 |       benchmarks at that level (i.e., one discards the benchmarks
325 |       pointed by paths of which [p] is a strict prefix).
326 |       The special path component ["*"] selects all subtrees at that
327 |       level (it acts as a wildcard).  *)
328 | 
329 | 
330 |   (** {2 Running} *)
331 | 
332 |   type arg_state
333 | 
334 |   val arg : unit -> arg_state * (Arg.key * Arg.spec * Arg.doc) list
335 |   (** [arg ()] returns [(arg, specs)] where [arg] is a state coming
336 |       from parsing the command line using [specs].  The options are:
337 |       - "--path" or "-p" to add a sub-tree of benchmarks;
338 |       - "--tree" to print the tree of benchmarks;
339 |       - "--all" or "-a" to run all paths in the tree.
340 |       Note that the default state runs all benchmarks.  You need to
341 |       use something like [Arg.parse (specs @ more_specs) ...] to make
342 |       the above arguments available to the program user.  *)
343 | 
344 |   val run : ?with_gc:bool ->
345 |             ?arg: arg_state -> ?paths: path list ->  ?out: Format.formatter ->
346 |             t -> unit
347 |   (** [run t] runs all benchmarks of [t] and print the results to [fmt].
348 |       @param with_gc if true, will print GC statistics as well (Default: true)
349 |       @param paths if provided, only the sub-trees corresponding to
350 |         these path is executed.  Default: execute everything.
351 |       @param out The formatter on which to print the output.
352 |         Default: [Format.std_formatter].
353 |       @param arg use the result of the command line parsing to direct
354 |         the run.  Default: run all paths in [path] *)
355 | 
356 | 
357 |   (** {2 Global Registration} *)
358 | 
359 |   val global : unit -> t
360 |   (** Global tree, built from calls to {!register}.  It is useful
361 |       to centralize all benchmarks at one place to, then, run them all *)
362 | 
363 |   val register : t -> unit
364 |   (** Register a benchmark to the global registry of benchmarks. *)
365 | 
366 |   val run_global :
367 |     ?argv:string array ->
368 |     ?out:Format.formatter ->
369 |     unit -> unit
370 |   (** Same as {!run} on the global tree of benchmarks and parsing the
371 |       command line arguments from [argv] (which is [Sys.argv] by
372 |       default). *)
373 | end
374 | 


--------------------------------------------------------------------------------
/src/benchmark.ml:
--------------------------------------------------------------------------------
  1 | (* File: benchmark.ml
  2 |    For comparing runtime of functions
  3 |    *********************************************************************
  4 | 
  5 |    Copyright 2004-present, Troestler Christophe
  6 |    Christophe.Troestler(at)umh.ac.be
  7 | 
  8 |    Copyright 2002-2003, Doug Bagley
  9 |    http://www.bagley.org/~doug/ocaml/
 10 |    Initially based on the Perl module Benchmark.pm by Jarkko Hietaniemi
 11 |    and Tim Bunce
 12 | 
 13 |    This library is free software; you can redistribute it and/or
 14 |    modify it under the terms of the GNU Lesser General Public License
 15 |    version 3 as published by the Free Software Foundation, with the
 16 |    special exception on linking described in file LICENSE.
 17 | 
 18 |    This library is distributed in the hope that it will be useful, but
 19 |    WITHOUT ANY WARRANTY; without even the implied warranty of
 20 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the file
 21 |    LICENSE.txt for more details.
 22 | *)
 23 | 
 24 | open Printf
 25 | 
 26 | type t = {
 27 |   wall   : float;
 28 |   utime  : float;
 29 |   stime  : float;
 30 |   cutime : float;
 31 |   cstime : float;
 32 |   iters  : Int64.t;
 33 |   (* As of version 0.8, one had to change [iter] from [int] because,
 34 |      as machines run faster, a number of iterations ~ 2^29 is no
 35 |      longer enough (2^29 is the largest > 0 power of 2 that [int] can
 36 |      hold on a 32 bits platform. *)
 37 |   minor_words: float;
 38 |   major_words: float;
 39 |   promoted_words: float;
 40 | }
 41 | 
 42 | type style = No_child | No_parent | All | Auto | Nil
 43 | 
 44 | let null_t =
 45 |   { wall = 0.; utime = 0.; stime = 0.; cutime = 0.; cstime = 0.; iters = 0L;
 46 |     minor_words = 0.; major_words = 0.; promoted_words = 0.; }
 47 | 
 48 | let bytes_per_word = float (Sys.word_size / 8)
 49 | 
 50 | let make n =
 51 |   let minor_words, promoted_words, major_words = Gc.counters() in
 52 |   let minor_words = minor_words *. bytes_per_word in
 53 |   let major_words = major_words *. bytes_per_word in
 54 |   let promoted_words = promoted_words *. bytes_per_word in
 55 |   let tms = Unix.times() in
 56 |   { wall = Unix.gettimeofday();
 57 |     utime = tms.Unix.tms_utime;   stime = tms.Unix.tms_stime;
 58 |     cutime = tms.Unix.tms_cutime; cstime = tms.Unix.tms_cstime;
 59 |     iters = n; minor_words; major_words; promoted_words; }
 60 | 
 61 | let add a b =
 62 |   { wall = a.wall +. b.wall;       utime = a.utime +. b.utime;
 63 |     stime = a.stime +. b.stime;    cutime = a.cutime +. b.cutime;
 64 |     cstime = a.cstime +. b.cstime; iters = Int64.add a.iters b.iters;
 65 |     minor_words = a.minor_words +. b.minor_words;
 66 |     major_words = a.major_words +. b.major_words;
 67 |     promoted_words = a.promoted_words +. b.promoted_words;
 68 |   }
 69 | 
 70 | let sub a b =
 71 |   { wall = a.wall -. b.wall;       utime = a.utime -. b.utime;
 72 |     stime = a.stime -. b.stime;    cutime = a.cutime -. b.cutime;
 73 |     cstime = a.cstime -. b.cstime; iters = Int64.sub a.iters b.iters;
 74 |     minor_words = a.minor_words -. b.minor_words;
 75 |     major_words = a.major_words -. b.major_words;
 76 |     promoted_words = a.promoted_words -. b.promoted_words;
 77 |   }
 78 | 
 79 | (* It may happen that, because of slight variations, the running time
 80 |    of a fast running test is less than the running time of the null
 81 |    loop.  Returning a negative result is obviously ridiculous, thus
 82 |    one returns 0. *)
 83 | let ( -- ) a b = if (a:float) > b then a -. b else 0.
 84 | let pos_sub a b =
 85 |   { wall = a.wall -- b.wall;       utime = a.utime -- b.utime;
 86 |     stime = a.stime -- b.stime;    cutime = a.cutime -- b.cutime;
 87 |     cstime = a.cstime -- b.cstime; iters = Int64.sub a.iters b.iters;
 88 |     minor_words = a.minor_words -- b.minor_words;
 89 |     major_words = a.major_words -- b.major_words;
 90 |     promoted_words = a.promoted_words -- b.promoted_words;
 91 |   }
 92 | 
 93 | 
 94 | let cpu_process b = b.utime +. b.stime
 95 | let cpu_childs b = b.cutime +. b.cstime
 96 | let cpu_all b = b.utime +. b.stime +. b.cutime +. b.cstime
 97 | 
 98 | let string_of_bytes b =
 99 |   if b >= 1e6 then sprintf "%.2f MB" (b /. 1e6)
100 |   else if b >= 1e3 then sprintf "%.2f kB" (b /. 1e3)
101 |   else sprintf "%.0f B" b
102 | 
103 | (* Return a formatted representation of benchmark structure according
104 |    to [style].  Default values for presentation parameters are set
105 |    here. *)
106 | let to_string ?(style=Auto) ?(fwidth=5) ?(fdigits=2) (b:t) : string =
107 |   let pt = cpu_process b
108 |   and ct = cpu_childs b in
109 |   let style =
110 |     if style = Auto then if ct > 1e-10 then All else No_child else style in
111 |   let iter_info t =
112 |     if b.iters > 0L && t > 0.0 then
113 |       sprintf " @ %*.*f/s (n=%Ld)" fwidth fdigits
114 |         (Int64.to_float b.iters /. t) b.iters
115 |     else ""  in
116 |   let f x = sprintf "%*.*f" fwidth fdigits x in
117 |   match style with
118 |   | All ->
119 |       sprintf "%s WALL (%s usr %s sys + %s cusr %s csys = %s CPU, minor = %s, major = %s)%s"
120 |         (f b.wall) (f b.utime) (f b.stime) (f b.cutime) (f b.cstime)
121 |         (f(pt +. ct))
122 |         (string_of_bytes b.minor_words)
123 |         (string_of_bytes (b.major_words -. b.promoted_words))
124 |         (iter_info pt)
125 |   | No_child ->
126 |       sprintf "%s WALL (%s usr + %s sys = %s CPU, minor = %s, major = %s)%s"
127 |         (f b.wall) (f b.utime) (f b.stime) (f pt)
128 |         (string_of_bytes b.minor_words)
129 |         (string_of_bytes (b.major_words -. b.promoted_words)) (iter_info pt)
130 |   | No_parent ->
131 |       sprintf "%s WALL (%s cusr + %s csys = %s CPU, minor = %s, major = %s)%s"
132 |         (f b.wall) (f b.cutime) (f b.cstime) (f ct)
133 |         (string_of_bytes b.minor_words)
134 |         (string_of_bytes (b.major_words -. b.promoted_words))
135 |         (iter_info ct)
136 |   | Nil -> ""
137 |   | Auto -> assert false
138 | 
139 | 
140 | (* Returns a string in minutes-seconds of a time [t >= 0] given in
141 |    seconds. *)
142 | let rec string_of_time t =
143 |   if t = 0 || t = 1 then string_of_int t ^ "s"
144 |   else if t < 60 then string_of_int t ^ "s"
145 |   else if t < 120 then "1m " ^ string_of_time(t - 60)
146 |   else string_of_int(t / 60) ^ "m " ^ string_of_time(t mod 60)
147 | 
148 | (* The time [t >= 0] is rounded to the nearest integer: *)
149 | let string_of_time t = string_of_time(truncate(t +. 0.5))
150 | 
151 | type samples = (string * t list) list
152 | 
153 | let by_name (s1, _) (s2, _) = compare (s1:string) s2
154 | 
155 | let merge (l1:samples) (l2:samples) =
156 |   (* [do_merge] assumes [l1] and [l2] are sorted. *)
157 |   let rec do_merge acc l1 l2 =
158 |     match l1, l2 with
159 |     | _, [] -> acc @ l1
160 |     | [], _ -> acc @ l2
161 |     | ((n1, t1) as d1) :: tl1, ((n2, t2) as d2) :: tl2 ->
162 |         let sgn = compare n1 n2 in
163 |         if sgn = 0 then do_merge ((n1, t1 @ t2) :: acc) tl1 tl2
164 |         else if sgn < 0 then do_merge (d1 :: acc) tl1 l2
165 |         else do_merge (d2 :: acc) l1 tl2 in
166 |   do_merge [] (List.sort by_name l1) (List.sort by_name l2)
167 | 
168 | 
169 | let max_iter = Int64.add (Int64.of_int max_int) 1L
170 |   (* even if [int] is 63 bits, [(max_iter:Int64.t) > 0] *)
171 | 
172 | (* [runloop n_iters n f x] returns the elapsed time of running [n >=
173 |    0L] times [f] with the argument [x].  The structure returned
174 |    declare [n_iter] iterations. *)
175 | let runloop n_iters n f x =
176 |   let n' = Int64.div n max_iter in
177 |   if n' >= max_iter then
178 |     invalid_arg "Benchmark.runloop: number of iterations too large";
179 |   let n1 = Int64.to_int n'
180 |   and n0 = Int64.to_int(Int64.rem n max_iter) in
181 |   let t0 = ref (make 0L) in
182 |   let tbase = !t0.utime in
183 |   (* Wait for user timer to tick.  This makes the error range more
184 |      like -0.01, +0.  If we don't wait, then it's more like -0.01,
185 |      +0.01. *)
186 |   while tbase = (!t0).utime do t0 := make 0L done;
187 |   (* Loop over function we are timing [n] times (looping on int64
188 |      quantities takes too long, this is why we use composite loops). *)
189 |   for _ = 1 to n1 do
190 |     for _ = 0 to max_int do ignore(f x) done; (* [max_iter] runs *)
191 |   done;
192 |   for _ = 1 to n0 do ignore(f x) done;
193 |   let t1 = make n_iters in
194 |   pos_sub t1 !t0
195 | 
196 | (* time a null-loop; no iter count *)
197 | let null_loop n = runloop 0L n ignore ()
198 | 
199 | (* Run function [f] count times, return time taken (all times
200 |    garanteed to be [>= 0.]) *)
201 | let timeit n f x =
202 |   let bn = null_loop n in
203 |   let bm = runloop n n f x in
204 |   pos_sub bm bn (* time of function minus null-loop *),
205 |   bn.wall +. bm.wall (* how much the used had to wait *)
206 | 
207 | 
208 | type printer = {
209 |   print_indent : string -> unit; (* prefix, flushes *)
210 |   print : string -> unit; (* No prefix but flushes *)
211 | }
212 | 
213 | (* [print_run ff bm] prints the list of timings [bm] according to the
214 |    style defined by the optional parameters. *)
215 | let print_run out ?(min_count=4L) ?(min_cpu=0.4) ~style ?fwidth ?fdigits b =
216 |   out.print_indent(to_string ~style ?fwidth ?fdigits b ^ "\n");
217 |   if b.iters < min_count || cpu_all b < min_cpu
218 |      || (b.wall < 1. && b.iters < 1000L)
219 |   then out.print_indent "(warning: too few iterations for a reliable count)\n"
220 | 
221 | 
222 | let latency n out ?min_count ?min_cpu ~style ?fwidth ?fdigits
223 |     ~repeat _name f x =
224 |   let rec loop nrep acc =
225 |     if nrep < 1 then acc
226 |     else (
227 |       Gc.full_major();
228 |       Gc.compact(); (* Reclaim memory to avoid undue GC during the test. *)
229 |       let bm, _ = timeit n f x in
230 |       print_run out ?min_count ?min_cpu ~style ?fwidth ?fdigits bm;
231 |       loop (nrep - 1) (bm :: acc)
232 |     ) in
233 |   loop repeat []
234 | 
235 | 
236 | (* Read the code from bottom to top: [min_iter] determines the minimal
237 |    number of iterations to have a significant timing, then
238 |    [estimate_niter] estimate by linear interpolation the number of
239 |    iter to run [> tmin] and then the test is performed. *)
240 | let throughput tmin out ?min_count ?min_cpu ~style ?fwidth ?fdigits
241 |     ~repeat _name f x : t list =
242 |   (* Run [f] for [niter] times and complete with >= [nmin] iterations
243 |      (estimated by linear interpolation) to run >= [tmin]. *)
244 |   let rec run_test nmin niter bm_init total_wall =
245 |     let bm, wall = timeit niter f x in
246 |     let bm = add bm_init bm in
247 |     let tn = cpu_process bm in
248 |     let total_wall = total_wall +. wall in
249 |     if tn >= tmin then (
250 |       print_run out ?min_count ?min_cpu ~style ?fwidth ?fdigits bm;
251 |       bm, total_wall
252 |     )
253 |     else
254 |       (* FIXME *)
255 |       let n = Int64.of_float((tmin /. tn -. 1.) *. Int64.to_float bm.iters) in
256 |       run_test nmin (max nmin n) bm total_wall in
257 |   (* Repeat the test [nrep] times and return the list of results. *)
258 |   let rec repeat_test nrep acc nmin niter wall_estim =
259 |     if nrep < 1 then acc else (
260 |       Gc.full_major();
261 |       Gc.compact(); (* Reclaim memory to avoid undue GC during the test. *)
262 |       let bm, wall = run_test nmin niter null_t 0. in
263 |       let wall_estim =
264 |         if wall > wall_estim +. 60. then (
265 |           out.print_indent("(Estimated time for subsequent runs: "
266 |                             ^ (string_of_time wall) ^ ")\n");
267 |           wall
268 |         )
269 |         else wall_estim in
270 |       repeat_test (nrep - 1) (bm :: acc) nmin niter wall_estim
271 |     ) in
272 |   (* Estimate number of iter > [nmin] to have a running time >=
273 |      [tmin].  The initial estimate is [n] running [tn] secs.  Linear
274 |      estimates bear a 5% fudge to improve the overall responsiveness. *)
275 |   let tpra = 0.1 *. tmin (* Target/time practice *) in
276 |   let rec estimate_niter nmin n tn wall =
277 |     if tn >= tpra then
278 |       (* FIXME: *)
279 |       (* lin estim *)
280 |       let niter = Int64.of_float(Int64.to_float n *. (1.05 *. tmin /. tn)) in
281 |       let wall_estim = wall *. (1.05 *. tmin /. tn) in
282 |       if wall_estim >= 60. then
283 |         out.print_indent("(Estimated time for each run: "
284 |                          ^ (string_of_time wall_estim) ^ ")\n");
285 |       repeat_test repeat [] nmin (max nmin niter) wall_estim
286 |     else
287 |       (* lin estim *)
288 |       let new_n = Int64.of_float(Int64.to_float n *. 1.05 *. tpra /. tn) in
289 |       let new_bn, new_wall = timeit new_n f x in
290 |       let new_tn = cpu_process new_bn in
291 |       let n = (* make sure we make progress *)
292 |         if new_tn > 1.2 *. tn then new_n
293 |         else Int64.of_float(1.1 *. Int64.to_float n +. 1.) (* FIXME *) in
294 |       estimate_niter nmin n new_tn new_wall in
295 |   (* Determine the minimum number of iterations to run >= 0.1 sec
296 |      (whatever [tmin]).  Inform the user if it takes too long. *)
297 |   let rec min_iter n ~takes_long:previous_took_long total_wall =
298 |     if n <= 0L then
299 |       failwith "throughput: number of iterations too large for Int64.t storage";
300 |     let bm, wall = timeit n f x in
301 |     let tn = cpu_process bm in
302 |     let total_wall = total_wall +. wall in
303 |     if tn < 0.1 then (
304 |       let takes_long = total_wall >= 30. in
305 |       if takes_long then (
306 |         if total_wall >= 120. then (
307 |           out.print " canceled)\n";
308 |           failwith(sprintf "Benchmark.throughputN: wall time is %g while \
309 |             CPU time is %g.  Do you use \"sleep\"?" total_wall tn)
310 |         )
311 |         else if previous_took_long then out.print "." else
312 |           out.print_indent("(Determining how many runs to perform, \
313 |             please be patient...");
314 |         );
315 |       let twice_n = Int64.shift_left n 1 in
316 |       min_iter twice_n ~takes_long total_wall
317 |     )
318 |     else (
319 |       if previous_took_long then out.print ")\n";
320 |       if tn < tmin then estimate_niter n n tn wall (* tn > 0.1 *)
321 |       else ( (* minimal [n] good for [tmin], use the above measurement
322 |                 for the first run. *)
323 |         print_run out ?min_count ?min_cpu ~style ?fwidth ?fdigits bm;
324 |         repeat_test (repeat - 1) [bm] n n wall
325 |       )
326 |     ) in
327 |   min_iter 1L ~takes_long:false 0.
328 | 
329 | 
330 | (* Make a print function that prefixes each output except the first
331 |    one by [nspace] spaces. *)
332 | let make_printer nspace =
333 |   let first = ref true in
334 |   let prefix = String.make nspace ' ' in
335 |   let print s = print_string s; flush stdout in
336 |   let print_indent s =
337 |     if !first then first := false else print_string prefix;
338 |     print s in
339 |   { print_indent = print_indent; print = print; }
340 | 
341 | let null_printer = { print_indent = (fun _ -> ());  print = (fun _ -> ()) }
342 | 
343 | 
344 | (* Generic interface for performing measurements on a list of functions *)
345 | let testN ~test default_f_name  ?min_count ?min_cpu ~style
346 |     ?fwidth ?fdigits ~repeat funs =
347 |   let length_name =
348 |     List.fold_left (fun m (n,_,_) -> max m (String.length n)) 0 funs in
349 |   let result_of (name, f, x) =
350 |     if style <> Nil then
351 |       printf "%*s: %!" length_name (if name = "" then default_f_name else name);
352 |     let out = if style = Nil then null_printer
353 |               else make_printer (length_name + 2) in
354 |     let bm = test out ?min_count ?min_cpu ~style ?fwidth ?fdigits
355 |                   ~repeat name f x in
356 |     (name, bm) in
357 |   List.map result_of funs
358 | 
359 | let string_of_names funs =
360 |   String.concat ", " (List.map (fun (a,_,_) -> sprintf "%S" a) funs)
361 | 
362 | 
363 | let latencyN ?min_cpu ?(style=Auto) ?fwidth ?fdigits ?(repeat=1) n funs =
364 |   if n < 4L then invalid_arg "Benchmark.latencyN: n < 4";
365 |   if style <> Nil then (
366 |     printf "Latencies for %Ld iterations of %s%s:\n%!" n
367 |       (string_of_names funs)
368 |       (if repeat > 1 then sprintf " (%i runs)" repeat else "");
369 |   );
370 |   testN ~test:(latency n) (sprintf "[run %Ld times]" n)
371 |     ?min_cpu ~style ?fwidth ?fdigits ~repeat funs
372 | 
373 | let latency1 ?min_cpu ?style ?fwidth ?fdigits ?repeat  n ?(name="") f x =
374 |   if n < 4L then invalid_arg "Benchmark.latency1";
375 |   latencyN ?min_cpu ?style ?fwidth ?fdigits ?repeat n [(name, f, x)]
376 | 
377 | 
378 | let throughputN ?min_count ?(style=Auto) ?fwidth ?fdigits ?(repeat=1) n funs =
379 |   if n <= 0 then invalid_arg "Benchmark.throughputN: n <= 0";
380 |   let tmin = float n in
381 |   if style <> Nil then (
382 |     printf "Throughputs for %s%s running%s for at least %g CPU second%s:\n%!"
383 |       (string_of_names funs)
384 |       (if List.length funs > 1 then " each" else "")
385 |       (if repeat > 1 then sprintf " %i times" repeat else "")
386 |       tmin (if n > 1 then "s" else "");
387 |   );
388 |   testN ~test:(throughput tmin) (sprintf "[run > %3.1g secs]" tmin)
389 |     ?min_count ~style ?fwidth ?fdigits ~repeat funs
390 | 
391 | let throughput1 ?min_count ?style ?fwidth ?fdigits ?repeat n ?(name="") f x =
392 |   if n <= 0 then invalid_arg "Benchmark.throughput1: n <= 0";
393 |   throughputN ?min_count ?style ?fwidth ?fdigits ?repeat n [(name, f, x)]
394 | 
395 | 
396 | (* Statistical tests and comparison table
397 |  ***********************************************************************)
398 | 
399 | (* Utility functions *)
400 | let list_mapi f =
401 |   let rec loop i = function
402 |     | [] -> []
403 |     | a::l -> let r = f i a in r :: loop (i + 1) l in
404 |   loop 0
405 | 
406 | let list_iteri f =
407 |   let rec loop i = function
408 |     | [] -> ()
409 |     | a::l -> let () = f i a in loop (i + 1) l in
410 |   loop 0
411 | 
412 | let is_nan x = (classify_float x = FP_nan)
413 | 
414 | 
415 | (* [log_gamma x] computes the logarithm of the Gamma function at [x]
416 |    using Lanczos method.  It is assumed [x > 0.].
417 | 
418 |    See e.g. http://home.att.net/~numericana/answer/info/godfrey.htm *)
419 | let log_gamma =
420 |   let c = [|    1.000000000000000174663;
421 |              5716.400188274341379136;
422 |            -14815.30426768413909044;
423 |             14291.49277657478554025;
424 |             -6348.160217641458813289;
425 |              1301.608286058321874105;
426 |              -108.1767053514369634679;
427 |                 2.605696505611755827729;
428 |                -0.7423452510201416151527e-2;
429 |                 0.5384136432509564062961e-7;
430 |                -0.4023533141268236372067e-8 |] in
431 |   let c_last = Array.length c - 1 in
432 |   let g = float(c_last - 1) in
433 |   let sqrt2pi = sqrt(8. *. atan 1.) in
434 |   let rec sum i den s =
435 |     if i > 0 then sum (i - 1) (den -. 1.) (s +. c.(i) /. den)
436 |     else c.(0) +. s in
437 |   fun x ->
438 |     assert(x > 0.);
439 |     let xg = x +. g in
440 |     let xg_5 = xg -. 0.5 in
441 |     log(sqrt2pi *. sum c_last xg 0.) +. (x -. 0.5) *. log xg_5 -. xg_5
442 | 
443 | (* Beta function.  It is assumed [a > 0. && b > 0.]. *)
444 | let _beta a b =
445 |   assert(a > 0. && b > 0.);
446 |   exp(log_gamma a +. log_gamma b -. log_gamma(a +. b))
447 | 
448 | (* [betai x a b] returns the value of the incomplete Beta function
449 |    I_x(a,b).  It is evaluated through the continued fraction expansion
450 |    (see e.g. Numerical Recipies, 6.4):
451 | 
452 |               x^a (1-x)^b [  1  d1  d2     ]
453 |    I_x(a,b) = ----------- [ --  --  -- ... ]
454 |                a  B(a,b)  [ 1+  1+  1+     ]
455 | 
456 |    where B(a,b) is the beta function and
457 | 
458 |                m (b-m) x                       - (a + m)(a + b + m) x
459 |    d_2m = --------------------      d_(2m+1) = ----------------------
460 |           (a + 2m - 1)(a + 2m)                  (a + 2m)(a + 2m + 1)
461 | 
462 |    The modified Lentz's method is used for the continued fraction (see
463 |    NR, section 5.2) in routine [betai_cf].
464 | *)
465 | let max_tiny x = max 1e-30 x (* to avoid null divisors *)
466 | 
467 | let betai_cf_eps = epsilon_float
468 | 
469 | let betai_cf x a b =
470 |   let apb = a +. b
471 |   and ap1 = a +. 1.
472 |   and am1 = a -. 1. in
473 |   let rec lentz m c d f =
474 |     let m2 = 2. *. m in
475 |     (* Even rec step d_2m *)
476 |     let cf_d2m = m *. (b -. m) *. x /. ((am1 +. m2) *. (a +. m2)) in
477 |     let d = 1. /. max_tiny(1. +. cf_d2m *. d)
478 |     and c = max_tiny(1. +. cf_d2m /. c) in
479 |     let f = f *. d *. c in
480 |     (* Odd rec step d_2m+1 *)
481 |     let cf_d2m1 = -. (a +. m) *. (apb +. m) *. x
482 |                   /. ((a +. m2) *. (ap1 +. m2)) in
483 |     let d = 1. /. max_tiny(1. +. cf_d2m1 *. d)
484 |     and c = max_tiny(1. +. cf_d2m1 /. c) in
485 |     let delta = c *. d in
486 |     let f = f *. delta in
487 |     if abs_float(delta -. 1.) < betai_cf_eps then f
488 |     else lentz (m +. 1.) c d f in
489 |   (* Initialize Lentz's method with C2=1, D2 (step 2) *)
490 |   let d2 = 1. /. max_tiny(1. -. apb *. x /. ap1) in
491 |   lentz 1. 1. d2 d2
492 | 
493 | let betai x a b =
494 |   assert(a > 0. && b > 0.);
495 |   if x < 0. || x > 1. then invalid_arg "betai";
496 |   if x = 0. then 0.
497 |   else if x = 1. then 1.
498 |   else
499 |     let m = exp(log_gamma(a +. b) -. log_gamma a -. log_gamma b
500 |                 +. a *. log x +. b *. log(1. -. x)) in
501 |     if x < (a +. 1.) /. (a +. b +. 2.)
502 |     then m *. betai_cf x a b /. a
503 |     else 1. -. m *. betai_cf (1. -. x) b a /. b
504 | 
505 | (* [cpl_student_t t nu] compute the "complement" of the Student's
506 |    distribution: 1 - A(t|nu).  It is used to compute the significance
507 |    of probabilistic tests. *)
508 | let cpl_student_t t nu =
509 |   betai (nu /. (nu +. t *. t)) (0.5 *. nu) 0.5
510 | 
511 | 
512 | (* [comp_rates (name, bm)] computes the number, average and standard
513 |    deviation of rates from the list of timings [bm].  If bm = [x(1);
514 |    x(2);...; x(n)], the algorithm is
515 | 
516 |    m(1) = x(1)		m(k) = m(k-1) + (x(k) - m(k-1))/k
517 |    s(1) = 0		s(k) = s(k-1) + (x(k) - m(k-1))(x(k) - m(k))
518 | 
519 |    One proves by recurrence that
520 | 
521 |    m(k) = sum(x(i) : 1 <= i <= k) / k
522 |    s(k) = sum(x(i)**2 : 1 <= i <= k) - k m(k)**2
523 |         = sum( (x(i) - m(k))**2 : 1 <= i <= k)
524 | 
525 |    Cf. Knuth, Seminumerical algorithms. *)
526 | let comp_rates cpu (name, bm) =
527 |   let rec loop n m s = function
528 |     | [] -> (name, n, m, s)
529 |     | b :: tl ->
530 |         let rate = Int64.to_float b.iters /. cpu b in
531 |         let n' = n + 1 in
532 |         let m' = m +. (rate -. m) /. (float n') in
533 |         let s' = s +. (rate -. m) *. (rate -. m') in
534 |         loop n' m' s' tl in
535 |   match bm with
536 |   | [] -> (name, 0, nan, 0.) (* NaN used for no-data *)
537 |   | b :: tl -> loop 1 (Int64.to_float b.iters /. (cpu b +. 1e-15)) 0. tl
538 | 
539 | (* Compare rates *)
540 | let by_rates (_,_,r1,_) (_,_,r2,_) = compare (r1:float) r2
541 | 
542 | (* Check whether two rates are significantly different.  With a small
543 |    [significance], a [true] returned value means that the rates are
544 |    significantly different.  [n1] is the number of repetitions of the
545 |    test1, [r1] is its mean rate and [s1] its standard deviation.
546 |    [n2], [r2] and [s2] are similar for the test2. *)
547 | let different_rates significance  n1 r1 s1  n2 r2 s2 =
548 |   assert(n1 > 0 && n2 > 0);
549 |   if n1 = 1 && n2 = 1 then true (* no info about distribution, assume
550 |                                    they really are. *)
551 |   else
552 |     let df = float(n1 + n2 - 2) (* >= 1. *)
553 |     and n1 = float n1
554 |     and n2 = float n2 in
555 |     let sD = sqrt((s1 +. s2) /. df *. (1. /. n1 +. 1. /. n2)) in
556 |     let t = (r1 -. r2) /. sD in
557 |     cpl_student_t t df <= significance
558 | 
559 | 
560 | (* [string_of_rate display_as_rate confidence n r s] *)
561 | let string_of_rate display_as_rate =
562 |   let per_sec = if display_as_rate then "/s" else "" in
563 |   fun confidence n r s : (string * string) ->
564 |     (* Assume Gaussian distribution *)
565 |     let sigma = sqrt(s/. float n) in
566 |     let err = confidence *. sigma (* FIXME *) in
567 |     let a, err =
568 |       if display_as_rate then r, err else
569 |         let n = 1. /. r in (n, n *. n *. err) (* Taylor of order 1 of 1/r *) in
570 |     let p prec =
571 |       if sigma < 1e-15 then (sprintf " %.*f%s" prec a per_sec, "")
572 |       else (sprintf " %.*f+-" prec a, sprintf "%.*f%s" prec err per_sec) in
573 |     if a >= 100. then p 0
574 |     else if a >= 10. then p 1
575 |     else if a >= 1.  then p 2
576 |     else if a >= 0.1 then p 3
577 |     else if sigma < 1e-15 then (sprintf " %g%s" a per_sec, "")
578 |     else (sprintf " %g+-" a, sprintf "%g%s" err per_sec)
579 | 
580 | (* print results of a bench_many run *)
581 | (* results = [(name, bm); (name, bm); (name, bm); ...] *)
582 | let tabulate ?(no_parent=false) ?(confidence=0.95) 
583 |     (results:( _ * t list) list) : unit =
584 |   if confidence < 0. || confidence > 1. then
585 |     invalid_arg "Benchmark.tabulate: confidence < 0. or > 1.";
586 |   let len = List.length results in
587 |   if len = 0 then invalid_arg "Benchmark.tabulate: empty list of results";
588 |   (* Compute (name, rate, sigma) for all results and sort them by rates *)
589 |   let cpu = if no_parent then cpu_childs else cpu_process in
590 |   let rates = List.sort by_rates (List.map (comp_rates cpu) results) in
591 |   (* Decide whether to display by rates or seconds *)
592 |   let display_as_rate =
593 |     let (_,_,r,_) = List.nth rates (len / 2) in r > 1. in
594 |   (*
595 |    * Compute rows
596 |    *)
597 |   let top_row = "" :: (if display_as_rate then " Rate" else " s/iter")
598 |                 :: "" :: (List.map (fun (s,_,_,_) -> " " ^ s) rates) in
599 |   (* Initialize the widths of the columns from the top row *)
600 |   let col_width = Array.of_list (List.map String.length top_row) in
601 |   (* Build all the data [rows], each starting with separation space *)
602 |   let string_of_rate = string_of_rate display_as_rate in
603 |   let make_row i (row_name, row_n, row_rate, row_s) =
604 |     (* Column 0: test name *)
605 |     col_width.(0) <- max (String.length row_name) col_width.(0);
606 |     (* Column 1 & 2: performance *)
607 |     let ra, ra_err = string_of_rate confidence row_n row_rate row_s in
608 |     col_width.(1) <- max (String.length ra) col_width.(1);
609 |     col_width.(2) <- max (String.length ra_err) col_width.(2);
610 |     (* Columns 3..(len + 2): performance ratios *)
611 |     let make_col j (_col_name, col_n, col_rate, col_s) =
612 |       let ratio =
613 |         if i = j || is_nan row_rate || is_nan col_rate then "--" else
614 |           let p = 100. *. row_rate /. col_rate -. 100. in
615 |           if p = 0. || different_rates (1. -. confidence)
616 |             row_n row_rate row_s  col_n col_rate col_s
617 |           then sprintf " %.0f%%" p
618 |           else sprintf " [%.0f%%]" p in
619 |       col_width.(j + 3) <- max (String.length ratio) col_width.(j + 3);
620 |       ratio in
621 |     row_name :: ra :: ra_err :: (list_mapi make_col rates) in
622 |   let rows = list_mapi make_row rates in
623 |   (*
624 |    * Equalize column widths in the chart as much as possible without
625 |    * exceeding 80 characters.  This does not use or affect cols 0, 1 and 2.
626 |    *)
627 |   (* Build an array of indexes [nth.(0..(len-1))] to access
628 |      [col_width.(3..(len+2))] in nondecreasing order. *)
629 |   let nth = Array.init len (fun i -> i + 3) in
630 |   let by_width i1 i2 = compare col_width.(i1) col_width.(i2) in
631 |   Array.sort by_width nth;
632 |   let max_width = col_width.(nth.(len - 1)) in
633 |   let rec stretcher min_width total =
634 |     if min_width < max_width then stretch_min 0 min_width total
635 |   and stretch_min i min_width total = (* try to stretch col [i] *)
636 |     if total < 80 then begin
637 |       if i < len && col_width.(nth.(i)) = min_width then begin
638 |         col_width.(nth.(i)) <- col_width.(nth.(i)) + 1;
639 |         stretch_min (i + 1) min_width (total + 1) (* stretch next col? *)
640 |       end
641 |       else stretcher (min_width + 1) total (* try again to stretch *)
642 |     end in
643 |   stretcher col_width.(nth.(0)) (Array.fold_left ( + ) 0 col_width);
644 |   (*
645 |    * Display the table
646 |    *)
647 |   let row_formatter row =
648 |     list_iteri (fun i d -> printf "%*s" col_width.(i) d) row;
649 |     print_string "\n" in
650 |   row_formatter top_row;
651 |   List.iter row_formatter rows;
652 |   flush stdout
653 | 
654 | let get_minor b = b.minor_words
655 | let get_major b = b.major_words -. b.promoted_words
656 | let get_promoted b = b.promoted_words
657 | 
658 | let print_gc  (results:( _ * t list) list) : unit =
659 |   let len = List.length results in
660 |   if len = 0 then invalid_arg "Benchmark.print_gc: empty list of results";
661 | 
662 |   let compute_per_iter get (name, samples) : string * float =
663 |     let rec loop n sum = function
664 |       | [] -> n, sum
665 |       | b :: tl ->
666 |         let n = Int64.(add n b.iters) in
667 |         let sum = sum +. get b in
668 |         loop n sum tl in
669 |     let n, sum = loop 0L 0. samples in
670 |     if n=0L then name, 0.
671 |     else name, sum /. (Int64.to_float n)
672 |   in
673 | 
674 |   (* Compute (name, rate, sigma) for all results *)
675 |   let minor_rates = List.map (compute_per_iter get_minor) results in
676 |   let major_rates = List.map (compute_per_iter get_major) results in
677 |   let promoted_rates = List.map (compute_per_iter get_promoted) results in
678 | 
679 |   (* Compute rows *)
680 |   let top_row = ["" ; " minor_allocs/iter" ; " major_allocs/iter"; " promoted/iter" ] in
681 |   let rows =
682 |     List.map2
683 |       (fun (name, minor) ((_, major), (_, promoted)) ->
684 |         [name; string_of_bytes minor; string_of_bytes major; string_of_bytes promoted])
685 |       minor_rates (List.combine major_rates promoted_rates)
686 |   in
687 | 
688 |   (* Initialize the widths of the columns from the top row *)
689 |   let col_width = Array.of_list (List.map String.length top_row) in
690 |   List.iter (List.iteri (fun i col ->
691 |     col_width.(i) <- max col_width.(i) (String.length col))) rows;
692 | 
693 |   let row_formatter row =
694 |     list_iteri (fun i d -> printf "%*s" col_width.(i) d) row;
695 |     print_string "\n" in
696 |   row_formatter top_row;
697 |   List.iter row_formatter rows;
698 |   flush stdout
699 | 
700 | (** {2 Bench Tree} *)
701 | 
702 | module Tree = struct
703 |   (** {2 Path} *)
704 | 
705 |   type path = string list
706 | 
707 |   let print_path_element fmt p =
708 |     Format.pp_print_char fmt '.';
709 |     Format.pp_print_cut fmt ();
710 |     Format.pp_print_string fmt p
711 | 
712 |   let print_path fmt path =
713 |     Format.fprintf fmt "@[<2>";
714 |     (match path with
715 |      | [] -> ()
716 |      | [p] -> Format.pp_print_string fmt p
717 |      | p :: tl -> Format.pp_print_string fmt p;
718 |                  List.iter (print_path_element fmt) tl);
719 |     Format.fprintf fmt "@]"
720 | 
721 |   (* Split the string along "." characters. Specification:
722 |      assert (parse_path "foo.bar" = ["foo";"bar"]);
723 |      assert (parse_path "foo" = ["foo"]);
724 |      assert (parse_path "" = [""])
725 |   *)
726 |   let rev_parse_path s =
727 |     let l = ref [] in
728 |     let i0 = ref 0 in
729 |     for i = 0 to String.length s - 1 do
730 |       if String.unsafe_get s i = '.' then (
731 |         let name = String.sub s !i0 (i - !i0) in
732 |         l := name :: !l;
733 |         i0 := i + 1;
734 |       )
735 |     done;
736 |     let name = if !i0 = 0 then s
737 |                else String.sub s !i0 (String.length s - !i0) in
738 |     name :: !l
739 | 
740 |   let check_reserved name =
741 |     if name = "*" then invalid_arg "Name \"*\" is reserved for wildcard"
742 | 
743 |   let parse_path s = List.rev(rev_parse_path s)
744 | 
745 | 
746 |   (** {2 Bench Tree} *)
747 | 
748 |   module SMap = Map.Make(String)
749 | 
750 |   (* A collection of benchmarks with fast concatenation. *)
751 |   type benches = Single of samples Lazy.t
752 |                | Pair of benches * benches
753 | 
754 |   let merge_benches_opt b1 b2 = match b1, b2 with
755 |     | None, b | b, None -> b
756 |     | Some b1, Some b2 -> Some(Pair(b1, b2))
757 | 
758 |   let rec number_of_benches = function
759 |     | Single _ -> 1
760 |     | Pair(b1, b2) -> number_of_benches b1 + number_of_benches b2
761 | 
762 |   let rec benches_iter benches ~f = match benches with
763 |     | Single b -> f b
764 |     | Pair(b1, b2) -> benches_iter b1 ~f;  benches_iter b2 ~f
765 | 
766 |   type t = Tree of benches option * t SMap.t
767 |   (* benches at that level + named sublevels.  The name "" is
768 |      understood as "at this level" and so will not be present in the
769 |      map. *)
770 | 
771 |   let empty = Tree(None, SMap.empty)
772 | 
773 |   let is_empty (Tree(b, m)) =
774 |     b = None && SMap.is_empty m
775 | 
776 |   let rec merge (Tree(b1, m1)) (Tree(b2, m2)) : t =
777 |     let b = merge_benches_opt b1 b2 in
778 |     let m = SMap.merge merge_opt m1 m2 in
779 |     Tree(b, m)
780 |   and merge_opt _ o1 o2 = match o1, o2 with
781 |     | None, None -> None
782 |     | Some o, None
783 |     | None, Some o -> Some o
784 |     | Some o1, Some o2 -> Some (merge o1 o2)
785 | 
786 |   let concat l = List.fold_left merge empty l
787 | 
788 |   let check_allowed_name n =
789 |     check_reserved n;
790 |     for i = 0 to String.length n - 1 do
791 |       if String.unsafe_get n i = '.' then
792 |         invalid_arg "Names cannot contain dots"
793 |     done
794 | 
795 |   let of_bench bench = Tree(Some(Single bench), SMap.empty)
796 | 
797 |   let name_nonempty t n = Tree(None, SMap.singleton n t)
798 | 
799 |   let name t n =
800 |     check_allowed_name n;
801 |     if n = "" then t else name_nonempty t n
802 | 
803 |   (* prefix a tree with a path. Now the whole tree is only reachable
804 |      from this given path *)
805 |   let prefix path t =
806 |     List.fold_right (fun n t -> name t n) path t
807 | 
808 |   let ( @>> ) n t =
809 |     List.fold_left name t (rev_parse_path n)
810 | 
811 |   let ( @> ) name bench = name @>> (of_bench bench)
812 | 
813 |   let (@>>>) n l = n @>> (concat l)
814 | 
815 |   let with_int f = function
816 |     | [] -> empty
817 |     | l ->
818 |        let g i = Tree(None, SMap.singleton (string_of_int i) (f i)) in
819 |        concat (List.map g l)
820 | 
821 |   (* print the structure of the tree, to show the user possible paths *)
822 |   let rec print_tree_map fmt m =
823 |     SMap.iter (print_tree_path fmt) m
824 |   and print_tree_path fmt name (Tree(b, m)) =
825 |     (match b with
826 |      | None -> Format.fprintf fmt "@\n@[<2>- %s" name
827 |      | Some b ->
828 |         let n = number_of_benches b in
829 |         Format.fprintf fmt "@\n@[<2>- %s: %i benchmark%s"
830 |                        name n (if n > 1 then "s" else ""));
831 |     print_tree_map fmt m;
832 |     Format.fprintf fmt "@]"
833 | 
834 |   let print fmt (Tree(b, m)) =
835 |     (match b with
836 |      | None -> Format.fprintf fmt "No benchmark at root"
837 |      | Some b ->
838 |         let n = number_of_benches b in
839 |         Format.fprintf fmt "%i benchmark%s at root"
840 |                        n (if n > 1 then "s" else ""));
841 |     print_tree_map fmt m
842 | 
843 |   (** {2 Selecting a subtree} *)
844 | 
845 |   let rec filter path (Tree(b,m) as t) = match path with
846 |     | [] -> t
847 |     | [""] -> (* Only return the benches at this level (no sub-levels) *)
848 |        Tree(b, SMap.empty)
849 |     | "" :: tl -> (* skip empty component NOT at the end, skip *)
850 |        filter tl t
851 |     | "*" :: tl ->
852 |        (* wildcard pattern, select all subtrees *)
853 |        let map_filter name t m =
854 |          let t = filter tl t in
855 |          (* Keep it only if not empty. *)
856 |          if is_empty t then m else SMap.add name t m in
857 |        Tree(b, SMap.fold map_filter m SMap.empty)
858 |     | p0 :: tl ->
859 |        match (try Some(SMap.find p0 m) with Not_found -> None) with
860 |        | None -> empty
861 |        | Some t -> let t = filter tl t in
862 |                   (* propagate up the emptiness *)
863 |                   if is_empty t then empty else name_nonempty t p0
864 | 
865 |   (** {2 Run} *)
866 | 
867 |   let print_sep fmt =
868 |     Format.pp_print_string fmt "***********************************\
869 |                                 ***********************************";
870 |     Format.pp_print_newline fmt ()
871 | 
872 |   let run_bench_path ~with_gc fmt is_previous_output rev_path = function
873 |     | None -> is_previous_output
874 |     | Some b ->
875 |        if is_previous_output then print_sep fmt;
876 |        Format.fprintf fmt "*** Run benchmarks for path \"%a\"@\n@."
877 |                       print_path (List.rev rev_path);
878 |        benches_iter b ~f:(fun b ->
879 |         let b = Lazy.force b in
880 |         tabulate b;
881 |         if with_gc then print_gc b);
882 |        true
883 | 
884 |   let rec run_all ~with_gc fmt is_previous_output rev_path (Tree(b, m)) =
885 |     let is_previous_output =
886 |       run_bench_path ~with_gc fmt is_previous_output rev_path b in
887 |     SMap.fold
888 |       (fun name t is_out ->
889 |         run_all ~with_gc fmt is_out (name :: rev_path) t)
890 |       m is_previous_output
891 | 
892 |   let run_1path fmt t is_previous_output path =
893 |     (* Filtering the tree keep its full paths so we initialize
894 |        [rev_path] to [[]]. *)
895 |     run_all fmt is_previous_output [] (filter path t)
896 | 
897 |   let run_paths ~with_gc fmt ~paths t =
898 |     let is_out = List.fold_left (run_1path ~with_gc fmt t) false paths in
899 |     if not is_out then
900 |       match paths with
901 |       | [] -> Format.fprintf fmt "No benchmark to run.@\n@."
902 |       | p0 :: tl ->
903 |          Format.fprintf fmt "No benchmark to run for paths ";
904 |          print_path fmt p0;
905 |          List.iter (fun p -> print_path fmt p;
906 |                           Format.pp_print_string fmt ", ") tl;
907 |          Format.fprintf fmt ".@\n@."
908 | 
909 |   type arg_state = { mutable paths : path list;
910 |                      mutable print_tree : bool;
911 |                      mutable with_gc: bool;
912 |                    }
913 |   let arg () =
914 |     let st = { paths = [];  print_tree = false; with_gc = true } in
915 |     let add_path s = st.paths <- parse_path s :: st.paths in
916 |     let options = Arg.align
917 |       [ "--path", Arg.String add_path, " only apply to subpath"
918 |       ; "-p", Arg.String add_path, " short option for --path"
919 |       ; "--all", Arg.Unit (fun () -> add_path "*"), " run all paths"
920 |       ; "-a", Arg.Unit (fun () -> add_path "*"), " short option for --all"
921 |       ; "--tree", Arg.Unit (fun () -> st.print_tree <- true), " print the tree"
922 |       ; "--gc", Arg.Unit (fun () -> st.with_gc <- true), " print GC stats"
923 |       ; "--no-gc", Arg.Unit (fun () -> st.with_gc <- false), " do not print GC stats"
924 |       ] in
925 |     st, options
926 | 
927 |   let run ?(with_gc=true) ?arg ?(paths=[]) ?(out=Format.std_formatter) t =
928 |     match arg with
929 |     | None -> run_paths ~with_gc out ~paths t
930 |     | Some st ->
931 |       if st.print_tree then
932 |         Format.fprintf out "@[%a@]@." print t
933 |       else
934 |         run_paths ~with_gc:st.with_gc out ~paths:(paths @ List.rev st.paths) t
935 | 
936 |   (** {2 Global Registration} *)
937 | 
938 |   (* the global tree of benchmarks *)
939 |   let tree = ref empty
940 | 
941 |   let global () = !tree
942 | 
943 |   let register new_t =
944 |     tree := merge !tree new_t
945 | 
946 |   let run_global ?(argv=Sys.argv) ?(out=Format.std_formatter) () =
947 |     let st, specs = arg () in
948 |     let no_anon _ = raise(Arg.Bad "No anonymous arguments allowed") in
949 |     let pgm = try Filename.basename Sys.argv.(0)
950 |               with _ -> "run benchmark" in
951 |     let usage = pgm ^ " [options]" in
952 |     try
953 |       Arg.parse_argv argv specs no_anon usage;
954 |       run ~arg:st ~out !tree
955 |     with Arg.Bad msg | Arg.Help msg ->
956 |       Format.fprintf out "%s@." msg
957 | end
958 | 
959 | (* Local Variables: *)
960 | (* compile-command: "make -k" *)
961 | (* End: *)
962 | 


--------------------------------------------------------------------------------