├── .gitignore ├── web ├── Home.jpg ├── Download.jpg ├── License.jpg ├── Summary.jpg ├── Documentation.jpg ├── OCamlBenchmark.jpg └── index.html ├── src ├── dune ├── benchmark.mli └── benchmark.ml ├── tests ├── dune ├── long_run.ml └── tree.ml ├── examples ├── dune ├── test.pl ├── regexps.out ├── let_try.ml ├── try_if.ml ├── loops.ml ├── func_record.ml ├── match_array.ml ├── numbers.out ├── Makefile ├── iter.ml ├── numbers.ml ├── regexps.ml ├── composition.ml └── ar_ba.ml ├── Makefile ├── dune-project ├── .github └── workflows │ ├── gh-pages.yml │ └── main.yml ├── README.md ├── benchmark.opam ├── CHANGES.md └── LICENSE.md /.gitignore: -------------------------------------------------------------------------------- 1 | # -*-conf-unix-*- 2 | _build 3 | .merlin 4 | *.install 5 | IDEAS* -------------------------------------------------------------------------------- /web/Home.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chris00/ocaml-benchmark/HEAD/web/Home.jpg -------------------------------------------------------------------------------- /web/Download.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chris00/ocaml-benchmark/HEAD/web/Download.jpg -------------------------------------------------------------------------------- /web/License.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chris00/ocaml-benchmark/HEAD/web/License.jpg -------------------------------------------------------------------------------- /web/Summary.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chris00/ocaml-benchmark/HEAD/web/Summary.jpg -------------------------------------------------------------------------------- /web/Documentation.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chris00/ocaml-benchmark/HEAD/web/Documentation.jpg -------------------------------------------------------------------------------- /web/OCamlBenchmark.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Chris00/ocaml-benchmark/HEAD/web/OCamlBenchmark.jpg -------------------------------------------------------------------------------- /src/dune: -------------------------------------------------------------------------------- 1 | 2 | (library 3 | (name benchmark) 4 | (public_name benchmark) 5 | (libraries unix) 6 | (synopsis "Benchmark running times of code")) 7 | -------------------------------------------------------------------------------- /tests/dune: -------------------------------------------------------------------------------- 1 | (executables 2 | (names long_run tree) 3 | (libraries benchmark unix)) 4 | 5 | (alias 6 | (name tests) 7 | (deps long_run.exe tree.exe)) 8 | 9 | (rule 10 | (alias runtest) 11 | (deps long_run.exe tree.exe) 12 | (action (progn 13 | (run %{dep:long_run.exe}) 14 | (run %{dep:tree.exe})))) 15 | -------------------------------------------------------------------------------- /examples/dune: -------------------------------------------------------------------------------- 1 | 2 | (executables 3 | (names ar_ba composition iter let_try loops 4 | match_array numbers regexps try_if func_record) 5 | (libraries benchmark bigarray str re)) 6 | 7 | (alias 8 | (name examples) 9 | (deps ar_ba.exe composition.exe iter.exe let_try.exe loops.exe 10 | match_array.exe numbers.exe regexps.exe try_if.exe func_record.exe)) 11 | -------------------------------------------------------------------------------- /tests/long_run.ml: -------------------------------------------------------------------------------- 1 | (* Test that the test is performed correctly even when the test 2 | execution time exceeds the max throughput time. *) 3 | 4 | open Benchmark 5 | 6 | let long () = 7 | let s = ref 0. in 8 | for _ = 1 to 100_000 do 9 | for _ = 1 to 2_000 do 10 | s := !s +. 1. 11 | done 12 | done 13 | 14 | let () = 15 | let t = throughputN 1 ["long", long, ()] in 16 | tabulate t 17 | -------------------------------------------------------------------------------- /examples/test.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use lib qw(.); 4 | use Benchmark qw(timethese cmpthese); 5 | 6 | sub f { 7 | my $g=0; 8 | foreach (0..$_[0]) { $g = 2.0 * 2.0 * 2.0 * 2.0 }; 9 | $g; 10 | }; 11 | 12 | $res = timethese(10, { foo => sub {f(1000000)}, 13 | bar => sub {f(2000000)}, 14 | baz => sub {f(3000000)}, }); 15 | print "\n"; 16 | cmpthese $res; 17 | 18 | -------------------------------------------------------------------------------- /tests/tree.ml: -------------------------------------------------------------------------------- 1 | let () = 2 | let open Benchmark.Tree in 3 | "" @> lazy (let create() = Array.init 1_000_000 (fun i -> i) in 4 | Benchmark.latency1 18L create () ) 5 | |> register; 6 | 7 | "map" @> lazy (let a = Array.init 1_000_000 (fun i -> i) in 8 | let f x = x + 1 in 9 | Benchmark.latency1 18L (Array.map f) a ) 10 | |> register; 11 | 12 | "sort" 13 | @> lazy (let a = Array.init 1_000_000 (fun i -> -i) in 14 | Benchmark.latency1 18L (Array.sort compare) a ) 15 | |> register; 16 | 17 | "sort.add" 18 | @> lazy (Benchmark.latency1 18L (fun x -> x + 1) 1) 19 | |> register 20 | 21 | let () = 22 | Benchmark.Tree.run_global () 23 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PKGVERSION = $(shell git describe --always) 2 | PKGTARBALL = benchmark-$(PKGVERSION).tbz 3 | 4 | DUNE_OPTS?= 5 | 6 | all build byte native: 7 | dune build $(DUNE_OPTS) @install @tests @examples 8 | 9 | install uninstall: 10 | dune $@ 11 | 12 | doc: 13 | sed -e 's/%%VERSION%%/$(PKGVERSION)/' benchmark.mli \ 14 | > _build/default/benchmark.mli 15 | dune build $(DUNE_OPTS) @doc 16 | @echo '.def { background: #f0f0f0; }' \ 17 | >> _build/default/_doc/_html/odoc.css 18 | 19 | lint: 20 | opam lint benchmark.opam 21 | 22 | clean: 23 | dune clean 24 | 25 | WATCH?=@install 26 | watch: 27 | dune build $(DUNE_OPTS) $(WATCH) -w 28 | 29 | 30 | .PHONY: all build byte native install uninstall doc lint clean watch 31 | -------------------------------------------------------------------------------- /examples/regexps.out: -------------------------------------------------------------------------------- 1 | Pcre matches: 012345678 0123456789 2 | Str matches: 012345678 0123456789 3 | 4 | Throughputs for pcre match, str match ... 5 | pcre match: 6 WALL ( 5.40 usr + 0.06 sys = 5.46 CPU) @ 11.54/s (n=63) 6 | str match: 5 WALL ( 4.99 usr + 0.06 sys = 5.05 CPU) @ 8.71/s (n=44) 7 | 8 | Rate str match pcre match 9 | str match 8.71/s -- -24% 10 | pcre match 11.5/s 32% -- 11 | 12 | Latencies for 100 iterations of pcre match, str match ... 13 | pcre match: 9 WALL ( 8.47 usr + 0.17 sys = 8.64 CPU) @ 11.57/s (n=100) 14 | str match: 12 WALL (11.30 usr + 0.17 sys = 11.47 CPU) @ 8.72/s (n=100) 15 | 16 | Rate str match pcre match 17 | str match 8.72/s -- -25% 18 | pcre match 11.6/s 33% -- 19 | -------------------------------------------------------------------------------- /examples/let_try.ml: -------------------------------------------------------------------------------- 1 | (* Compare two possible implementations of let try x = ... with ... *) 2 | 3 | let k x = if x >= 0 then x else failwith "x < 0" 4 | 5 | let f a = 6 | let sgn s x = 7 | let y = (try Some(k x) with _ -> None) in 8 | match y with 9 | | None -> s 10 | | Some y -> s + y in 11 | ignore(Array.fold_left sgn 0 a) 12 | 13 | let g a = 14 | let sgn s x = 15 | (try 16 | let y = k x in 17 | (fun () -> s + y) 18 | with _ -> 19 | (fun () -> s) 20 | )() in 21 | ignore(Array.fold_left sgn 0 a) 22 | 23 | 24 | open Benchmark 25 | 26 | let () = 27 | let a = Array.init 1000 (fun _ -> Random.int 2 - 1) in 28 | let res = throughputN ~repeat:5 1 [("Some", f, a); 29 | ("()->", g, a); ] in 30 | tabulate res 31 | -------------------------------------------------------------------------------- /examples/try_if.ml: -------------------------------------------------------------------------------- 1 | open Bigarray 2 | 3 | let n = 100 4 | 5 | let a = Array1.create float64 fortran_layout n 6 | 7 | (* Base case: no test *) 8 | let f0 () = 9 | for i = 1 to n do 10 | let x = a.{i} in 11 | ignore(x) 12 | done; 13 | ignore(0.) 14 | 15 | let f1 () = 16 | for i = 1 to n+1 do 17 | let x = try a.{i} with _ -> 0. in 18 | ignore(x) 19 | done 20 | 21 | let f2 () = 22 | for i = 1 to n+1 do 23 | let x = if i <= n then a.{i} else 0. in 24 | ignore(x) 25 | done 26 | 27 | open Benchmark 28 | 29 | let () = 30 | let res = throughputN ~repeat:5 3 [("no test", f0, ()); 31 | ("try", f1, ()); 32 | ("if", f2, ()) ] in 33 | print_endline "Bigarray bound checking:"; 34 | tabulate res; 35 | print_gc res 36 | -------------------------------------------------------------------------------- /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 2.0) 2 | (generate_opam_files true) 3 | 4 | (name benchmark) 5 | (version 1.7) 6 | (homepage "https://github.com/Chris00/ocaml-benchmark") 7 | (source (github Chris00/ocaml-benchmark)) 8 | (license "LGPL-3.0 with OCaml linking exception") 9 | (maintainers "Christophe Troestler ") 10 | (authors "Christophe Troestler " 11 | "Doug Bagley" "c-cube") 12 | 13 | (package 14 | (name benchmark) 15 | (synopsis "Benchmark running times of code") 16 | (description "This module provides a set of tools to measure the running times of 17 | your functions and to easily compare the results. A statistical test 18 | is used to determine whether the results truly differ.") 19 | (depends 20 | (ocaml (>= 4.03)) 21 | base-unix 22 | (dune (>= 2.0)))) 23 | -------------------------------------------------------------------------------- /.github/workflows/gh-pages.yml: -------------------------------------------------------------------------------- 1 | name: github pages 2 | 3 | on: 4 | push: 5 | branches: 6 | - master # Set a branch name to trigger deployment 7 | 8 | jobs: 9 | deploy: 10 | name: Deploy doc 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@main 14 | 15 | - name: Use OCaml 16 | uses: ocaml/setup-ocaml@v3 17 | with: 18 | ocaml-compiler: '5.2' 19 | dune-cache: true 20 | allow-prerelease-opam: true 21 | 22 | - run: opam install odig benchmark 23 | 24 | - run: opam exec -- odig odoc --cache-dir=_doc/ benchmark 25 | 26 | - name: Deploy 27 | uses: peaceiris/actions-gh-pages@v3 28 | with: 29 | github_token: ${{ secrets.GITHUB_TOKEN }} 30 | publish_dir: ./_doc/html 31 | destination_dir: . 32 | enable_jekyll: false 33 | -------------------------------------------------------------------------------- /examples/loops.ml: -------------------------------------------------------------------------------- 1 | open Benchmark 2 | 3 | (* Test for the speed of recusion w.r.t. imperative loops to access 4 | arrays fo floats. *) 5 | 6 | let rec_loop (a : float array) = 7 | let rec loop i = 8 | if i < Array.length a then begin 9 | a.(i) <- a.(i) +. 1.; 10 | loop (i + 1) 11 | end in 12 | loop 0 13 | 14 | let rec_loop2 (a : float array) = 15 | let len = Array.length a in 16 | let rec loop i = 17 | if i < len then begin 18 | a.(i) <- a.(i) +. 1.; 19 | loop (i + 1) 20 | end in 21 | loop 0 22 | 23 | let for_loop (a : float array) = 24 | for i = 0 to Array.length a - 1 do 25 | a.(i) <- a.(i) +. 1. 26 | done 27 | 28 | let () = 29 | let a = Array.make 100 1. in 30 | let res = throughputN ~repeat:5 1 31 | [("rec", rec_loop, a); 32 | ("rec2", rec_loop2, a); 33 | ("for", for_loop, a); ] in 34 | tabulate res; 35 | print_gc res 36 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Build and Test 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | 9 | jobs: 10 | run: 11 | name: build # build+test on various versions of OCaml, on linux 12 | timeout-minutes: 15 13 | strategy: 14 | fail-fast: true 15 | matrix: 16 | os: 17 | - ubuntu-latest 18 | #- windows-latest 19 | ocaml-compiler: 20 | - '4.03' 21 | - '4.14' 22 | - '5.2' 23 | 24 | runs-on: ${{ matrix.os }} 25 | steps: 26 | - uses: actions/checkout@main 27 | - name: Use OCaml ${{ matrix.ocaml-compiler }} 28 | uses: ocaml/setup-ocaml@v3 29 | with: 30 | ocaml-compiler: ${{ matrix.ocaml-compiler }} 31 | dune-cache: true 32 | allow-prerelease-opam: true 33 | 34 | - run: opam install -t --deps-only benchmark 35 | - run: opam exec -- dune build --profile=release --force @install @runtest 36 | 37 | -------------------------------------------------------------------------------- /examples/func_record.ml: -------------------------------------------------------------------------------- 1 | module F(E : sig 2 | val f : float -> float 3 | val g : float -> float 4 | end) = 5 | struct 6 | let h x = 1. +. E.f x +. E.g x 7 | end 8 | 9 | module A = F(struct let f x = x +. 1. let g x = 2. *. x end) 10 | 11 | 12 | type env = { f : float -> float; g : float -> float } 13 | 14 | let h_rec e x = 1. +. e.f x +. e.g x 15 | 16 | 17 | let h_fun f g x = 1. +. f x +. g x 18 | 19 | let f x = x +. 1. 20 | let g x = 2. *. x 21 | 22 | let h x = 1. +. f x +. g x 23 | 24 | 25 | open Benchmark 26 | 27 | let () = 28 | let res = throughputN ~repeat:3 3 29 | [("functor", (fun () -> A.h 1.), ()); 30 | ("record", (fun () -> h_rec { f = f; g = g } 1.), ()); 31 | ("fun arg", (fun () -> h_fun f g 1.), ()); 32 | ("no arg", (fun () -> h 1.), ()); 33 | ] in 34 | print_endline "Functor vesus records vesus passing as arg:"; 35 | tabulate res; 36 | print_gc res 37 | 38 | (* Local Variables: *) 39 | (* compile-command: "make -k -C .." *) 40 | (* End: *) 41 | -------------------------------------------------------------------------------- /examples/match_array.ml: -------------------------------------------------------------------------------- 1 | (* This is a typical problem where the functions are so fast (on a 2 | 2Ghz machine) that it takes way too long to get results. Thus a 3 | wrapping in a loop is done. *) 4 | 5 | let n = 100 6 | 7 | let string_of_month1 = 8 | let month = [| "Jan"; "Feb"; "Mar"; "Apr"; "May"; "Jun"; "Jul"; "Aug"; 9 | "Sep"; "Oct"; "Nov"; "Dec" |] in 10 | fun i -> Array.unsafe_get month i 11 | 12 | let f1 () = 13 | for _ = 1 to n do ignore(string_of_month1 7) done 14 | 15 | let string_of_month2 = function 16 | | 0 -> "Jan" 17 | | 1 -> "Feb" 18 | | 2 -> "Mar" 19 | | 3 -> "Apr" 20 | | 4 -> "May" 21 | | 5 -> "Jun" 22 | | 6 -> "Jul" 23 | | 7 -> "Aug" 24 | | 8 -> "Sep" 25 | | 9 -> "Oct" 26 | | 10 -> "Nov" 27 | | 11 -> "Dec" 28 | | _ -> failwith "h" 29 | 30 | let f2 () = 31 | for _ = 1 to n do ignore(string_of_month2 7) done 32 | 33 | 34 | open Benchmark 35 | 36 | let () = 37 | let res = throughputN 3 ~repeat:5 [ ("arr", f1, ()); 38 | ("pat", f2, ()); ] in 39 | tabulate res; 40 | print_gc res 41 | 42 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build and Test](https://github.com/Chris00/ocaml-benchmark/actions/workflows/main.yml/badge.svg)](https://github.com/Chris00/ocaml-benchmark/actions/workflows/main.yml) 2 | 3 | Benchmark — measure/compare run-time of OCaml functions 4 | ======================================================= 5 | 6 | Benchmark provides functions to measure and compare the run-time of 7 | functions. It is inspired by the Perl module of the same name. 8 | 9 | 10 | Installation 11 | ------------ 12 | 13 | The easier way to install it is by using opam: 14 | 15 | opam install benchmark 16 | 17 | If you use the development version of this project, install [Dune][] 18 | and issue 19 | 20 | make 21 | make install 22 | 23 | [Dune]: https://github.com/ocaml/dune 24 | 25 | 26 | Documentation 27 | ------------- 28 | 29 | See the [interface of `Benchmark`](src/benchmark.mli). It can also be 30 | read in [HTML](https://chris00.github.io/ocaml-benchmark/doc/). 31 | 32 | 33 | 34 | 35 | Copyright 2004-present, Christophe Troestler 36 | Copyright 2002-2003, Doug Bagley 37 | 38 | 39 | -------------------------------------------------------------------------------- /examples/numbers.out: -------------------------------------------------------------------------------- 1 | f_int 666 = 666 2 | f_int32 666 = 666 3 | f_int64 666 = 666 4 | 5 | int-1-lat: 3 WALL ( 2.82 usr + 0.06 sys = 2.88 CPU) @ 347.22/s (n=1000) 6 | int-1-thru: 5 WALL ( 5.27 usr + 0.07 sys = 5.34 CPU) @ 344.94/s (n=1842) 7 | 8 | Throughputs for int, int32, int64 ... 9 | int: 11 WALL (10.42 usr + 0.09 sys = 10.51 CPU) @ 345.67/s (n=3633) 10 | int32: 11 WALL (10.18 usr + 0.17 sys = 10.35 CPU) @ 201.93/s (n=2090) 11 | int64: 11 WALL (10.53 usr + 0.10 sys = 10.63 CPU) @ 187.30/s (n=1991) 12 | 13 | Rate int64 int32 int 14 | int64 187/s -- -7% -46% 15 | int32 202/s 8% -- -42% 16 | int 346/s 85% 71% -- 17 | 18 | Latencies for 2000 iterations of int, int32, int64 ... 19 | int: 6 WALL ( 5.73 usr + 0.05 sys = 5.78 CPU) @ 346.02/s (n=2000) 20 | int32: 10 WALL ( 9.75 usr + 0.11 sys = 9.86 CPU) @ 202.84/s (n=2000) 21 | int64: 11 WALL (10.49 usr + 0.17 sys = 10.66 CPU) @ 187.62/s (n=2000) 22 | 23 | Rate int64 int32 int 24 | int64 188/s -- -8% -46% 25 | int32 203/s 8% -- -41% 26 | int 346/s 84% 71% -- 27 | -------------------------------------------------------------------------------- /examples/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for benchmark examples 2 | # 3 | # Just type "make"! 4 | 5 | # Do not require that the package be installed to try the examples 6 | REQUIRES = str unix bigarray 7 | BENCHMARK = -I .. benchmark.cma 8 | 9 | EXAMPLES = $(wildcard *.ml) 10 | PACKAGE_OPTS = $(if $(REQUIRES), -package "$(REQUIRES)", ) 11 | PREDICATE_OPTS = $(if $(PREDICATES), -predicates $(PREDICATES), ) 12 | ALL_OPTS = -annot $(PACKAGE_OPTS) $(PREDICATE_OPTS) 13 | 14 | OCAMLC = ocamlfind ocamlc 15 | OCAMLOPT = ocamlfind ocamlopt 16 | 17 | .PHONY: all 18 | 19 | all: byte opt native 20 | byte: $(EXAMPLES:.ml=.exe) 21 | opt native: $(EXAMPLES:.ml=.com) 22 | 23 | regexps.exe regexps.com: REQUIRES += re 24 | 25 | %.exe: %.ml ../benchmark.cma 26 | $(OCAMLC) -o $@ $(ALL_OPTS) -linkpkg $(BENCHMARK) $< 27 | 28 | %.com: %.ml ../benchmark.cmxa 29 | $(OCAMLOPT) -o $@ $(ALL_OPTS) -linkpkg $(BENCHMARK:.cma=.cmxa) $< 30 | 31 | ###################################################################### 32 | .PHONY: clean 33 | 34 | clean: 35 | $(RM) -f $(EXAMPLES:.ml=.exe) $(EXAMPLES:.ml=.com) 36 | $(RM) $(wildcard *.o *.cm[iox] *~ *.annot *.dat) 37 | 38 | -------------------------------------------------------------------------------- /benchmark.opam: -------------------------------------------------------------------------------- 1 | # This file is generated by dune, edit dune-project instead 2 | opam-version: "2.0" 3 | version: "1.7" 4 | synopsis: "Benchmark running times of code" 5 | description: """ 6 | This module provides a set of tools to measure the running times of 7 | your functions and to easily compare the results. A statistical test 8 | is used to determine whether the results truly differ.""" 9 | maintainer: ["Christophe Troestler "] 10 | authors: [ 11 | "Christophe Troestler " 12 | "Doug Bagley" 13 | "c-cube" 14 | ] 15 | license: "LGPL-3.0 with OCaml linking exception" 16 | homepage: "https://github.com/Chris00/ocaml-benchmark" 17 | bug-reports: "https://github.com/Chris00/ocaml-benchmark/issues" 18 | depends: [ 19 | "ocaml" {>= "4.03"} 20 | "base-unix" 21 | "dune" {>= "2.0"} 22 | ] 23 | build: [ 24 | ["dune" "subst"] {pinned} 25 | [ 26 | "dune" 27 | "build" 28 | "-p" 29 | name 30 | "-j" 31 | jobs 32 | "@install" 33 | "@runtest" {with-test} 34 | "@doc" {with-doc} 35 | ] 36 | ] 37 | dev-repo: "git+https://github.com/Chris00/ocaml-benchmark.git" 38 | -------------------------------------------------------------------------------- /examples/iter.ml: -------------------------------------------------------------------------------- 1 | open Bigarray 2 | 3 | let n = 1_000 4 | 5 | (* Bigarrays *) 6 | type vec = (float, float64_elt, c_layout) Array1.t 7 | let a = Array1.create float64 c_layout n 8 | let () = Array1.fill a 1. 9 | 10 | let ba f (x: vec) = 11 | for i = 0 to n - 1 do f x.{i} done 12 | 13 | let ba_unsafe f (x: vec) = 14 | for i = 0 to n - 1 do f (Array1.unsafe_get x i) done 15 | 16 | (* Arrays *) 17 | let b = Array.make n 1. 18 | 19 | let arr f (x: float array) = 20 | for i = 0 to n-1 do f x.(i) done 21 | 22 | let arr_unsafe f (x: float array) = 23 | for i = 0 to n-1 do f (Array.unsafe_get x i) done 24 | 25 | (* Lists *) 26 | let c = Array.to_list b 27 | 28 | 29 | open Benchmark 30 | 31 | 32 | let () = 33 | (* Simulate a simple side effect *) 34 | let z = ref 0. in 35 | let f x = z := x in 36 | 37 | let res = throughputN ~repeat:3 3 38 | [("ba", (fun () -> ba f a), ()); 39 | ("ba_unsafe", (fun () -> ba_unsafe f a), ()); 40 | ("arr", (fun () -> arr f b), ()); 41 | ("arr_unsafe", (fun () -> arr_unsafe f b), ()); 42 | ("list", (fun () -> List.iter f c), ()) 43 | ] in 44 | print_endline "Iterating a function with a simple side effect:"; 45 | tabulate res 46 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | 2 | 1.7 2025-01-29 3 | -------------- 4 | 5 | - move to dune 2.0 6 | - collect and display GC statistics 7 | - use Re.Pcre instead of pcre in example 8 | 9 | 1.6 2018-09-07 10 | -------------- 11 | 12 | - Port to Dune (not the former Jbuilder) and dune-release. 13 | - Fix some typos in the documentation. 14 | 15 | 1.5 2018-05-17 16 | -------------- 17 | 18 | - Port to Dune/Jbuilder and Topkg. 19 | - Add option `--all` to the `Tree.arg`. 20 | - Fix uncaught exception in `Tree.run_global`. 21 | 22 | 23 | 24 | Very old changes 25 | ---------------- 26 | 27 | 2004-08-22 Troestler Christophe 28 | 29 | * benchmark.ml: Code mostly rewritten to improve clarity (and to 30 | correct some bugs). Allows to return multiple times for a given 31 | test. Student's statistical test to determine whether two rates 32 | are significantly different (see `log_gamma`, `betai`, 33 | `cpl_student_t`, `comp_rates` and `different_rates`). 34 | 35 | * benchmark.mli: The documentation is greatly improved. Functions 36 | `make`, `add`, `sub` instead of `create`, `sum`, `diff` for 37 | uniformity with the OCaml standard library. 38 | 39 | 2004-08-18 Troestler Christophe 40 | 41 | * benchmark: Checked Doug Bagley module in CVS. 42 | -------------------------------------------------------------------------------- /examples/numbers.ml: -------------------------------------------------------------------------------- 1 | open Printf 2 | open Benchmark 3 | 4 | (* Test the speed of addition for native ints (unboxed), and 5 | Int32/Int64 (which are both boxed). 6 | 7 | The output looks something like numbers.out 8 | *) 9 | 10 | let f_int n = 11 | let rec loop i sum = 12 | if i < n then loop (i + 1) (sum + 1) else sum in 13 | loop 0 0 14 | 15 | let f_int32 n = 16 | let rec loop i sum = 17 | if i < n then loop (i + 1) (Int32.add sum Int32.one) else sum in 18 | Int32.to_int (loop 0 Int32.zero) 19 | 20 | let f_int64 n = 21 | let rec loop i sum = 22 | if i < n then loop (i + 1) (Int64.add sum Int64.one) else sum in 23 | Int64.to_int (loop 0 Int64.zero) 24 | 25 | let () = 26 | (* print out the results of the f_* functions to doublecheck that 27 | they work as we intend. *) 28 | printf "f_int 666 = %d\n" (f_int 666); 29 | printf "f_int32 666 = %d\n" (f_int32 666); 30 | printf "f_int64 666 = %d\n" (f_int64 666); 31 | print_newline (); 32 | 33 | (* let's exercise the *1 functions: *) 34 | let _ = latency1 ~name:"int-1-lat" 1000L f_int 10000 in 35 | let s = throughput1 ~name:"int-1-thru" 5 f_int 10000 in 36 | print_gc s; 37 | print_newline (); 38 | 39 | (* now let's exercise the *N functions: *) 40 | let res = throughputN ~repeat:5 10 41 | [("int", f_int, 10000); 42 | ("int32", f_int32, 10000); 43 | ("int64", f_int64, 10000); ] in 44 | print_newline (); 45 | tabulate res; 46 | print_gc res; 47 | 48 | print_newline (); 49 | let res = latencyN 2000L [("int", f_int, 10000); 50 | ("int32", f_int32, 10000); 51 | ("int64", f_int64, 10000); ] in 52 | print_newline (); 53 | tabulate res 54 | -------------------------------------------------------------------------------- /examples/regexps.ml: -------------------------------------------------------------------------------- 1 | module Pcre = Re.Pcre 2 | open Printf 3 | open Benchmark 4 | 5 | (* Test the speed of standard regular expressions vs. Pcre using a 6 | simple regexp with captures. 7 | 8 | The output looks something like regexps.out 9 | *) 10 | 11 | (* Create a chunk of data to search. 12 | It's full of "near hits", strings of "012345678" 13 | with a string on the end we are searching for: "0123456789" *) 14 | let bigdata = 15 | let size = 500000 in 16 | let buf = Buffer.create size in 17 | for _ = 1 to size/10 - 1 do Buffer.add_string buf "012345678 " done; 18 | Buffer.add_string buf "0123456789"; 19 | Buffer.contents buf 20 | 21 | let pcre_re = Pcre.regexp "(012345678) (0123456789)" 22 | let str_re = Str.regexp "\\(012345678\\) \\(0123456789\\)" 23 | 24 | let pcre_match dat = 25 | let group = Pcre.extract ~rex:pcre_re dat in 26 | (group.(1), group.(2)) 27 | 28 | let str_match dat = 29 | let _pos = Str.search_forward str_re dat 0 in 30 | (Str.matched_group 1 dat, Str.matched_group 2 dat) 31 | 32 | let () = 33 | (* Print out the results of the functions to doublecheck that they 34 | work as we intend. *) 35 | let (a, b) = pcre_match bigdata in printf "Pcre matches: %s %s\n" a b; 36 | let (a, b) = str_match bigdata in printf "Str matches: %s %s\n" a b; 37 | print_newline (); 38 | 39 | let res = throughputN ~repeat:5 5 40 | [("pcre match", pcre_match, bigdata); 41 | ("str match", str_match, bigdata)] in 42 | print_newline(); 43 | tabulate res 44 | 45 | (* print_newline(); *) 46 | (* let res = latencyN ~repeat:5 100 *) 47 | (* [("pcre match", pcre_match, bigdata); *) 48 | (* ("str match", str_match, bigdata)] in *) 49 | (* print_newline(); *) 50 | (* tabulate res *) 51 | -------------------------------------------------------------------------------- /examples/composition.ml: -------------------------------------------------------------------------------- 1 | (* Tries to show the profile cost of composing small functions. *) 2 | 3 | (* Small functions: permutations of [0 .. n-1] *) 4 | 5 | let n = 100000 6 | let rotate r = fun k -> (k + r) mod n 7 | let reverse i j = fun k -> if i <= k && k <= j then j + i - k else k 8 | let splice l i j = fun k -> 9 | if k < j then if k < i then k else k + l + 1 10 | else if k <= j + l then k - j + i 11 | else let k' = k - l - 1 in if k' < i then k' else k 12 | 13 | 14 | open Benchmark 15 | 16 | let ncomp = 400 (* Number of compositions *) 17 | 18 | let make_perms = 19 | (* Create a random list of transformations *) 20 | Random.self_init(); 21 | let rec random_perm ((p_f, p_v) as acc) i = 22 | if i <= 0 then acc else 23 | let c = Random.int 3 in 24 | (* New function *) 25 | let p = 26 | if c = 0 then rotate (Random.int n) 27 | else if c = 1 then reverse (Random.int n) (Random.int n) 28 | else (* c = 2 *) splice (Random.int n) (Random.int n) (Random.int n) in 29 | (* Corresponding array transformer *) 30 | let p_vec w v = 31 | for i = 0 to Array.length v - 1 do w.(i) <- p v.(i) done in 32 | random_perm (p :: p_f, p_vec :: p_v) (i - 1) in 33 | random_perm ([], []) 34 | 35 | let () = 36 | let ncomp = 300 in 37 | let p_f, p_v = make_perms ncomp in 38 | let v = Array.init n (fun k -> k) in 39 | let do_f () = 40 | let f = List.fold_left (fun f f0 -> (fun k -> f0(f k))) (fun k -> k) p_f in 41 | Array.map f v 42 | and do_v () = 43 | snd(List.fold_left (fun (w,v) f -> f w v; (v,w)) (Array.make n 0, v) p_v) 44 | in 45 | 46 | let res = throughputN ~repeat:3 5 [("fun", do_f, ()); 47 | ("vec", do_v, ()) ] in 48 | tabulate res; 49 | print_gc res 50 | -------------------------------------------------------------------------------- /examples/ar_ba.ml: -------------------------------------------------------------------------------- 1 | (* Compare bigarray and standard float array access times. *) 2 | 3 | open Bigarray 4 | 5 | let n = 10_000 6 | let m = 1000 7 | 8 | (* Bigarrays 9 | ***********************************************************************) 10 | type vec = (float, float64_elt, c_layout) Array1.t 11 | 12 | let a = Array1.create float64 c_layout n 13 | let () = Array1.fill a 1. 14 | 15 | let ba (a: vec) = 16 | let s = ref 0. in 17 | for i = 0 to n-1 do s := !s +. a.{i} done 18 | 19 | let ba_u (a: vec) = 20 | let s = ref 0. in 21 | for i = 0 to n-1 do s := !s +. Array1.unsafe_get a i done 22 | 23 | let ba_cl () = 24 | let s = ref 0. in 25 | for i = 0 to n-1 do s := !s +. a.{i} done 26 | 27 | let ba_gen a = 28 | let s = ref 0. in 29 | for i = 0 to n-1 do s := !s +. a.{i} done 30 | 31 | let ba_alloc () = 32 | let a = Array1.create float64 c_layout n in 33 | Array1.fill a 1.; 34 | let s = ref 0. in 35 | for _ = 1 to m do 36 | for i = 0 to n-1 do s := !s +. a.{i} done 37 | done 38 | 39 | let set_ba (a: vec) = for i = 0 to n-1 do a.{i} <- 1. done 40 | 41 | let set_ba_alloc () = 42 | let a = Array1.create float64 c_layout n in 43 | for _ = 1 to m do 44 | for i = 0 to n-1 do a.{i} <- 3. done 45 | done 46 | 47 | (* Arrays 48 | ***********************************************************************) 49 | let b = Array.make n 1. 50 | 51 | let arr (b: float array) = 52 | let s = ref 0. in 53 | for i = 0 to n-1 do s := !s +. b.(i) done 54 | 55 | let arr_u (b: float array) = 56 | let s = ref 0. in 57 | for i = 0 to n - 1 do s := !s +. Array.unsafe_get b (i) done 58 | 59 | let arr_cl () = 60 | let s = ref 0. in 61 | for i = 0 to n-1 do s := !s +. b.(i) done 62 | 63 | let arr_alloc () = 64 | let b = Array.make n 1. in 65 | let s = ref 0. in 66 | for _ = 1 to m do 67 | for i = 0 to n-1 do s := !s +. b.(i) done 68 | done 69 | 70 | let set_arr (b: float array) = for i = 0 to n-1 do b.(i) <- 1. done 71 | 72 | let set_arr_alloc () = 73 | let a = Array.make n 0. in 74 | for _ = 1 to m do 75 | for i = 0 to n-1 do a.(i) <- 3. done 76 | done 77 | 78 | (* Lists 79 | ***********************************************************************) 80 | let c = Array.to_list b 81 | 82 | let list c = ignore(List.fold_left ( +. ) 0. c) 83 | 84 | 85 | open Benchmark 86 | 87 | let () = 88 | let res = throughputN ~repeat:3 3 89 | [("ba", (fun () -> ba a), ()); 90 | ("ba_u", (fun () -> ba_u a), ()); 91 | ("ba_cl", ba_cl, ()); 92 | ("ba_gen", (fun () -> ba_gen a), ()); 93 | ("set_ba", (fun () -> set_ba a), ()); 94 | ("arr", (fun () -> arr b), ()); 95 | ("arr_u", (fun () -> arr_u b), ()); 96 | ("arr_cl", arr_cl, ()); 97 | ("list", (fun () -> list c), ()); 98 | ("set_arr", (fun () -> set_arr b), ()); 99 | ] in 100 | print_endline "Sum of elements or set all elements to 3.:"; 101 | tabulate res; 102 | print_gc res; 103 | 104 | let res = throughputN ~repeat:3 3 105 | [("ba", ba_alloc, ()); 106 | ("set_ba", set_ba_alloc, ()); 107 | ("arr", arr_alloc, ()); 108 | ("set_arr", set_arr_alloc, ()); 109 | ] in 110 | print_endline "With allocation:"; 111 | tabulate res; 112 | print_gc res 113 | -------------------------------------------------------------------------------- /web/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Ocaml Benchmark module 6 | 7 | 8 | 9 |
10 | 11 | 12 | 13 | 15 | 16 | 17 | 21 | 26 | 62 | 63 | 64 |
OCaml Benchmark
14 |
Summary
22 | Documentation
23 | Download
24 | License
25 |
27 |

28 | OCaml Benchmark is a small module to benchmark running times 29 | of code. It supersedes an earlier version 30 | by Doug Bagley. 31 | A typical use looks like this:

32 |         33 | open 34 | Benchmark
35 |         36 | let 37 | res = throughputN 8 [("foo", f, 1000000);
38 |                                  ("bar", g, 39 | 1000000);
40 |                                  ("baz", h, 41 | 1000000) ] in
42 |         print_newline();
43 |         tabulate res
44 |
45 |
46 | For a complete description of the functions, see the 47 | interface Benchmark.mli. 48 | Some examples are also included in 49 | the tarball. 50 |

51 |

52 | If you have questions, suggestions, bugs,... you can contact 53 | me by 54 | email: Christophe.Troestler@umons.ac.be. 55 | The source code is maintained in a Git repository stored on Github. 56 |

57 |

The code is released under the GNU Lesser General Public 58 | License (LGPL) with the same special exception as for the 59 | OCaml standard library (see the 60 | file LICENSE for more 61 | details).

65 | 66 | 67 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE, Version 3, 29 June 2007 2 | ========================================================== 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | ## 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | ## 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | ## 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | 1. under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | 2. under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | ## 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | 1. Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | 2. Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | ## 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | 1. Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | 2. Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | 3. For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | 4. Do one of the following: 100 | 101 | 1. Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 2. Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | 5. Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | ## 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | 1. Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | 2. Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | ## 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /src/benchmark.mli: -------------------------------------------------------------------------------- 1 | (* File: benchmark.mli 2 | 3 | Copyright Aug. 2004-present by Troestler Christophe 4 | Christophe.Troestler(at)umons.ac.be 5 | 6 | Copyright 2002-2003, Doug Bagley 7 | http://www.bagley.org/~doug/ocaml/ 8 | Based on the Perl module Benchmark.pm by Jarkko Hietaniemi and Tim Bunce 9 | 10 | This library is free software; you can redistribute it and/or 11 | modify it under the terms of the GNU Lesser General Public License 12 | version 3 as published by the Free Software Foundation, with the 13 | special exception on linking described in file LICENSE.txt. 14 | 15 | This library is distributed in the hope that it will be useful, but 16 | WITHOUT ANY WARRANTY; without even the implied warranty of 17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file 18 | LICENSE.txt for more details. 19 | *) 20 | 21 | (** Benchmark running times of code. 22 | 23 | This module implements benchmarking functions for measuring the 24 | run-time of one or many functions using latency (multiple 25 | repetitions) or throughput (repeat until some time period has 26 | passed) tests. 27 | 28 | {b Examples:} 29 | Run the function [f] with input [5000] for [10] iterations and 30 | print the CPU times: 31 | {[ 32 | Benchmark.latency1 10 f 5000 ]} 33 | 34 | Run the tests [foo], [bar] and [baz] three times for at least [8] 35 | seconds each, printing the results of each test, and then print a 36 | cross tabulation of the results: 37 | {[ 38 | open Benchmark 39 | let res = throughputN ~repeat:3 8 [("foo", foo, 1000000); 40 | ("bar", bar, 2000000); 41 | ("baz", baz, 3000000); ] in 42 | print_newline(); 43 | tabulate res ]} 44 | 45 | Time how long it takes to some piece of code: 46 | {[ 47 | let t0 = Benchmark.make 0L in 48 | (* do something here *) 49 | let b = Benchmark.sub (Benchmark.make 0L) t0 in 50 | print_endline "Benchmark results:"; 51 | print_endline (Benchmark.to_string b) ]} 52 | *) 53 | 54 | 55 | (** {2 Timing and samples structures} *) 56 | 57 | (** The information returned by timing tests. *) 58 | type t = { 59 | wall : float; (** Wallclock time (in seconds) *) 60 | utime : float; (** This process User CPU time (in seconds) *) 61 | stime : float; (** This process System CPU time (in seconds) *) 62 | cutime : float; (** Child process User CPU time (in seconds) *) 63 | cstime : float; (** Child process System CPU time (in seconds) *) 64 | iters : Int64.t; (** Number of iterations. *) 65 | minor_words: float; (** Bytes allocated on minor heap *) 66 | major_words: float; (** Bytes allocated on major heap, incl. promoted *) 67 | promoted_words: float; (** Bytes moved from minor heap to major heap *) 68 | } 69 | 70 | (** Style of the output. *) 71 | type style = 72 | | No_child (** Do not print child CPU times *) 73 | | No_parent (** Do not print parent CPU times *) 74 | | All (** Print parent and child CPU times *) 75 | | Auto (** Same as [No_child] unless there is child CPU used *) 76 | | Nil (** Print nothing *) 77 | 78 | val make : Int64.t -> t 79 | (** [Benchmark.make n] create a new {!Benchmark.t} structure with 80 | current time values and [n] iterations. Only the integer part of 81 | [n] is used, the fractional part is ignored. *) 82 | 83 | val add : t -> t -> t 84 | (** [Benchmark.add b1 b2] add {!Benchmark.t} structure [b1] to [b2]. *) 85 | 86 | val sub : t -> t -> t 87 | (** [Benchmark.sub b1 b2] subtract {!Benchmark.t} structure [b2] 88 | from [b1]. *) 89 | 90 | val to_string : ?style:style -> ?fwidth:int -> ?fdigits:int -> t -> string 91 | (** [Benchmark.to_string ?style ?fwidth ?fdigits b] converts the 92 | {!Benchmark.t} structure to a formatted string. 93 | 94 | @param style printing style (default: [Auto]) 95 | @param fwidth number of chars reserved for the numbers (default: [5]) 96 | @param fdigits number of fractional digits of the numbers (default: [2]) 97 | *) 98 | 99 | type samples = (string * t list) list 100 | (** Association list that links the names of the tests to the list 101 | of their timings. *) 102 | 103 | val merge : samples -> samples -> samples 104 | (** [merge l1 l2] merges the two association lists of timings [l1] 105 | and [l2] into a single one, concatenating the timings for the same 106 | names of [l1] and [l2]. *) 107 | 108 | 109 | (** {2 Timing functions} *) 110 | 111 | val throughputN : 112 | ?min_count:Int64.t -> 113 | ?style:style -> 114 | ?fwidth:int -> 115 | ?fdigits:int -> 116 | ?repeat:int -> 117 | int -> (string * ('a -> 'b) * 'a) list -> samples 118 | (** [Benchmark.throughputN ?min_count ?style ?fwidth ?fdigits t 119 | funs] runs each function in list [funs] for at least [t > 0] 120 | seconds. The list [funs] has the structure: [[(name1, f1, x1); 121 | (name2, f2, x2); ...]], where [name1] is the name to label the 122 | first test, [f1] is the function to run, and [x1] is its 123 | input,... If [~style] is not [Nil], then the results are 124 | printed. Returns the resulting list which can be passed to 125 | {!Benchmark.tabulate} if you want a comparison table. 126 | 127 | REMARK that [t] is the running time of the functions, not of the 128 | repetition loop. Thus a very fast running function will need 129 | lots of repetitions to make a difference of [t] seconds to the 130 | empty loop. In this case, the running time of the loop will 131 | dominate the whole process which can therefore take much longer 132 | than [t] seconds. If you are only interested in the {i 133 | relative} times of fast functions and not in their real running 134 | times, we recommend you wrap each of them in a loop. 135 | 136 | @param min_count a warning will be printed if the number of runs is 137 | less than [min_count]. This is a first defense against 138 | meaningless results. (default: [4L]) 139 | @param style printing style (default: [Auto]) 140 | @param fwidth number of chars reserved for the numbers (default: [5]) 141 | @param fdigits number of fractional digits of the numbers (default: [2]) 142 | 143 | @param repeat number of times each function running time is measured. 144 | The default is [1] to be compatible with the former 145 | version of this library but it is highly recommended 146 | to set it to a higher number to enable confidence 147 | statistics to be performed by {!Benchmark.tabulate}. 148 | *) 149 | 150 | val throughput1 : 151 | ?min_count:Int64.t -> 152 | ?style:style -> 153 | ?fwidth:int -> 154 | ?fdigits:int -> 155 | ?repeat:int -> 156 | int -> ?name:string -> ('a -> 'b) -> 'a -> samples 157 | (** [Benchmark.throughput1 ?min_count ?style ?fwidth ?fdigits t ?name f x] 158 | runs one function [f] with input [x] for at least [t] seconds, and 159 | returns the result, which is also printed unless [~style] is 160 | [Nil]. See {!Benchmark.throughputN} for more information. *) 161 | 162 | val latencyN : 163 | ?min_cpu:float -> 164 | ?style:style -> 165 | ?fwidth:int -> 166 | ?fdigits:int -> 167 | ?repeat:int -> 168 | Int64.t -> (string * ('a -> 'b) * 'a) list -> samples 169 | (** [Benchmark.latencyN ?min_cpu ?style ?fwidth ?fdigits n funs] 170 | runs each function in list [funs] for [n] iterations. [n] must be 171 | at least 4. The list [funs] has the structure: [[(name1, f1, x1); 172 | (name2, f2, x2); ...]], where [name1] is the name to label the 173 | first test, [f1] is the function to run, and [x1] is its input,... 174 | If style is not [Nil], then the results are printed. Returns the 175 | results list, which can be passed to {!Benchmark.tabulate} if you 176 | want to print a comparison table. 177 | 178 | @raise Invalid_argument if [n < 4L]. 179 | @param min_cpu a warning will be printed if the total CPU time is 180 | less than [min_cpu]. This is a first defense against 181 | meaningless results (default: [0.4]). 182 | @param style printing style (default: [Auto]). 183 | @param fwidth number of chars reserved for the numbers (default: [5]). 184 | @param fdigits number of fractional digits of the numbers (default: [2]). 185 | 186 | @param repeat number of times each function running time is measured. 187 | The default is [1] to be compatible with the former 188 | version of this library but it is highly recommended 189 | to set it to a higher number to enable confidence 190 | statistics to be performed by {!Benchmark.tabulate}. 191 | *) 192 | 193 | val latency1 : 194 | ?min_cpu:float -> 195 | ?style:style -> 196 | ?fwidth:int -> 197 | ?fdigits:int -> 198 | ?repeat:int -> 199 | Int64.t -> ?name:string -> ('a -> 'b) -> 'a -> samples 200 | (** [Benchmark.latency1 ?min_cpu ?style ?fwidth ?fdigits n ?name f x] 201 | runs the function [f] with input [x] for [n] iterations, and 202 | returns the results, which are also printed unless [~style] is 203 | [Nil]. See {!Benchmark.latencyN} for more information. *) 204 | 205 | 206 | val tabulate : ?no_parent:bool -> ?confidence:float -> samples -> unit 207 | (** [Benchmark.tablulate results] prints a comparison table for a 208 | list of [results] obtained by {!Benchmark.latencyN} or 209 | {!Benchmark.throughputN} with each function compared to all the 210 | others. The table is of the type 211 | 212 | {[ Rate name1 name2 ... OR s/iter name1 name2 ... 213 | name1 #/s -- r12 name1 # -- r12 214 | name2 #/s r21 -- name2 # r21 -- 215 | ... ... ]} 216 | 217 | where name1, name2,... are the labels of the tests sorted from 218 | slowest to fastest and rij says how much namei is faster (or 219 | slower if < 0) than namej (technically it is equal to (ri - rj) 220 | expressed in percents of rj where ri and rj are the rates of namei 221 | and namej respectively). 222 | 223 | If several results are associated to a given name, they are used 224 | to compute a Student's statistic to check whether the rates are 225 | significantly different. If ri and rj are not believed to be 226 | different, rij will be printed between brackets. 227 | 228 | @param no_parent if [true], only take in account the times of the 229 | children (default: [false]). 230 | 231 | @param confidence is used to determine the confidence interval for 232 | the Student's test. (default: [0.95]). *) 233 | 234 | val print_gc : samples -> unit 235 | (** Print GC statistics. *) 236 | 237 | (** {2 Benchmark Tree} 238 | 239 | Naming benchmarks within a hierarchy that allows to run them all, 240 | or filter them so that only a subset is run. *) 241 | 242 | module Tree : sig 243 | type t 244 | (** A (possibly empty) tree of benchmarks. Individual benchmarks 245 | (i.e., calls to {!throughputN}, {!latencyN}, etc. wrapped with 246 | {!(>:)}) can appear at any node of the tree. The edges are 247 | annotated with strings, and paths (see {!path}) are used to 248 | select subtrees. *) 249 | 250 | val ( @> ) : string -> samples Lazy.t -> t 251 | (** [name @> bench] returns a (named) node of the benchmark tree. 252 | If evaluated, it simply returns samples (for instance using 253 | {!throughputN}). If the name contains dots, it is interpreted 254 | as a path. For examle ["a.b" @> bench] is equivalent to ["a" @>> 255 | "b" @> bench]. 256 | 257 | Example (the lazy thunk is used to hide initialization code): 258 | 259 | {[ 260 | Benchmark.Tree.( 261 | "sort" >: lazy 262 | (let a = Array.init 1_000_000 (fun i -> i) in 263 | Benchmark.throughput1 18 (Array.sort compare) a 264 | ) 265 | ) ;; 266 | ]} *) 267 | 268 | val ( @>> ) : string -> t -> t 269 | (** [name @>> tree] makes [tree] accessible through the given 270 | [name], i.e., prefix all paths in the tree by [name]. It has no 271 | effect if [name = ""]. If the name contains dots, it is 272 | interpreted as a path. For instance ["n1.n2" @>> tree] is 273 | equivalent to ["n1" @>> "n2" @>> tree] and adds the path 274 | [[n1;n2]] as a prefix to the tree. 275 | 276 | @raise Invalid_argument is the name is invalid. At least names 277 | corresponding to OCaml identifiers are valid. *) 278 | 279 | val concat : t list -> t 280 | (** Merge the given trees (recursively). Merging proceeds by taking the union 281 | of all path heads in the list, and, for each such string [x], 282 | merging recursively all subtrees reachable under [x]. 283 | 284 | For instance merging the trees [a.{b, c}], [a.b.d] and [{a.d, foo}] 285 | will give the tree [{a.(b, b.d, c, d}, d}]. *) 286 | 287 | val ( @>>> ) : string -> t list -> t 288 | (** [name @>>> l] is equivalent to [name @>> concat l]. It names a list of 289 | trees, and is useful to build lists of benchmarks related to some 290 | common topic. If the name contains dots, it is interpreted 291 | as a path. 292 | 293 | @raise Invalid_argument is the name is invalid. At least names 294 | corresponding to OCaml identifiers are valid. *) 295 | 296 | val with_int : (int -> t) -> int list -> t 297 | (** [with_int f l] parametrize trees with several integer values 298 | (e.g. a size). The tree [f i] is prefixed with the label [i]. *) 299 | 300 | val print : Format.formatter -> t -> unit 301 | (** Print the tree of benchmarks (its structure) on the given formatter. 302 | Useful in combination with the [path] argument of {!run} *) 303 | 304 | (** {2 Path} *) 305 | 306 | type path = string list 307 | (** A path in a tree, pointing at a subtree. *) 308 | 309 | val print_path : Format.formatter -> path -> unit 310 | 311 | val parse_path : string -> path 312 | (** Split a string into a path at the "." separators. 313 | Example: [parse_path "a.b.c"] returns [["a"; "b"; "c"]]. *) 314 | 315 | val prefix : path -> t -> t 316 | (** Add the path as a prefix to the tree, similar to repeated 317 | calls to [@>>]. *) 318 | 319 | val filter : path -> t -> t 320 | (** [filter p t] return the tree obtained by keeping all the paths 321 | in [t] that match the path [p]. 322 | Empty components [""] in the middle of the path are ignored. 323 | Empty components [""] at the end of the path return only the 324 | benchmarks at that level (i.e., one discards the benchmarks 325 | pointed by paths of which [p] is a strict prefix). 326 | The special path component ["*"] selects all subtrees at that 327 | level (it acts as a wildcard). *) 328 | 329 | 330 | (** {2 Running} *) 331 | 332 | type arg_state 333 | 334 | val arg : unit -> arg_state * (Arg.key * Arg.spec * Arg.doc) list 335 | (** [arg ()] returns [(arg, specs)] where [arg] is a state coming 336 | from parsing the command line using [specs]. The options are: 337 | - "--path" or "-p" to add a sub-tree of benchmarks; 338 | - "--tree" to print the tree of benchmarks; 339 | - "--all" or "-a" to run all paths in the tree. 340 | Note that the default state runs all benchmarks. You need to 341 | use something like [Arg.parse (specs @ more_specs) ...] to make 342 | the above arguments available to the program user. *) 343 | 344 | val run : ?with_gc:bool -> 345 | ?arg: arg_state -> ?paths: path list -> ?out: Format.formatter -> 346 | t -> unit 347 | (** [run t] runs all benchmarks of [t] and print the results to [fmt]. 348 | @param with_gc if true, will print GC statistics as well (Default: true) 349 | @param paths if provided, only the sub-trees corresponding to 350 | these path is executed. Default: execute everything. 351 | @param out The formatter on which to print the output. 352 | Default: [Format.std_formatter]. 353 | @param arg use the result of the command line parsing to direct 354 | the run. Default: run all paths in [path] *) 355 | 356 | 357 | (** {2 Global Registration} *) 358 | 359 | val global : unit -> t 360 | (** Global tree, built from calls to {!register}. It is useful 361 | to centralize all benchmarks at one place to, then, run them all *) 362 | 363 | val register : t -> unit 364 | (** Register a benchmark to the global registry of benchmarks. *) 365 | 366 | val run_global : 367 | ?argv:string array -> 368 | ?out:Format.formatter -> 369 | unit -> unit 370 | (** Same as {!run} on the global tree of benchmarks and parsing the 371 | command line arguments from [argv] (which is [Sys.argv] by 372 | default). *) 373 | end 374 | -------------------------------------------------------------------------------- /src/benchmark.ml: -------------------------------------------------------------------------------- 1 | (* File: benchmark.ml 2 | For comparing runtime of functions 3 | ********************************************************************* 4 | 5 | Copyright 2004-present, Troestler Christophe 6 | Christophe.Troestler(at)umh.ac.be 7 | 8 | Copyright 2002-2003, Doug Bagley 9 | http://www.bagley.org/~doug/ocaml/ 10 | Initially based on the Perl module Benchmark.pm by Jarkko Hietaniemi 11 | and Tim Bunce 12 | 13 | This library is free software; you can redistribute it and/or 14 | modify it under the terms of the GNU Lesser General Public License 15 | version 3 as published by the Free Software Foundation, with the 16 | special exception on linking described in file LICENSE. 17 | 18 | This library is distributed in the hope that it will be useful, but 19 | WITHOUT ANY WARRANTY; without even the implied warranty of 20 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file 21 | LICENSE.txt for more details. 22 | *) 23 | 24 | open Printf 25 | 26 | type t = { 27 | wall : float; 28 | utime : float; 29 | stime : float; 30 | cutime : float; 31 | cstime : float; 32 | iters : Int64.t; 33 | (* As of version 0.8, one had to change [iter] from [int] because, 34 | as machines run faster, a number of iterations ~ 2^29 is no 35 | longer enough (2^29 is the largest > 0 power of 2 that [int] can 36 | hold on a 32 bits platform. *) 37 | minor_words: float; 38 | major_words: float; 39 | promoted_words: float; 40 | } 41 | 42 | type style = No_child | No_parent | All | Auto | Nil 43 | 44 | let null_t = 45 | { wall = 0.; utime = 0.; stime = 0.; cutime = 0.; cstime = 0.; iters = 0L; 46 | minor_words = 0.; major_words = 0.; promoted_words = 0.; } 47 | 48 | let bytes_per_word = float (Sys.word_size / 8) 49 | 50 | let make n = 51 | let minor_words, promoted_words, major_words = Gc.counters() in 52 | let minor_words = minor_words *. bytes_per_word in 53 | let major_words = major_words *. bytes_per_word in 54 | let promoted_words = promoted_words *. bytes_per_word in 55 | let tms = Unix.times() in 56 | { wall = Unix.gettimeofday(); 57 | utime = tms.Unix.tms_utime; stime = tms.Unix.tms_stime; 58 | cutime = tms.Unix.tms_cutime; cstime = tms.Unix.tms_cstime; 59 | iters = n; minor_words; major_words; promoted_words; } 60 | 61 | let add a b = 62 | { wall = a.wall +. b.wall; utime = a.utime +. b.utime; 63 | stime = a.stime +. b.stime; cutime = a.cutime +. b.cutime; 64 | cstime = a.cstime +. b.cstime; iters = Int64.add a.iters b.iters; 65 | minor_words = a.minor_words +. b.minor_words; 66 | major_words = a.major_words +. b.major_words; 67 | promoted_words = a.promoted_words +. b.promoted_words; 68 | } 69 | 70 | let sub a b = 71 | { wall = a.wall -. b.wall; utime = a.utime -. b.utime; 72 | stime = a.stime -. b.stime; cutime = a.cutime -. b.cutime; 73 | cstime = a.cstime -. b.cstime; iters = Int64.sub a.iters b.iters; 74 | minor_words = a.minor_words -. b.minor_words; 75 | major_words = a.major_words -. b.major_words; 76 | promoted_words = a.promoted_words -. b.promoted_words; 77 | } 78 | 79 | (* It may happen that, because of slight variations, the running time 80 | of a fast running test is less than the running time of the null 81 | loop. Returning a negative result is obviously ridiculous, thus 82 | one returns 0. *) 83 | let ( -- ) a b = if (a:float) > b then a -. b else 0. 84 | let pos_sub a b = 85 | { wall = a.wall -- b.wall; utime = a.utime -- b.utime; 86 | stime = a.stime -- b.stime; cutime = a.cutime -- b.cutime; 87 | cstime = a.cstime -- b.cstime; iters = Int64.sub a.iters b.iters; 88 | minor_words = a.minor_words -- b.minor_words; 89 | major_words = a.major_words -- b.major_words; 90 | promoted_words = a.promoted_words -- b.promoted_words; 91 | } 92 | 93 | 94 | let cpu_process b = b.utime +. b.stime 95 | let cpu_childs b = b.cutime +. b.cstime 96 | let cpu_all b = b.utime +. b.stime +. b.cutime +. b.cstime 97 | 98 | let string_of_bytes b = 99 | if b >= 1e6 then sprintf "%.2f MB" (b /. 1e6) 100 | else if b >= 1e3 then sprintf "%.2f kB" (b /. 1e3) 101 | else sprintf "%.0f B" b 102 | 103 | (* Return a formatted representation of benchmark structure according 104 | to [style]. Default values for presentation parameters are set 105 | here. *) 106 | let to_string ?(style=Auto) ?(fwidth=5) ?(fdigits=2) (b:t) : string = 107 | let pt = cpu_process b 108 | and ct = cpu_childs b in 109 | let style = 110 | if style = Auto then if ct > 1e-10 then All else No_child else style in 111 | let iter_info t = 112 | if b.iters > 0L && t > 0.0 then 113 | sprintf " @ %*.*f/s (n=%Ld)" fwidth fdigits 114 | (Int64.to_float b.iters /. t) b.iters 115 | else "" in 116 | let f x = sprintf "%*.*f" fwidth fdigits x in 117 | match style with 118 | | All -> 119 | sprintf "%s WALL (%s usr %s sys + %s cusr %s csys = %s CPU, minor = %s, major = %s)%s" 120 | (f b.wall) (f b.utime) (f b.stime) (f b.cutime) (f b.cstime) 121 | (f(pt +. ct)) 122 | (string_of_bytes b.minor_words) 123 | (string_of_bytes (b.major_words -. b.promoted_words)) 124 | (iter_info pt) 125 | | No_child -> 126 | sprintf "%s WALL (%s usr + %s sys = %s CPU, minor = %s, major = %s)%s" 127 | (f b.wall) (f b.utime) (f b.stime) (f pt) 128 | (string_of_bytes b.minor_words) 129 | (string_of_bytes (b.major_words -. b.promoted_words)) (iter_info pt) 130 | | No_parent -> 131 | sprintf "%s WALL (%s cusr + %s csys = %s CPU, minor = %s, major = %s)%s" 132 | (f b.wall) (f b.cutime) (f b.cstime) (f ct) 133 | (string_of_bytes b.minor_words) 134 | (string_of_bytes (b.major_words -. b.promoted_words)) 135 | (iter_info ct) 136 | | Nil -> "" 137 | | Auto -> assert false 138 | 139 | 140 | (* Returns a string in minutes-seconds of a time [t >= 0] given in 141 | seconds. *) 142 | let rec string_of_time t = 143 | if t = 0 || t = 1 then string_of_int t ^ "s" 144 | else if t < 60 then string_of_int t ^ "s" 145 | else if t < 120 then "1m " ^ string_of_time(t - 60) 146 | else string_of_int(t / 60) ^ "m " ^ string_of_time(t mod 60) 147 | 148 | (* The time [t >= 0] is rounded to the nearest integer: *) 149 | let string_of_time t = string_of_time(truncate(t +. 0.5)) 150 | 151 | type samples = (string * t list) list 152 | 153 | let by_name (s1, _) (s2, _) = compare (s1:string) s2 154 | 155 | let merge (l1:samples) (l2:samples) = 156 | (* [do_merge] assumes [l1] and [l2] are sorted. *) 157 | let rec do_merge acc l1 l2 = 158 | match l1, l2 with 159 | | _, [] -> acc @ l1 160 | | [], _ -> acc @ l2 161 | | ((n1, t1) as d1) :: tl1, ((n2, t2) as d2) :: tl2 -> 162 | let sgn = compare n1 n2 in 163 | if sgn = 0 then do_merge ((n1, t1 @ t2) :: acc) tl1 tl2 164 | else if sgn < 0 then do_merge (d1 :: acc) tl1 l2 165 | else do_merge (d2 :: acc) l1 tl2 in 166 | do_merge [] (List.sort by_name l1) (List.sort by_name l2) 167 | 168 | 169 | let max_iter = Int64.add (Int64.of_int max_int) 1L 170 | (* even if [int] is 63 bits, [(max_iter:Int64.t) > 0] *) 171 | 172 | (* [runloop n_iters n f x] returns the elapsed time of running [n >= 173 | 0L] times [f] with the argument [x]. The structure returned 174 | declare [n_iter] iterations. *) 175 | let runloop n_iters n f x = 176 | let n' = Int64.div n max_iter in 177 | if n' >= max_iter then 178 | invalid_arg "Benchmark.runloop: number of iterations too large"; 179 | let n1 = Int64.to_int n' 180 | and n0 = Int64.to_int(Int64.rem n max_iter) in 181 | let t0 = ref (make 0L) in 182 | let tbase = !t0.utime in 183 | (* Wait for user timer to tick. This makes the error range more 184 | like -0.01, +0. If we don't wait, then it's more like -0.01, 185 | +0.01. *) 186 | while tbase = (!t0).utime do t0 := make 0L done; 187 | (* Loop over function we are timing [n] times (looping on int64 188 | quantities takes too long, this is why we use composite loops). *) 189 | for _ = 1 to n1 do 190 | for _ = 0 to max_int do ignore(f x) done; (* [max_iter] runs *) 191 | done; 192 | for _ = 1 to n0 do ignore(f x) done; 193 | let t1 = make n_iters in 194 | pos_sub t1 !t0 195 | 196 | (* time a null-loop; no iter count *) 197 | let null_loop n = runloop 0L n ignore () 198 | 199 | (* Run function [f] count times, return time taken (all times 200 | garanteed to be [>= 0.]) *) 201 | let timeit n f x = 202 | let bn = null_loop n in 203 | let bm = runloop n n f x in 204 | pos_sub bm bn (* time of function minus null-loop *), 205 | bn.wall +. bm.wall (* how much the used had to wait *) 206 | 207 | 208 | type printer = { 209 | print_indent : string -> unit; (* prefix, flushes *) 210 | print : string -> unit; (* No prefix but flushes *) 211 | } 212 | 213 | (* [print_run ff bm] prints the list of timings [bm] according to the 214 | style defined by the optional parameters. *) 215 | let print_run out ?(min_count=4L) ?(min_cpu=0.4) ~style ?fwidth ?fdigits b = 216 | out.print_indent(to_string ~style ?fwidth ?fdigits b ^ "\n"); 217 | if b.iters < min_count || cpu_all b < min_cpu 218 | || (b.wall < 1. && b.iters < 1000L) 219 | then out.print_indent "(warning: too few iterations for a reliable count)\n" 220 | 221 | 222 | let latency n out ?min_count ?min_cpu ~style ?fwidth ?fdigits 223 | ~repeat _name f x = 224 | let rec loop nrep acc = 225 | if nrep < 1 then acc 226 | else ( 227 | Gc.full_major(); 228 | Gc.compact(); (* Reclaim memory to avoid undue GC during the test. *) 229 | let bm, _ = timeit n f x in 230 | print_run out ?min_count ?min_cpu ~style ?fwidth ?fdigits bm; 231 | loop (nrep - 1) (bm :: acc) 232 | ) in 233 | loop repeat [] 234 | 235 | 236 | (* Read the code from bottom to top: [min_iter] determines the minimal 237 | number of iterations to have a significant timing, then 238 | [estimate_niter] estimate by linear interpolation the number of 239 | iter to run [> tmin] and then the test is performed. *) 240 | let throughput tmin out ?min_count ?min_cpu ~style ?fwidth ?fdigits 241 | ~repeat _name f x : t list = 242 | (* Run [f] for [niter] times and complete with >= [nmin] iterations 243 | (estimated by linear interpolation) to run >= [tmin]. *) 244 | let rec run_test nmin niter bm_init total_wall = 245 | let bm, wall = timeit niter f x in 246 | let bm = add bm_init bm in 247 | let tn = cpu_process bm in 248 | let total_wall = total_wall +. wall in 249 | if tn >= tmin then ( 250 | print_run out ?min_count ?min_cpu ~style ?fwidth ?fdigits bm; 251 | bm, total_wall 252 | ) 253 | else 254 | (* FIXME *) 255 | let n = Int64.of_float((tmin /. tn -. 1.) *. Int64.to_float bm.iters) in 256 | run_test nmin (max nmin n) bm total_wall in 257 | (* Repeat the test [nrep] times and return the list of results. *) 258 | let rec repeat_test nrep acc nmin niter wall_estim = 259 | if nrep < 1 then acc else ( 260 | Gc.full_major(); 261 | Gc.compact(); (* Reclaim memory to avoid undue GC during the test. *) 262 | let bm, wall = run_test nmin niter null_t 0. in 263 | let wall_estim = 264 | if wall > wall_estim +. 60. then ( 265 | out.print_indent("(Estimated time for subsequent runs: " 266 | ^ (string_of_time wall) ^ ")\n"); 267 | wall 268 | ) 269 | else wall_estim in 270 | repeat_test (nrep - 1) (bm :: acc) nmin niter wall_estim 271 | ) in 272 | (* Estimate number of iter > [nmin] to have a running time >= 273 | [tmin]. The initial estimate is [n] running [tn] secs. Linear 274 | estimates bear a 5% fudge to improve the overall responsiveness. *) 275 | let tpra = 0.1 *. tmin (* Target/time practice *) in 276 | let rec estimate_niter nmin n tn wall = 277 | if tn >= tpra then 278 | (* FIXME: *) 279 | (* lin estim *) 280 | let niter = Int64.of_float(Int64.to_float n *. (1.05 *. tmin /. tn)) in 281 | let wall_estim = wall *. (1.05 *. tmin /. tn) in 282 | if wall_estim >= 60. then 283 | out.print_indent("(Estimated time for each run: " 284 | ^ (string_of_time wall_estim) ^ ")\n"); 285 | repeat_test repeat [] nmin (max nmin niter) wall_estim 286 | else 287 | (* lin estim *) 288 | let new_n = Int64.of_float(Int64.to_float n *. 1.05 *. tpra /. tn) in 289 | let new_bn, new_wall = timeit new_n f x in 290 | let new_tn = cpu_process new_bn in 291 | let n = (* make sure we make progress *) 292 | if new_tn > 1.2 *. tn then new_n 293 | else Int64.of_float(1.1 *. Int64.to_float n +. 1.) (* FIXME *) in 294 | estimate_niter nmin n new_tn new_wall in 295 | (* Determine the minimum number of iterations to run >= 0.1 sec 296 | (whatever [tmin]). Inform the user if it takes too long. *) 297 | let rec min_iter n ~takes_long:previous_took_long total_wall = 298 | if n <= 0L then 299 | failwith "throughput: number of iterations too large for Int64.t storage"; 300 | let bm, wall = timeit n f x in 301 | let tn = cpu_process bm in 302 | let total_wall = total_wall +. wall in 303 | if tn < 0.1 then ( 304 | let takes_long = total_wall >= 30. in 305 | if takes_long then ( 306 | if total_wall >= 120. then ( 307 | out.print " canceled)\n"; 308 | failwith(sprintf "Benchmark.throughputN: wall time is %g while \ 309 | CPU time is %g. Do you use \"sleep\"?" total_wall tn) 310 | ) 311 | else if previous_took_long then out.print "." else 312 | out.print_indent("(Determining how many runs to perform, \ 313 | please be patient..."); 314 | ); 315 | let twice_n = Int64.shift_left n 1 in 316 | min_iter twice_n ~takes_long total_wall 317 | ) 318 | else ( 319 | if previous_took_long then out.print ")\n"; 320 | if tn < tmin then estimate_niter n n tn wall (* tn > 0.1 *) 321 | else ( (* minimal [n] good for [tmin], use the above measurement 322 | for the first run. *) 323 | print_run out ?min_count ?min_cpu ~style ?fwidth ?fdigits bm; 324 | repeat_test (repeat - 1) [bm] n n wall 325 | ) 326 | ) in 327 | min_iter 1L ~takes_long:false 0. 328 | 329 | 330 | (* Make a print function that prefixes each output except the first 331 | one by [nspace] spaces. *) 332 | let make_printer nspace = 333 | let first = ref true in 334 | let prefix = String.make nspace ' ' in 335 | let print s = print_string s; flush stdout in 336 | let print_indent s = 337 | if !first then first := false else print_string prefix; 338 | print s in 339 | { print_indent = print_indent; print = print; } 340 | 341 | let null_printer = { print_indent = (fun _ -> ()); print = (fun _ -> ()) } 342 | 343 | 344 | (* Generic interface for performing measurements on a list of functions *) 345 | let testN ~test default_f_name ?min_count ?min_cpu ~style 346 | ?fwidth ?fdigits ~repeat funs = 347 | let length_name = 348 | List.fold_left (fun m (n,_,_) -> max m (String.length n)) 0 funs in 349 | let result_of (name, f, x) = 350 | if style <> Nil then 351 | printf "%*s: %!" length_name (if name = "" then default_f_name else name); 352 | let out = if style = Nil then null_printer 353 | else make_printer (length_name + 2) in 354 | let bm = test out ?min_count ?min_cpu ~style ?fwidth ?fdigits 355 | ~repeat name f x in 356 | (name, bm) in 357 | List.map result_of funs 358 | 359 | let string_of_names funs = 360 | String.concat ", " (List.map (fun (a,_,_) -> sprintf "%S" a) funs) 361 | 362 | 363 | let latencyN ?min_cpu ?(style=Auto) ?fwidth ?fdigits ?(repeat=1) n funs = 364 | if n < 4L then invalid_arg "Benchmark.latencyN: n < 4"; 365 | if style <> Nil then ( 366 | printf "Latencies for %Ld iterations of %s%s:\n%!" n 367 | (string_of_names funs) 368 | (if repeat > 1 then sprintf " (%i runs)" repeat else ""); 369 | ); 370 | testN ~test:(latency n) (sprintf "[run %Ld times]" n) 371 | ?min_cpu ~style ?fwidth ?fdigits ~repeat funs 372 | 373 | let latency1 ?min_cpu ?style ?fwidth ?fdigits ?repeat n ?(name="") f x = 374 | if n < 4L then invalid_arg "Benchmark.latency1"; 375 | latencyN ?min_cpu ?style ?fwidth ?fdigits ?repeat n [(name, f, x)] 376 | 377 | 378 | let throughputN ?min_count ?(style=Auto) ?fwidth ?fdigits ?(repeat=1) n funs = 379 | if n <= 0 then invalid_arg "Benchmark.throughputN: n <= 0"; 380 | let tmin = float n in 381 | if style <> Nil then ( 382 | printf "Throughputs for %s%s running%s for at least %g CPU second%s:\n%!" 383 | (string_of_names funs) 384 | (if List.length funs > 1 then " each" else "") 385 | (if repeat > 1 then sprintf " %i times" repeat else "") 386 | tmin (if n > 1 then "s" else ""); 387 | ); 388 | testN ~test:(throughput tmin) (sprintf "[run > %3.1g secs]" tmin) 389 | ?min_count ~style ?fwidth ?fdigits ~repeat funs 390 | 391 | let throughput1 ?min_count ?style ?fwidth ?fdigits ?repeat n ?(name="") f x = 392 | if n <= 0 then invalid_arg "Benchmark.throughput1: n <= 0"; 393 | throughputN ?min_count ?style ?fwidth ?fdigits ?repeat n [(name, f, x)] 394 | 395 | 396 | (* Statistical tests and comparison table 397 | ***********************************************************************) 398 | 399 | (* Utility functions *) 400 | let list_mapi f = 401 | let rec loop i = function 402 | | [] -> [] 403 | | a::l -> let r = f i a in r :: loop (i + 1) l in 404 | loop 0 405 | 406 | let list_iteri f = 407 | let rec loop i = function 408 | | [] -> () 409 | | a::l -> let () = f i a in loop (i + 1) l in 410 | loop 0 411 | 412 | let is_nan x = (classify_float x = FP_nan) 413 | 414 | 415 | (* [log_gamma x] computes the logarithm of the Gamma function at [x] 416 | using Lanczos method. It is assumed [x > 0.]. 417 | 418 | See e.g. http://home.att.net/~numericana/answer/info/godfrey.htm *) 419 | let log_gamma = 420 | let c = [| 1.000000000000000174663; 421 | 5716.400188274341379136; 422 | -14815.30426768413909044; 423 | 14291.49277657478554025; 424 | -6348.160217641458813289; 425 | 1301.608286058321874105; 426 | -108.1767053514369634679; 427 | 2.605696505611755827729; 428 | -0.7423452510201416151527e-2; 429 | 0.5384136432509564062961e-7; 430 | -0.4023533141268236372067e-8 |] in 431 | let c_last = Array.length c - 1 in 432 | let g = float(c_last - 1) in 433 | let sqrt2pi = sqrt(8. *. atan 1.) in 434 | let rec sum i den s = 435 | if i > 0 then sum (i - 1) (den -. 1.) (s +. c.(i) /. den) 436 | else c.(0) +. s in 437 | fun x -> 438 | assert(x > 0.); 439 | let xg = x +. g in 440 | let xg_5 = xg -. 0.5 in 441 | log(sqrt2pi *. sum c_last xg 0.) +. (x -. 0.5) *. log xg_5 -. xg_5 442 | 443 | (* Beta function. It is assumed [a > 0. && b > 0.]. *) 444 | let _beta a b = 445 | assert(a > 0. && b > 0.); 446 | exp(log_gamma a +. log_gamma b -. log_gamma(a +. b)) 447 | 448 | (* [betai x a b] returns the value of the incomplete Beta function 449 | I_x(a,b). It is evaluated through the continued fraction expansion 450 | (see e.g. Numerical Recipies, 6.4): 451 | 452 | x^a (1-x)^b [ 1 d1 d2 ] 453 | I_x(a,b) = ----------- [ -- -- -- ... ] 454 | a B(a,b) [ 1+ 1+ 1+ ] 455 | 456 | where B(a,b) is the beta function and 457 | 458 | m (b-m) x - (a + m)(a + b + m) x 459 | d_2m = -------------------- d_(2m+1) = ---------------------- 460 | (a + 2m - 1)(a + 2m) (a + 2m)(a + 2m + 1) 461 | 462 | The modified Lentz's method is used for the continued fraction (see 463 | NR, section 5.2) in routine [betai_cf]. 464 | *) 465 | let max_tiny x = max 1e-30 x (* to avoid null divisors *) 466 | 467 | let betai_cf_eps = epsilon_float 468 | 469 | let betai_cf x a b = 470 | let apb = a +. b 471 | and ap1 = a +. 1. 472 | and am1 = a -. 1. in 473 | let rec lentz m c d f = 474 | let m2 = 2. *. m in 475 | (* Even rec step d_2m *) 476 | let cf_d2m = m *. (b -. m) *. x /. ((am1 +. m2) *. (a +. m2)) in 477 | let d = 1. /. max_tiny(1. +. cf_d2m *. d) 478 | and c = max_tiny(1. +. cf_d2m /. c) in 479 | let f = f *. d *. c in 480 | (* Odd rec step d_2m+1 *) 481 | let cf_d2m1 = -. (a +. m) *. (apb +. m) *. x 482 | /. ((a +. m2) *. (ap1 +. m2)) in 483 | let d = 1. /. max_tiny(1. +. cf_d2m1 *. d) 484 | and c = max_tiny(1. +. cf_d2m1 /. c) in 485 | let delta = c *. d in 486 | let f = f *. delta in 487 | if abs_float(delta -. 1.) < betai_cf_eps then f 488 | else lentz (m +. 1.) c d f in 489 | (* Initialize Lentz's method with C2=1, D2 (step 2) *) 490 | let d2 = 1. /. max_tiny(1. -. apb *. x /. ap1) in 491 | lentz 1. 1. d2 d2 492 | 493 | let betai x a b = 494 | assert(a > 0. && b > 0.); 495 | if x < 0. || x > 1. then invalid_arg "betai"; 496 | if x = 0. then 0. 497 | else if x = 1. then 1. 498 | else 499 | let m = exp(log_gamma(a +. b) -. log_gamma a -. log_gamma b 500 | +. a *. log x +. b *. log(1. -. x)) in 501 | if x < (a +. 1.) /. (a +. b +. 2.) 502 | then m *. betai_cf x a b /. a 503 | else 1. -. m *. betai_cf (1. -. x) b a /. b 504 | 505 | (* [cpl_student_t t nu] compute the "complement" of the Student's 506 | distribution: 1 - A(t|nu). It is used to compute the significance 507 | of probabilistic tests. *) 508 | let cpl_student_t t nu = 509 | betai (nu /. (nu +. t *. t)) (0.5 *. nu) 0.5 510 | 511 | 512 | (* [comp_rates (name, bm)] computes the number, average and standard 513 | deviation of rates from the list of timings [bm]. If bm = [x(1); 514 | x(2);...; x(n)], the algorithm is 515 | 516 | m(1) = x(1) m(k) = m(k-1) + (x(k) - m(k-1))/k 517 | s(1) = 0 s(k) = s(k-1) + (x(k) - m(k-1))(x(k) - m(k)) 518 | 519 | One proves by recurrence that 520 | 521 | m(k) = sum(x(i) : 1 <= i <= k) / k 522 | s(k) = sum(x(i)**2 : 1 <= i <= k) - k m(k)**2 523 | = sum( (x(i) - m(k))**2 : 1 <= i <= k) 524 | 525 | Cf. Knuth, Seminumerical algorithms. *) 526 | let comp_rates cpu (name, bm) = 527 | let rec loop n m s = function 528 | | [] -> (name, n, m, s) 529 | | b :: tl -> 530 | let rate = Int64.to_float b.iters /. cpu b in 531 | let n' = n + 1 in 532 | let m' = m +. (rate -. m) /. (float n') in 533 | let s' = s +. (rate -. m) *. (rate -. m') in 534 | loop n' m' s' tl in 535 | match bm with 536 | | [] -> (name, 0, nan, 0.) (* NaN used for no-data *) 537 | | b :: tl -> loop 1 (Int64.to_float b.iters /. (cpu b +. 1e-15)) 0. tl 538 | 539 | (* Compare rates *) 540 | let by_rates (_,_,r1,_) (_,_,r2,_) = compare (r1:float) r2 541 | 542 | (* Check whether two rates are significantly different. With a small 543 | [significance], a [true] returned value means that the rates are 544 | significantly different. [n1] is the number of repetitions of the 545 | test1, [r1] is its mean rate and [s1] its standard deviation. 546 | [n2], [r2] and [s2] are similar for the test2. *) 547 | let different_rates significance n1 r1 s1 n2 r2 s2 = 548 | assert(n1 > 0 && n2 > 0); 549 | if n1 = 1 && n2 = 1 then true (* no info about distribution, assume 550 | they really are. *) 551 | else 552 | let df = float(n1 + n2 - 2) (* >= 1. *) 553 | and n1 = float n1 554 | and n2 = float n2 in 555 | let sD = sqrt((s1 +. s2) /. df *. (1. /. n1 +. 1. /. n2)) in 556 | let t = (r1 -. r2) /. sD in 557 | cpl_student_t t df <= significance 558 | 559 | 560 | (* [string_of_rate display_as_rate confidence n r s] *) 561 | let string_of_rate display_as_rate = 562 | let per_sec = if display_as_rate then "/s" else "" in 563 | fun confidence n r s : (string * string) -> 564 | (* Assume Gaussian distribution *) 565 | let sigma = sqrt(s/. float n) in 566 | let err = confidence *. sigma (* FIXME *) in 567 | let a, err = 568 | if display_as_rate then r, err else 569 | let n = 1. /. r in (n, n *. n *. err) (* Taylor of order 1 of 1/r *) in 570 | let p prec = 571 | if sigma < 1e-15 then (sprintf " %.*f%s" prec a per_sec, "") 572 | else (sprintf " %.*f+-" prec a, sprintf "%.*f%s" prec err per_sec) in 573 | if a >= 100. then p 0 574 | else if a >= 10. then p 1 575 | else if a >= 1. then p 2 576 | else if a >= 0.1 then p 3 577 | else if sigma < 1e-15 then (sprintf " %g%s" a per_sec, "") 578 | else (sprintf " %g+-" a, sprintf "%g%s" err per_sec) 579 | 580 | (* print results of a bench_many run *) 581 | (* results = [(name, bm); (name, bm); (name, bm); ...] *) 582 | let tabulate ?(no_parent=false) ?(confidence=0.95) 583 | (results:( _ * t list) list) : unit = 584 | if confidence < 0. || confidence > 1. then 585 | invalid_arg "Benchmark.tabulate: confidence < 0. or > 1."; 586 | let len = List.length results in 587 | if len = 0 then invalid_arg "Benchmark.tabulate: empty list of results"; 588 | (* Compute (name, rate, sigma) for all results and sort them by rates *) 589 | let cpu = if no_parent then cpu_childs else cpu_process in 590 | let rates = List.sort by_rates (List.map (comp_rates cpu) results) in 591 | (* Decide whether to display by rates or seconds *) 592 | let display_as_rate = 593 | let (_,_,r,_) = List.nth rates (len / 2) in r > 1. in 594 | (* 595 | * Compute rows 596 | *) 597 | let top_row = "" :: (if display_as_rate then " Rate" else " s/iter") 598 | :: "" :: (List.map (fun (s,_,_,_) -> " " ^ s) rates) in 599 | (* Initialize the widths of the columns from the top row *) 600 | let col_width = Array.of_list (List.map String.length top_row) in 601 | (* Build all the data [rows], each starting with separation space *) 602 | let string_of_rate = string_of_rate display_as_rate in 603 | let make_row i (row_name, row_n, row_rate, row_s) = 604 | (* Column 0: test name *) 605 | col_width.(0) <- max (String.length row_name) col_width.(0); 606 | (* Column 1 & 2: performance *) 607 | let ra, ra_err = string_of_rate confidence row_n row_rate row_s in 608 | col_width.(1) <- max (String.length ra) col_width.(1); 609 | col_width.(2) <- max (String.length ra_err) col_width.(2); 610 | (* Columns 3..(len + 2): performance ratios *) 611 | let make_col j (_col_name, col_n, col_rate, col_s) = 612 | let ratio = 613 | if i = j || is_nan row_rate || is_nan col_rate then "--" else 614 | let p = 100. *. row_rate /. col_rate -. 100. in 615 | if p = 0. || different_rates (1. -. confidence) 616 | row_n row_rate row_s col_n col_rate col_s 617 | then sprintf " %.0f%%" p 618 | else sprintf " [%.0f%%]" p in 619 | col_width.(j + 3) <- max (String.length ratio) col_width.(j + 3); 620 | ratio in 621 | row_name :: ra :: ra_err :: (list_mapi make_col rates) in 622 | let rows = list_mapi make_row rates in 623 | (* 624 | * Equalize column widths in the chart as much as possible without 625 | * exceeding 80 characters. This does not use or affect cols 0, 1 and 2. 626 | *) 627 | (* Build an array of indexes [nth.(0..(len-1))] to access 628 | [col_width.(3..(len+2))] in nondecreasing order. *) 629 | let nth = Array.init len (fun i -> i + 3) in 630 | let by_width i1 i2 = compare col_width.(i1) col_width.(i2) in 631 | Array.sort by_width nth; 632 | let max_width = col_width.(nth.(len - 1)) in 633 | let rec stretcher min_width total = 634 | if min_width < max_width then stretch_min 0 min_width total 635 | and stretch_min i min_width total = (* try to stretch col [i] *) 636 | if total < 80 then begin 637 | if i < len && col_width.(nth.(i)) = min_width then begin 638 | col_width.(nth.(i)) <- col_width.(nth.(i)) + 1; 639 | stretch_min (i + 1) min_width (total + 1) (* stretch next col? *) 640 | end 641 | else stretcher (min_width + 1) total (* try again to stretch *) 642 | end in 643 | stretcher col_width.(nth.(0)) (Array.fold_left ( + ) 0 col_width); 644 | (* 645 | * Display the table 646 | *) 647 | let row_formatter row = 648 | list_iteri (fun i d -> printf "%*s" col_width.(i) d) row; 649 | print_string "\n" in 650 | row_formatter top_row; 651 | List.iter row_formatter rows; 652 | flush stdout 653 | 654 | let get_minor b = b.minor_words 655 | let get_major b = b.major_words -. b.promoted_words 656 | let get_promoted b = b.promoted_words 657 | 658 | let print_gc (results:( _ * t list) list) : unit = 659 | let len = List.length results in 660 | if len = 0 then invalid_arg "Benchmark.print_gc: empty list of results"; 661 | 662 | let compute_per_iter get (name, samples) : string * float = 663 | let rec loop n sum = function 664 | | [] -> n, sum 665 | | b :: tl -> 666 | let n = Int64.(add n b.iters) in 667 | let sum = sum +. get b in 668 | loop n sum tl in 669 | let n, sum = loop 0L 0. samples in 670 | if n=0L then name, 0. 671 | else name, sum /. (Int64.to_float n) 672 | in 673 | 674 | (* Compute (name, rate, sigma) for all results *) 675 | let minor_rates = List.map (compute_per_iter get_minor) results in 676 | let major_rates = List.map (compute_per_iter get_major) results in 677 | let promoted_rates = List.map (compute_per_iter get_promoted) results in 678 | 679 | (* Compute rows *) 680 | let top_row = ["" ; " minor_allocs/iter" ; " major_allocs/iter"; " promoted/iter" ] in 681 | let rows = 682 | List.map2 683 | (fun (name, minor) ((_, major), (_, promoted)) -> 684 | [name; string_of_bytes minor; string_of_bytes major; string_of_bytes promoted]) 685 | minor_rates (List.combine major_rates promoted_rates) 686 | in 687 | 688 | (* Initialize the widths of the columns from the top row *) 689 | let col_width = Array.of_list (List.map String.length top_row) in 690 | List.iter (List.iteri (fun i col -> 691 | col_width.(i) <- max col_width.(i) (String.length col))) rows; 692 | 693 | let row_formatter row = 694 | list_iteri (fun i d -> printf "%*s" col_width.(i) d) row; 695 | print_string "\n" in 696 | row_formatter top_row; 697 | List.iter row_formatter rows; 698 | flush stdout 699 | 700 | (** {2 Bench Tree} *) 701 | 702 | module Tree = struct 703 | (** {2 Path} *) 704 | 705 | type path = string list 706 | 707 | let print_path_element fmt p = 708 | Format.pp_print_char fmt '.'; 709 | Format.pp_print_cut fmt (); 710 | Format.pp_print_string fmt p 711 | 712 | let print_path fmt path = 713 | Format.fprintf fmt "@[<2>"; 714 | (match path with 715 | | [] -> () 716 | | [p] -> Format.pp_print_string fmt p 717 | | p :: tl -> Format.pp_print_string fmt p; 718 | List.iter (print_path_element fmt) tl); 719 | Format.fprintf fmt "@]" 720 | 721 | (* Split the string along "." characters. Specification: 722 | assert (parse_path "foo.bar" = ["foo";"bar"]); 723 | assert (parse_path "foo" = ["foo"]); 724 | assert (parse_path "" = [""]) 725 | *) 726 | let rev_parse_path s = 727 | let l = ref [] in 728 | let i0 = ref 0 in 729 | for i = 0 to String.length s - 1 do 730 | if String.unsafe_get s i = '.' then ( 731 | let name = String.sub s !i0 (i - !i0) in 732 | l := name :: !l; 733 | i0 := i + 1; 734 | ) 735 | done; 736 | let name = if !i0 = 0 then s 737 | else String.sub s !i0 (String.length s - !i0) in 738 | name :: !l 739 | 740 | let check_reserved name = 741 | if name = "*" then invalid_arg "Name \"*\" is reserved for wildcard" 742 | 743 | let parse_path s = List.rev(rev_parse_path s) 744 | 745 | 746 | (** {2 Bench Tree} *) 747 | 748 | module SMap = Map.Make(String) 749 | 750 | (* A collection of benchmarks with fast concatenation. *) 751 | type benches = Single of samples Lazy.t 752 | | Pair of benches * benches 753 | 754 | let merge_benches_opt b1 b2 = match b1, b2 with 755 | | None, b | b, None -> b 756 | | Some b1, Some b2 -> Some(Pair(b1, b2)) 757 | 758 | let rec number_of_benches = function 759 | | Single _ -> 1 760 | | Pair(b1, b2) -> number_of_benches b1 + number_of_benches b2 761 | 762 | let rec benches_iter benches ~f = match benches with 763 | | Single b -> f b 764 | | Pair(b1, b2) -> benches_iter b1 ~f; benches_iter b2 ~f 765 | 766 | type t = Tree of benches option * t SMap.t 767 | (* benches at that level + named sublevels. The name "" is 768 | understood as "at this level" and so will not be present in the 769 | map. *) 770 | 771 | let empty = Tree(None, SMap.empty) 772 | 773 | let is_empty (Tree(b, m)) = 774 | b = None && SMap.is_empty m 775 | 776 | let rec merge (Tree(b1, m1)) (Tree(b2, m2)) : t = 777 | let b = merge_benches_opt b1 b2 in 778 | let m = SMap.merge merge_opt m1 m2 in 779 | Tree(b, m) 780 | and merge_opt _ o1 o2 = match o1, o2 with 781 | | None, None -> None 782 | | Some o, None 783 | | None, Some o -> Some o 784 | | Some o1, Some o2 -> Some (merge o1 o2) 785 | 786 | let concat l = List.fold_left merge empty l 787 | 788 | let check_allowed_name n = 789 | check_reserved n; 790 | for i = 0 to String.length n - 1 do 791 | if String.unsafe_get n i = '.' then 792 | invalid_arg "Names cannot contain dots" 793 | done 794 | 795 | let of_bench bench = Tree(Some(Single bench), SMap.empty) 796 | 797 | let name_nonempty t n = Tree(None, SMap.singleton n t) 798 | 799 | let name t n = 800 | check_allowed_name n; 801 | if n = "" then t else name_nonempty t n 802 | 803 | (* prefix a tree with a path. Now the whole tree is only reachable 804 | from this given path *) 805 | let prefix path t = 806 | List.fold_right (fun n t -> name t n) path t 807 | 808 | let ( @>> ) n t = 809 | List.fold_left name t (rev_parse_path n) 810 | 811 | let ( @> ) name bench = name @>> (of_bench bench) 812 | 813 | let (@>>>) n l = n @>> (concat l) 814 | 815 | let with_int f = function 816 | | [] -> empty 817 | | l -> 818 | let g i = Tree(None, SMap.singleton (string_of_int i) (f i)) in 819 | concat (List.map g l) 820 | 821 | (* print the structure of the tree, to show the user possible paths *) 822 | let rec print_tree_map fmt m = 823 | SMap.iter (print_tree_path fmt) m 824 | and print_tree_path fmt name (Tree(b, m)) = 825 | (match b with 826 | | None -> Format.fprintf fmt "@\n@[<2>- %s" name 827 | | Some b -> 828 | let n = number_of_benches b in 829 | Format.fprintf fmt "@\n@[<2>- %s: %i benchmark%s" 830 | name n (if n > 1 then "s" else "")); 831 | print_tree_map fmt m; 832 | Format.fprintf fmt "@]" 833 | 834 | let print fmt (Tree(b, m)) = 835 | (match b with 836 | | None -> Format.fprintf fmt "No benchmark at root" 837 | | Some b -> 838 | let n = number_of_benches b in 839 | Format.fprintf fmt "%i benchmark%s at root" 840 | n (if n > 1 then "s" else "")); 841 | print_tree_map fmt m 842 | 843 | (** {2 Selecting a subtree} *) 844 | 845 | let rec filter path (Tree(b,m) as t) = match path with 846 | | [] -> t 847 | | [""] -> (* Only return the benches at this level (no sub-levels) *) 848 | Tree(b, SMap.empty) 849 | | "" :: tl -> (* skip empty component NOT at the end, skip *) 850 | filter tl t 851 | | "*" :: tl -> 852 | (* wildcard pattern, select all subtrees *) 853 | let map_filter name t m = 854 | let t = filter tl t in 855 | (* Keep it only if not empty. *) 856 | if is_empty t then m else SMap.add name t m in 857 | Tree(b, SMap.fold map_filter m SMap.empty) 858 | | p0 :: tl -> 859 | match (try Some(SMap.find p0 m) with Not_found -> None) with 860 | | None -> empty 861 | | Some t -> let t = filter tl t in 862 | (* propagate up the emptiness *) 863 | if is_empty t then empty else name_nonempty t p0 864 | 865 | (** {2 Run} *) 866 | 867 | let print_sep fmt = 868 | Format.pp_print_string fmt "***********************************\ 869 | ***********************************"; 870 | Format.pp_print_newline fmt () 871 | 872 | let run_bench_path ~with_gc fmt is_previous_output rev_path = function 873 | | None -> is_previous_output 874 | | Some b -> 875 | if is_previous_output then print_sep fmt; 876 | Format.fprintf fmt "*** Run benchmarks for path \"%a\"@\n@." 877 | print_path (List.rev rev_path); 878 | benches_iter b ~f:(fun b -> 879 | let b = Lazy.force b in 880 | tabulate b; 881 | if with_gc then print_gc b); 882 | true 883 | 884 | let rec run_all ~with_gc fmt is_previous_output rev_path (Tree(b, m)) = 885 | let is_previous_output = 886 | run_bench_path ~with_gc fmt is_previous_output rev_path b in 887 | SMap.fold 888 | (fun name t is_out -> 889 | run_all ~with_gc fmt is_out (name :: rev_path) t) 890 | m is_previous_output 891 | 892 | let run_1path fmt t is_previous_output path = 893 | (* Filtering the tree keep its full paths so we initialize 894 | [rev_path] to [[]]. *) 895 | run_all fmt is_previous_output [] (filter path t) 896 | 897 | let run_paths ~with_gc fmt ~paths t = 898 | let is_out = List.fold_left (run_1path ~with_gc fmt t) false paths in 899 | if not is_out then 900 | match paths with 901 | | [] -> Format.fprintf fmt "No benchmark to run.@\n@." 902 | | p0 :: tl -> 903 | Format.fprintf fmt "No benchmark to run for paths "; 904 | print_path fmt p0; 905 | List.iter (fun p -> print_path fmt p; 906 | Format.pp_print_string fmt ", ") tl; 907 | Format.fprintf fmt ".@\n@." 908 | 909 | type arg_state = { mutable paths : path list; 910 | mutable print_tree : bool; 911 | mutable with_gc: bool; 912 | } 913 | let arg () = 914 | let st = { paths = []; print_tree = false; with_gc = true } in 915 | let add_path s = st.paths <- parse_path s :: st.paths in 916 | let options = Arg.align 917 | [ "--path", Arg.String add_path, " only apply to subpath" 918 | ; "-p", Arg.String add_path, " short option for --path" 919 | ; "--all", Arg.Unit (fun () -> add_path "*"), " run all paths" 920 | ; "-a", Arg.Unit (fun () -> add_path "*"), " short option for --all" 921 | ; "--tree", Arg.Unit (fun () -> st.print_tree <- true), " print the tree" 922 | ; "--gc", Arg.Unit (fun () -> st.with_gc <- true), " print GC stats" 923 | ; "--no-gc", Arg.Unit (fun () -> st.with_gc <- false), " do not print GC stats" 924 | ] in 925 | st, options 926 | 927 | let run ?(with_gc=true) ?arg ?(paths=[]) ?(out=Format.std_formatter) t = 928 | match arg with 929 | | None -> run_paths ~with_gc out ~paths t 930 | | Some st -> 931 | if st.print_tree then 932 | Format.fprintf out "@[%a@]@." print t 933 | else 934 | run_paths ~with_gc:st.with_gc out ~paths:(paths @ List.rev st.paths) t 935 | 936 | (** {2 Global Registration} *) 937 | 938 | (* the global tree of benchmarks *) 939 | let tree = ref empty 940 | 941 | let global () = !tree 942 | 943 | let register new_t = 944 | tree := merge !tree new_t 945 | 946 | let run_global ?(argv=Sys.argv) ?(out=Format.std_formatter) () = 947 | let st, specs = arg () in 948 | let no_anon _ = raise(Arg.Bad "No anonymous arguments allowed") in 949 | let pgm = try Filename.basename Sys.argv.(0) 950 | with _ -> "run benchmark" in 951 | let usage = pgm ^ " [options]" in 952 | try 953 | Arg.parse_argv argv specs no_anon usage; 954 | run ~arg:st ~out !tree 955 | with Arg.Bad msg | Arg.Help msg -> 956 | Format.fprintf out "%s@." msg 957 | end 958 | 959 | (* Local Variables: *) 960 | (* compile-command: "make -k" *) 961 | (* End: *) 962 | --------------------------------------------------------------------------------