├── .gitignore
├── .hlint.yaml
├── cabal.haskell-ci
├── bench
    └── bench-fibo.hs
├── LICENSE
├── .cirrus.yml
├── compare_benches.sh
├── tasty-bench.cabal
├── convert_readme.sh
├── example.svg
├── changelog.md
├── .github
    └── workflows
    │   ├── other.yml
    │   └── haskell-ci.yml
├── README.md
└── src
    └── Test
        └── Tasty
            └── Bench.hs


/.gitignore:
--------------------------------------------------------------------------------
1 | /dist-newstyle/
2 | /dist-mcabal/
3 | 


--------------------------------------------------------------------------------
/.hlint.yaml:
--------------------------------------------------------------------------------
1 | - ignore: {name: "Use lambda-case"}
2 | - ignore: {name: "Redundant if"}
3 | 


--------------------------------------------------------------------------------
/cabal.haskell-ci:
--------------------------------------------------------------------------------
 1 | constraint-set no-tasty
 2 |   constraints: tasty-bench -tasty
 3 | 
 4 | constraint-set tasty-1.4
 5 |   constraints: tasty ^>= 1.4
 6 | 
 7 | constraint-set tasty-1.5
 8 |   constraints: tasty ^>= 1.5
 9 |   ghc: >= 8.0
10 | 


--------------------------------------------------------------------------------
/bench/bench-fibo.hs:
--------------------------------------------------------------------------------
 1 | module Main (main) where
 2 | 
 3 | import Test.Tasty.Bench
 4 | 
 5 | fibo :: Int -> Integer
 6 | fibo n = if n < 2 then toInteger n else fibo (n - 1) + fibo (n - 2)
 7 | 
 8 | main :: IO ()
 9 | main = defaultMain
10 |   [ bgroup "Fibonacci numbers"
11 |     [ bench "fifth"     $ nf fibo  5
12 |     , bench "tenth"     $ nf fibo 10
13 |     , bench "twentieth" $ nf fibo 20
14 |     ]
15 |   ]
16 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Andrew Lelechenko
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.cirrus.yml:
--------------------------------------------------------------------------------
 1 | task:
 2 |   name: FreeBSD
 3 |   freebsd_instance:
 4 |     image_family: freebsd-14-2
 5 |   install_script:
 6 |     - pkg install -y git gmake
 7 |     - curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | BOOTSTRAP_HASKELL_NONINTERACTIVE=1 BOOTSTRAP_HASKELL_INSTALL_NO_STACK=1 sh
 8 |   script:
 9 |     - export PATH="$HOME/.ghcup/bin:$PATH"
10 |     - cabal build
11 | 
12 | task:
13 |   name: OpenBSD
14 |   compute_engine_instance:
15 |     image_project: pg-ci-images
16 |     # See https://github.com/anarazel/pg-vm-images/blob/main/packer/openbsd.pkrvars.hcl
17 |     image: family/pg-ci-openbsd-vanilla
18 |     platform: openbsd
19 |   install_script: pkg_add ghc cabal-install git
20 |   script:
21 |     - export CABAL_DIR=/tmp/.cabal
22 |     - cabal update
23 |     - cabal build
24 | 
25 | task:
26 |   name: NetBSD
27 |   compute_engine_instance:
28 |     image_project: pg-ci-images
29 |     # See https://github.com/anarazel/pg-vm-images/blob/main/packer/netbsd.pkrvars.hcl
30 |     image: family/pg-ci-netbsd-vanilla
31 |     platform: netbsd
32 |   install_script:
33 |     - export PKG_PATH="http://cdn.NetBSD.org/pub/pkgsrc/packages/NetBSD/$(uname -p)/$(uname -r|cut -f '1 2' -d.)/All/"
34 |     - pkg_add ghc cabal-install git
35 |   script:
36 |     - export CABAL_DIR=/tmp/.cabal
37 |     - cabal update
38 |     - cabal build
39 | 


--------------------------------------------------------------------------------
/compare_benches.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | compare_benches () {
 3 |   if [ "$#" -lt 2 ]; then
 4 |     printf "Usage:\n  compare_benches oldCommit newCommit ...\nwhere ... is passed to benchmarks directly.\n"
 5 |     return 0
 6 |   fi
 7 | 
 8 |   OLD="$1"
 9 |   shift
10 |   NEW="$1"
11 |   shift
12 | 
13 |   OLDREF=$(git rev-parse --verify "$OLD")
14 |   NEWREF=$(git rev-parse --verify "$NEW")
15 | 
16 |   OLDCSV=$(echo "$OLD".csv | sed -e s#/##g)
17 |   NEWCSV=$(echo "$NEW".csv | sed -e s#/##g)
18 |   OLDVSNEWCSV=$(echo "$OLD"-vs-"$NEW".csv | sed -e s#/##g)
19 | 
20 |   git checkout -q "$OLDREF" && \
21 |   trap 'git checkout -q "@{-1}" && trap - INT' INT && \
22 |   cabal run -v0 benchmarks -- --csv "$OLDCSV" "$@" && \
23 |   git checkout -q "$NEWREF" && \
24 |   trap 'git checkout -q "@{-2}" && trap - INT' INT && \
25 |   cabal run -v0 benchmarks -- --baseline "$OLDCSV" --csv "$NEWCSV" "$@" && \
26 |   git checkout -q "@{-2}" && \
27 | 
28 |   awk 'BEGIN{FS=",";OFS=",";print "Name,'"$OLD"','"$NEW"',Ratio"}FNR==1{trueNF=NF;next}NF<trueNF{print "Benchmark names should not contain newlines";exit 1}FNR==NR{oldTime=$(NF-trueNF+2);NF-=trueNF-1;a[$0]=oldTime;next}{newTime=$(NF-trueNF+2);NF-=trueNF-1;if(a[$0]){print $0,a[$0],newTime,newTime/a[$0];gs+=log(newTime/a[$0]);gc++}}END{if(gc>0)print "Geometric mean,,",exp(gs/gc)}' "$OLDCSV" "$NEWCSV" > "$OLDVSNEWCSV" && \
29 | 
30 |   trap - INT
31 | }
32 | 


--------------------------------------------------------------------------------
/tasty-bench.cabal:
--------------------------------------------------------------------------------
 1 | name:          tasty-bench
 2 | version:       0.4.1
 3 | cabal-version: 1.18
 4 | build-type:    Simple
 5 | license:       MIT
 6 | license-file:  LICENSE
 7 | copyright:     2021 Andrew Lelechenko
 8 | author:        Andrew Lelechenko
 9 | maintainer:    Andrew Lelechenko <andrew.lelechenko@gmail.com>
10 | homepage:      https://github.com/Bodigrim/tasty-bench
11 | bug-reports:   https://github.com/Bodigrim/tasty-bench/issues
12 | category:      Development, Performance, Testing, Benchmarking
13 | synopsis:      Featherlight benchmark framework
14 | description:
15 |   Featherlight framework (only one file!)
16 |   for performance measurement with API mimicking
17 |   @criterion@ and @gauge@, featuring built-in comparison
18 |   against previous runs and between benchmarks. Our benchmarks are just
19 |   regular @tasty@ tests.
20 | 
21 | extra-source-files:
22 |   compare_benches.sh
23 | extra-doc-files:
24 |   changelog.md
25 |   example.svg
26 |   README.md
27 | 
28 | tested-with: GHC == 9.14.1, GHC == 9.12.2, GHC == 9.10.3, GHC == 9.8.4, GHC == 9.6.7, GHC == 9.4.8, GHC == 9.2.8, GHC==9.0.2, GHC==8.10.7, GHC==8.8.4, GHC==8.6.5, GHC==8.4.4, GHC==8.2.2, GHC==8.0.2
29 | 
30 | source-repository head
31 |   type: git
32 |   location: https://github.com/Bodigrim/tasty-bench
33 | 
34 | flag tasty
35 |   default: True
36 |   manual: True
37 |   description:
38 |     When disabled, reduces API to functions independent of @tasty@: combinators
39 |     to construct @Benchmarkable@ and @measureCpuTime@.
40 | 
41 | library
42 |   exposed-modules:  Test.Tasty.Bench
43 |   hs-source-dirs:   src
44 |   default-language: Haskell2010
45 |   ghc-options:      -O2 -Wall -fno-warn-unused-imports -Wcompat -Widentities
46 | 
47 |   build-depends:
48 |     base >= 4.9 && < 5,
49 |     deepseq >= 1.1 && < 1.6
50 |   if impl(ghc < 9.0)
51 |     build-depends:
52 |       ghc-prim < 0.14
53 |   if flag(tasty)
54 |     build-depends:
55 |       containers >= 0.5 && < 0.9,
56 |       tasty >= 1.4 && < 1.6
57 |   if impl(ghc < 8.4)
58 |     build-depends:
59 |       time >= 1.2 && < 2
60 | 
61 | benchmark bench-fibo
62 |   default-language: Haskell2010
63 |   hs-source-dirs:   bench
64 |   main-is:          bench-fibo.hs
65 |   type:             exitcode-stdio-1.0
66 |   build-depends:    base, tasty-bench
67 |   ghc-options:      "-with-rtsopts=-A32m"
68 | 


--------------------------------------------------------------------------------
/convert_readme.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | sed 's/e\. g\./e.g./g' README.md | pandoc -f markdown -t haddock -o README.haddock
 3 | 
 4 | ex README.haddock <<eof
 5 | 1 insert
 6 | {- |
 7 | Module:      Test.Tasty.Bench
 8 | Copyright:   (c) 2021 Andrew Lelechenko
 9 | License:     MIT
10 | .
11 | xit
12 | eof
13 | 
14 | printf '\n-}\n' >> README.haddock
15 | 
16 | sed -i '' '/^= /d' README.haddock
17 | sed -i '' '/^#/d' README.haddock
18 | sed -i '' '/^-   <#/d' README.haddock
19 | 
20 | sed -i '' 's/^== /=== /g' README.haddock
21 | sed -i '' -E 's/<(.*) (criterion|gauge|tasty|tasty-rerun|-fproc-alignment)>/[@\2@](\1)/g' README.haddock
22 | sed -i '' -E 's/<(.*) tasty documentation>/[@tasty@ documentation](\1)/g' README.haddock
23 | sed -i '' -E 's/vs\../vs. /g' README.haddock
24 | sed -i '' -E 's/e\.g\./e. g./g' README.haddock
25 | 
26 | sed -i '' 's/@localOption (NumThreads 1)@/@localOption@ (@NumThreads@ 1)/g' README.haddock
27 | sed -i '' 's/@localOption (FailIfSlower 0.10)@/@localOption@ (@FailIfSlower@ 0.10)/g' README.haddock
28 | sed -i '' 's/@localOption (RelStDev 0.02)@/@localOption@ (@RelStDev@ 0.02)/g' README.haddock
29 | sed -i '' 's/@localOption (mkTimeout 100000000)@/@localOption@ (@mkTimeout@ 100000000)/g' README.haddock
30 | sed -i '' "s/@Test.Tasty.Bench.defaultMain@/@Test.Tasty.Bench.@'Test.Tasty.Bench.defaultMain'/g" README.haddock
31 | sed -i '' "s/@Test.Tasty.defaultMain@/@Test.Tasty.@'Test.Tasty.defaultMain'/g" README.haddock
32 | sed -i '' "s/@ghc-options:@\[@-fproc-alignment@\]/@ghc-options:@ [@-fproc-alignment@]/g" README.haddock
33 | 
34 | sed -i '' 's/@Test.Tasty.Bench@/"Test.Tasty.Bench"/g' README.haddock
35 | 
36 | for word in getCPUTime TimeMode RTSStats allocated_bytes copied_bytes max_mem_in_use_bytes consoleBenchReporter consoleTestReporter env envWithCleanup IO bcompare bcompareWithin locateBenchmark localOption NumThreads Benchmarkable measureCpuTime mkTimeout RelStDev FailIfSlower; do
37 |   sed -i '' "s/@$word@/'$word'/g" README.haddock
38 | done
39 | 
40 | sed -i '' "s;<<https://hackage.haskell.org/package/tasty-bench/src/example.svg Plotting>>;![Plotting](example.svg);g" README.haddock
41 | sed -i '' "s/^Plotting$//g" README.haddock
42 | 
43 | sed -i '' 's/^-   @\(.*\)@/[@\1@]:/g' README.haddock
44 | 
45 | sed -n '/^{-#/,$p' src/Test/Tasty/Bench.hs > src/Test/Tasty/Bench.temp
46 | cat README.haddock src/Test/Tasty/Bench.temp > src/Test/Tasty/Bench.hs
47 | rm README.haddock src/Test/Tasty/Bench.temp
48 | 


--------------------------------------------------------------------------------
/example.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" height="227" width="960.0" font-size="16" font-family="sans-serif" stroke-width="2">
 2 | <g transform="translate(10.0 0)">
 3 | <g fill="hsl(0, 100%, 40%)">
 4 | <text y="22">Inversion.Data.Mod</text>
 5 | <text y="22" x="283.118455709073" text-anchor="end">353 ms</text>
 6 | </g>
 7 | <g>
 8 | <title>353 ms ±  47 ms</title>
 9 | <rect y="28" rx="5" height="22" width="283.118455709073" fill="hsl(0, 100%, 80%)" stroke="hsl(0, 100%, 55%)" />
10 | <g stroke="hsl(0, 100%, 40%)"><line x1="245.25631736668015" x2="320.9805940514658" y1="39" y2="39" />
11 | <line x1="245.25631736668015" x2="245.25631736668015" y1="33" y2="45" />
12 | <line x1="320.9805940514658" x2="320.9805940514658" y1="33" y2="45" />
13 | </g>
14 | </g>
15 | <text fill="hsl(90, 100%, 40%)" y="77">Inversion.Data.Mod.Word 294 ms</text>
16 | <g>
17 | <title>294 ms ±  43 ms</title>
18 | <rect y="83" rx="5" height="22" width="235.65287325623" fill="hsl(90, 100%, 80%)" stroke="hsl(90, 100%, 55%)" />
19 | <g stroke="hsl(90, 100%, 40%)"><line x1="201.23554942411812" x2="270.0701970883419" y1="94" y2="94" />
20 | <line x1="201.23554942411812" x2="201.23554942411812" y1="88" y2="100" />
21 | <line x1="270.0701970883419" x2="270.0701970883419" y1="88" y2="100" />
22 | </g>
23 | </g>
24 | <g fill="hsl(180, 100%, 40%)">
25 | <text y="132">Inversion.finite-field</text>
26 | <text y="132" x="821.8298170179202" text-anchor="end">1.03 s</text>
27 | </g>
28 | <g>
29 | <title>1.03 s ± 147 ms</title>
30 | <rect y="138" rx="5" height="22" width="821.8298170179202" fill="hsl(180, 100%, 80%)" stroke="hsl(180, 100%, 55%)" />
31 | <g stroke="hsl(180, 100%, 40%)"><line x1="703.6596340358403" x2="940.0" y1="149" y2="149" />
32 | <line x1="703.6596340358403" x2="703.6596340358403" y1="143" y2="155" />
33 | <line x1="940.0" x2="940.0" y1="143" y2="155" />
34 | </g>
35 | </g>
36 | <g fill="hsl(270, 100%, 40%)">
37 | <text y="187">Inversion.modular-arithmetic</text>
38 | <text y="187" x="717.8860415010972" text-anchor="end">896 ms</text>
39 | </g>
40 | <g>
41 | <title>896 ms ±  58 ms</title>
42 | <rect y="193" rx="5" height="22" width="717.8860415010972" fill="hsl(270, 100%, 80%)" stroke="hsl(270, 100%, 55%)" />
43 | <g stroke="hsl(270, 100%, 40%)"><line x1="671.4753013587209" x2="764.2967816434735" y1="204" y2="204" />
44 | <line x1="671.4753013587209" x2="671.4753013587209" y1="198" y2="210" />
45 | <line x1="764.2967816434735" x2="764.2967816434735" y1="198" y2="210" />
46 | </g>
47 | </g>
48 | </g>
49 | </svg>
50 | 


--------------------------------------------------------------------------------
/changelog.md:
--------------------------------------------------------------------------------
 1 | # 0.5
 2 | 
 3 | * Extend `TimeMode` with `MutatorCpuTime`, `MutatorWallTime` and `CustomTime`.
 4 | * Use a more robust strategy to force encoding to UTF-8.
 5 | 
 6 | # 0.4.1
 7 | 
 8 | * Force GC before collecting `RTSStats`, otherwise measurements are inaccurate.
 9 | * Restore locale encoding after `defaultMain`.
10 | 
11 | # 0.4
12 | 
13 | * Switch `nf`, `nfIO` and `nfAppIO` to evaluate outputs to a normal form
14 |   with `rnf` instead of `force`. It means that parts of the output, which have
15 |   already been forced, can be garbage collected early, without waiting for
16 |   the entire output to be allocated at the same time. This decreases
17 |   benchmarking overhead in many scenarios and brings the behaviour in line
18 |   with `criterion`. See [#39](https://github.com/Bodigrim/tasty-bench/issues/39)
19 |   for discussion.
20 | * Drop support of `tasty < 1.4`.
21 | * Make `IO` benchmarks immune to `-fspec-constr-count` limit.
22 | * Decomission `debug` build flag.
23 | * Decomission warning when `--timeout` is absent.
24 | * Add `instance {Eq,Ord,Num,Fractional} {RelStDev,FailIfSlower,FailIfFaster}`.
25 | * Add `instance {Eq,Ord} {CsvPath,SvgPath,BaselinePath}`.
26 | 
27 | # 0.3.5
28 | 
29 | * Support `tasty-1.5`.
30 | * Report benchmarking progress.
31 | 
32 | # 0.3.4
33 | 
34 | * Force single-threaded execution in `defaultMain`.
35 | * Expose `measureCpuTimeAndStDev` helper to analyse benchmarks manually.
36 | 
37 | # 0.3.3
38 | 
39 | * Drop support of `tasty < 1.2.3`.
40 | * Make benchmarks immune to `-fspec-constr-count` limit.
41 | 
42 | # 0.3.2
43 | 
44 | * Add `locateBenchmark` and `mapLeafBenchmarks`.
45 | * Support measuring of wall-clock time.
46 | * Make messages for baseline comparison less ambiguous.
47 | * Graceful degradation on non-Unicode terminals.
48 | 
49 | # 0.3.1
50 | 
51 | * Add `bcompareWithin` for portable performance tests.
52 | * Add `tasty` and `debug` build flags.
53 | 
54 | # 0.3
55 | 
56 | * Report mean time with 3 significant digits.
57 | * Report peak memory usage, when run with `+RTS -T`.
58 | * Run benchmarks only once, if `RelStDev` is infinite.
59 | * Make `Benchmarkable` constructor public.
60 | * Expose `measureCpuTime` helper to run benchmarks manually.
61 | * Expose `CsvPath`, `BaselinePath`, `SvgPath`.
62 | 
63 | # 0.2.5
64 | 
65 | * Fix comparison against baseline.
66 | 
67 | # 0.2.4
68 | 
69 | * Add a simplistic SVG reporter.
70 | * Add `bcompare` to compare between benchmarks.
71 | * Throw a warning, if benchmarks take too long.
72 | 
73 | # 0.2.3
74 | 
75 | * Prohibit duplicated benchmark names in CSV reports.
76 | 
77 | # 0.2.2
78 | 
79 | * Remove `NFData` constraint from `whnfIO`.
80 | 
81 | # 0.2.1
82 | 
83 | * Fix integer overflow in stdev computations.
84 | 
85 | # 0.2
86 | 
87 | * Add `env` and `envWithCleanup`.
88 | * Run console and CSV reporters in parallel.
89 | * Extend console reporter and export it as `consoleBenchReporter`.
90 | * Add comparison against baseline and relevant options.
91 | * Export `RelStDev` option.
92 | * Export `benchIngredients`.
93 | 
94 | # 0.1
95 | 
96 | * Initial release.
97 | 


--------------------------------------------------------------------------------
/.github/workflows/other.yml:
--------------------------------------------------------------------------------
  1 | name: other
  2 | on:
  3 |   - push
  4 |   - pull_request
  5 | 
  6 | defaults:
  7 |   run:
  8 |     shell: bash
  9 | 
 10 | jobs:
 11 |   build:
 12 |     runs-on: ${{ matrix.os }}
 13 |     strategy:
 14 |       fail-fast: true
 15 |       matrix:
 16 |         os: [windows-latest, macOS-latest, macOS-14]
 17 |         ghc: ['latest']
 18 |     steps:
 19 |     - uses: actions/checkout@v4
 20 |     - uses: haskell-actions/setup@v2
 21 |       id: setup-haskell-cabal
 22 |       with:
 23 |         ghc-version: ${{ matrix.ghc }}
 24 |     - name: Update cabal package database
 25 |       run: cabal update
 26 |     - uses: actions/cache@v4
 27 |       name: Cache cabal stuff
 28 |       with:
 29 |         path: |
 30 |           ${{ steps.setup-haskell-cabal.outputs.cabal-store }}
 31 |           dist-newstyle
 32 |         key: ${{ runner.os }}-${{ matrix.ghc }}
 33 |     - name: Build
 34 |       run: |
 35 |         cabal sdist -z -o .
 36 |         cabal get tasty-bench-*.tar.gz
 37 |         cd tasty-bench-*/
 38 |         cabal build
 39 |     - name: Haddock
 40 |       run: |
 41 |         cd tasty-bench-*/
 42 |         cabal haddock
 43 | 
 44 |   # Emulation on s390x and ppc64le platforms is incredibly slow and memory demanding.
 45 |   # It seems that any executable with GHC RTS takes at least 7-8 Gb of RAM, so we can
 46 |   # run `cabal` or `ghc` on their own, but cannot run them both at the same time, striking
 47 |   # out `cabal build`. Instead we install system packages and invoke `ghc` manually.
 48 | 
 49 |   emulated:
 50 |     needs: build
 51 |     runs-on: ubuntu-latest
 52 |     strategy:
 53 |       fail-fast: true
 54 |       matrix:
 55 |         arch: ['s390x', 'ppc64le', 'armv7', 'aarch64', 'riscv64']
 56 |     steps:
 57 |     - uses: actions/checkout@v4
 58 |     - uses: uraimo/run-on-arch-action@v3
 59 |       with:
 60 |         arch: ${{ matrix.arch }}
 61 |         distro: ubuntu_rolling
 62 |         githubToken: ${{ github.token }}
 63 |         install: |
 64 |           apt-get update -y
 65 |           apt-get install -y ghc libghc-tasty-dev
 66 |         run: |
 67 |           ghc --version
 68 |           ghc src/Test/Tasty/Bench.hs
 69 | 
 70 |   i386:
 71 |     needs: build
 72 |     runs-on: ubuntu-latest
 73 |     container:
 74 |       image: i386/ubuntu:bionic
 75 |     steps:
 76 |     - name: Install
 77 |       run: |
 78 |         apt-get update -y
 79 |         apt-get install -y autoconf build-essential zlib1g-dev libgmp-dev curl libncurses5 libtinfo5 libncurses5-dev libtinfo-dev
 80 |         curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | BOOTSTRAP_HASKELL_NONINTERACTIVE=1 BOOTSTRAP_HASKELL_INSTALL_NO_STACK=1 sh
 81 |     - uses: actions/checkout@v1
 82 |     - name: Test
 83 |       run: |
 84 |         source ~/.ghcup/env
 85 |         cabal update
 86 |         cabal build
 87 | 
 88 |   wasm:
 89 |     needs: build
 90 |     runs-on: ubuntu-latest
 91 |     env:
 92 |       GHC_WASM_META_REV: 895f7067e1d4c918a45559da9d2d6a403a690703
 93 |       FLAVOUR: '9.6'
 94 |     steps:
 95 |     - name: setup-ghc-wasm32-wasi
 96 |       run: |
 97 |         cd $(mktemp -d)
 98 |         curl -L https://gitlab.haskell.org/ghc/ghc-wasm-meta/-/archive/$GHC_WASM_META_REV/ghc-wasm-meta-master.tar.gz | tar xz --strip-components=1
 99 |         ./setup.sh
100 |         ~/.ghc-wasm/add_to_github_path.sh
101 |     - uses: actions/checkout@v4
102 | 
103 |     - uses: actions/cache@v4
104 |       with:
105 |         path: |
106 |           ~/.ghc-wasm/.cabal/store
107 |           dist-newstyle
108 |         key: build-wasi-${{ runner.os }}-wasm-meta-${{ env.GHC_WASM_META_REV }}-flavour-${{ env.FLAVOUR }}-${{ github.sha }}
109 |         restore-keys: |
110 |           build-wasi-${{ runner.os }}-wasm-meta-${{ env.GHC_WASM_META_REV }}-flavour-${{ env.FLAVOUR }}-
111 | 
112 |     - name: Build
113 |       run: |
114 |         wasm32-wasi-cabal build all
115 | 


--------------------------------------------------------------------------------
/.github/workflows/haskell-ci.yml:
--------------------------------------------------------------------------------
  1 | # This GitHub workflow config has been generated by a script via
  2 | #
  3 | #   haskell-ci 'github' 'tasty-bench.cabal' '--haddock-jobs=>=8.2'
  4 | #
  5 | # To regenerate the script (for example after adjusting tested-with) run
  6 | #
  7 | #   haskell-ci regenerate
  8 | #
  9 | # For more information, see https://github.com/haskell-CI/haskell-ci
 10 | #
 11 | # version: 0.19.20251105
 12 | #
 13 | # REGENDATA ("0.19.20251105",["github","tasty-bench.cabal","--haddock-jobs=>=8.2"])
 14 | #
 15 | name: Haskell-CI
 16 | on:
 17 |   - push
 18 |   - pull_request
 19 |   - merge_group
 20 | jobs:
 21 |   linux:
 22 |     name: Haskell-CI - Linux - ${{ matrix.compiler }}
 23 |     runs-on: ubuntu-24.04
 24 |     timeout-minutes:
 25 |       60
 26 |     container:
 27 |       image: buildpack-deps:jammy
 28 |     continue-on-error: ${{ matrix.allow-failure }}
 29 |     strategy:
 30 |       matrix:
 31 |         include:
 32 |           - compiler: ghc-9.14.0.20251028
 33 |             compilerKind: ghc
 34 |             compilerVersion: 9.14.0.20251028
 35 |             setup-method: ghcup-prerelease
 36 |             allow-failure: false
 37 |           - compiler: ghc-9.12.2
 38 |             compilerKind: ghc
 39 |             compilerVersion: 9.12.2
 40 |             setup-method: ghcup
 41 |             allow-failure: false
 42 |           - compiler: ghc-9.10.3
 43 |             compilerKind: ghc
 44 |             compilerVersion: 9.10.3
 45 |             setup-method: ghcup
 46 |             allow-failure: false
 47 |           - compiler: ghc-9.8.4
 48 |             compilerKind: ghc
 49 |             compilerVersion: 9.8.4
 50 |             setup-method: ghcup
 51 |             allow-failure: false
 52 |           - compiler: ghc-9.6.7
 53 |             compilerKind: ghc
 54 |             compilerVersion: 9.6.7
 55 |             setup-method: ghcup
 56 |             allow-failure: false
 57 |           - compiler: ghc-9.4.8
 58 |             compilerKind: ghc
 59 |             compilerVersion: 9.4.8
 60 |             setup-method: ghcup
 61 |             allow-failure: false
 62 |           - compiler: ghc-9.2.8
 63 |             compilerKind: ghc
 64 |             compilerVersion: 9.2.8
 65 |             setup-method: ghcup
 66 |             allow-failure: false
 67 |           - compiler: ghc-9.0.2
 68 |             compilerKind: ghc
 69 |             compilerVersion: 9.0.2
 70 |             setup-method: ghcup
 71 |             allow-failure: false
 72 |           - compiler: ghc-8.10.7
 73 |             compilerKind: ghc
 74 |             compilerVersion: 8.10.7
 75 |             setup-method: ghcup
 76 |             allow-failure: false
 77 |           - compiler: ghc-8.8.4
 78 |             compilerKind: ghc
 79 |             compilerVersion: 8.8.4
 80 |             setup-method: ghcup
 81 |             allow-failure: false
 82 |           - compiler: ghc-8.6.5
 83 |             compilerKind: ghc
 84 |             compilerVersion: 8.6.5
 85 |             setup-method: ghcup
 86 |             allow-failure: false
 87 |           - compiler: ghc-8.4.4
 88 |             compilerKind: ghc
 89 |             compilerVersion: 8.4.4
 90 |             setup-method: ghcup
 91 |             allow-failure: false
 92 |           - compiler: ghc-8.2.2
 93 |             compilerKind: ghc
 94 |             compilerVersion: 8.2.2
 95 |             setup-method: ghcup
 96 |             allow-failure: false
 97 |           - compiler: ghc-8.0.2
 98 |             compilerKind: ghc
 99 |             compilerVersion: 8.0.2
100 |             setup-method: ghcup
101 |             allow-failure: false
102 |       fail-fast: false
103 |     steps:
104 |       - name: apt-get install
105 |         run: |
106 |           apt-get update
107 |           apt-get install -y --no-install-recommends gnupg ca-certificates dirmngr curl git software-properties-common libtinfo5 libnuma-dev
108 |       - name: Install GHCup
109 |         run: |
110 |           mkdir -p "$HOME/.ghcup/bin"
111 |           curl -sL https://downloads.haskell.org/ghcup/0.1.50.1/x86_64-linux-ghcup-0.1.50.1 > "$HOME/.ghcup/bin/ghcup"
112 |           chmod a+x "$HOME/.ghcup/bin/ghcup"
113 |       - name: Install cabal-install
114 |         run: |
115 |           "$HOME/.ghcup/bin/ghcup" install cabal 3.16.0.0 || (cat "$HOME"/.ghcup/logs/*.* && false)
116 |           echo "CABAL=$HOME/.ghcup/bin/cabal-3.16.0.0 -vnormal+nowrap" >> "$GITHUB_ENV"
117 |       - name: Install GHC (GHCup)
118 |         if: matrix.setup-method == 'ghcup'
119 |         run: |
120 |           "$HOME/.ghcup/bin/ghcup" install ghc "$HCVER" || (cat "$HOME"/.ghcup/logs/*.* && false)
121 |           HC=$("$HOME/.ghcup/bin/ghcup" whereis ghc "$HCVER")
122 |           HCPKG=$(echo "$HC" | sed 's#ghc$#ghc-pkg#')
123 |           HADDOCK=$(echo "$HC" | sed 's#ghc$#haddock#')
124 |           echo "HC=$HC" >> "$GITHUB_ENV"
125 |           echo "HCPKG=$HCPKG" >> "$GITHUB_ENV"
126 |           echo "HADDOCK=$HADDOCK" >> "$GITHUB_ENV"
127 |         env:
128 |           HCKIND: ${{ matrix.compilerKind }}
129 |           HCNAME: ${{ matrix.compiler }}
130 |           HCVER: ${{ matrix.compilerVersion }}
131 |       - name: Install GHC (GHCup prerelease)
132 |         if: matrix.setup-method == 'ghcup-prerelease'
133 |         run: |
134 |           "$HOME/.ghcup/bin/ghcup" config add-release-channel prereleases
135 |           "$HOME/.ghcup/bin/ghcup" install ghc "$HCVER" || (cat "$HOME"/.ghcup/logs/*.* && false)
136 |           HC=$("$HOME/.ghcup/bin/ghcup" whereis ghc "$HCVER")
137 |           HCPKG=$(echo "$HC" | sed 's#ghc$#ghc-pkg#')
138 |           HADDOCK=$(echo "$HC" | sed 's#ghc$#haddock#')
139 |           echo "HC=$HC" >> "$GITHUB_ENV"
140 |           echo "HCPKG=$HCPKG" >> "$GITHUB_ENV"
141 |           echo "HADDOCK=$HADDOCK" >> "$GITHUB_ENV"
142 |         env:
143 |           HCKIND: ${{ matrix.compilerKind }}
144 |           HCNAME: ${{ matrix.compiler }}
145 |           HCVER: ${{ matrix.compilerVersion }}
146 |       - name: Set PATH and environment variables
147 |         run: |
148 |           echo "$HOME/.cabal/bin" >> $GITHUB_PATH
149 |           echo "LANG=C.UTF-8" >> "$GITHUB_ENV"
150 |           echo "CABAL_DIR=$HOME/.cabal" >> "$GITHUB_ENV"
151 |           echo "CABAL_CONFIG=$HOME/.cabal/config" >> "$GITHUB_ENV"
152 |           HCNUMVER=$(${HC} --numeric-version|perl -ne '/^(\d+)\.(\d+)\.(\d+)(\.(\d+))?$/; print(10000 * $1 + 100 * $2 + ($3 == 0 ? $5 != 1 : $3))')
153 |           echo "HCNUMVER=$HCNUMVER" >> "$GITHUB_ENV"
154 |           echo "ARG_TESTS=--enable-tests" >> "$GITHUB_ENV"
155 |           echo "ARG_BENCH=--enable-benchmarks" >> "$GITHUB_ENV"
156 |           if [ $((HCNUMVER >= 91400)) -ne 0 ] ; then echo "HEADHACKAGE=true" >> "$GITHUB_ENV" ; else echo "HEADHACKAGE=false" >> "$GITHUB_ENV" ; fi
157 |           echo "ARG_COMPILER=--$HCKIND --with-compiler=$HC" >> "$GITHUB_ENV"
158 |         env:
159 |           HCKIND: ${{ matrix.compilerKind }}
160 |           HCNAME: ${{ matrix.compiler }}
161 |           HCVER: ${{ matrix.compilerVersion }}
162 |       - name: env
163 |         run: |
164 |           env
165 |       - name: write cabal config
166 |         run: |
167 |           mkdir -p $CABAL_DIR
168 |           cat >> $CABAL_CONFIG <<EOF
169 |           remote-build-reporting: anonymous
170 |           write-ghc-environment-files: never
171 |           remote-repo-cache: $CABAL_DIR/packages
172 |           logs-dir:          $CABAL_DIR/logs
173 |           world-file:        $CABAL_DIR/world
174 |           extra-prog-path:   $CABAL_DIR/bin
175 |           symlink-bindir:    $CABAL_DIR/bin
176 |           installdir:        $CABAL_DIR/bin
177 |           build-summary:     $CABAL_DIR/logs/build.log
178 |           store-dir:         $CABAL_DIR/store
179 |           install-dirs user
180 |             prefix: $CABAL_DIR
181 |           repository hackage.haskell.org
182 |             url: http://hackage.haskell.org/
183 |           EOF
184 |           if $HEADHACKAGE; then
185 |           cat >> $CABAL_CONFIG <<EOF
186 |           repository head.hackage.ghc.haskell.org
187 |              url: https://ghc.gitlab.haskell.org/head.hackage/
188 |              secure: True
189 |              root-keys: 7541f32a4ccca4f97aea3b22f5e593ba2c0267546016b992dfadcd2fe944e55d
190 |                         26021a13b401500c8eb2761ca95c61f2d625bfef951b939a8124ed12ecf07329
191 |                         f76d08be13e9a61a377a85e2fb63f4c5435d40f8feb3e12eb05905edb8cdea89
192 |              key-threshold: 3
193 |           active-repositories: hackage.haskell.org, head.hackage.ghc.haskell.org:override
194 |           EOF
195 |           fi
196 |           cat >> $CABAL_CONFIG <<EOF
197 |           program-default-options
198 |             ghc-options: $GHCJOBS +RTS -M3G -RTS
199 |           EOF
200 |           cat $CABAL_CONFIG
201 |       - name: versions
202 |         run: |
203 |           $HC --version || true
204 |           $HC --print-project-git-commit-id || true
205 |           $CABAL --version || true
206 |       - name: update cabal index
207 |         run: |
208 |           $CABAL v2-update -v
209 |       - name: install cabal-plan
210 |         run: |
211 |           mkdir -p $HOME/.cabal/bin
212 |           curl -sL https://github.com/haskell-hvr/cabal-plan/releases/download/v0.7.3.0/cabal-plan-0.7.3.0-x86_64-linux.xz > cabal-plan.xz
213 |           echo 'f62ccb2971567a5f638f2005ad3173dba14693a45154c1508645c52289714cb2  cabal-plan.xz' | sha256sum -c -
214 |           xz -d < cabal-plan.xz > $HOME/.cabal/bin/cabal-plan
215 |           rm -f cabal-plan.xz
216 |           chmod a+x $HOME/.cabal/bin/cabal-plan
217 |           cabal-plan --version
218 |       - name: checkout
219 |         uses: actions/checkout@v5
220 |         with:
221 |           path: source
222 |       - name: initial cabal.project for sdist
223 |         run: |
224 |           touch cabal.project
225 |           echo "packages: $GITHUB_WORKSPACE/source/." >> cabal.project
226 |           cat cabal.project
227 |       - name: sdist
228 |         run: |
229 |           mkdir -p sdist
230 |           $CABAL sdist all --output-dir $GITHUB_WORKSPACE/sdist
231 |       - name: unpack
232 |         run: |
233 |           mkdir -p unpacked
234 |           find sdist -maxdepth 1 -type f -name '*.tar.gz' -exec tar -C $GITHUB_WORKSPACE/unpacked -xzvf {} \;
235 |       - name: generate cabal.project
236 |         run: |
237 |           PKGDIR_tasty_bench="$(find "$GITHUB_WORKSPACE/unpacked" -maxdepth 1 -type d -regex '.*/tasty-bench-[0-9.]*')"
238 |           echo "PKGDIR_tasty_bench=${PKGDIR_tasty_bench}" >> "$GITHUB_ENV"
239 |           rm -f cabal.project cabal.project.local
240 |           touch cabal.project
241 |           touch cabal.project.local
242 |           echo "packages: ${PKGDIR_tasty_bench}" >> cabal.project
243 |           if [ $((HCNUMVER >= 80200)) -ne 0 ] ; then echo "package tasty-bench" >> cabal.project ; fi
244 |           if [ $((HCNUMVER >= 80200)) -ne 0 ] ; then echo "    ghc-options: -Werror=missing-methods -Werror=missing-fields" >> cabal.project ; fi
245 |           if [ $((HCNUMVER >= 90400)) -ne 0 ] ; then echo "package tasty-bench" >> cabal.project ; fi
246 |           if [ $((HCNUMVER >= 90400)) -ne 0 ] ; then echo "    ghc-options: -Werror=unused-packages" >> cabal.project ; fi
247 |           if [ $((HCNUMVER >= 90000)) -ne 0 ] ; then echo "package tasty-bench" >> cabal.project ; fi
248 |           if [ $((HCNUMVER >= 90000)) -ne 0 ] ; then echo "    ghc-options: -Werror=incomplete-patterns -Werror=incomplete-uni-patterns" >> cabal.project ; fi
249 |           cat >> cabal.project <<EOF
250 |           EOF
251 |           if $HEADHACKAGE; then
252 |           echo "allow-newer: $($HCPKG list --simple-output | sed -E 's/([a-zA-Z-]+)-[0-9.]+/*:\1,/g')" >> cabal.project
253 |           fi
254 |           $HCPKG list --simple-output --names-only | perl -ne 'for (split /\s+/) { print "constraints: any.$_ installed\n" unless /^(tasty-bench)$/; }' >> cabal.project.local
255 |           cat cabal.project
256 |           cat cabal.project.local
257 |       - name: dump install plan
258 |         run: |
259 |           $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH --dry-run all
260 |           cabal-plan
261 |       - name: restore cache
262 |         uses: actions/cache/restore@v4
263 |         with:
264 |           key: ${{ runner.os }}-${{ matrix.compiler }}-${{ github.sha }}
265 |           path: ~/.cabal/store
266 |           restore-keys: ${{ runner.os }}-${{ matrix.compiler }}-
267 |       - name: install dependencies
268 |         run: |
269 |           $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --dependencies-only -j2 all
270 |           $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH --dependencies-only -j2 all
271 |       - name: build w/o tests
272 |         run: |
273 |           $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks all
274 |       - name: build
275 |         run: |
276 |           $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH all --write-ghc-environment-files=always
277 |       - name: cabal check
278 |         run: |
279 |           cd ${PKGDIR_tasty_bench} || false
280 |           ${CABAL} -vnormal check
281 |       - name: haddock
282 |         run: |
283 |           if [ $((HCNUMVER >= 80200)) -ne 0 ] ; then $CABAL v2-haddock --disable-documentation --haddock-all $ARG_COMPILER --with-haddock $HADDOCK $ARG_TESTS $ARG_BENCH all ; fi
284 |       - name: unconstrained build
285 |         run: |
286 |           rm -f cabal.project.local
287 |           $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks all
288 |       - name: prepare for constraint sets
289 |         run: |
290 |           rm -f cabal.project.local
291 |       - name: constraint set tasty-1.5
292 |         run: |
293 |           $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --constraint='tasty ^>= 1.5' all --dry-run
294 |           cabal-plan topo | sort
295 |           $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --constraint='tasty ^>= 1.5' --dependencies-only -j2 all
296 |           $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --constraint='tasty ^>= 1.5' all
297 |       - name: constraint set tasty-1.4
298 |         run: |
299 |           $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --constraint='tasty ^>= 1.4' all --dry-run
300 |           cabal-plan topo | sort
301 |           $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --constraint='tasty ^>= 1.4' --dependencies-only -j2 all
302 |           $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --constraint='tasty ^>= 1.4' all
303 |       - name: constraint set no-tasty
304 |         run: |
305 |           $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --constraint='tasty-bench -tasty' all --dry-run
306 |           cabal-plan topo | sort
307 |           $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --constraint='tasty-bench -tasty' --dependencies-only -j2 all
308 |           $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --constraint='tasty-bench -tasty' all
309 |       - name: save cache
310 |         if: always()
311 |         uses: actions/cache/save@v4
312 |         with:
313 |           key: ${{ runner.os }}-${{ matrix.compiler }}-${{ github.sha }}
314 |           path: ~/.cabal/store
315 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # tasty-bench [![Hackage](http://img.shields.io/hackage/v/tasty-bench.svg)](https://hackage.haskell.org/package/tasty-bench) [![Stackage LTS](http://stackage.org/package/tasty-bench/badge/lts)](http://stackage.org/lts/package/tasty-bench) [![Stackage Nightly](http://stackage.org/package/tasty-bench/badge/nightly)](http://stackage.org/nightly/package/tasty-bench)
  2 | 
  3 | Featherlight benchmark framework (only one file!) for performance measurement
  4 | with API mimicking [`criterion`](http://hackage.haskell.org/package/criterion)
  5 | and [`gauge`](http://hackage.haskell.org/package/gauge).
  6 | A prominent feature is built-in comparison against previous runs
  7 | and between benchmarks.
  8 | 
  9 | <!-- MarkdownTOC autolink="true" -->
 10 | 
 11 | - [How lightweight is it?](#how-lightweight-is-it)
 12 | - [How is it possible?](#how-is-it-possible)
 13 | - [How to switch?](#how-to-switch)
 14 | - [How to write a benchmark?](#how-to-write-a-benchmark)
 15 | - [How to read results?](#how-to-read-results)
 16 | - [Wall-clock time vs. CPU time](#wall-clock-time-vs-cpu-time)
 17 | - [Statistical model](#statistical-model)
 18 | - [Memory usage](#memory-usage)
 19 | - [Combining tests and benchmarks](#combining-tests-and-benchmarks)
 20 | - [Troubleshooting](#troubleshooting)
 21 | - [Isolating interfering benchmarks](#isolating-interfering-benchmarks)
 22 | - [Comparison against baseline](#comparison-against-baseline)
 23 | - [Comparison between benchmarks](#comparison-between-benchmarks)
 24 | - [Plotting results](#plotting-results)
 25 | - [Build flags](#build-flags)
 26 | - [Command-line options](#command-line-options)
 27 | - [Custom command-line options](#custom-command-line-options)
 28 | 
 29 | <!-- /MarkdownTOC -->
 30 | 
 31 | ## How lightweight is it?
 32 | 
 33 | There is only one source file `Test.Tasty.Bench` and no non-boot dependencies
 34 | except [`tasty`](http://hackage.haskell.org/package/tasty).
 35 | So if you already depend on `tasty` for a test suite, there
 36 | is nothing else to install.
 37 | 
 38 | Compare this to `criterion` (10+ modules, 50+ dependencies)
 39 | and `gauge` (40+ modules, depends on `basement` and `vector`).
 40 | A build on a clean machine is up to 16x
 41 | faster than `criterion` and up to 4x faster than `gauge`. A build without dependencies
 42 | is up to 6x faster than `criterion` and up to 8x faster than `gauge`.
 43 | 
 44 | `tasty-bench` is a native Haskell library and works everywhere, where GHC
 45 | does, including WASM. We support a full range of architectures (`i386`, `amd64`, `armhf`,
 46 | `arm64`, `ppc64le`, `s390x`, `riscv64`) and operating systems (Linux, Windows, macOS,
 47 | FreeBSD, OpenBSD, NetBSD), plus any GHC from 8.0 to 9.14
 48 | (and earlier releases stretch back to GHC 7.0).
 49 | 
 50 | ## How is it possible?
 51 | 
 52 | Our benchmarks are literally regular `tasty` tests, so we can leverage all existing
 53 | machinery for command-line options, resource management, structuring,
 54 | listing and filtering benchmarks, running them and reporting results. It also means
 55 | that `tasty-bench` can be used in conjunction with other `tasty` ingredients.
 56 | 
 57 | Unlike `criterion` and `gauge` we use a very simple statistical model described below.
 58 | This is arguably a questionable choice, but it works pretty well in practice.
 59 | A rare developer is sufficiently well-versed in probability theory
 60 | to make sense and use of all numbers generously generated by `criterion`.
 61 | 
 62 | ## How to switch?
 63 | 
 64 | [Cabal mixins](https://cabal.readthedocs.io/en/3.4/cabal-package.html#pkg-field-mixins)
 65 | allow to taste `tasty-bench` instead of `criterion` or `gauge`
 66 | without changing a single line of code:
 67 | 
 68 | ```cabal
 69 | cabal-version: 2.0
 70 | 
 71 | benchmark foo
 72 |   ...
 73 |   build-depends:
 74 |     tasty-bench
 75 |   mixins:
 76 |     tasty-bench (Test.Tasty.Bench as Criterion, Test.Tasty.Bench as Criterion.Main, Test.Tasty.Bench as Gauge, Test.Tasty.Bench as Gauge.Main)
 77 | ```
 78 | 
 79 | This works vice versa as well: if you use `tasty-bench`, but at some point
 80 | need a more comprehensive statistical analysis,
 81 | it is easy to switch temporarily back to `criterion`.
 82 | 
 83 | ## How to write a benchmark?
 84 | 
 85 | Benchmarks are declared
 86 | in [a separate section of `cabal` file](https://cabal.readthedocs.io/en/stable/cabal-package-description-file.html#benchmarks):
 87 | 
 88 | ```cabal
 89 | cabal-version:   2.0
 90 | name:            bench-fibo
 91 | version:         0.0
 92 | build-type:      Simple
 93 | synopsis:        Example of a benchmark
 94 | 
 95 | benchmark bench-fibo
 96 |   main-is:       BenchFibo.hs
 97 |   type:          exitcode-stdio-1.0
 98 |   build-depends: base, tasty-bench
 99 |   ghc-options:   "-with-rtsopts=-A32m"
100 |   if impl(ghc >= 8.6)
101 |     ghc-options: -fproc-alignment=64
102 | ```
103 | 
104 | And here is `BenchFibo.hs`:
105 | 
106 | ```haskell
107 | import Test.Tasty.Bench
108 | 
109 | fibo :: Int -> Integer
110 | fibo n = if n < 2 then toInteger n else fibo (n - 1) + fibo (n - 2)
111 | 
112 | main :: IO ()
113 | main = defaultMain
114 |   [ bgroup "Fibonacci numbers"
115 |     [ bench "fifth"     $ nf fibo  5
116 |     , bench "tenth"     $ nf fibo 10
117 |     , bench "twentieth" $ nf fibo 20
118 |     ]
119 |   ]
120 | ```
121 | 
122 | Since `tasty-bench` provides an API compatible with `criterion`,
123 | one can refer to [its documentation](https://hackage.haskell.org/package/criterion#readme) for more examples.
124 | 
125 | ## How to read results?
126 | 
127 | Running the example above (`cabal bench` or `stack bench`)
128 | results in the following output:
129 | 
130 | ```
131 | All
132 |   Fibonacci numbers
133 |     fifth:     OK
134 |        63 ns ± 3.4 ns
135 |     tenth:     OK
136 |       809 ns ±  73 ns
137 |     twentieth: OK
138 |       104 μs ± 4.9 μs
139 | 
140 | All 3 tests passed (7.25s)
141 | ```
142 | 
143 | The output says that, for instance, in the first benchmark the mean CPU time is
144 | 63 nanoseconds and means of individual samples do not often diverge from it
145 | further than ±3.4 nanoseconds (double standard deviation). Take standard
146 | deviation numbers with a grain of salt; there are lies, damned lies, and
147 | statistics.
148 | 
149 | ## Wall-clock time vs. CPU time
150 | 
151 | What time are we talking about?
152 | Both `criterion` and `gauge` by default report wall-clock time, which is
153 | affected by any other application which runs concurrently.
154 | Ideally benchmarks are executed on a dedicated server without any other load,
155 | but — let's face the truth — most of developers run benchmarks
156 | on a laptop with a hundred other services and a window manager, and
157 | watch videos while waiting for benchmarks to finish. That's the cause
158 | of a notorious "variance introduced by outliers: 88% (severely inflated)" warning.
159 | 
160 | To alleviate this issue `tasty-bench` measures CPU time by `getCPUTime`
161 | instead of wall-clock time by default.
162 | It does not provide a perfect isolation from other processes (e. g.,
163 | if CPU cache is spoiled by others, populating data back from RAM
164 | is your burden), but is a bit more stable.
165 | 
166 | Caveat: this means that for multithreaded algorithms
167 | `tasty-bench` reports total elapsed CPU time across all cores, while
168 | `criterion` and `gauge` print maximum of core's wall-clock time.
169 | It also means that by default `tasty-bench` does not measure time spent out of process,
170 | e. g., calls to other executables. To work around this limitation
171 | use `--time-mode` command-line option or set it locally via `TimeMode` option.
172 | 
173 | ## Statistical model
174 | 
175 | Here is a procedure used by `tasty-bench` to measure execution time:
176 | 
177 | 1. Set $n \leftarrow 1$.
178 | 2. Measure execution time $t_n$ of $n$ iterations
179 |    and execution time $t_{2n}$ of $2n$ iterations.
180 | 3. Find $t$ which minimizes deviation of $(nt,2nt)$ from $(t_n,t_{2n})$,
181 |    namely $t \leftarrow (t_n + 2t_{2n}) / 5n$.
182 | 4. If deviation is small enough (see `--stdev` below)
183 |    or time is running out soon (see `--timeout` below),
184 |    return $t$ as a mean execution time.
185 | 5. Otherwise set $n \leftarrow 2n$ and jump back to Step 2.
186 | 
187 | This is roughly similar to the linear regression approach which `criterion` takes,
188 | but we fit only two last points. This allows us to simplify away all heavy-weight
189 | statistical analysis. More importantly, earlier measurements,
190 | which are presumably shorter and noisier, do not affect overall result.
191 | This is in contrast to `criterion`, which fits all measurements and
192 | is biased to use more data points corresponding to shorter runs
193 | (it employs $n \leftarrow 1.05n$ progression).
194 | 
195 | Mean time and its deviation does not say much about the
196 | distribution of individual timings. E. g., imagine a computation which
197 | (according to a coarse system timer) takes either 0 ms or 1 ms with equal
198 | probability. While one would be able to establish that its mean time is 0.5 ms
199 | with a very small deviation, this does not imply that individual measurements
200 | are anywhere near 0.5 ms. Even assuming an infinite precision of a system
201 | timer, the distribution of individual times is not known to be
202 | [normal](https://en.wikipedia.org/wiki/Normal_distribution).
203 | 
204 | Obligatory disclaimer: statistics is a tricky matter, there is no
205 | one-size-fits-all approach.
206 | In the absence of a good theory
207 | simplistic approaches are as (un)sound as obscure ones.
208 | Those who seek statistical soundness should rather collect raw data
209 | and process it themselves using a proper statistical toolbox.
210 | Data reported by `tasty-bench`
211 | is only of indicative and comparative significance.
212 | 
213 | ## Memory usage
214 | 
215 | Configuring RTS to collect GC statistics
216 | (e. g., via `cabal bench --benchmark-options '+RTS -T'`
217 | or `stack bench --ba '+RTS -T'`) enables `tasty-bench` to estimate and report
218 | memory usage:
219 | 
220 | ```
221 | All
222 |   Fibonacci numbers
223 |     fifth:     OK
224 |        63 ns ± 3.4 ns, 223 B  allocated,   0 B  copied, 2.0 MB peak memory
225 |     tenth:     OK
226 |       809 ns ±  73 ns, 2.3 KB allocated,   0 B  copied, 4.0 MB peak memory
227 |     twentieth: OK
228 |       104 μs ± 4.9 μs, 277 KB allocated,  59 B  copied, 5.0 MB peak memory
229 | 
230 | All 3 tests passed (7.25s)
231 | ```
232 | 
233 | This data is reported as per [`GHC.Stats.RTSStats`](https://hackage.haskell.org/package/base/docs/GHC-Stats.html#t:RTSStats) fields:
234 | 
235 | * `allocated_bytes`
236 | 
237 |   Total size of data ever allocated since the start
238 |   of the benchmark iteration. Even if data was immediately
239 |   garbage collected and freed, it still counts.
240 | 
241 | * `copied_bytes`
242 | 
243 |   Total size of data ever copied by GC (because it was alive and kicking)
244 |   since the start of the benchmark iteration. Note that zero bytes often mean
245 |   that the benchmark was too short to trigger GC at all.
246 | 
247 | * `max_mem_in_use_bytes`
248 | 
249 |   Peak size of live data since the very start of the process.
250 |   This is a global metric, it cumulatively grows and does not say much
251 |   about individual benchmarks, but rather characterizes heap
252 |   environment in which they are executed.
253 | 
254 | ## Combining tests and benchmarks
255 | 
256 | When optimizing an existing function, it is important to check that its
257 | observable behavior remains unchanged. One can rebuild
258 | both tests and benchmarks after each change, but it would be more convenient
259 | to run sanity checks within benchmark itself. Since our benchmarks
260 | are compatible with `tasty` tests, we can easily do so.
261 | 
262 | Imagine you come up with a faster function `myFibo` to generate Fibonacci numbers:
263 | 
264 | ```haskell
265 | import Test.Tasty.Bench
266 | import Test.Tasty.QuickCheck -- from tasty-quickcheck package
267 | 
268 | fibo :: Int -> Integer
269 | fibo n = if n < 2 then toInteger n else fibo (n - 1) + fibo (n - 2)
270 | 
271 | myFibo :: Int -> Integer
272 | myFibo n = if n < 3 then toInteger n else myFibo (n - 1) + myFibo (n - 2)
273 | 
274 | main :: IO ()
275 | main = Test.Tasty.Bench.defaultMain -- not Test.Tasty.defaultMain
276 |   [ bench "fibo   20" $ nf fibo   20
277 |   , bench "myFibo 20" $ nf myFibo 20
278 |   , testProperty "myFibo = fibo" $ \n -> fibo n === myFibo n
279 |   ]
280 | ```
281 | 
282 | This outputs:
283 | 
284 | ```
285 | All
286 |   fibo   20:     OK
287 |     104 μs ± 4.9 μs
288 |   myFibo 20:     OK
289 |      71 μs ± 5.3 μs
290 |   myFibo = fibo: FAIL
291 |     *** Failed! Falsified (after 5 tests and 1 shrink):
292 |     2
293 |     1 /= 2
294 |     Use --quickcheck-replay=927711 to reproduce.
295 | 
296 | 1 out of 3 tests failed (5.03s)
297 | ```
298 | 
299 | We see that `myFibo` is indeed significantly faster than `fibo`,
300 | but unfortunately does not do the same thing. One should probably
301 | look for another way to speed up Fibonacci numbers.
302 | 
303 | ## Troubleshooting
304 | 
305 | * If benchmarks take too long, set `--timeout` to limit execution time
306 |   of individual benchmarks, and `tasty-bench` will do its best to fit
307 |   into a given time frame. Without `--timeout` we rerun benchmarks until
308 |   achieving a target precision set by `--stdev`, which in a noisy environment
309 |   of a modern laptop with hundreds of service processes may take a lot of time.
310 | 
311 |   While `criterion` runs each benchmark at least for 5 seconds,
312 |   `tasty-bench` is happy to conclude earlier, if it does not compromise
313 |   the quality of results. In our experiments `tasty-bench` suites
314 |   tend to finish earlier, even if some individual benchmarks
315 |   take longer than with `criterion`.
316 | 
317 |   A common source of noisiness is garbage collection. Setting a larger
318 |   allocation area (_nursery_) is often a good idea, either via
319 |   `cabal bench --benchmark-options '+RTS -A32m'` or `stack bench --ba '+RTS -A32m'`.
320 |   Alternatively bake it into
321 |   `cabal` file as `ghc-options: "-with-rtsopts=-A32m"`.
322 | 
323 | * Never compile benchmarks with `-fstatic-argument-transformation`, because it
324 |   breaks a trick we use to force GHC into reevaluation of the same function application
325 |   over and over again.
326 | 
327 | * If benchmark results look malformed like below, make sure that you are
328 |   invoking `Test.Tasty.Bench.defaultMain` and not `Test.Tasty.defaultMain`
329 |   (the underlying difference is `consoleBenchReporter` vs. `consoleTestReporter`):
330 | 
331 |   ```
332 |   All
333 |     fibo 20:       OK (1.46s)
334 |       WithLoHi (Estimate {estMean = Measurement {measTime = 41529118775, measAllocs = 0, measCopied = 0, measMaxMem = 0}, estStdev = 1595055320}) (-Infinity) Infinity
335 |   ```
336 | 
337 | * If benchmarks fail with an error message
338 | 
339 |   ```
340 |   Unhandled resource. Probably a bug in the runner you're using.
341 |   ```
342 | 
343 |   or
344 | 
345 |   ```
346 |   Unexpected state of the resource (NotCreated) in getResource. Report as a tasty bug.
347 |   ```
348 | 
349 |   this is likely caused by `env` or `envWithCleanup` affecting the structure of benchmarks.
350 |   You can use `env` to read test data from `IO`, but not to read benchmark names
351 |   or affect their hierarchy in another way. This is a fundamental restriction of `tasty`
352 |   so that it can list and filter benchmarks without launching missiles.
353 | 
354 |   Strict pattern-matching on resource is also prohibited. For instance,
355 |   if it is a tuple, the second argument of `env` should use a lazy pattern match
356 |   `\~(a, b) -> ...`
357 | 
358 | * If benchmarks fail with `Test dependencies form a loop`
359 |   or `Test dependencies have cycles`, this is likely
360 |   because of `bcompare`, which compares a benchmark with itself.
361 |   Locating a benchmark in a global environment may be tricky, please refer to
362 |   [`tasty` documentation](https://github.com/UnkindPartition/tasty#patterns) for details
363 |   and consider using `locateBenchmark`.
364 | 
365 | * When seeing
366 | 
367 |   ```
368 |   This benchmark takes more than 100 seconds. Consider setting --timeout, if this is unexpected (or to silence this warning).
369 |   ```
370 | 
371 |   do follow the advice: abort benchmarks and pass `-t100` or similar. Unless you are
372 |   benchmarking a very computationally expensive function, a single benchmark should
373 |   stabilize after a couple of seconds. This warning is a sign that your environment
374 |   is too noisy, in which case `tasty-bench` will continue trying with exponentially
375 |   longer intervals, often unproductively.
376 | 
377 | * The following error can be thrown when benchmarks are built with
378 |   `ghc-options: -threaded`:
379 | 
380 |   ```
381 |   Benchmarks must not be run concurrently. Please pass -j1 and/or avoid +RTS -N.
382 |   ```
383 | 
384 |   The underlying cause is that `tasty` runs tests concurrently, which is harmful
385 |   for reliable performance measurements. Make sure to use `tasty-bench >= 0.3.4` and invoke
386 |   `Test.Tasty.Bench.defaultMain` and not `Test.Tasty.defaultMain`. Note that
387 |   `localOption (NumThreads 1)` quashes the warning, but does not eliminate the cause.
388 | 
389 | * If benchmarks using GHC 9.4.4+ segfault on Windows, check that you
390 |   are not using non-moving garbage collector `--nonmoving-gc`. This is likely caused
391 |   by [GHC issue #23003](https://gitlab.haskell.org/ghc/ghc/-/issues/23003).
392 |   Previous releases of `tasty-bench` recommended enabling `--nonmoving-gc`
393 |   to stabilise benchmarks, but it's discouraged now.
394 | 
395 | * If you see
396 | 
397 |   ```
398 |   <stdout>: commitBuffer: invalid argument (cannot encode character '\177')
399 |   ```
400 | 
401 |   or
402 | 
403 |   ```
404 |   Uncaught exception ghc-internal:GHC.Internal.IO.Exception.IOException:
405 |   <stdout>: commitBuffer: invalid argument (cannot encode character '\956')
406 |   ```
407 | 
408 |   it means that your locale does not support UTF-8. `tasty-bench` makes an effort
409 |   to force locale to UTF-8, but it's not bulletproof.
410 |   In such case run `locale -a` to list available locales and set a UTF-8-capable
411 |   one (e. g., `export LANG=C.UTF-8`) before starting benchmarks.
412 | 
413 | ## Isolating interfering benchmarks
414 | 
415 | One difficulty of benchmarking in Haskell is that it is
416 | hard to isolate benchmarks so that they do not interfere.
417 | Changing the order of benchmarks or skipping some of them
418 | has an effect on heap's layout and thus affects garbage collection.
419 | This issue is well attested in
420 | [both](https://github.com/haskell/criterion/issues/166)
421 | [`criterion`](https://github.com/haskell/criterion/issues/60)
422 | and
423 | [`gauge`](https://github.com/vincenthz/hs-gauge/issues/2).
424 | 
425 | Usually (but not always) skipping some benchmarks speeds up remaining ones.
426 | That's because once a benchmark allocated heap which for some reason
427 | was not promptly released afterwards (e. g., it forced a top-level thunk
428 | in an underlying library), all further benchmarks are slowed down
429 | by garbage collector processing this additional amount of live data
430 | over and over again.
431 | 
432 | There are several mitigation strategies. First of all, giving garbage collector
433 | more breathing space by `+RTS -A32m` (or more) is often good enough.
434 | 
435 | Further, avoid using top-level bindings to store large test data. Once such thunks
436 | are forced, they remain allocated forever, which affects detrimentally subsequent
437 | unrelated benchmarks. Treat them as external data, supplied via `env`: instead of
438 | 
439 | ```haskell
440 | largeData :: String
441 | largeData = replicate 1000000 'a'
442 | 
443 | main :: IO ()
444 | main = defaultMain
445 |   [ bench "large" $ nf length largeData, ... ]
446 | ```
447 | 
448 | use
449 | 
450 | ```haskell
451 | import Control.DeepSeq (force)
452 | import Control.Exception (evaluate)
453 | 
454 | main :: IO ()
455 | main = defaultMain
456 |   [ env (evaluate (force (replicate 1000000 'a'))) $ \largeData ->
457 |     bench "large" $ nf length largeData, ... ]
458 | ```
459 | 
460 | Finally, as an ultimate measure to reduce interference between benchmarks,
461 | one can run each of them in a separate process. We do not quite recommend
462 | this approach, but if you are desperate, here is how:
463 | 
464 | ```sh
465 | cabal run -v0 all:benches -- -l | sed -e 's/[\"]/\\\\\\&/g' | while read -r name; do cabal run -v0 all:benches -- -p '$0 == "'"$name"'"'; done
466 | ```
467 | 
468 | This assumes that there is a single benchmark suite in the project
469 | and that benchmark names do not contain newlines.
470 | 
471 | ## Comparison against baseline
472 | 
473 | One can compare benchmark results against an earlier run in an automatic way.
474 | 
475 | When using this feature, it's especially important to compile benchmarks with
476 | `ghc-options: `[`-fproc-alignment`](https://downloads.haskell.org/ghc/latest/docs/users_guide/debugging.html#ghc-flag--fproc-alignment)`=64`, otherwise results could be skewed by
477 | intermittent changes in cache-line alignment.
478 | 
479 | Firstly, run `tasty-bench` with `--csv FILE` key
480 | to dump results to `FILE` in CSV format
481 | (it could be a good idea to set smaller `--stdev`, if possible):
482 | 
483 | ```
484 | Name,Mean (ps),2*Stdev (ps)
485 | All.Fibonacci numbers.fifth,48453,4060
486 | All.Fibonacci numbers.tenth,637152,46744
487 | All.Fibonacci numbers.twentieth,81369531,3342646
488 | ```
489 | 
490 | Now modify implementation and rerun benchmarks
491 | with `--baseline FILE` key. This produces a report as follows:
492 | 
493 | ```
494 | All
495 |   Fibonacci numbers
496 |     fifth:     OK
497 |        53 ns ± 2.7 ns,  8% more than baseline
498 |     tenth:     OK
499 |       641 ns ±  59 ns,       same as baseline
500 |     twentieth: OK
501 |        77 μs ± 6.4 μs,  5% less than baseline
502 | 
503 | All 3 tests passed (1.50s)
504 | ```
505 | 
506 | You can also fail benchmarks, which deviate too far from baseline, using
507 | `--fail-if-slower` and `--fail-if-faster` options. For example, setting both of them
508 | to 6 will fail the first benchmark above (because it is more than 6% slower),
509 | but the last one still succeeds (even while it is measurably faster than baseline,
510 | deviation is less than 6%). Consider also using `--hide-successes` to show
511 | only problematic benchmarks, or even
512 | [`tasty-rerun`](http://hackage.haskell.org/package/tasty-rerun) package
513 | to focus on rerunning failing items only.
514 | 
515 | If you wish to compare two CSV reports non-interactively, here is a handy `awk` incantation:
516 | 
517 | ```sh
518 | awk 'BEGIN{FS=",";OFS=",";print "Name,Old,New,Ratio"}FNR==1{trueNF=NF;next}NF<trueNF{print "Benchmark names should not contain newlines";exit 1}FNR==NR{oldTime=$(NF-trueNF+2);NF-=trueNF-1;a[$0]=oldTime;next}{newTime=$(NF-trueNF+2);NF-=trueNF-1;if(a[$0]){print $0,a[$0],newTime,newTime/a[$0];gs+=log(newTime/a[$0]);gc++}}END{if(gc>0)print "Geometric mean,,",exp(gs/gc)}' old.csv new.csv
519 | ```
520 | 
521 | A larger shell snippet to compare two `git` commits can be found in `compare_benches.sh`.
522 | 
523 | Note that columns in CSV report are different from what `criterion` or `gauge`
524 | would produce. If names do not contain commas, missing columns can be faked this way:
525 | 
526 | ```sh
527 | awk 'BEGIN{FS=",";OFS=",";print "Name,Mean,MeanLB,MeanUB,Stddev,StddevLB,StddevUB"}NR==1{trueNF=NF;next}NF<trueNF{print $0;next}{mean=$(NF-trueNF+2);stddev=$(NF-trueNF+3);NF-=trueNF-1;print $0,mean/1e12,mean/1e12,mean/1e12,stddev/2e12,stddev/2e12,stddev/2e12}'
528 | ```
529 | 
530 | To fake `gauge` in `--csvraw` mode use
531 | 
532 | ```sh
533 | awk 'BEGIN{FS=",";OFS=",";print "name,iters,time,cycles,cpuTime,utime,stime,maxrss,minflt,majflt,nvcsw,nivcsw,allocated,numGcs,bytesCopied,mutatorWallSeconds,mutatorCpuSeconds,gcWallSeconds,gcCpuSeconds"}NR==1{trueNF=NF;next}NF<trueNF{print $0;next}{mean=$(NF-trueNF+2);fourth=$(NF-trueNF+4);fifth=$(NF-trueNF+5);sixth=$(NF-trueNF+6);NF-=trueNF-1;print $0,1,mean/1e12,0,mean/1e12,mean/1e12,0,sixth+0,0,0,0,0,fourth+0,0,fifth+0,0,0,0,0}'
534 | ```
535 | 
536 | ## Comparison between benchmarks
537 | 
538 | You can also compare benchmarks to each other without any external tools,
539 | all in the comfort of your terminal.
540 | 
541 | ```haskell
542 | import Test.Tasty.Bench
543 | 
544 | fibo :: Int -> Integer
545 | fibo n = if n < 2 then toInteger n else fibo (n - 1) + fibo (n - 2)
546 | 
547 | main :: IO ()
548 | main = defaultMain
549 |   [ bgroup "Fibonacci numbers"
550 |     [ bcompare "tenth"  $ bench "fifth"     $ nf fibo  5
551 |     ,                     bench "tenth"     $ nf fibo 10
552 |     , bcompare "tenth"  $ bench "twentieth" $ nf fibo 20
553 |     ]
554 |   ]
555 | ```
556 | 
557 | This produces a report, comparing mean times of `fifth` and `twentieth` to `tenth`:
558 | 
559 | ```
560 | All
561 |   Fibonacci numbers
562 |     fifth:     OK
563 |       121 ns ± 2.6 ns, 0.08x
564 |     tenth:     OK
565 |       1.6 μs ±  31 ns
566 |     twentieth: OK
567 |       203 μs ± 4.1 μs, 128.36x
568 | ```
569 | 
570 | To locate a baseline benchmark in a larger suite use `locateBenchmark`.
571 | 
572 | One can leverage comparisons between benchmarks to implement portable performance
573 | tests, expressing properties like "this algorithm must be at least twice faster
574 | than that one" or "this operation should not be more than thrice slower than that".
575 | This can be achieved with `bcompareWithin`, which takes an acceptable interval
576 | of performance as an argument.
577 | 
578 | ## Plotting results
579 | 
580 | Users can dump results into CSV with `--csv FILE`
581 | and plot them using `gnuplot` or other software. But for convenience
582 | there is also a built-in quick-and-dirty SVG plotting feature,
583 | which can be invoked by passing `--svg FILE`. Here is a sample of its output:
584 | 
585 | ![Plotting](https://hackage.haskell.org/package/tasty-bench/src/example.svg)
586 | 
587 | ## Build flags
588 | 
589 | Build flags are a brittle subject and users do not normally need to touch them.
590 | 
591 | * If you find yourself in an environment, where `tasty` is not available and you
592 |   have access to boot packages only, you can still use `tasty-bench`! Just copy
593 |   `Test/Tasty/Bench.hs` to your project (imagine it like a header-only C library).
594 |   It will provide you with functions to build `Benchmarkable` and run them manually
595 |   via `measureCpuTime`. This mode of operation can be also configured
596 |   by disabling Cabal flag `tasty`.
597 | 
598 | ## Command-line options
599 | 
600 | Use `--help` to list all command-line options.
601 | 
602 | * `-p`, `--pattern`
603 | 
604 |   This is a standard `tasty` option, which allows filtering benchmarks
605 |   by a pattern or `awk` expression. Please refer to
606 |   [`tasty` documentation](https://github.com/UnkindPartition/tasty#patterns)
607 |   for details.
608 | 
609 | * `-t`, `--timeout`
610 | 
611 |   This is a standard `tasty` option, setting timeout for individual benchmarks
612 |   in seconds. Use it when benchmarks tend to take too long: `tasty-bench` will make
613 |   an effort to report results (even if of subpar quality) before timeout. Setting
614 |   timeout too tight (insufficient for at least three iterations)
615 |   will result in a benchmark failure. One can adjust it locally for a group
616 |   of benchmarks, e. g., `localOption (mkTimeout 100000000)` for 100 seconds.
617 | 
618 | * `--stdev`
619 | 
620 |   Target relative standard deviation of measurements in percents (5% by default).
621 |   Large values correspond to fast and loose benchmarks, and small ones to long and precise.
622 |   It can also be adjusted locally for a group of benchmarks,
623 |   e. g., `localOption (RelStDev 0.02)`.
624 |   If benchmarking takes far too long, consider setting `--timeout`,
625 |   which will interrupt benchmarks, potentially before reaching the target deviation.
626 | 
627 | * `--csv`
628 | 
629 |   File to write results in CSV format.
630 | 
631 | * `--baseline`
632 | 
633 |   File to read baseline results in CSV format (as produced by `--csv`).
634 | 
635 | * `--fail-if-slower`, `--fail-if-faster`
636 | 
637 |   Upper bounds of acceptable slow down / speed up in percents. If a benchmark is unacceptably slower / faster than baseline (see `--baseline`),
638 |   it will be reported as failed. Can be used in conjunction with
639 |   a standard `tasty` option `--hide-successes` to show only problematic benchmarks.
640 |   Both options can be adjusted locally for a group of benchmarks,
641 |   e. g., `localOption (FailIfSlower 0.10)`.
642 | 
643 | * `--svg`
644 | 
645 |   File to plot results in SVG format.
646 | 
647 | * `--time-mode`
648 | 
649 |   Whether to measure CPU time (`cpu`, default) or wall-clock time (`wall`).
650 | 
651 | * `+RTS -T`
652 | 
653 |   Estimate and report memory usage.
654 | 
655 | ## Custom command-line options
656 | 
657 | As usual with `tasty`, it is easy to extend benchmarks with custom command-line options.
658 | Here is an example:
659 | 
660 | ```haskell
661 | import Data.Proxy
662 | import Test.Tasty.Bench
663 | import Test.Tasty.Ingredients.Basic
664 | import Test.Tasty.Options
665 | import Test.Tasty.Runners
666 | 
667 | newtype RandomSeed = RandomSeed Int
668 | 
669 | instance IsOption RandomSeed where
670 |   defaultValue = RandomSeed 42
671 |   parseValue = fmap RandomSeed . safeRead
672 |   optionName = pure "seed"
673 |   optionHelp = pure "Random seed used in benchmarks"
674 | 
675 | main :: IO ()
676 | main = do
677 |   let customOpts  = [Option (Proxy :: Proxy RandomSeed)]
678 |       ingredients = includingOptions customOpts : benchIngredients
679 |   opts <- parseOptions ingredients benchmarks
680 |   let RandomSeed seed = lookupOption opts
681 |   defaultMainWithIngredients ingredients benchmarks
682 | 
683 | benchmarks :: Benchmark
684 | benchmarks = bgroup "All" []
685 | ```
686 | 


--------------------------------------------------------------------------------
/src/Test/Tasty/Bench.hs:
--------------------------------------------------------------------------------
   1 | {- |
   2 | Module:      Test.Tasty.Bench
   3 | Copyright:   (c) 2021 Andrew Lelechenko
   4 | License:     MIT
   5 | 
   6 | Featherlight benchmark framework (only one file!) for performance
   7 | measurement with API mimicking
   8 | [@criterion@](http://hackage.haskell.org/package/criterion) and
   9 | [@gauge@](http://hackage.haskell.org/package/gauge). A prominent feature is
  10 | built-in comparison against previous runs and between benchmarks.
  11 | 
  12 | 
  13 | === How lightweight is it?
  14 | 
  15 | There is only one source file "Test.Tasty.Bench" and no non-boot
  16 | dependencies except [@tasty@](http://hackage.haskell.org/package/tasty). So
  17 | if you already depend on @tasty@ for a test suite, there is nothing else
  18 | to install.
  19 | 
  20 | Compare this to @criterion@ (10+ modules, 50+ dependencies) and @gauge@
  21 | (40+ modules, depends on @basement@ and @vector@). A build on a clean
  22 | machine is up to 16x faster than @criterion@ and up to 4x faster than
  23 | @gauge@. A build without dependencies is up to 6x faster than
  24 | @criterion@ and up to 8x faster than @gauge@.
  25 | 
  26 | @tasty-bench@ is a native Haskell library and works everywhere, where
  27 | GHC does, including WASM. We support a full range of architectures
  28 | (@i386@, @amd64@, @armhf@, @arm64@, @ppc64le@, @s390x@, @riscv64@) and
  29 | operating systems (Linux, Windows, macOS, FreeBSD, OpenBSD, NetBSD),
  30 | plus any GHC from 8.0 to 9.14 (and earlier releases stretch back to GHC
  31 | 7.0).
  32 | 
  33 | === How is it possible?
  34 | 
  35 | Our benchmarks are literally regular @tasty@ tests, so we can leverage
  36 | all existing machinery for command-line options, resource management,
  37 | structuring, listing and filtering benchmarks, running them and
  38 | reporting results. It also means that @tasty-bench@ can be used in
  39 | conjunction with other @tasty@ ingredients.
  40 | 
  41 | Unlike @criterion@ and @gauge@ we use a very simple statistical model
  42 | described below. This is arguably a questionable choice, but it works
  43 | pretty well in practice. A rare developer is sufficiently well-versed in
  44 | probability theory to make sense and use of all numbers generously
  45 | generated by @criterion@.
  46 | 
  47 | === How to switch?
  48 | 
  49 | <https://cabal.readthedocs.io/en/3.4/cabal-package.html#pkg-field-mixins Cabal mixins>
  50 | allow to taste @tasty-bench@ instead of @criterion@ or @gauge@ without
  51 | changing a single line of code:
  52 | 
  53 | > cabal-version: 2.0
  54 | >
  55 | > benchmark foo
  56 | >   ...
  57 | >   build-depends:
  58 | >     tasty-bench
  59 | >   mixins:
  60 | >     tasty-bench (Test.Tasty.Bench as Criterion, Test.Tasty.Bench as Criterion.Main, Test.Tasty.Bench as Gauge, Test.Tasty.Bench as Gauge.Main)
  61 | 
  62 | This works vice versa as well: if you use @tasty-bench@, but at some
  63 | point need a more comprehensive statistical analysis, it is easy to
  64 | switch temporarily back to @criterion@.
  65 | 
  66 | === How to write a benchmark?
  67 | 
  68 | Benchmarks are declared in
  69 | <https://cabal.readthedocs.io/en/stable/cabal-package-description-file.html#benchmarks a separate section of cabal file>:
  70 | 
  71 | > cabal-version:   2.0
  72 | > name:            bench-fibo
  73 | > version:         0.0
  74 | > build-type:      Simple
  75 | > synopsis:        Example of a benchmark
  76 | >
  77 | > benchmark bench-fibo
  78 | >   main-is:       BenchFibo.hs
  79 | >   type:          exitcode-stdio-1.0
  80 | >   build-depends: base, tasty-bench
  81 | >   ghc-options:   "-with-rtsopts=-A32m"
  82 | >   if impl(ghc >= 8.6)
  83 | >     ghc-options: -fproc-alignment=64
  84 | 
  85 | And here is @BenchFibo.hs@:
  86 | 
  87 | > import Test.Tasty.Bench
  88 | >
  89 | > fibo :: Int -> Integer
  90 | > fibo n = if n < 2 then toInteger n else fibo (n - 1) + fibo (n - 2)
  91 | >
  92 | > main :: IO ()
  93 | > main = defaultMain
  94 | >   [ bgroup "Fibonacci numbers"
  95 | >     [ bench "fifth"     $ nf fibo  5
  96 | >     , bench "tenth"     $ nf fibo 10
  97 | >     , bench "twentieth" $ nf fibo 20
  98 | >     ]
  99 | >   ]
 100 | 
 101 | Since @tasty-bench@ provides an API compatible with @criterion@, one can
 102 | refer to
 103 | <https://hackage.haskell.org/package/criterion#readme its documentation>
 104 | for more examples.
 105 | 
 106 | === How to read results?
 107 | 
 108 | Running the example above (@cabal bench@ or @stack bench@) results in
 109 | the following output:
 110 | 
 111 | > All
 112 | >   Fibonacci numbers
 113 | >     fifth:     OK
 114 | >        63 ns ± 3.4 ns
 115 | >     tenth:     OK
 116 | >       809 ns ±  73 ns
 117 | >     twentieth: OK
 118 | >       104 μs ± 4.9 μs
 119 | >
 120 | > All 3 tests passed (7.25s)
 121 | 
 122 | The output says that, for instance, in the first benchmark the mean CPU
 123 | time is 63 nanoseconds and means of individual samples do not often
 124 | diverge from it further than ±3.4 nanoseconds (double standard
 125 | deviation). Take standard deviation numbers with a grain of salt; there
 126 | are lies, damned lies, and statistics.
 127 | 
 128 | === Wall-clock time vs. CPU time
 129 | 
 130 | What time are we talking about? Both @criterion@ and @gauge@ by default
 131 | report wall-clock time, which is affected by any other application which
 132 | runs concurrently. Ideally benchmarks are executed on a dedicated server
 133 | without any other load, but — let’s face the truth — most of developers
 134 | run benchmarks on a laptop with a hundred other services and a window
 135 | manager, and watch videos while waiting for benchmarks to finish. That’s
 136 | the cause of a notorious “variance introduced by outliers: 88% (severely
 137 | inflated)” warning.
 138 | 
 139 | To alleviate this issue @tasty-bench@ measures CPU time by 'getCPUTime'
 140 | instead of wall-clock time by default. It does not provide a perfect
 141 | isolation from other processes (e. g., if CPU cache is spoiled by others,
 142 | populating data back from RAM is your burden), but is a bit more stable.
 143 | 
 144 | Caveat: this means that for multithreaded algorithms @tasty-bench@
 145 | reports total elapsed CPU time across all cores, while @criterion@ and
 146 | @gauge@ print maximum of core’s wall-clock time. It also means that by
 147 | default @tasty-bench@ does not measure time spent out of process, e. g.,
 148 | calls to other executables. To work around this limitation use
 149 | @--time-mode@ command-line option or set it locally via 'TimeMode'
 150 | option.
 151 | 
 152 | === Statistical model
 153 | 
 154 | Here is a procedure used by @tasty-bench@ to measure execution time:
 155 | 
 156 | 1.  Set \(n \leftarrow 1\).
 157 | 2.  Measure execution time \(t_n\) of \(n\) iterations and execution
 158 |     time \(t_{2n}\) of \(2n\) iterations.
 159 | 3.  Find \(t\) which minimizes deviation of \((nt,2nt)\) from
 160 |     \((t_n,t_{2n})\), namely \(t \leftarrow (t_n + 2t_{2n}) / 5n\).
 161 | 4.  If deviation is small enough (see @--stdev@ below) or time is
 162 |     running out soon (see @--timeout@ below), return \(t\) as a mean
 163 |     execution time.
 164 | 5.  Otherwise set \(n \leftarrow 2n\) and jump back to Step 2.
 165 | 
 166 | This is roughly similar to the linear regression approach which
 167 | @criterion@ takes, but we fit only two last points. This allows us to
 168 | simplify away all heavy-weight statistical analysis. More importantly,
 169 | earlier measurements, which are presumably shorter and noisier, do not
 170 | affect overall result. This is in contrast to @criterion@, which fits
 171 | all measurements and is biased to use more data points corresponding to
 172 | shorter runs (it employs \(n \leftarrow 1.05n\) progression).
 173 | 
 174 | Mean time and its deviation does not say much about the distribution of
 175 | individual timings. E. g., imagine a computation which (according to a
 176 | coarse system timer) takes either 0 ms or 1 ms with equal probability.
 177 | While one would be able to establish that its mean time is 0.5 ms with a
 178 | very small deviation, this does not imply that individual measurements
 179 | are anywhere near 0.5 ms. Even assuming an infinite precision of a
 180 | system timer, the distribution of individual times is not known to be
 181 | <https://en.wikipedia.org/wiki/Normal_distribution normal>.
 182 | 
 183 | Obligatory disclaimer: statistics is a tricky matter, there is no
 184 | one-size-fits-all approach. In the absence of a good theory simplistic
 185 | approaches are as (un)sound as obscure ones. Those who seek statistical
 186 | soundness should rather collect raw data and process it themselves using
 187 | a proper statistical toolbox. Data reported by @tasty-bench@ is only of
 188 | indicative and comparative significance.
 189 | 
 190 | === Memory usage
 191 | 
 192 | Configuring RTS to collect GC statistics (e. g., via
 193 | @cabal bench --benchmark-options \'+RTS -T\'@ or
 194 | @stack bench --ba \'+RTS -T\'@) enables @tasty-bench@ to estimate and
 195 | report memory usage:
 196 | 
 197 | > All
 198 | >   Fibonacci numbers
 199 | >     fifth:     OK
 200 | >        63 ns ± 3.4 ns, 223 B  allocated,   0 B  copied, 2.0 MB peak memory
 201 | >     tenth:     OK
 202 | >       809 ns ±  73 ns, 2.3 KB allocated,   0 B  copied, 4.0 MB peak memory
 203 | >     twentieth: OK
 204 | >       104 μs ± 4.9 μs, 277 KB allocated,  59 B  copied, 5.0 MB peak memory
 205 | >
 206 | > All 3 tests passed (7.25s)
 207 | 
 208 | This data is reported as per
 209 | <https://hackage.haskell.org/package/base/docs/GHC-Stats.html#t:RTSStats GHC.Stats.RTSStats>
 210 | fields:
 211 | 
 212 | -   'allocated_bytes'
 213 | 
 214 |     Total size of data ever allocated since the start of the benchmark
 215 |     iteration. Even if data was immediately garbage collected and freed,
 216 |     it still counts.
 217 | 
 218 | -   'copied_bytes'
 219 | 
 220 |     Total size of data ever copied by GC (because it was alive and
 221 |     kicking) since the start of the benchmark iteration. Note that zero
 222 |     bytes often mean that the benchmark was too short to trigger GC at
 223 |     all.
 224 | 
 225 | -   'max_mem_in_use_bytes'
 226 | 
 227 |     Peak size of live data since the very start of the process. This is
 228 |     a global metric, it cumulatively grows and does not say much about
 229 |     individual benchmarks, but rather characterizes heap environment in
 230 |     which they are executed.
 231 | 
 232 | === Combining tests and benchmarks
 233 | 
 234 | When optimizing an existing function, it is important to check that its
 235 | observable behavior remains unchanged. One can rebuild both tests and
 236 | benchmarks after each change, but it would be more convenient to run
 237 | sanity checks within benchmark itself. Since our benchmarks are
 238 | compatible with @tasty@ tests, we can easily do so.
 239 | 
 240 | Imagine you come up with a faster function @myFibo@ to generate
 241 | Fibonacci numbers:
 242 | 
 243 | > import Test.Tasty.Bench
 244 | > import Test.Tasty.QuickCheck -- from tasty-quickcheck package
 245 | >
 246 | > fibo :: Int -> Integer
 247 | > fibo n = if n < 2 then toInteger n else fibo (n - 1) + fibo (n - 2)
 248 | >
 249 | > myFibo :: Int -> Integer
 250 | > myFibo n = if n < 3 then toInteger n else myFibo (n - 1) + myFibo (n - 2)
 251 | >
 252 | > main :: IO ()
 253 | > main = Test.Tasty.Bench.defaultMain -- not Test.Tasty.defaultMain
 254 | >   [ bench "fibo   20" $ nf fibo   20
 255 | >   , bench "myFibo 20" $ nf myFibo 20
 256 | >   , testProperty "myFibo = fibo" $ \n -> fibo n === myFibo n
 257 | >   ]
 258 | 
 259 | This outputs:
 260 | 
 261 | > All
 262 | >   fibo   20:     OK
 263 | >     104 μs ± 4.9 μs
 264 | >   myFibo 20:     OK
 265 | >      71 μs ± 5.3 μs
 266 | >   myFibo = fibo: FAIL
 267 | >     *** Failed! Falsified (after 5 tests and 1 shrink):
 268 | >     2
 269 | >     1 /= 2
 270 | >     Use --quickcheck-replay=927711 to reproduce.
 271 | >
 272 | > 1 out of 3 tests failed (5.03s)
 273 | 
 274 | We see that @myFibo@ is indeed significantly faster than @fibo@, but
 275 | unfortunately does not do the same thing. One should probably look for
 276 | another way to speed up Fibonacci numbers.
 277 | 
 278 | === Troubleshooting
 279 | 
 280 | -   If benchmarks take too long, set @--timeout@ to limit execution time
 281 |     of individual benchmarks, and @tasty-bench@ will do its best to fit
 282 |     into a given time frame. Without @--timeout@ we rerun benchmarks
 283 |     until achieving a target precision set by @--stdev@, which in a
 284 |     noisy environment of a modern laptop with hundreds of service
 285 |     processes may take a lot of time.
 286 | 
 287 |     While @criterion@ runs each benchmark at least for 5 seconds,
 288 |     @tasty-bench@ is happy to conclude earlier, if it does not
 289 |     compromise the quality of results. In our experiments @tasty-bench@
 290 |     suites tend to finish earlier, even if some individual benchmarks
 291 |     take longer than with @criterion@.
 292 | 
 293 |     A common source of noisiness is garbage collection. Setting a larger
 294 |     allocation area (/nursery/) is often a good idea, either via
 295 |     @cabal bench --benchmark-options \'+RTS -A32m\'@ or
 296 |     @stack bench --ba \'+RTS -A32m\'@. Alternatively bake it into
 297 |     @cabal@ file as @ghc-options: \"-with-rtsopts=-A32m\"@.
 298 | 
 299 | -   Never compile benchmarks with @-fstatic-argument-transformation@,
 300 |     because it breaks a trick we use to force GHC into reevaluation of
 301 |     the same function application over and over again.
 302 | 
 303 | -   If benchmark results look malformed like below, make sure that you
 304 |     are invoking @Test.Tasty.Bench.@'Test.Tasty.Bench.defaultMain' and not
 305 |     @Test.Tasty.@'Test.Tasty.defaultMain' (the underlying difference is
 306 |     'consoleBenchReporter' vs. 'consoleTestReporter'):
 307 | 
 308 |     > All
 309 |     >   fibo 20:       OK (1.46s)
 310 |     >     WithLoHi (Estimate {estMean = Measurement {measTime = 41529118775, measAllocs = 0, measCopied = 0, measMaxMem = 0}, estStdev = 1595055320}) (-Infinity) Infinity
 311 | 
 312 | -   If benchmarks fail with an error message
 313 | 
 314 |     > Unhandled resource. Probably a bug in the runner you're using.
 315 | 
 316 |     or
 317 | 
 318 |     > Unexpected state of the resource (NotCreated) in getResource. Report as a tasty bug.
 319 | 
 320 |     this is likely caused by 'env' or 'envWithCleanup' affecting the
 321 |     structure of benchmarks. You can use 'env' to read test data from
 322 |     'IO', but not to read benchmark names or affect their hierarchy in
 323 |     another way. This is a fundamental restriction of @tasty@ so that it
 324 |     can list and filter benchmarks without launching missiles.
 325 | 
 326 |     Strict pattern-matching on resource is also prohibited. For
 327 |     instance, if it is a tuple, the second argument of 'env' should use
 328 |     a lazy pattern match @\\~(a, b) -> ...@
 329 | 
 330 | -   If benchmarks fail with @Test dependencies form a loop@ or
 331 |     @Test dependencies have cycles@, this is likely because of
 332 |     'bcompare', which compares a benchmark with itself. Locating a
 333 |     benchmark in a global environment may be tricky, please refer to
 334 |     [@tasty@ documentation](https://github.com/UnkindPartition/tasty#patterns)
 335 |     for details and consider using 'locateBenchmark'.
 336 | 
 337 | -   When seeing
 338 | 
 339 |     > This benchmark takes more than 100 seconds. Consider setting --timeout, if this is unexpected (or to silence this warning).
 340 | 
 341 |     do follow the advice: abort benchmarks and pass @-t100@ or similar.
 342 |     Unless you are benchmarking a very computationally expensive
 343 |     function, a single benchmark should stabilize after a couple of
 344 |     seconds. This warning is a sign that your environment is too noisy,
 345 |     in which case @tasty-bench@ will continue trying with exponentially
 346 |     longer intervals, often unproductively.
 347 | 
 348 | -   The following error can be thrown when benchmarks are built with
 349 |     @ghc-options: -threaded@:
 350 | 
 351 |     > Benchmarks must not be run concurrently. Please pass -j1 and/or avoid +RTS -N.
 352 | 
 353 |     The underlying cause is that @tasty@ runs tests concurrently, which
 354 |     is harmful for reliable performance measurements. Make sure to use
 355 |     @tasty-bench >= 0.3.4@ and invoke @Test.Tasty.Bench.@'Test.Tasty.Bench.defaultMain' and
 356 |     not @Test.Tasty.@'Test.Tasty.defaultMain'. Note that 'localOption' ('NumThreads' 1)
 357 |     quashes the warning, but does not eliminate the cause.
 358 | 
 359 | -   If benchmarks using GHC 9.4.4+ segfault on Windows, check that you
 360 |     are not using non-moving garbage collector @--nonmoving-gc@. This is
 361 |     likely caused by
 362 |     <https://gitlab.haskell.org/ghc/ghc/-/issues/23003 GHC issue #23003>.
 363 |     Previous releases of @tasty-bench@ recommended enabling
 364 |     @--nonmoving-gc@ to stabilise benchmarks, but it’s discouraged now.
 365 | 
 366 | -   If you see
 367 | 
 368 |     > <stdout>: commitBuffer: invalid argument (cannot encode character '\177')
 369 | 
 370 |     or
 371 | 
 372 |     > Uncaught exception ghc-internal:GHC.Internal.IO.Exception.IOException:
 373 |     > <stdout>: commitBuffer: invalid argument (cannot encode character '\956')
 374 | 
 375 |     it means that your locale does not support UTF-8. @tasty-bench@
 376 |     makes an effort to force locale to UTF-8, but it’s not bulletproof.
 377 |     In such case run @locale -a@ to list available locales and set a
 378 |     UTF-8-capable one (e. g., @export LANG=C.UTF-8@) before starting
 379 |     benchmarks.
 380 | 
 381 | === Isolating interfering benchmarks
 382 | 
 383 | One difficulty of benchmarking in Haskell is that it is hard to isolate
 384 | benchmarks so that they do not interfere. Changing the order of
 385 | benchmarks or skipping some of them has an effect on heap’s layout and
 386 | thus affects garbage collection. This issue is well attested in
 387 | <https://github.com/haskell/criterion/issues/166 both>
 388 | [@criterion@](https://github.com/haskell/criterion/issues/60) and
 389 | [@gauge@](https://github.com/vincenthz/hs-gauge/issues/2).
 390 | 
 391 | Usually (but not always) skipping some benchmarks speeds up remaining
 392 | ones. That’s because once a benchmark allocated heap which for some
 393 | reason was not promptly released afterwards (e. g., it forced a top-level
 394 | thunk in an underlying library), all further benchmarks are slowed down
 395 | by garbage collector processing this additional amount of live data over
 396 | and over again.
 397 | 
 398 | There are several mitigation strategies. First of all, giving garbage
 399 | collector more breathing space by @+RTS -A32m@ (or more) is often good
 400 | enough.
 401 | 
 402 | Further, avoid using top-level bindings to store large test data. Once
 403 | such thunks are forced, they remain allocated forever, which affects
 404 | detrimentally subsequent unrelated benchmarks. Treat them as external
 405 | data, supplied via 'env': instead of
 406 | 
 407 | > largeData :: String
 408 | > largeData = replicate 1000000 'a'
 409 | >
 410 | > main :: IO ()
 411 | > main = defaultMain
 412 | >   [ bench "large" $ nf length largeData, ... ]
 413 | 
 414 | use
 415 | 
 416 | > import Control.DeepSeq (force)
 417 | > import Control.Exception (evaluate)
 418 | >
 419 | > main :: IO ()
 420 | > main = defaultMain
 421 | >   [ env (evaluate (force (replicate 1000000 'a'))) $ \largeData ->
 422 | >     bench "large" $ nf length largeData, ... ]
 423 | 
 424 | Finally, as an ultimate measure to reduce interference between
 425 | benchmarks, one can run each of them in a separate process. We do not
 426 | quite recommend this approach, but if you are desperate, here is how:
 427 | 
 428 | > cabal run -v0 all:benches -- -l | sed -e 's/[\"]/\\\\\\&/g' | while read -r name; do cabal run -v0 all:benches -- -p '$0 == "'"$name"'"'; done
 429 | 
 430 | This assumes that there is a single benchmark suite in the project and
 431 | that benchmark names do not contain newlines.
 432 | 
 433 | === Comparison against baseline
 434 | 
 435 | One can compare benchmark results against an earlier run in an automatic
 436 | way.
 437 | 
 438 | When using this feature, it’s especially important to compile benchmarks
 439 | with
 440 | @ghc-options:@ [@-fproc-alignment@](https://downloads.haskell.org/ghc/latest/docs/users_guide/debugging.html#ghc-flag--fproc-alignment)@=64@,
 441 | otherwise results could be skewed by intermittent changes in cache-line
 442 | alignment.
 443 | 
 444 | Firstly, run @tasty-bench@ with @--csv FILE@ key to dump results to
 445 | @FILE@ in CSV format (it could be a good idea to set smaller @--stdev@,
 446 | if possible):
 447 | 
 448 | > Name,Mean (ps),2*Stdev (ps)
 449 | > All.Fibonacci numbers.fifth,48453,4060
 450 | > All.Fibonacci numbers.tenth,637152,46744
 451 | > All.Fibonacci numbers.twentieth,81369531,3342646
 452 | 
 453 | Now modify implementation and rerun benchmarks with @--baseline FILE@
 454 | key. This produces a report as follows:
 455 | 
 456 | > All
 457 | >   Fibonacci numbers
 458 | >     fifth:     OK
 459 | >        53 ns ± 2.7 ns,  8% more than baseline
 460 | >     tenth:     OK
 461 | >       641 ns ±  59 ns,       same as baseline
 462 | >     twentieth: OK
 463 | >        77 μs ± 6.4 μs,  5% less than baseline
 464 | >
 465 | > All 3 tests passed (1.50s)
 466 | 
 467 | You can also fail benchmarks, which deviate too far from baseline, using
 468 | @--fail-if-slower@ and @--fail-if-faster@ options. For example, setting
 469 | both of them to 6 will fail the first benchmark above (because it is
 470 | more than 6% slower), but the last one still succeeds (even while it is
 471 | measurably faster than baseline, deviation is less than 6%). Consider
 472 | also using @--hide-successes@ to show only problematic benchmarks, or
 473 | even [@tasty-rerun@](http://hackage.haskell.org/package/tasty-rerun)
 474 | package to focus on rerunning failing items only.
 475 | 
 476 | If you wish to compare two CSV reports non-interactively, here is a
 477 | handy @awk@ incantation:
 478 | 
 479 | > awk 'BEGIN{FS=",";OFS=",";print "Name,Old,New,Ratio"}FNR==1{trueNF=NF;next}NF<trueNF{print "Benchmark names should not contain newlines";exit 1}FNR==NR{oldTime=$(NF-trueNF+2);NF-=trueNF-1;a[$0]=oldTime;next}{newTime=$(NF-trueNF+2);NF-=trueNF-1;if(a[$0]){print $0,a[$0],newTime,newTime/a[$0];gs+=log(newTime/a[$0]);gc++}}END{if(gc>0)print "Geometric mean,,",exp(gs/gc)}' old.csv new.csv
 480 | 
 481 | A larger shell snippet to compare two @git@ commits can be found in
 482 | @compare_benches.sh@.
 483 | 
 484 | Note that columns in CSV report are different from what @criterion@ or
 485 | @gauge@ would produce. If names do not contain commas, missing columns
 486 | can be faked this way:
 487 | 
 488 | > awk 'BEGIN{FS=",";OFS=",";print "Name,Mean,MeanLB,MeanUB,Stddev,StddevLB,StddevUB"}NR==1{trueNF=NF;next}NF<trueNF{print $0;next}{mean=$(NF-trueNF+2);stddev=$(NF-trueNF+3);NF-=trueNF-1;print $0,mean/1e12,mean/1e12,mean/1e12,stddev/2e12,stddev/2e12,stddev/2e12}'
 489 | 
 490 | To fake @gauge@ in @--csvraw@ mode use
 491 | 
 492 | > awk 'BEGIN{FS=",";OFS=",";print "name,iters,time,cycles,cpuTime,utime,stime,maxrss,minflt,majflt,nvcsw,nivcsw,allocated,numGcs,bytesCopied,mutatorWallSeconds,mutatorCpuSeconds,gcWallSeconds,gcCpuSeconds"}NR==1{trueNF=NF;next}NF<trueNF{print $0;next}{mean=$(NF-trueNF+2);fourth=$(NF-trueNF+4);fifth=$(NF-trueNF+5);sixth=$(NF-trueNF+6);NF-=trueNF-1;print $0,1,mean/1e12,0,mean/1e12,mean/1e12,0,sixth+0,0,0,0,0,fourth+0,0,fifth+0,0,0,0,0}'
 493 | 
 494 | === Comparison between benchmarks
 495 | 
 496 | You can also compare benchmarks to each other without any external
 497 | tools, all in the comfort of your terminal.
 498 | 
 499 | > import Test.Tasty.Bench
 500 | >
 501 | > fibo :: Int -> Integer
 502 | > fibo n = if n < 2 then toInteger n else fibo (n - 1) + fibo (n - 2)
 503 | >
 504 | > main :: IO ()
 505 | > main = defaultMain
 506 | >   [ bgroup "Fibonacci numbers"
 507 | >     [ bcompare "tenth"  $ bench "fifth"     $ nf fibo  5
 508 | >     ,                     bench "tenth"     $ nf fibo 10
 509 | >     , bcompare "tenth"  $ bench "twentieth" $ nf fibo 20
 510 | >     ]
 511 | >   ]
 512 | 
 513 | This produces a report, comparing mean times of @fifth@ and @twentieth@
 514 | to @tenth@:
 515 | 
 516 | > All
 517 | >   Fibonacci numbers
 518 | >     fifth:     OK
 519 | >       121 ns ± 2.6 ns, 0.08x
 520 | >     tenth:     OK
 521 | >       1.6 μs ±  31 ns
 522 | >     twentieth: OK
 523 | >       203 μs ± 4.1 μs, 128.36x
 524 | 
 525 | To locate a baseline benchmark in a larger suite use 'locateBenchmark'.
 526 | 
 527 | One can leverage comparisons between benchmarks to implement portable
 528 | performance tests, expressing properties like “this algorithm must be at
 529 | least twice faster than that one” or “this operation should not be more
 530 | than thrice slower than that”. This can be achieved with
 531 | 'bcompareWithin', which takes an acceptable interval of performance as
 532 | an argument.
 533 | 
 534 | === Plotting results
 535 | 
 536 | Users can dump results into CSV with @--csv FILE@ and plot them using
 537 | @gnuplot@ or other software. But for convenience there is also a
 538 | built-in quick-and-dirty SVG plotting feature, which can be invoked by
 539 | passing @--svg FILE@. Here is a sample of its output:
 540 | 
 541 | ![Plotting](example.svg)
 542 | 
 543 | 
 544 | === Build flags
 545 | 
 546 | Build flags are a brittle subject and users do not normally need to
 547 | touch them.
 548 | 
 549 | -   If you find yourself in an environment, where @tasty@ is not
 550 |     available and you have access to boot packages only, you can still
 551 |     use @tasty-bench@! Just copy @Test\/Tasty\/Bench.hs@ to your project
 552 |     (imagine it like a header-only C library). It will provide you with
 553 |     functions to build 'Benchmarkable' and run them manually via
 554 |     'measureCpuTime'. This mode of operation can be also configured by
 555 |     disabling Cabal flag @tasty@.
 556 | 
 557 | === Command-line options
 558 | 
 559 | Use @--help@ to list all command-line options.
 560 | 
 561 | [@-p@, @--pattern@]:
 562 | 
 563 |     This is a standard @tasty@ option, which allows filtering benchmarks
 564 |     by a pattern or @awk@ expression. Please refer to
 565 |     [@tasty@ documentation](https://github.com/UnkindPartition/tasty#patterns)
 566 |     for details.
 567 | 
 568 | [@-t@, @--timeout@]:
 569 | 
 570 |     This is a standard @tasty@ option, setting timeout for individual
 571 |     benchmarks in seconds. Use it when benchmarks tend to take too long:
 572 |     @tasty-bench@ will make an effort to report results (even if of
 573 |     subpar quality) before timeout. Setting timeout too tight
 574 |     (insufficient for at least three iterations) will result in a
 575 |     benchmark failure. One can adjust it locally for a group of
 576 |     benchmarks, e. g., 'localOption' ('mkTimeout' 100000000) for 100
 577 |     seconds.
 578 | 
 579 | [@--stdev@]:
 580 | 
 581 |     Target relative standard deviation of measurements in percents (5%
 582 |     by default). Large values correspond to fast and loose benchmarks,
 583 |     and small ones to long and precise. It can also be adjusted locally
 584 |     for a group of benchmarks, e. g., 'localOption' ('RelStDev' 0.02). If
 585 |     benchmarking takes far too long, consider setting @--timeout@, which
 586 |     will interrupt benchmarks, potentially before reaching the target
 587 |     deviation.
 588 | 
 589 | [@--csv@]:
 590 | 
 591 |     File to write results in CSV format.
 592 | 
 593 | [@--baseline@]:
 594 | 
 595 |     File to read baseline results in CSV format (as produced by
 596 |     @--csv@).
 597 | 
 598 | [@--fail-if-slower@, @--fail-if-faster@]:
 599 | 
 600 |     Upper bounds of acceptable slow down \/ speed up in percents. If a
 601 |     benchmark is unacceptably slower \/ faster than baseline (see
 602 |     @--baseline@), it will be reported as failed. Can be used in
 603 |     conjunction with a standard @tasty@ option @--hide-successes@ to
 604 |     show only problematic benchmarks. Both options can be adjusted
 605 |     locally for a group of benchmarks, e. g.,
 606 |     'localOption' ('FailIfSlower' 0.10).
 607 | 
 608 | [@--svg@]:
 609 | 
 610 |     File to plot results in SVG format.
 611 | 
 612 | [@--time-mode@]:
 613 | 
 614 |     Whether to measure CPU time (@cpu@, default) or wall-clock time
 615 |     (@wall@).
 616 | 
 617 | [@+RTS -T@]:
 618 | 
 619 |     Estimate and report memory usage.
 620 | 
 621 | === Custom command-line options
 622 | 
 623 | As usual with @tasty@, it is easy to extend benchmarks with custom
 624 | command-line options. Here is an example:
 625 | 
 626 | > import Data.Proxy
 627 | > import Test.Tasty.Bench
 628 | > import Test.Tasty.Ingredients.Basic
 629 | > import Test.Tasty.Options
 630 | > import Test.Tasty.Runners
 631 | >
 632 | > newtype RandomSeed = RandomSeed Int
 633 | >
 634 | > instance IsOption RandomSeed where
 635 | >   defaultValue = RandomSeed 42
 636 | >   parseValue = fmap RandomSeed . safeRead
 637 | >   optionName = pure "seed"
 638 | >   optionHelp = pure "Random seed used in benchmarks"
 639 | >
 640 | > main :: IO ()
 641 | > main = do
 642 | >   let customOpts  = [Option (Proxy :: Proxy RandomSeed)]
 643 | >       ingredients = includingOptions customOpts : benchIngredients
 644 | >   opts <- parseOptions ingredients benchmarks
 645 | >   let RandomSeed seed = lookupOption opts
 646 | >   defaultMainWithIngredients ingredients benchmarks
 647 | >
 648 | > benchmarks :: Benchmark
 649 | > benchmarks = bgroup "All" []
 650 | 
 651 | -}
 652 | {-# LANGUAGE BangPatterns #-}
 653 | {-# LANGUAGE CPP #-}
 654 | {-# LANGUAGE DeriveFunctor #-}
 655 | {-# LANGUAGE FlexibleContexts #-}
 656 | {-# LANGUAGE FlexibleInstances #-}
 657 | {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 658 | {-# LANGUAGE ScopedTypeVariables #-}
 659 | {-# LANGUAGE TupleSections #-}
 660 | 
 661 | {-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
 662 | {-# HLINT ignore "Unused LANGUAGE pragma" #-}
 663 | 
 664 | module Test.Tasty.Bench
 665 |   (
 666 | #ifdef MIN_VERSION_tasty
 667 |   -- * Running 'Benchmark'
 668 |     defaultMain
 669 |   , Benchmark
 670 |   , bench
 671 |   , bgroup
 672 |   , bcompare
 673 |   , bcompareWithin
 674 |   , env
 675 |   , envWithCleanup
 676 |   ,
 677 | #endif
 678 |   -- * Creating t'Benchmarkable'
 679 |     Benchmarkable(..)
 680 |   , nf
 681 |   , whnf
 682 |   , nfIO
 683 |   , whnfIO
 684 |   , nfAppIO
 685 |   , whnfAppIO
 686 |   , measureCpuTime
 687 |   , measureCpuTimeAndStDev
 688 | #ifdef MIN_VERSION_tasty
 689 |   -- * Ingredients
 690 |   , benchIngredients
 691 |   , consoleBenchReporter
 692 |   , csvReporter
 693 |   , svgReporter
 694 |   , RelStDev(..)
 695 |   , FailIfSlower(..)
 696 |   , FailIfFaster(..)
 697 |   , CsvPath(..)
 698 |   , BaselinePath(..)
 699 |   , SvgPath(..)
 700 |   , TimeMode(..)
 701 |   -- * Utilities
 702 |   , locateBenchmark
 703 |   , mapLeafBenchmarks
 704 | #else
 705 |   , Timeout(..)
 706 |   , RelStDev(..)
 707 | #endif
 708 |   ) where
 709 | 
 710 | import Prelude hiding (Int, Integer)
 711 | import qualified Prelude
 712 | import Control.Applicative
 713 | import Control.Arrow (first, second)
 714 | import Control.DeepSeq (NFData, force, rnf)
 715 | import Control.Exception (bracket, bracket_, evaluate)
 716 | import Control.Monad (void, unless, guard, (>=>), when)
 717 | import Data.Foldable (foldMap, traverse_)
 718 | import Data.Int (Int64)
 719 | import Data.IORef
 720 | import Data.List (intercalate, stripPrefix, isPrefixOf, genericLength, genericDrop, foldl1')
 721 | import Data.Maybe (fromMaybe)
 722 | import Data.Monoid (All(..), Any(..))
 723 | import Data.Proxy
 724 | import Data.Traversable (forM)
 725 | import Data.Word (Word64)
 726 | import GHC.Conc
 727 | import GHC.IO.Encoding
 728 | import GHC.Stats
 729 | #if MIN_VERSION_base(4,15,0)
 730 | import GHC.Exts (SPEC(..))
 731 | #else
 732 | import GHC.Types (SPEC(..))
 733 | #endif
 734 | import System.CPUTime
 735 | import System.Exit
 736 | import System.IO
 737 | import System.IO.Unsafe
 738 | import System.Mem
 739 | import Text.Printf
 740 | 
 741 | #ifdef MIN_VERSION_tasty
 742 | import Data.Semigroup (Semigroup(..))
 743 | import qualified Data.IntMap.Strict as IM
 744 | import Data.IntMap (IntMap)
 745 | import Data.Sequence (Seq, (<|))
 746 | import qualified Data.Sequence as Seq
 747 | import qualified Data.Set as S
 748 | import Test.Tasty hiding (defaultMain)
 749 | import qualified Test.Tasty
 750 | import Test.Tasty.Ingredients
 751 | import Test.Tasty.Ingredients.ConsoleReporter
 752 | import Test.Tasty.Options
 753 | import Test.Tasty.Patterns.Eval (eval, asB, withFields)
 754 | import Test.Tasty.Patterns.Types (Expr (And, Field, IntLit, NF, StringLit, Sub))
 755 | import qualified Test.Tasty.Patterns.Types as Patterns
 756 | import Test.Tasty.Providers
 757 | import Test.Tasty.Runners
 758 | #endif
 759 | 
 760 | #if MIN_VERSION_base(4,11,0)
 761 | import GHC.Clock (getMonotonicTime)
 762 | #else
 763 | import Data.Time.Clock.POSIX (getPOSIXTime)
 764 | #endif
 765 | 
 766 | #if defined(mingw32_HOST_OS)
 767 | import Data.Word (Word32)
 768 | #endif
 769 | 
 770 | #ifndef MIN_VERSION_tasty
 771 | data Timeout
 772 |   = Timeout
 773 |     Prelude.Integer -- ^ number of microseconds (e. g., 200000)
 774 |     String          -- ^ textual representation (e. g., @"0.2s"@)
 775 |   | NoTimeout
 776 |   deriving (Show)
 777 | 
 778 | type Progress = ()
 779 | #endif
 780 | 
 781 | 
 782 | -- | In addition to @--stdev@ command-line option,
 783 | -- one can adjust target relative standard deviation
 784 | -- for individual benchmarks and groups of benchmarks
 785 | -- using 'adjustOption' and 'localOption'.
 786 | --
 787 | -- E. g., set target relative standard deviation to 2% as follows:
 788 | --
 789 | -- > import Test.Tasty (localOption)
 790 | -- > localOption (RelStDev 0.02) (bgroup [...])
 791 | --
 792 | -- If you set t'RelStDev' to infinity,
 793 | -- a benchmark will be executed
 794 | -- only once and its standard deviation will be recorded as zero.
 795 | -- This is rather a blunt approach, but it might be a necessary evil
 796 | -- for extremely long benchmarks. If you wish to run all benchmarks
 797 | -- only once, use command-line option @--stdev@ @Infinity@.
 798 | --
 799 | -- @since 0.2
 800 | newtype RelStDev = RelStDev Double
 801 |   deriving
 802 |   ( Eq
 803 |   -- ^ @since 0.4
 804 |   , Ord
 805 |   -- ^ @since 0.4
 806 |   , Show
 807 |   , Read
 808 |   , Num
 809 |   -- ^ @since 0.4
 810 |   , Fractional
 811 |   -- ^ @since 0.4
 812 |   )
 813 | 
 814 | -- | Whether to measure CPU time or wall-clock time.
 815 | -- Normally 'CpuTime' is a better option (and default),
 816 | -- but consider switching to 'WallTime'
 817 | -- to measure multithreaded algorithms or time spent in external processes.
 818 | --
 819 | -- One can switch the default measurement mode globally
 820 | -- using @--time-mode@ command-line option,
 821 | -- but it is usually better to adjust the mode locally:
 822 | --
 823 | -- > import Test.Tasty (localOption)
 824 | -- > localOption WallTime (bgroup [...])
 825 | --
 826 | -- You can measure both times and report their ratio with the following gadget:
 827 | --
 828 | -- @
 829 | -- bgroup \"Foo\"
 830 | --   [ localOption WallTime $ bench \"WallTime\" foo
 831 | --   , bcompare \"Foo.WallTime\"
 832 | --   $ localOption CpuTime  $ bench \"CPUTime\"  foo
 833 | --   ]
 834 | -- @
 835 | --
 836 | -- @since 0.3.2
 837 | data TimeMode = CpuTime
 838 |   -- ^ Measure CPU time.
 839 |   | WallTime
 840 |   -- ^ Measure wall-clock time.
 841 |   | MutatorCpuTime
 842 |   -- ^ Measure CPU time
 843 |   -- excluding garbage collection, known as "mutator time".
 844 |   --
 845 |   -- @since 0.5
 846 |   | MutatorWallTime
 847 |   -- ^ Measure wall-clock time
 848 |   -- excluding garbage collection, known as "mutator time".
 849 |   --
 850 |   -- @since 0.5
 851 |   | CustomTime (IO Word64)
 852 |   -- ^ Custom measurement action, returning time in picoseconds.
 853 | 
 854 | #ifdef MIN_VERSION_tasty
 855 | instance IsOption RelStDev where
 856 |   defaultValue = RelStDev 0.05
 857 |   parseValue = fmap RelStDev . parsePositivePercents
 858 |   optionName = pure "stdev"
 859 |   optionHelp = pure "Target relative standard deviation of measurements in percents (5 by default). Large values correspond to fast and loose benchmarks, and small ones to long and precise. If it takes far too long, consider setting --timeout, which will interrupt benchmarks, potentially before reaching the target deviation."
 860 | 
 861 | -- | In addition to @--fail-if-slower@ command-line option,
 862 | -- one can adjust an upper bound of acceptable slow down
 863 | -- in comparison to baseline for
 864 | -- individual benchmarks and groups of benchmarks
 865 | -- using 'adjustOption' and 'localOption'.
 866 | --
 867 | -- E. g., set upper bound of acceptable slow down to 10% as follows:
 868 | --
 869 | -- > import Test.Tasty (localOption)
 870 | -- > localOption (FailIfSlower 0.10) (bgroup [...])
 871 | --
 872 | -- @since 0.2
 873 | newtype FailIfSlower = FailIfSlower Double
 874 |   deriving
 875 |   ( Eq
 876 |   -- ^ @since 0.4
 877 |   , Ord
 878 |   -- ^ @since 0.4
 879 |   , Show
 880 |   , Read
 881 |   , Num
 882 |   -- ^ @since 0.4
 883 |   , Fractional
 884 |   -- ^ @since 0.4
 885 |   )
 886 | 
 887 | instance IsOption FailIfSlower where
 888 |   defaultValue = FailIfSlower (1.0 / 0.0)
 889 |   parseValue = fmap FailIfSlower . parsePositivePercents
 890 |   optionName = pure "fail-if-slower"
 891 |   optionHelp = pure "Upper bound of acceptable slow down in percents. If a benchmark is unacceptably slower than baseline (see --baseline), it will be reported as failed."
 892 | 
 893 | -- | In addition to @--fail-if-faster@ command-line option,
 894 | -- one can adjust an upper bound of acceptable speed up
 895 | -- in comparison to baseline for
 896 | -- individual benchmarks and groups of benchmarks
 897 | -- using 'adjustOption' and 'localOption'.
 898 | --
 899 | -- E. g., set upper bound of acceptable speed up to 10% as follows:
 900 | --
 901 | -- > import Test.Tasty (localOption)
 902 | -- > localOption (FailIfFaster 0.10) (bgroup [...])
 903 | --
 904 | -- @since 0.2
 905 | newtype FailIfFaster = FailIfFaster Double
 906 |   deriving
 907 |   ( Eq
 908 |   -- ^ @since 0.4
 909 |   , Ord
 910 |   -- ^ @since 0.4
 911 |   , Show
 912 |   , Read
 913 |   , Num
 914 |   -- ^ @since 0.4
 915 |   , Fractional
 916 |   -- ^ @since 0.4
 917 |   )
 918 | 
 919 | instance IsOption FailIfFaster where
 920 |   defaultValue = FailIfFaster (1.0 / 0.0)
 921 |   parseValue = fmap FailIfFaster . parsePositivePercents
 922 |   optionName = pure "fail-if-faster"
 923 |   optionHelp = pure "Upper bound of acceptable speed up in percents. If a benchmark is unacceptably faster than baseline (see --baseline), it will be reported as failed."
 924 | 
 925 | parsePositivePercents :: String -> Maybe Double
 926 | parsePositivePercents xs = do
 927 |   x <- safeRead xs
 928 |   guard (x > 0)
 929 |   pure (x / 100)
 930 | 
 931 | instance IsOption TimeMode where
 932 |   defaultValue = CpuTime
 933 |   parseValue v = case v of
 934 |     "cpu" -> Just CpuTime
 935 |     "wall" -> Just WallTime
 936 |     "mutcpu" -> Just MutatorCpuTime
 937 |     "mutwall" -> Just MutatorWallTime
 938 |     _ -> Nothing
 939 |   optionName = pure "time-mode"
 940 |   optionHelp = pure "Whether to measure total CPU time (\"cpu\"), total wall-clock time (\"wall\"), or time spent by the mutator (CPU \"mutcpu\" or wall-clock \"mutwall\")"
 941 |   showDefaultValue m = case m of
 942 |     CpuTime -> Just "cpu"
 943 |     WallTime -> Just "wall"
 944 |     MutatorCpuTime -> Just "mutcpu"
 945 |     MutatorWallTime -> Just "mutwall"
 946 |     CustomTime _ -> Nothing
 947 | #endif
 948 | 
 949 | -- | Something that can be benchmarked, produced by 'nf', 'whnf', 'nfIO', 'whnfIO',
 950 | -- 'nfAppIO', 'whnfAppIO' below.
 951 | --
 952 | -- Drop-in replacement for @Criterion.Benchmarkable@ and
 953 | -- @Gauge.Benchmarkable@.
 954 | --
 955 | -- @since 0.1
 956 | newtype Benchmarkable =
 957 |     -- | @since 0.3
 958 |     Benchmarkable
 959 |   { unBenchmarkable :: Word64 -> IO () -- ^ Run benchmark given number of times.
 960 |   }
 961 | 
 962 | #ifdef MIN_VERSION_tasty
 963 | 
 964 | -- | 'defaultMain' forces encoding to 'utf8', but users might
 965 | -- be running benchmarks outside of it (e. g., via 'defaultMainWithIngredients').
 966 | supportsUnicode :: Bool
 967 | supportsUnicode = maybe False ((== "UTF") . take 3 . textEncodingName) enc
 968 | #if defined(mingw32_HOST_OS)
 969 |   && unsafePerformIO getConsoleOutputCP == 65001
 970 | #endif
 971 |   where
 972 |     enc = unsafePerformIO (hGetEncoding stdout)
 973 | {-# NOINLINE supportsUnicode #-}
 974 | 
 975 | mu :: Char
 976 | mu = if supportsUnicode then 'μ' else 'u'
 977 | 
 978 | pm :: String
 979 | pm = if supportsUnicode then " ± " else " +-"
 980 | 
 981 | -- | Show picoseconds, fitting number in 3 characters.
 982 | showPicos3 :: Word64 -> String
 983 | showPicos3 i
 984 |   | t < 995   = printf "%3.0f ps" t
 985 |   | t < 995e1 = printf "%3.1f ns" (t / 1e3)
 986 |   | t < 995e3 = printf "%3.0f ns" (t / 1e3)
 987 |   | t < 995e4 = printf "%3.1f %cs" (t / 1e6) mu
 988 |   | t < 995e6 = printf "%3.0f %cs" (t / 1e6) mu
 989 |   | t < 995e7 = printf "%3.1f ms" (t / 1e9)
 990 |   | t < 995e9 = printf "%3.0f ms" (t / 1e9)
 991 |   | otherwise = printf "%4.2f s"  (t / 1e12)
 992 |   where
 993 |     t = word64ToDouble i
 994 | 
 995 | -- | Show picoseconds, fitting number in 4 characters.
 996 | showPicos4 :: Word64 -> String
 997 | showPicos4 i
 998 |   | t < 995   = printf "%3.0f  ps"  t
 999 |   | t < 995e1 = printf "%4.2f ns"  (t / 1e3)
1000 |   | t < 995e2 = printf "%4.1f ns"  (t / 1e3)
1001 |   | t < 995e3 = printf "%3.0f  ns" (t / 1e3)
1002 |   | t < 995e4 = printf "%4.2f %cs"  (t / 1e6) mu
1003 |   | t < 995e5 = printf "%4.1f %cs"  (t / 1e6) mu
1004 |   | t < 995e6 = printf "%3.0f  %cs" (t / 1e6) mu
1005 |   | t < 995e7 = printf "%4.2f ms"  (t / 1e9)
1006 |   | t < 995e8 = printf "%4.1f ms"  (t / 1e9)
1007 |   | t < 995e9 = printf "%3.0f  ms" (t / 1e9)
1008 |   | otherwise = printf "%4.3f s"   (t / 1e12)
1009 |   where
1010 |     t = word64ToDouble i
1011 | 
1012 | showBytes :: Word64 -> String
1013 | showBytes i
1014 |   | t < 1000                 = printf "%3.0f B " t
1015 |   | t < 10189                = printf "%3.1f KB" (t / 1024)
1016 |   | t < 1023488              = printf "%3.0f KB" (t / 1024)
1017 |   | t < 10433332             = printf "%3.1f MB" (t / 1048576)
1018 |   | t < 1048051712           = printf "%3.0f MB" (t / 1048576)
1019 |   | t < 10683731149          = printf "%3.1f GB" (t / 1073741824)
1020 |   | t < 1073204953088        = printf "%3.0f GB" (t / 1073741824)
1021 |   | t < 10940140696372       = printf "%3.1f TB" (t / 1099511627776)
1022 |   | t < 1098961871962112     = printf "%3.0f TB" (t / 1099511627776)
1023 |   | t < 11202704073084108    = printf "%3.1f PB" (t / 1125899906842624)
1024 |   | t < 1125336956889202624  = printf "%3.0f PB" (t / 1125899906842624)
1025 |   | t < 11471568970838126592 = printf "%3.1f EB" (t / 1152921504606846976)
1026 |   | otherwise                = printf "%3.0f EB" (t / 1152921504606846976)
1027 |   where
1028 |     t = word64ToDouble i
1029 | #endif
1030 | 
1031 | data Measurement = Measurement
1032 |   { measTime   :: !Word64 -- ^ time in picoseconds
1033 |   , measAllocs :: !Word64 -- ^ allocations in bytes
1034 |   , measCopied :: !Word64 -- ^ copied bytes
1035 |   , measMaxMem :: !Word64 -- ^ max memory in use
1036 |   } deriving (Show, Read)
1037 | 
1038 | data Estimate = Estimate
1039 |   { estMean  :: !Measurement
1040 |   , estStdev :: !Word64  -- ^ standard deviation in picoseconds
1041 |   } deriving (Show, Read)
1042 | 
1043 | #ifdef MIN_VERSION_tasty
1044 | 
1045 | data WithLoHi a = WithLoHi
1046 |   !a      -- payload
1047 |   !Double -- lower bound (e. g., 0.9 for -10% speedup)
1048 |   !Double -- upper bound (e. g., 1.2 for +20% slowdown)
1049 |   deriving (Show, Read)
1050 | 
1051 | prettyEstimate :: Estimate -> String
1052 | prettyEstimate (Estimate m stdev) =
1053 |   showPicos4 (measTime m)
1054 |   ++ (if stdev == 0 then "         " else pm ++ showPicos3 (2 * stdev))
1055 | 
1056 | prettyEstimateWithGC :: Estimate -> String
1057 | prettyEstimateWithGC (Estimate m stdev) =
1058 |   showPicos4 (measTime m)
1059 |   ++ (if stdev == 0 then ",          " else pm ++ showPicos3 (2 * stdev) ++ ", ")
1060 |   ++ showBytes (measAllocs m) ++ " allocated, "
1061 |   ++ showBytes (measCopied m) ++ " copied, "
1062 |   ++ showBytes (measMaxMem m) ++ " peak memory"
1063 | 
1064 | csvEstimate :: Estimate -> String
1065 | csvEstimate (Estimate m stdev) = show (measTime m) ++ "," ++ show (2 * stdev)
1066 | 
1067 | csvEstimateWithGC :: Estimate -> String
1068 | csvEstimateWithGC (Estimate m stdev) = show (measTime m) ++ "," ++ show (2 * stdev)
1069 |   ++ "," ++ show (measAllocs m) ++ "," ++ show (measCopied m) ++ "," ++ show (measMaxMem m)
1070 | #endif
1071 | 
1072 | predict
1073 |   :: Measurement -- ^ time for one run
1074 |   -> Measurement -- ^ time for two runs
1075 |   -> Estimate
1076 | predict (Measurement t1 a1 c1 m1) (Measurement t2 a2 c2 m2) = Estimate
1077 |   { estMean  = Measurement t (fit a1 a2) (fit c1 c2) (max m1 m2)
1078 |   , estStdev = truncate (sqrt d :: Double)
1079 |   }
1080 |   where
1081 |     fit x1 x2 = x1 `quot` 5 + 2 * (x2 `quot` 5)
1082 |     t = fit t1 t2
1083 |     sqr x = x * x
1084 |     d = sqr (word64ToDouble t1 -     word64ToDouble t)
1085 |       + sqr (word64ToDouble t2 - 2 * word64ToDouble t)
1086 | 
1087 | predictPerturbed :: Measurement -> Measurement -> Estimate
1088 | predictPerturbed t1 t2 = Estimate
1089 |   { estMean = estMean (predict t1 t2)
1090 |   , estStdev = max
1091 |     (estStdev (predict (lo t1) (hi t2)))
1092 |     (estStdev (predict (hi t1) (lo t2)))
1093 |   }
1094 |   where
1095 |     prec = max (fromInteger cpuTimePrecision) 1000000000 -- 1 ms
1096 |     hi meas = meas { measTime = measTime meas + prec }
1097 |     lo meas = meas { measTime = if measTime meas > prec then measTime meas - prec else 0 }
1098 | 
1099 | hasGCStats :: Bool
1100 | #if MIN_VERSION_base(4,10,0)
1101 | hasGCStats = unsafePerformIO getRTSStatsEnabled
1102 | #else
1103 | hasGCStats = unsafePerformIO getGCStatsEnabled
1104 | #endif
1105 | {-# NOINLINE hasGCStats  #-}
1106 | 
1107 | getAllocsAndCopied :: IO (Word64, Word64, Word64)
1108 | getAllocsAndCopied = do
1109 |   if not hasGCStats then pure (0, 0, 0) else
1110 | #if MIN_VERSION_base(4,10,0)
1111 |     (\s -> (allocated_bytes s, copied_bytes s, max_mem_in_use_bytes s)) <$> getRTSStats
1112 | #else
1113 |     (\s -> (int64ToWord64 $ bytesAllocated s, int64ToWord64 $ bytesCopied s, int64ToWord64 $ peakMegabytesAllocated s * 1024 * 1024)) <$> getGCStats
1114 | #endif
1115 | 
1116 | getWallTimeSecs :: IO Double
1117 | #if MIN_VERSION_base(4,11,0)
1118 | getWallTimeSecs = getMonotonicTime
1119 | #else
1120 | getWallTimeSecs = realToFrac <$> getPOSIXTime
1121 | #endif
1122 | 
1123 | getMutatorCpuTime :: IO Word64
1124 | #if MIN_VERSION_base(4,10,0)
1125 | getMutatorCpuTime = (1000 *) . fromIntegral . mutator_cpu_ns <$> getRTSStats
1126 | #else
1127 | getMutatorCpuTime = round . (1e12 *) . mutatorCpuSeconds <$> getGCStats
1128 | #endif
1129 | 
1130 | getMutatorWallTime :: IO Word64
1131 | #if MIN_VERSION_base(4,10,0)
1132 | getMutatorWallTime = (1000 *) . fromIntegral . mutator_elapsed_ns <$> getRTSStats
1133 | #else
1134 | getMutatorWallTime = round . (1e12 *) . mutatorWallSeconds <$> getGCStats
1135 | #endif
1136 | 
1137 | 
1138 | getTimePicoSecs :: TimeMode -> IO Word64
1139 | getTimePicoSecs timeMode = case timeMode of
1140 |   CpuTime -> fromInteger <$> getCPUTime
1141 |   WallTime -> round . (1e12 *) <$> getWallTimeSecs
1142 |   MutatorCpuTime -> getMutatorCpuTime
1143 |   MutatorWallTime -> getMutatorWallTime
1144 |   CustomTime getCustomTime -> getCustomTime
1145 | 
1146 | measure :: TimeMode -> Word64 -> Benchmarkable -> IO Measurement
1147 | measure timeMode n (Benchmarkable act) = do
1148 |   let getTimePicoSecs' = getTimePicoSecs timeMode
1149 |   performGC
1150 |   startTime <- getTimePicoSecs'
1151 |   (startAllocs, startCopied, startMaxMemInUse) <- getAllocsAndCopied
1152 |   act n
1153 |   endTime <- getTimePicoSecs'
1154 |   performMinorGC -- perform GC to update RTSStats
1155 |   (endAllocs, endCopied, endMaxMemInUse) <- getAllocsAndCopied
1156 |   let meas = Measurement
1157 |         { measTime   = endTime - startTime
1158 |         , measAllocs = endAllocs - startAllocs
1159 |         , measCopied = endCopied - startCopied
1160 |         , measMaxMem = max endMaxMemInUse startMaxMemInUse
1161 |         }
1162 |   pure meas
1163 | 
1164 | measureUntil
1165 |     :: (Progress -> IO ())
1166 |     -> TimeMode
1167 |     -> Timeout
1168 |     -> RelStDev
1169 |     -> Benchmarkable
1170 |     -> IO Estimate
1171 | measureUntil _ timeMode _ (RelStDev targetRelStDev) b
1172 |   | isInfinite targetRelStDev, targetRelStDev > 0 = do
1173 |   t1 <- measure timeMode 1 b
1174 |   pure $ Estimate { estMean = t1, estStdev = 0 }
1175 | measureUntil yieldProgress timeMode timeout (RelStDev targetRelStDev) b = do
1176 |   t1 <- measure' 1 b
1177 |   go 1 t1 0
1178 |   where
1179 |     measure' = measure timeMode
1180 | 
1181 |     go :: Word64 -> Measurement -> Word64 -> IO Estimate
1182 |     go n t1 sumOfTs = do
1183 |       t2 <- measure' (2 * n) b
1184 | 
1185 |       let Estimate (Measurement meanN allocN copiedN maxMemN) stdevN = predictPerturbed t1 t2
1186 |           isTimeoutSoon = case timeout of
1187 |             NoTimeout -> False
1188 |             -- multiplying by 12/10 helps to avoid accidental timeouts
1189 |             Timeout micros _ -> (sumOfTs' + 3 * measTime t2) `quot` (1000000 * 10 `quot` 12) >= fromInteger micros
1190 |           isStDevInTargetRange = stdevN < truncate (max 0 targetRelStDev * word64ToDouble meanN)
1191 |           scale = (`quot` n)
1192 |           sumOfTs' = sumOfTs + measTime t1
1193 | 
1194 |       let scaledEstimate = Estimate
1195 |             { estMean  = Measurement (scale meanN) (scale allocN) (scale copiedN) maxMemN
1196 |             , estStdev = scale stdevN }
1197 | 
1198 | #ifdef MIN_VERSION_tasty
1199 |       yieldProgress $ Progress
1200 |         { progressText = prettyEstimate scaledEstimate
1201 |         , progressPercent = 0.0
1202 |         }
1203 | #else
1204 |       yieldProgress ()
1205 | #endif
1206 | 
1207 |       if isStDevInTargetRange || isTimeoutSoon
1208 |         then pure scaledEstimate
1209 |         else go (2 * n) t2 sumOfTs'
1210 | 
1211 | -- | An internal routine to measure CPU execution time in seconds
1212 | -- for a given timeout (put 'NoTimeout', or 'mkTimeout' 100000000 for 100 seconds)
1213 | -- and a target relative standard deviation
1214 | -- (put v'RelStDev' 0.05 for 5% or v'RelStDev' (1/0) to run only one iteration).
1215 | --
1216 | -- t'Timeout' takes soft priority over t'RelStDev': this function prefers
1217 | -- to finish in time even if at cost of precision. However, timeout is guidance
1218 | -- not guarantee: 'measureCpuTime' can take longer, if there is not enough time
1219 | -- to run at least thrice or an iteration takes unusually long.
1220 | --
1221 | -- @since 0.3
1222 | measureCpuTime :: Timeout -> RelStDev -> Benchmarkable -> IO Double
1223 | measureCpuTime = ((fmap fst .) .) . measureCpuTimeAndStDev
1224 | 
1225 | -- | Same as 'measureCpuTime', but returns both CPU execution time
1226 | -- and its standard deviation.
1227 | --
1228 | -- @since 0.3.4
1229 | measureCpuTimeAndStDev :: Timeout -> RelStDev -> Benchmarkable -> IO (Double, Double)
1230 | measureCpuTimeAndStDev
1231 |     = ((fmap (\x ->
1232 |         ( word64ToDouble (measTime (estMean x)) / 1e12
1233 |         , word64ToDouble (estStdev x) / 1e12
1234 |         )) .) .)
1235 |     . measureUntil (const $ pure ()) CpuTime
1236 | 
1237 | #ifdef MIN_VERSION_tasty
1238 | 
1239 | instance IsTest Benchmarkable where
1240 |   testOptions = pure
1241 |     [ Option (Proxy :: Proxy RelStDev)
1242 |     -- FailIfSlower and FailIfFaster must be options of a test provider rather
1243 |     -- than options of an ingredient to allow setting them on per-test level.
1244 |     , Option (Proxy :: Proxy FailIfSlower)
1245 |     , Option (Proxy :: Proxy FailIfFaster)
1246 |     , Option (Proxy :: Proxy TimeMode)
1247 |     ]
1248 |   run opts b yieldProgress = case getNumThreads (lookupOption opts) of
1249 |     1 -> do
1250 |       let timeMode = lookupOption opts
1251 |       est <- measureUntil yieldProgress timeMode (lookupOption opts) (lookupOption opts) b
1252 |       let FailIfSlower ifSlower = lookupOption opts
1253 |           FailIfFaster ifFaster = lookupOption opts
1254 |       pure $ testPassed $ show (WithLoHi est (1 - ifFaster) (1 + ifSlower))
1255 |     _ -> pure $ testFailed "Benchmarks must not be run concurrently. Please pass -j1 and/or avoid +RTS -N."
1256 | 
1257 | -- | Attach a name to t'Benchmarkable'.
1258 | --
1259 | -- This is actually a synonym of 'Test.Tasty.Providers.singleTest' to
1260 | -- provide an interface compatible with @Criterion.bench@
1261 | -- and @Gauge.bench@.
1262 | --
1263 | -- @since 0.1
1264 | bench :: String -> Benchmarkable -> Benchmark
1265 | bench = singleTest
1266 | 
1267 | -- | Attach a name to a group of 'Benchmark'.
1268 | --
1269 | -- This is actually a synonym of 'Test.Tasty.testGroup' to provide an
1270 | -- interface compatible with @Criterion.bgroup@ and
1271 | -- @Gauge.bgroup@.
1272 | --
1273 | -- @since 0.1
1274 | bgroup :: String -> [Benchmark] -> Benchmark
1275 | bgroup = testGroup
1276 | 
1277 | -- | Compare benchmarks, reporting relative speed up or slow down.
1278 | --
1279 | -- This function is a vague reminiscence of @bcompare@, which existed in pre-1.0
1280 | -- versions of @criterion@, but their types are incompatible. Under the hood
1281 | -- 'bcompare' is a thin wrapper over 'after'.
1282 | --
1283 | -- Here is a basic example:
1284 | --
1285 | -- > import Test.Tasty.Bench
1286 | -- >
1287 | -- > fibo :: Int -> Integer
1288 | -- > fibo n = if n < 2 then toInteger n else fibo (n - 1) + fibo (n - 2)
1289 | -- >
1290 | -- > main :: IO ()
1291 | -- > main = defaultMain
1292 | -- >   [ bgroup "Fibonacci numbers"
1293 | -- >     [ bcompare "tenth"  $ bench "fifth"     $ nf fibo  5
1294 | -- >     ,                     bench "tenth"     $ nf fibo 10
1295 | -- >     , bcompare "tenth"  $ bench "twentieth" $ nf fibo 20
1296 | -- >     ]
1297 | -- >   ]
1298 | --
1299 | -- More complex examples:
1300 | --
1301 | -- * https://hackage.haskell.org/package/chimera-0.4.1.0/src/bench/Read.hs
1302 | -- * https://hackage.haskell.org/package/fast-digits-0.3.2.0/src/bench/Bench.hs
1303 | -- * https://hackage.haskell.org/package/unicode-data-0.8.0/src/bench/Unicode/Char/Bench.hs
1304 | --
1305 | -- @since 0.2.4
1306 | bcompare
1307 |   :: String
1308 |   -- ^ @tasty@ pattern, which must unambiguously
1309 |   -- match a unique baseline benchmark. Consider using 'locateBenchmark' to construct it.
1310 |   -> Benchmark
1311 |   -- ^ Benchmark (or a group of benchmarks)
1312 |   -- to be compared against the baseline benchmark by dividing measured mean times.
1313 |   -- The result is reported by 'consoleBenchReporter', e. g., 0.50x or 1.25x.
1314 |   -> Benchmark
1315 | bcompare = bcompareWithin (-1/0) (1/0)
1316 | 
1317 | -- | Same as 'bcompare', but takes expected lower and upper bounds of
1318 | -- comparison. If the result is not within provided bounds, benchmark fails.
1319 | -- This allows to create portable performance tests: instead of comparing
1320 | -- to an absolute timeout or to previous runs, you can state that one implementation
1321 | -- of an algorithm must be faster than another.
1322 | --
1323 | -- E. g., 'bcompareWithin' 2.0 3.0 passes only if a benchmark is at least 2x
1324 | -- and at most 3x slower than a baseline.
1325 | --
1326 | -- Examples:
1327 | --
1328 | -- * https://hackage.haskell.org/package/text-2.1.3/src/benchmarks/haskell/Benchmarks/Micro.hs
1329 | -- * https://hackage.haskell.org/package/bluefin-algae-0.1.0.2/src/bench/quadratic-counter.hs
1330 | --
1331 | -- @since 0.3.1
1332 | bcompareWithin
1333 |   :: Double    -- ^ Lower bound of relative speed up.
1334 |   -> Double    -- ^ Upper bound of relative speed up.
1335 |   -> String    -- ^ @tasty@ pattern to locate a baseline benchmark.
1336 |   -> Benchmark -- ^ Benchmark to compare against baseline.
1337 |   -> Benchmark
1338 | bcompareWithin lo hi s = case parseExpr s of
1339 |   Nothing -> error $ "Could not parse bcompare pattern " ++ s
1340 |   Just e  -> after_ AllSucceed (And (StringLit (bcomparePrefix ++ show (lo, hi))) e)
1341 | 
1342 | bcomparePrefix :: String
1343 | bcomparePrefix = "tasty-bench"
1344 | 
1345 | -- | Benchmarks are actually just a regular 'Test.Tasty.TestTree' in disguise.
1346 | --
1347 | -- This is a drop-in replacement for @Criterion.Benchmark@
1348 | -- and @Gauge.Benchmark@.
1349 | --
1350 | -- @since 0.1
1351 | type Benchmark = TestTree
1352 | 
1353 | -- | Run benchmarks and report results, providing an interface
1354 | -- compatible with @Criterion.defaultMain@ and
1355 | -- @Gauge.defaultMain@.
1356 | --
1357 | -- An unfortunate consequence of being a drop-in replacement for @criterion@
1358 | -- and @gauge@ is that this function diverges from @tasty@,
1359 | -- where 'Test.Tasty.defaultMain' takes a single input, not a list of inputs.
1360 | --
1361 | -- @since 0.1
1362 | defaultMain :: [Benchmark] -> IO ()
1363 | defaultMain bs = do
1364 |   let act = defaultMain' bs
1365 |   bracketUtf8 act
1366 | 
1367 | bracketUtf8 :: IO a -> IO a
1368 | bracketUtf8 act = do
1369 |   prevStdoutEnc <- hGetEncoding stdout
1370 | #if defined(mingw32_HOST_OS)
1371 |   codePage <- getConsoleOutputCP
1372 |   bracket_
1373 |     (hSetEncoding stdout utf8
1374 |       >> setConsoleOutputCP 65001)
1375 |     (maybe (hSetBinaryMode stdout True) (hSetEncoding stdout) prevStdoutEnc
1376 |       >> setConsoleOutputCP codePage)
1377 |     act
1378 | #else
1379 |   bracket_
1380 |     (hSetEncoding stdout utf8)
1381 |     (maybe (hSetBinaryMode stdout True) (hSetEncoding stdout) prevStdoutEnc)
1382 |     act
1383 | #endif
1384 | 
1385 | defaultMain' :: [Benchmark] -> IO ()
1386 | defaultMain' bs  = do
1387 |   installSignalHandlers
1388 |   let b = testGroup "All" bs
1389 |   opts <- parseOptions benchIngredients b
1390 |   let opts' = setOption (NumThreads 1) opts
1391 | #if MIN_VERSION_tasty(1,5,0)
1392 |       opts'' = setOption (MinDurationToReport 1000000000000) opts'
1393 | #else
1394 |       opts'' = opts'
1395 | #endif
1396 |   case tryIngredients benchIngredients opts'' b of
1397 |     Nothing -> exitFailure
1398 |     Just act -> act >>= \x -> if x then exitSuccess else exitFailure
1399 | 
1400 | -- | List of default benchmark ingredients. This is what 'defaultMain' runs.
1401 | --
1402 | -- @since 0.2
1403 | benchIngredients :: [Ingredient]
1404 | benchIngredients = [listingTests, composeReporters consoleBenchReporter (composeReporters csvReporter svgReporter)]
1405 | 
1406 | #endif
1407 | 
1408 | funcToBench :: forall a b c. (b -> c) -> (a -> b) -> a -> Benchmarkable
1409 | funcToBench frc = (Benchmarkable .) . funcToBenchLoop SPEC
1410 |   where
1411 |     -- Here we rely on the fact that GHC (unless spurred by
1412 |     -- -fstatic-argument-transformation) is not smart enough:
1413 |     -- it does not notice that `f` and `x` arguments are loop invariant
1414 |     -- and could be floated, and the whole `f x` expression shared.
1415 |     -- If we create a closure with `f` and `x` bound in the environment,
1416 |     -- then GHC is smart enough to share computation of `f x`.
1417 |     --
1418 |     -- For perspective, gauge and criterion < 1.4 mark similar functions as INLINE,
1419 |     -- while criterion >= 1.4 switches to NOINLINE.
1420 |     -- If we mark `funcToBenchLoop` NOINLINE then benchmark results are slightly larger
1421 |     -- (noticeable in bench-fibo), because the loop body is slightly bigger,
1422 |     -- since GHC does not unbox numbers or inline `Eq @Word64` dictionary.
1423 |     --
1424 |     -- This function is called `funcToBenchLoop` instead of, say, `go`,
1425 |     -- so it is easier to spot in Core dumps.
1426 |     --
1427 |     -- Forcing SpecConst optimization with SPEC makes the behaviour of benchmarks
1428 |     -- independent of -fspec-constr-count.
1429 |     funcToBenchLoop :: SPEC -> (a -> b) -> a -> Word64 -> IO ()
1430 |     funcToBenchLoop !_ f x n
1431 |       | n == 0    = pure ()
1432 |       | otherwise = do
1433 |         _ <- evaluate (frc (f x))
1434 |         funcToBenchLoop SPEC f x (n - 1)
1435 | {-# INLINE funcToBench #-}
1436 | 
1437 | -- | 'nf' @f@ @x@ measures time to compute
1438 | -- a normal form (by means of 'Control.DeepSeq.rnf', not 'Control.DeepSeq.force')
1439 | -- of an application of @f@ to @x@.
1440 | -- This does not include time to evaluate @f@ or @x@ themselves.
1441 | -- Ideally @x@ should be a primitive data type like 'Data.Int.Int'.
1442 | --
1443 | -- The same thunk of @x@ is shared by multiple calls of @f@. We cannot evaluate
1444 | -- @x@ beforehand: there is no 'NFData' @a@ constraint, and potentially @x@ may
1445 | -- be an infinite structure. Thus @x@ will be evaluated in course of the first
1446 | -- application of @f@. This noisy measurement is to be discarded soon,
1447 | -- but if @x@ is not a primitive data type, consider forcing its evaluation
1448 | -- separately, e. g., via 'env' or 'withResource'.
1449 | --
1450 | -- Here is a textbook anti-pattern: 'nf' 'sum' @[1..1000000]@.
1451 | -- Since an input list is shared by multiple invocations of 'sum',
1452 | -- it will be allocated in memory in full, putting immense pressure
1453 | -- on garbage collector. Also no list fusion will happen.
1454 | -- A better approach is 'nf' (@\\n@ @->@ 'sum' @[1..n]@) @1000000@.
1455 | --
1456 | -- It is preferable that the return type of the function under measurement
1457 | -- is inhabited enough to depend genuinely on all computations and is not simply @b ~ ()@.
1458 | -- Otherwise GHC might get aggressive and optimise the payload away.
1459 | --
1460 | -- If you are measuring an inlinable function,
1461 | -- it is prudent to ensure that its invocation is fully saturated,
1462 | -- otherwise inlining will not happen. That's why one can often
1463 | -- see 'nf' (@\\n@ @->@ @f@ @n@) @x@ instead of 'nf' @f@ @x@.
1464 | -- Same applies to rewrite rules.
1465 | --
1466 | -- If you suspect that GHC overoptimizes / overspecializes the function call @f@,
1467 | -- try defeating it with 'GHC.Exts.noinline' @f@.
1468 | --
1469 | -- While @tasty-bench@ is capable to perform micro- and even nanobenchmarks,
1470 | -- such measurements are noisy and involve an overhead. Results are more reliable
1471 | -- when @f@ @x@ takes at least several milliseconds.
1472 | --
1473 | -- Remember that forcing a normal form requires an additional
1474 | -- traverse of the structure. In certain scenarios (imagine benchmarking 'tail'),
1475 | -- especially when 'NFData' instance is badly written,
1476 | -- this traversal may take non-negligible time and affect results.
1477 | --
1478 | -- 'nf' @f@ is equivalent to 'whnf' ('Control.DeepSeq.rnf' '.' @f@), but not to
1479 | -- 'whnf' ('Control.DeepSeq.force' '.' @f@). The latter retains the result
1480 | -- in memory until it is fully evaluated, while the former allows
1481 | -- evaluated parts of the result to be garbage-collected immediately.
1482 | --
1483 | -- For users of @{-# LANGUAGE LinearTypes #-}@: if @f@ is a linear function,
1484 | -- then 'nf' @f@ @x@ is ill-typed, but you can use 'nf' @(\\y -> f y)@ @x@
1485 | -- instead.
1486 | --
1487 | -- Drop-in replacement for @Criterion.nf@ and
1488 | -- @Gauge.nf@.
1489 | --
1490 | -- @since 0.1
1491 | nf :: NFData b => (a -> b) -> a -> Benchmarkable
1492 | nf = funcToBench rnf
1493 | {-# INLINE nf #-}
1494 | 
1495 | -- | 'whnf' @f@ @x@ measures time to compute
1496 | -- a weak head normal form of an application of @f@ to @x@.
1497 | -- This does not include time to evaluate @f@ or @x@ themselves.
1498 | -- Ideally @x@ should be a primitive data type like 'Data.Int.Int'.
1499 | --
1500 | -- The same thunk of @x@ is shared by multiple calls of @f@. We cannot evaluate
1501 | -- @x@ beforehand: there is no 'NFData' @a@ constraint, and potentially @x@ may
1502 | -- be an infinite structure. Thus @x@ will be evaluated in course of the first
1503 | -- application of @f@. This noisy measurement is to be discarded soon,
1504 | -- but if @x@ is not a primitive data type, consider forcing its evaluation
1505 | -- separately, e. g., via 'env' or 'withResource'.
1506 | --
1507 | -- Computing only a weak head normal form is
1508 | -- rarely what intuitively is meant by "evaluation".
1509 | -- Beware that many educational materials contain examples with 'whnf':
1510 | -- this is a wrong default.
1511 | -- Unless you understand precisely, what is measured,
1512 | -- it is recommended to use 'nf' instead.
1513 | --
1514 | -- Here is a textbook anti-pattern: 'whnf' ('Data.List.replicate' @1000000@) @1@.
1515 | -- This will succeed in a matter of nanoseconds, because weak head
1516 | -- normal form forces only the first element of the list.
1517 | --
1518 | -- Drop-in replacement for @Criterion.whnf@ and @Gauge.whnf@.
1519 | --
1520 | -- @since 0.1
1521 | whnf :: (a -> b) -> a -> Benchmarkable
1522 | whnf = funcToBench id
1523 | {-# INLINE whnf #-}
1524 | 
1525 | ioToBench :: (b -> c) -> IO b -> Benchmarkable
1526 | ioToBench frc act = Benchmarkable (ioToBenchLoop SPEC)
1527 |   where
1528 |     ioToBenchLoop :: SPEC -> Word64 -> IO ()
1529 |     ioToBenchLoop !_ n
1530 |       | n == 0    = pure ()
1531 |       | otherwise = do
1532 |         val <- act
1533 |         _ <- evaluate (frc val)
1534 |         ioToBenchLoop SPEC (n - 1)
1535 | {-# INLINE ioToBench #-}
1536 | 
1537 | -- | 'nfIO' @x@ measures time to evaluate side-effects of @x@
1538 | -- and compute its normal form
1539 | -- (by means of 'Control.DeepSeq.rnf', not 'Control.DeepSeq.force').
1540 | --
1541 | -- Pure subexpression of an effectful computation @x@
1542 | -- may be evaluated only once and get cached. For example,
1543 | -- GHC is likely to float @x@ out of 'nfIO' ('pure' @x@) and
1544 | -- evaluate in only once, which leaves 'nfIO' to measure 'pure' only
1545 | -- with results in nanosecond range.
1546 | --
1547 | -- To avoid surprising results it is usually preferable
1548 | -- to use 'nfAppIO' instead. You can also try turning off
1549 | -- let floating by
1550 | -- [@-fno-full-laziness@](https://downloads.haskell.org/ghc/latest/docs/users_guide/using-optimisation.html#ghc-flag-ffull-laziness),
1551 | -- but this is likely to cause more problems than solve.
1552 | --
1553 | -- Remember that forcing a normal form requires an additional
1554 | -- traverse of the structure. In certain scenarios,
1555 | -- especially when 'NFData' instance is badly written,
1556 | -- this traversal may take non-negligible time and affect results.
1557 | --
1558 | -- A typical use case is 'nfIO' ('readFile' @"foo.txt"@).
1559 | -- However, if your goal is not to benchmark I\/O per se,
1560 | -- but just read input data from a file, it is cleaner to
1561 | -- use 'env' or 'withResource'.
1562 | --
1563 | -- One handy consequence of having access to `IO` is that you can
1564 | -- generate new random inputs for each run using @System.Random.randomIO@
1565 | -- or @System.Random.Stateful.uniformM@ @System.Random.Stateful.globalStdGen@.
1566 | --
1567 | -- Drop-in replacement for @Criterion.nfIO@ and @Gauge.nfIO@.
1568 | --
1569 | -- @since 0.1
1570 | nfIO :: NFData a => IO a -> Benchmarkable
1571 | nfIO = ioToBench rnf
1572 | {-# INLINE nfIO #-}
1573 | 
1574 | -- | 'whnfIO' @x@ measures time to evaluate side-effects of @x@
1575 | -- and compute its weak head normal form.
1576 | --
1577 | -- Pure subexpression of an effectful computation @x@
1578 | -- may be evaluated only once and get cached. For example,
1579 | -- GHC is likely to float @x@ out of 'whnfIO' ('pure' @x@) and
1580 | -- evaluate in only once, which leaves 'whnfIO' to measure 'pure' only
1581 | -- with results in nanosecond range.
1582 | --
1583 | -- To avoid surprising results it is usually preferable
1584 | -- to use 'whnfAppIO' instead. You can also try turning off
1585 | -- let floating by
1586 | -- [@-fno-full-laziness@](https://downloads.haskell.org/ghc/latest/docs/users_guide/using-optimisation.html#ghc-flag-ffull-laziness),
1587 | -- but this is likely to cause more problems than solve.
1588 | --
1589 | -- Computing only a weak head normal form is
1590 | -- rarely what intuitively is meant by "evaluation".
1591 | -- Unless you understand precisely, what is measured,
1592 | -- it is recommended to use 'nfIO' instead.
1593 | --
1594 | -- Lazy I\/O is treacherous.
1595 | -- If your goal is not to benchmark I\/O per se,
1596 | -- but just read input data from a file, it is cleaner to
1597 | -- use 'env' or 'withResource'.
1598 | --
1599 | -- One handy consequence of having access to `IO` is that you can
1600 | -- generate new random inputs for each run using @System.Random.randomIO@
1601 | -- or @System.Random.Stateful.uniformM@ @System.Random.Stateful.globalStdGen@.
1602 | --
1603 | -- Drop-in replacement for @Criterion.whnfIO@ and @Gauge.whnfIO@.
1604 | --
1605 | -- @since 0.1
1606 | whnfIO :: IO a -> Benchmarkable
1607 | whnfIO = ioToBench id
1608 | {-# INLINE whnfIO #-}
1609 | 
1610 | ioFuncToBench :: forall a b c. (b -> c) -> (a -> IO b) -> a -> Benchmarkable
1611 | ioFuncToBench frc = (Benchmarkable .) . ioFuncToBenchLoop SPEC
1612 |   where
1613 |     ioFuncToBenchLoop :: SPEC -> (a -> IO b) -> a -> Word64 -> IO ()
1614 |     ioFuncToBenchLoop !_ f x n
1615 |       | n == 0    = pure ()
1616 |       | otherwise = do
1617 |         val <- f x
1618 |         _ <- evaluate (frc val)
1619 |         ioFuncToBenchLoop SPEC f x (n - 1)
1620 | {-# INLINE ioFuncToBench #-}
1621 | 
1622 | -- | 'nfAppIO' @f@ @x@ measures time to evaluate side-effects of
1623 | -- an application of @f@ to @x@
1624 | -- and compute its normal form
1625 | -- (by means of 'Control.DeepSeq.rnf', not 'Control.DeepSeq.force').
1626 | -- This does not include time to evaluate @f@ or @x@ themselves.
1627 | -- Ideally @x@ should be a primitive data type like 'Data.Int.Int'.
1628 | --
1629 | -- The same thunk of @x@ is shared by multiple calls of @f@. We cannot evaluate
1630 | -- @x@ beforehand: there is no 'NFData' @a@ constraint, and potentially @x@ may
1631 | -- be an infinite structure. Thus @x@ will be evaluated in course of the first
1632 | -- application of @f@. This noisy measurement is to be discarded soon,
1633 | -- but if @x@ is not a primitive data type, consider forcing its evaluation
1634 | -- separately, e. g., via 'env' or 'withResource'.
1635 | --
1636 | -- Remember that forcing a normal form requires an additional
1637 | -- traverse of the structure. In certain scenarios,
1638 | -- especially when 'NFData' instance is badly written,
1639 | -- this traversal may take non-negligible time and affect results.
1640 | --
1641 | -- A typical use case is 'nfAppIO' 'readFile' @"foo.txt"@.
1642 | -- However, if your goal is not to benchmark I\/O per se,
1643 | -- but just read input data from a file, it is cleaner to
1644 | -- use 'env' or 'withResource'.
1645 | --
1646 | -- One handy consequence of having access to `IO` is that you can
1647 | -- generate new random inputs for each run using @System.Random.randomIO@
1648 | -- or @System.Random.Stateful.uniformM@ @System.Random.Stateful.globalStdGen@.
1649 | --
1650 | -- Drop-in replacement for @Criterion.nfAppIO@ and @Gauge.nfAppIO@.
1651 | --
1652 | -- @since 0.1
1653 | nfAppIO :: NFData b => (a -> IO b) -> a -> Benchmarkable
1654 | nfAppIO = ioFuncToBench rnf
1655 | {-# INLINE nfAppIO #-}
1656 | 
1657 | -- | 'whnfAppIO' @f@ @x@ measures time to evaluate side-effects of
1658 | -- an application of @f@ to @x@
1659 | -- and compute its weak head normal form.
1660 | -- This does not include time to evaluate @f@ or @x@ themselves.
1661 | -- Ideally @x@ should be a primitive data type like 'Data.Int.Int'.
1662 | --
1663 | -- The same thunk of @x@ is shared by multiple calls of @f@. We cannot evaluate
1664 | -- @x@ beforehand: there is no 'NFData' @a@ constraint, and potentially @x@ may
1665 | -- be an infinite structure. Thus @x@ will be evaluated in course of the first
1666 | -- application of @f@. This noisy measurement is to be discarded soon,
1667 | -- but if @x@ is not a primitive data type, consider forcing its evaluation
1668 | -- separately, e. g., via 'env' or 'withResource'.
1669 | --
1670 | -- Computing only a weak head normal form is
1671 | -- rarely what intuitively is meant by "evaluation".
1672 | -- Unless you understand precisely, what is measured,
1673 | -- it is recommended to use 'nfAppIO' instead.
1674 | --
1675 | -- Lazy I\/O is treacherous.
1676 | -- If your goal is not to benchmark I\/O per se,
1677 | -- but just read input data from a file, it is cleaner to
1678 | -- use 'env' or 'withResource'.
1679 | --
1680 | -- One handy consequence of having access to `IO` is that you can
1681 | -- generate new random inputs for each run using @System.Random.randomIO@
1682 | -- or @System.Random.Stateful.uniformM@ @System.Random.Stateful.globalStdGen@.
1683 | --
1684 | -- Drop-in replacement for @Criterion.whnfAppIO@ and @Gauge.whnfAppIO@.
1685 | --
1686 | -- @since 0.1
1687 | whnfAppIO :: (a -> IO b) -> a -> Benchmarkable
1688 | whnfAppIO = ioFuncToBench id
1689 | {-# INLINE whnfAppIO #-}
1690 | 
1691 | #ifdef MIN_VERSION_tasty
1692 | 
1693 | -- | Run benchmarks in the given environment, usually reading large input data from file.
1694 | --
1695 | -- One might wonder why 'env' is needed,
1696 | -- when we can simply read all input data
1697 | -- before calling 'defaultMain'. The reason is that large data
1698 | -- dangling in the heap causes longer garbage collection
1699 | -- and slows down all benchmarks, even those which do not use it at all.
1700 | --
1701 | -- It is instrumental not only for proper 'IO' actions,
1702 | -- but also for a large statically-known data as well. Instead of a top-level
1703 | -- definition, which once evaluated will slow down garbage collection
1704 | -- during all subsequent benchmarks,
1705 | --
1706 | -- > largeData :: String
1707 | -- > largeData = replicate 1000000 'a'
1708 | -- >
1709 | -- > main :: IO ()
1710 | -- > main = defaultMain
1711 | -- >   [ bench "large" $ nf length largeData, ... ]
1712 | --
1713 | -- use
1714 | --
1715 | -- > main :: IO ()
1716 | -- > main = defaultMain
1717 | -- >   [ env (pure (replicate 1000000 'a')) $ \largeData ->
1718 | -- >     bench "large" $ nf length largeData, ... ]
1719 | --
1720 | -- Even with 'env', it's advisable to store input data in as few heap objects
1721 | -- as possible. 'Data.Array.ByteArray.ByteArray' (ideally pinned)
1722 | -- or unboxed @Vector@ are good, boxed arrays are worse, lists and trees are bad.
1723 | --
1724 | -- @Test.Tasty.Bench.@'env' is provided only for the sake of
1725 | -- compatibility with @Criterion.env@ and
1726 | -- @Gauge.env@, and involves 'unsafePerformIO'. Consider using
1727 | -- 'withResource' instead.
1728 | --
1729 | -- When working with a mutable environment, bear in mind that it is threaded
1730 | -- through all iterations of a benchmark. @tasty-bench@ does not roll it back
1731 | -- or reset, it's user's resposibility. You might have better luck
1732 | -- with @Criterion.perBatchEnv@ or @Criterion.perRunEnv@.
1733 | --
1734 | -- 'defaultMain' requires that the hierarchy of benchmarks and their names is
1735 | -- independent of underlying 'IO' actions. While executing 'IO' inside 'bench'
1736 | -- via 'nfIO' is fine, and reading test data from files via 'env' is also fine,
1737 | -- using 'env' to choose benchmarks or their names depending on 'IO' side effects
1738 | -- will throw a rather cryptic error message:
1739 | --
1740 | -- > Unhandled resource. Probably a bug in the runner you're using.
1741 | --
1742 | -- Strict pattern-matching on resource is also prohibited. For
1743 | -- instance, if it is a tuple, the second argument of 'env' should use
1744 | -- a lazy pattern match @\\~(a, b) -> ...@
1745 | --
1746 | -- @since 0.2
1747 | env :: NFData env => IO env -> (env -> Benchmark) -> Benchmark
1748 | env res = envWithCleanup res (const $ pure ())
1749 | 
1750 | -- | Similar to 'env', but includes an additional argument
1751 | -- to clean up created environment.
1752 | --
1753 | -- Provided only for the sake of compatibility with
1754 | -- @Criterion.envWithCleanup@ and
1755 | -- @Gauge.envWithCleanup@, and involves
1756 | -- 'unsafePerformIO'. Consider using 'withResource' instead.
1757 | --
1758 | -- @since 0.2
1759 | envWithCleanup :: NFData env => IO env -> (env -> IO a) -> (env -> Benchmark) -> Benchmark
1760 | envWithCleanup res fin f = withResource
1761 |   (res >>= evaluate . force)
1762 |   (void . fin)
1763 |   (f . unsafePerformIO)
1764 | 
1765 | -- | A path to write results in CSV format, populated by @--csv@.
1766 | --
1767 | -- This is an option of 'csvReporter' and can be set only globally.
1768 | -- Modifying it via 'adjustOption' or 'localOption' does not have any effect.
1769 | -- One can however pass it to 'tryIngredients' 'benchIngredients'. For example,
1770 | -- here is how to set a default CSV location:
1771 | --
1772 | -- @
1773 | -- import Data.Maybe
1774 | -- import System.Exit
1775 | -- import Test.Tasty.Bench
1776 | -- import Test.Tasty.Ingredients
1777 | -- import Test.Tasty.Options
1778 | -- import Test.Tasty.Runners
1779 | --
1780 | -- main :: IO ()
1781 | -- main = do
1782 | --   let benchmarks = bgroup \"All\" ...
1783 | --   opts <- parseOptions benchIngredients benchmarks
1784 | --   let opts' = changeOption (Just . fromMaybe (CsvPath "foo.csv")) opts
1785 | --   case tryIngredients benchIngredients opts' benchmarks of
1786 | --     Nothing -> exitFailure
1787 | --     Just mb -> mb >>= \\b -> if b then exitSuccess else exitFailure
1788 | -- @
1789 | --
1790 | -- @since 0.3
1791 | newtype CsvPath = CsvPath FilePath
1792 |   deriving
1793 |   ( Eq
1794 |   -- ^ @since 0.4
1795 |   , Ord
1796 |   -- ^ @since 0.4
1797 |   )
1798 | 
1799 | instance IsOption (Maybe CsvPath) where
1800 |   defaultValue = Nothing
1801 |   parseValue = Just . Just . CsvPath
1802 |   optionName = pure "csv"
1803 |   optionHelp = pure "File to write results in CSV format"
1804 | 
1805 | -- | Run benchmarks and save results in CSV format.
1806 | -- It activates when @--csv@ @FILE@ command line option is specified.
1807 | --
1808 | -- @since 0.1
1809 | csvReporter :: Ingredient
1810 | csvReporter = TestReporter [Option (Proxy :: Proxy (Maybe CsvPath))] $
1811 |   \opts tree -> do
1812 |     CsvPath path <- lookupOption opts
1813 |     let names = testsNames opts tree
1814 |         namesMap = IM.fromDistinctAscList $ zip [0..] names
1815 |     pure $ \smap -> do
1816 |       case findNonUniqueElement names of
1817 |         Nothing -> pure ()
1818 |         Just name -> do -- 'die' is not available before base-4.8
1819 |           hPutStrLn stderr $ "CSV report cannot proceed, because name '" ++ name ++ "' corresponds to two or more benchmarks. Please disambiguate them."
1820 |           exitFailure
1821 |       let augmented = IM.intersectionWith (,) namesMap smap
1822 |       bracket
1823 |         (do
1824 |           h <- openFile path WriteMode
1825 |           hSetBuffering h LineBuffering
1826 |           hPutStrLn h $ "Name,Mean (ps),2*Stdev (ps)" ++
1827 |             (if hasGCStats then ",Allocated,Copied,Peak Memory" else "")
1828 |           pure h
1829 |         )
1830 |         hClose
1831 |         (`csvOutput` augmented)
1832 |       pure $ const $ isSuccessful smap
1833 | 
1834 | findNonUniqueElement :: Ord a => [a] -> Maybe a
1835 | findNonUniqueElement = go S.empty
1836 |   where
1837 |     go _ [] = Nothing
1838 |     go acc (x : xs)
1839 |       | x `S.member` acc = Just x
1840 |       | otherwise = go (S.insert x acc) xs
1841 | 
1842 | csvOutput :: Handle -> IntMap (TestName, TVar Status) -> IO ()
1843 | csvOutput h = traverse_ $ \(name, tv) -> do
1844 |   let csv = if hasGCStats then csvEstimateWithGC else csvEstimate
1845 |   r <- atomically $ readTVar tv >>= \s -> case s of Done r -> pure r; _ -> retry
1846 |   case safeRead (resultDescription r) of
1847 |     Nothing -> pure ()
1848 |     Just (WithLoHi est _ _) -> do
1849 |       msg <- formatMessage $ csv est
1850 |       hPutStrLn h (encodeCsv name ++ ',' : msg)
1851 | 
1852 | encodeCsv :: String -> String
1853 | encodeCsv xs
1854 |   | any (`elem` xs) ",\"\n\r"
1855 |   = '"' : go xs -- opening quote
1856 |   | otherwise = xs
1857 |   where
1858 |     go [] = '"' : [] -- closing quote
1859 |     go ('"' : ys) = '"' : '"' : go ys
1860 |     go (y : ys) = y : go ys
1861 | 
1862 | -- | A path to plot results in SVG format, populated by @--svg@.
1863 | --
1864 | -- This is an option of 'svgReporter' and can be set only globally.
1865 | -- Modifying it via 'adjustOption' or 'localOption' does not have any effect.
1866 | -- One can however pass it to 'tryIngredients' 'benchIngredients'.
1867 | --
1868 | -- @since 0.3
1869 | newtype SvgPath = SvgPath FilePath
1870 |   deriving
1871 |   ( Eq
1872 |   -- ^ @since 0.4
1873 |   , Ord
1874 |   -- ^ @since 0.4
1875 |   )
1876 | 
1877 | instance IsOption (Maybe SvgPath) where
1878 |   defaultValue = Nothing
1879 |   parseValue = Just . Just . SvgPath
1880 |   optionName = pure "svg"
1881 |   optionHelp = pure "File to plot results in SVG format"
1882 | 
1883 | -- | Run benchmarks and plot results in SVG format.
1884 | -- It activates when @--svg@ @FILE@ command line option is specified.
1885 | --
1886 | -- @since 0.2.4
1887 | svgReporter :: Ingredient
1888 | svgReporter = TestReporter [Option (Proxy :: Proxy (Maybe SvgPath))] $
1889 |   \opts tree -> do
1890 |     SvgPath path <- lookupOption opts
1891 |     let names = testsNames opts tree
1892 |         namesMap = IM.fromDistinctAscList $ zip [0..] names
1893 |     pure $ \smap -> do
1894 |       ref <- newIORef []
1895 |       svgCollect ref (IM.intersectionWith (,) namesMap smap)
1896 |       res <- readIORef ref
1897 |       writeFile path (svgRender (reverse res))
1898 |       pure $ const $ isSuccessful smap
1899 | 
1900 | isSuccessful :: StatusMap -> IO Bool
1901 | isSuccessful = go . IM.elems
1902 |   where
1903 |     go [] = pure True
1904 |     go (tv : tvs) = do
1905 |       b <- atomically $ readTVar tv >>= \s -> case s of Done r -> pure (resultSuccessful r); _ -> retry
1906 |       if b then go tvs else pure False
1907 | 
1908 | svgCollect :: IORef [(TestName, Estimate)] -> IntMap (TestName, TVar Status) -> IO ()
1909 | svgCollect ref = traverse_ $ \(name, tv) -> do
1910 |   r <- atomically $ readTVar tv >>= \s -> case s of Done r -> pure r; _ -> retry
1911 |   case safeRead (resultDescription r) of
1912 |     Nothing -> pure ()
1913 |     Just (WithLoHi est _ _) -> modifyIORef ref ((name, est) :)
1914 | 
1915 | svgRender :: [(TestName, Estimate)] -> String
1916 | svgRender [] = ""
1917 | svgRender pairs = header ++ concat (zipWith
1918 |   (\i (name, est) -> svgRenderItem i l xMax (dropAllPrefix name) est)
1919 |   [0..]
1920 |   pairs) ++ footer
1921 |   where
1922 |     dropAllPrefix
1923 |       | all (("All." `isPrefixOf`) . fst) pairs = drop 4
1924 |       | otherwise = id
1925 | 
1926 |     l = genericLength pairs
1927 |     findMaxX (Estimate m stdev) = measTime m + 2 * stdev
1928 |     xMax = word64ToDouble $ maximum $ minBound : map (findMaxX . snd) pairs
1929 |     header = printf "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"%i\" width=\"%f\" font-size=\"%i\" font-family=\"sans-serif\" stroke-width=\"2\">\n<g transform=\"translate(%f 0)\">\n" (svgItemOffset l - 15) svgCanvasWidth svgFontSize svgCanvasMargin
1930 |     footer = "</g>\n</svg>\n"
1931 | 
1932 | svgCanvasWidth :: Double
1933 | svgCanvasWidth = 960
1934 | 
1935 | svgCanvasMargin :: Double
1936 | svgCanvasMargin = 10
1937 | 
1938 | svgItemOffset :: Word64 -> Word64
1939 | svgItemOffset i = 22 + 55 * i
1940 | 
1941 | svgFontSize :: Word64
1942 | svgFontSize = 16
1943 | 
1944 | svgRenderItem :: Word64 -> Word64 -> Double -> TestName -> Estimate -> String
1945 | svgRenderItem i iMax xMax name est@(Estimate m stdev) =
1946 |   (if genericLength shortTextContent * glyphWidth < boxWidth then longText else shortText) ++ box
1947 |   where
1948 |     y  = svgItemOffset i
1949 |     y' = y  + (svgFontSize * 3) `quot` 8
1950 |     y1 = y' + whiskerMargin
1951 |     y2 = y' + boxHeight `quot` 2
1952 |     y3 = y' + boxHeight - whiskerMargin
1953 |     x1 = boxWidth - whiskerWidth
1954 |     x2 = boxWidth + whiskerWidth
1955 |     deg = (i * 360) `quot` iMax
1956 |     glyphWidth = word64ToDouble svgFontSize / 2
1957 | 
1958 |     scale w       = word64ToDouble w * (svgCanvasWidth - 2 * svgCanvasMargin) / xMax
1959 |     boxWidth      = scale (measTime m)
1960 |     whiskerWidth  = scale (2 * stdev)
1961 |     boxHeight     = 22
1962 |     whiskerMargin = 5
1963 | 
1964 |     box = printf boxTemplate
1965 |       (prettyEstimate est)
1966 |       y' boxHeight boxWidth deg deg
1967 |       deg
1968 |       x1 x2 y2 y2
1969 |       x1 x1 y1 y3
1970 |       x2 x2 y1 y3
1971 |     boxTemplate
1972 |       =  "<g>\n<title>%s</title>\n"
1973 |       ++ "<rect y=\"%i\" rx=\"5\" height=\"%i\" width=\"%f\" fill=\"hsl(%i, 100%%, 80%%)\" stroke=\"hsl(%i, 100%%, 55%%)\" />\n"
1974 |       ++ "<g stroke=\"hsl(%i, 100%%, 40%%)\">"
1975 |       ++ "<line x1=\"%f\" x2=\"%f\" y1=\"%i\" y2=\"%i\" />\n"
1976 |       ++ "<line x1=\"%f\" x2=\"%f\" y1=\"%i\" y2=\"%i\" />\n"
1977 |       ++ "<line x1=\"%f\" x2=\"%f\" y1=\"%i\" y2=\"%i\" />\n"
1978 |       ++ "</g>\n</g>\n"
1979 | 
1980 |     longText = printf longTextTemplate
1981 |       deg
1982 |       y (encodeSvg name)
1983 |       y boxWidth (showPicos4 (measTime m))
1984 |     longTextTemplate
1985 |       =  "<g fill=\"hsl(%i, 100%%, 40%%)\">\n"
1986 |       ++ "<text y=\"%i\">%s</text>\n"
1987 |       ++ "<text y=\"%i\" x=\"%f\" text-anchor=\"end\">%s</text>\n"
1988 |       ++ "</g>\n"
1989 | 
1990 |     shortTextContent  = encodeSvg name ++ " " ++ showPicos4 (measTime m)
1991 |     shortText         = printf shortTextTemplate deg y shortTextContent
1992 |     shortTextTemplate = "<text fill=\"hsl(%i, 100%%, 40%%)\" y=\"%i\">%s</text>\n"
1993 | 
1994 | encodeSvg :: String -> String
1995 | encodeSvg [] = []
1996 | encodeSvg ('<' : xs) = '&' : 'l' : 't' : ';' : encodeSvg xs
1997 | encodeSvg ('&' : xs) = '&' : 'a' : 'm' : 'p' : ';' : encodeSvg xs
1998 | encodeSvg (x : xs) = x : encodeSvg xs
1999 | 
2000 | -- | A path to read baseline results in CSV format, populated by @--baseline@.
2001 | --
2002 | -- This is an option of 'csvReporter' and can be set only globally.
2003 | -- Modifying it via 'adjustOption' or 'localOption' does not have any effect.
2004 | -- One can however pass it to 'tryIngredients' 'benchIngredients'.
2005 | --
2006 | -- @since 0.3
2007 | newtype BaselinePath = BaselinePath FilePath
2008 |   deriving
2009 |   ( Eq
2010 |   -- ^ @since 0.4
2011 |   , Ord
2012 |   -- ^ @since 0.4
2013 |   )
2014 | 
2015 | instance IsOption (Maybe BaselinePath) where
2016 |   defaultValue = Nothing
2017 |   parseValue = Just . Just . BaselinePath
2018 |   optionName = pure "baseline"
2019 |   optionHelp = pure "File with baseline results in CSV format to compare against"
2020 | 
2021 | -- | Run benchmarks and report results
2022 | -- in a manner similar to 'consoleTestReporter'.
2023 | --
2024 | -- If @--baseline@ @FILE@ command line option is specified,
2025 | -- compare results against an earlier run and mark
2026 | -- too slow / too fast benchmarks as failed in accordance to
2027 | -- bounds specified by @--fail-if-slower@ @PERCENT@ and @--fail-if-faster@ @PERCENT@.
2028 | --
2029 | -- @since 0.2
2030 | consoleBenchReporter :: Ingredient
2031 | consoleBenchReporter = modifyConsoleReporter [Option (Proxy :: Proxy (Maybe BaselinePath))] $ \opts -> do
2032 |   baseline <- case lookupOption opts of
2033 |     Nothing -> pure S.empty
2034 |     Just (BaselinePath path) -> S.fromList . joinQuotedFields . lines <$> (readFile path >>= evaluate . force)
2035 |   let pretty = if hasGCStats then prettyEstimateWithGC else prettyEstimate
2036 |   pure $ \name mDepR r -> case safeRead (resultDescription r) of
2037 |     Nothing  -> r
2038 |     Just (WithLoHi est lowerBound upperBound) ->
2039 |       (if isAcceptable then id else forceFail)
2040 |       r { resultDescription = pretty est ++ bcompareMsg ++ formatSlowDown mSlowDown }
2041 |       where
2042 |         isAcceptable = isAcceptableVsBaseline && isAcceptableVsBcompare
2043 |         mSlowDown = compareVsBaseline baseline name est
2044 |         slowDown = fromMaybe 1 mSlowDown
2045 |         isAcceptableVsBaseline = slowDown >= lowerBound && slowDown <= upperBound
2046 |         (isAcceptableVsBcompare, bcompareMsg) = case mDepR of
2047 |           Nothing -> (True, "")
2048 |           Just (WithLoHi depR depLowerBound depUpperBound) -> case safeRead (resultDescription depR) of
2049 |             Nothing -> (True, "")
2050 |             Just (WithLoHi depEst _ _) -> let ratio = estTime est / estTime depEst in
2051 |               ( ratio >= depLowerBound && ratio <= depUpperBound
2052 |               , printf ", %.2fx" ratio
2053 |               )
2054 | 
2055 | -- | A well-formed CSV entry contains an even number of quotes: 0, 2 or more.
2056 | joinQuotedFields :: [String] -> [String]
2057 | joinQuotedFields [] = []
2058 | joinQuotedFields (x : xs)
2059 |   | areQuotesBalanced x = x : joinQuotedFields xs
2060 |   | otherwise = case span areQuotesBalanced xs of
2061 |     (_, [])      -> [] -- malformed CSV
2062 |     (ys, z : zs) -> unlines (x : ys ++ [z]) : joinQuotedFields zs
2063 |   where
2064 |     areQuotesBalanced = even . length . filter (== '"')
2065 | 
2066 | estTime :: Estimate -> Double
2067 | estTime = word64ToDouble . measTime . estMean
2068 | 
2069 | compareVsBaseline :: S.Set String -> TestName -> Estimate -> Maybe Double
2070 | compareVsBaseline baseline name (Estimate m stdev) = case mOld of
2071 |   Nothing -> Nothing
2072 |   Just (oldTime, oldDoubleSigma)
2073 |     -- time and oldTime must be signed integers to use 'abs'
2074 |     | abs (time - oldTime) < max (2 * word64ToInt64 stdev) oldDoubleSigma -> Just 1
2075 |     | otherwise -> Just $ int64ToDouble time / int64ToDouble oldTime
2076 |   where
2077 |     time = word64ToInt64 $ measTime m
2078 | 
2079 |     mOld :: Maybe (Int64, Int64)
2080 |     mOld = do
2081 |       let prefix = encodeCsv name ++ ","
2082 |       (line, furtherLines) <- S.minView $ snd $ S.split prefix baseline
2083 | 
2084 |       case S.minView furtherLines of
2085 |         Nothing -> pure ()
2086 |         Just (nextLine, _) -> case stripPrefix prefix nextLine of
2087 |           Nothing -> pure ()
2088 |           -- If there are several lines matching prefix, skip them all.
2089 |           -- Should not normally happen, 'csvReporter' prohibits repeating test names.
2090 |           Just{}  -> Nothing
2091 | 
2092 |       (timeCell, ',' : rest) <- span (/= ',') <$> stripPrefix prefix line
2093 |       let doubleSigmaCell = takeWhile (/= ',') rest
2094 |       (,) <$> safeRead timeCell <*> safeRead doubleSigmaCell
2095 | 
2096 | formatSlowDown :: Maybe Double -> String
2097 | formatSlowDown Nothing = ""
2098 | formatSlowDown (Just ratio) = case percents `compare` 0 of
2099 |   LT -> printf ", %2i%% less than baseline" (-percents)
2100 |   EQ -> ",       same as baseline"
2101 |   GT -> printf ", %2i%% more than baseline" percents
2102 |   where
2103 |     percents :: Int64
2104 |     percents = truncate ((ratio - 1) * 100)
2105 | 
2106 | forceFail :: Result -> Result
2107 | forceFail r = r { resultOutcome = Failure TestFailed, resultShortDescription = "FAIL" }
2108 | 
2109 | data Unique a = None | Unique !a | NotUnique
2110 |   deriving (Functor)
2111 | 
2112 | instance Semigroup (Unique a) where
2113 |   None <> a = a
2114 |   a <> None = a
2115 |   _ <> _ = NotUnique
2116 | 
2117 | instance Monoid (Unique a) where
2118 |   mempty = None
2119 |   mappend = (<>)
2120 | 
2121 | modifyConsoleReporter
2122 |     :: [OptionDescription]
2123 |     -> (OptionSet -> IO (TestName -> Maybe (WithLoHi Result) -> Result -> Result))
2124 |     -> Ingredient
2125 | modifyConsoleReporter desc' iof = TestReporter (desc ++ desc') $ \opts tree ->
2126 |   let nameSeqs     = IM.fromDistinctAscList $ zip [0..] $ testNameSeqs opts tree
2127 |       namesAndDeps = IM.fromDistinctAscList $ zip [0..] $ map (second isSingle)
2128 |                    $ testNamesAndDeps nameSeqs opts tree
2129 |       modifySMap   = (iof opts >>=) . flip postprocessResult
2130 |                    . IM.intersectionWith (\(a, b) c -> (a, b, c)) namesAndDeps
2131 |   in (modifySMap >=>) <$> cb opts tree
2132 |   where
2133 |     (desc, cb) = case consoleTestReporter of
2134 |       TestReporter d c -> (d, c)
2135 |       _ -> error "modifyConsoleReporter: consoleTestReporter must be TestReporter"
2136 | 
2137 |     isSingle (Unique a) = Just a
2138 |     isSingle _ = Nothing
2139 | 
2140 | -- | Convert a test tree to a list of test names.
2141 | testNameSeqs :: OptionSet -> TestTree -> [Seq TestName]
2142 | testNameSeqs = foldTestTree trivialFold
2143 |   { foldSingle = const $ const . (:[]) . Seq.singleton
2144 | #if MIN_VERSION_tasty(1,5,0)
2145 |   , foldGroup  = const $ (. concat) . map . (<|)
2146 | #else
2147 |   , foldGroup  = const $ map . (<|)
2148 | #endif
2149 |   }
2150 | 
2151 | testNamesAndDeps :: IntMap (Seq TestName) -> OptionSet -> TestTree -> [(TestName, Unique (WithLoHi IM.Key))]
2152 | testNamesAndDeps im = foldTestTree trivialFold
2153 |   { foldSingle = const $ const . (: []) . (, mempty)
2154 | #if MIN_VERSION_tasty(1,5,0)
2155 |   , foldGroup  = const $ (. concat) . map . first . (++) . (++ ".")
2156 | #else
2157 |   , foldGroup  = const $ map . first . (++) . (++ ".")
2158 | #endif
2159 |   , foldAfter  = const foldDeps
2160 |   }
2161 |   where
2162 |     foldDeps :: DependencyType -> Expr -> [(a, Unique (WithLoHi IM.Key))] -> [(a, Unique (WithLoHi IM.Key))]
2163 |     foldDeps AllSucceed (And (StringLit xs) p)
2164 |       | bcomparePrefix `isPrefixOf` xs
2165 |       , Just (lo :: Double, hi :: Double) <- safeRead $ drop (length bcomparePrefix) xs
2166 |       = map $ second $ mappend $ (\x -> WithLoHi x lo hi) <$> findMatchingKeys im p
2167 |     foldDeps _ _ = id
2168 | 
2169 | findMatchingKeys :: IntMap (Seq TestName) -> Expr -> Unique IM.Key
2170 | findMatchingKeys im pattern =
2171 |   foldMap (\(k, v) -> if withFields v pat == Right True then Unique k else mempty) $ IM.assocs im
2172 |   where
2173 |     pat = eval pattern >>= asB
2174 | 
2175 | postprocessResult
2176 |     :: (TestName -> Maybe (WithLoHi Result) -> Result -> Result)
2177 |     -> IntMap (TestName, Maybe (WithLoHi IM.Key), TVar Status)
2178 |     -> IO StatusMap
2179 | postprocessResult f src = do
2180 |   paired <- forM src $ \(name, mDepId, tv) -> (name, mDepId, tv,) <$> newTVarIO NotStarted
2181 |   let doUpdate = atomically $ do
2182 |         (Any anyUpdated, All allDone) <-
2183 |           getApp $ flip foldMap paired $ \(name, mDepId, newTV, oldTV) -> Ap $ do
2184 |             old <- readTVar oldTV
2185 |             case old of
2186 |               Done{} -> pure (Any False, All True)
2187 |               _ -> do
2188 |                 new <- readTVar newTV
2189 |                 case new of
2190 |                   Done res -> do
2191 | 
2192 |                     depRes <- case mDepId of
2193 |                       Nothing -> pure Nothing
2194 |                       Just (WithLoHi depId lo hi) -> case IM.lookup depId src of
2195 |                         Nothing -> pure Nothing
2196 |                         Just (_, _, depTV) -> do
2197 |                           depStatus <- readTVar depTV
2198 |                           case depStatus of
2199 |                             Done dep -> pure $ Just (WithLoHi dep lo hi)
2200 |                             _ -> pure Nothing
2201 | 
2202 |                     writeTVar oldTV (Done (f name depRes res))
2203 |                     pure (Any True, All True)
2204 | #if MIN_VERSION_tasty(1,5,0)
2205 |                   Executing newProgr -> do
2206 |                     let updated = case old of
2207 |                           Executing oldProgr -> oldProgr /= newProgr
2208 |                           _ -> True
2209 |                     when updated $
2210 |                       writeTVar oldTV (Executing newProgr)
2211 |                     pure (Any updated, All False)
2212 | #else
2213 |                   Executing{} -> pure (Any False, All False)
2214 | #endif
2215 |                   NotStarted -> pure (Any False, All False)
2216 |         if anyUpdated || allDone then pure allDone else retry
2217 |       adNauseam = doUpdate >>= (`unless` adNauseam)
2218 |   _ <- forkIO adNauseam
2219 |   pure $ fmap (\(_, _, _, a) -> a) paired
2220 | 
2221 | int64ToDouble :: Int64 -> Double
2222 | int64ToDouble = fromIntegral
2223 | 
2224 | word64ToInt64 :: Word64 -> Int64
2225 | word64ToInt64 = fromIntegral
2226 | 
2227 | #endif
2228 | 
2229 | word64ToDouble :: Word64 -> Double
2230 | word64ToDouble = fromIntegral
2231 | 
2232 | #if !MIN_VERSION_base(4,10,0)
2233 | int64ToWord64 :: Int64 -> Word64
2234 | int64ToWord64 = fromIntegral
2235 | #endif
2236 | 
2237 | 
2238 | #if defined(mingw32_HOST_OS)
2239 | 
2240 | #if defined(i386_HOST_ARCH)
2241 | #define CCONV stdcall
2242 | #else
2243 | #define CCONV ccall
2244 | #endif
2245 | 
2246 | foreign import CCONV unsafe "windows.h GetConsoleOutputCP" getConsoleOutputCP :: IO Word32
2247 | foreign import CCONV unsafe "windows.h SetConsoleOutputCP" setConsoleOutputCP :: Word32 -> IO ()
2248 | 
2249 | #endif
2250 | 
2251 | #ifdef MIN_VERSION_tasty
2252 | 
2253 | -- | Map leaf benchmarks ('bench', not 'bgroup') with a provided function,
2254 | -- which has an access to leaf's reversed path.
2255 | --
2256 | -- This helper is useful for bulk application of 'bcompare'.
2257 | -- See also 'locateBenchmark'.
2258 | --
2259 | -- Real world examples:
2260 | --
2261 | -- * https://hackage.haskell.org/package/chimera-0.3.3.0/src/bench/Bench.hs
2262 | -- * https://hackage.haskell.org/package/text-builder-linear-0.1.1/src/bench/Main.hs
2263 | --
2264 | -- @since 0.3.2
2265 | mapLeafBenchmarks :: ([String] -> Benchmark -> Benchmark) -> Benchmark -> Benchmark
2266 | mapLeafBenchmarks processLeaf = go mempty
2267 |   where
2268 |     go :: [String] -> Benchmark -> Benchmark
2269 |     go path x = case x of
2270 |       SingleTest name t    -> processLeaf (name : path) (SingleTest name t)
2271 |       TestGroup name tts   -> TestGroup name (map (go (name : path))  tts)
2272 |       PlusTestOptions g tt -> PlusTestOptions g (go path tt)
2273 |       WithResource res f   -> WithResource res (go path . f)
2274 |       AskOptions f         -> AskOptions (go path . f)
2275 |       After dep expr tt    -> After dep expr (go path tt)
2276 | 
2277 | -- | Construct an AWK expression to locate an individual element or elements in 'Benchmark'
2278 | -- by the suffix of the path. Names are listed in reverse order:
2279 | -- from 'bench'\'s own name to a name of the outermost 'bgroup'.
2280 | --
2281 | -- This function is meant to be used in conjunction with 'bcompare', e. g.,
2282 | -- 'bcompare' ('Test.Tasty.Patterns.Printer.printAwkExpr' ('locateBenchmark' @path@)).
2283 | -- See also 'mapLeafBenchmarks'.
2284 | --
2285 | -- Real world examples:
2286 | --
2287 | -- * https://hackage.haskell.org/package/chimera-0.3.3.0/src/bench/Bench.hs
2288 | -- * https://hackage.haskell.org/package/text-builder-linear-0.1.1/src/bench/Main.hs
2289 | --
2290 | -- @since 0.3.2
2291 | locateBenchmark :: [String] -> Expr
2292 | locateBenchmark [] = IntLit 1
2293 | locateBenchmark path
2294 |   = foldl1' And
2295 |   $ zipWith (\i name -> Patterns.EQ (Field (Sub NF (IntLit i))) (StringLit name)) [0..] path
2296 | 
2297 | #endif
2298 | 


--------------------------------------------------------------------------------