├── .gitignore ├── .hlint.yaml ├── cabal.haskell-ci ├── bench └── bench-fibo.hs ├── LICENSE ├── .cirrus.yml ├── compare_benches.sh ├── tasty-bench.cabal ├── convert_readme.sh ├── example.svg ├── changelog.md ├── .github └── workflows │ ├── other.yml │ └── haskell-ci.yml ├── README.md └── src └── Test └── Tasty └── Bench.hs /.gitignore: -------------------------------------------------------------------------------- 1 | /dist-newstyle/ 2 | /dist-mcabal/ 3 | -------------------------------------------------------------------------------- /.hlint.yaml: -------------------------------------------------------------------------------- 1 | - ignore: {name: "Use lambda-case"} 2 | - ignore: {name: "Redundant if"} 3 | -------------------------------------------------------------------------------- /cabal.haskell-ci: -------------------------------------------------------------------------------- 1 | constraint-set no-tasty 2 | constraints: tasty-bench -tasty 3 | 4 | constraint-set tasty-1.4 5 | constraints: tasty ^>= 1.4 6 | 7 | constraint-set tasty-1.5 8 | constraints: tasty ^>= 1.5 9 | ghc: >= 8.0 10 | -------------------------------------------------------------------------------- /bench/bench-fibo.hs: -------------------------------------------------------------------------------- 1 | module Main (main) where 2 | 3 | import Test.Tasty.Bench 4 | 5 | fibo :: Int -> Integer 6 | fibo n = if n < 2 then toInteger n else fibo (n - 1) + fibo (n - 2) 7 | 8 | main :: IO () 9 | main = defaultMain 10 | [ bgroup "Fibonacci numbers" 11 | [ bench "fifth" $ nf fibo 5 12 | , bench "tenth" $ nf fibo 10 13 | , bench "twentieth" $ nf fibo 20 14 | ] 15 | ] 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Andrew Lelechenko 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.cirrus.yml: -------------------------------------------------------------------------------- 1 | task: 2 | name: FreeBSD 3 | freebsd_instance: 4 | image_family: freebsd-14-2 5 | install_script: 6 | - pkg install -y git gmake 7 | - curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | BOOTSTRAP_HASKELL_NONINTERACTIVE=1 BOOTSTRAP_HASKELL_INSTALL_NO_STACK=1 sh 8 | script: 9 | - export PATH="$HOME/.ghcup/bin:$PATH" 10 | - cabal build 11 | 12 | task: 13 | name: OpenBSD 14 | compute_engine_instance: 15 | image_project: pg-ci-images 16 | # See https://github.com/anarazel/pg-vm-images/blob/main/packer/openbsd.pkrvars.hcl 17 | image: family/pg-ci-openbsd-vanilla 18 | platform: openbsd 19 | install_script: pkg_add ghc cabal-install git 20 | script: 21 | - export CABAL_DIR=/tmp/.cabal 22 | - cabal update 23 | - cabal build 24 | 25 | task: 26 | name: NetBSD 27 | compute_engine_instance: 28 | image_project: pg-ci-images 29 | # See https://github.com/anarazel/pg-vm-images/blob/main/packer/netbsd.pkrvars.hcl 30 | image: family/pg-ci-netbsd-vanilla 31 | platform: netbsd 32 | install_script: 33 | - export PKG_PATH="http://cdn.NetBSD.org/pub/pkgsrc/packages/NetBSD/$(uname -p)/$(uname -r|cut -f '1 2' -d.)/All/" 34 | - pkg_add ghc cabal-install git 35 | script: 36 | - export CABAL_DIR=/tmp/.cabal 37 | - cabal update 38 | - cabal build 39 | -------------------------------------------------------------------------------- /compare_benches.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | compare_benches () { 3 | if [ "$#" -lt 2 ]; then 4 | printf "Usage:\n compare_benches oldCommit newCommit ...\nwhere ... is passed to benchmarks directly.\n" 5 | return 0 6 | fi 7 | 8 | OLD="$1" 9 | shift 10 | NEW="$1" 11 | shift 12 | 13 | OLDREF=$(git rev-parse --verify "$OLD") 14 | NEWREF=$(git rev-parse --verify "$NEW") 15 | 16 | OLDCSV=$(echo "$OLD".csv | sed -e s#/##g) 17 | NEWCSV=$(echo "$NEW".csv | sed -e s#/##g) 18 | OLDVSNEWCSV=$(echo "$OLD"-vs-"$NEW".csv | sed -e s#/##g) 19 | 20 | git checkout -q "$OLDREF" && \ 21 | trap 'git checkout -q "@{-1}" && trap - INT' INT && \ 22 | cabal run -v0 benchmarks -- --csv "$OLDCSV" "$@" && \ 23 | git checkout -q "$NEWREF" && \ 24 | trap 'git checkout -q "@{-2}" && trap - INT' INT && \ 25 | cabal run -v0 benchmarks -- --baseline "$OLDCSV" --csv "$NEWCSV" "$@" && \ 26 | git checkout -q "@{-2}" && \ 27 | 28 | awk 'BEGIN{FS=",";OFS=",";print "Name,'"$OLD"','"$NEW"',Ratio"}FNR==1{trueNF=NF;next}NF0)print "Geometric mean,,",exp(gs/gc)}' "$OLDCSV" "$NEWCSV" > "$OLDVSNEWCSV" && \ 29 | 30 | trap - INT 31 | } 32 | -------------------------------------------------------------------------------- /tasty-bench.cabal: -------------------------------------------------------------------------------- 1 | name: tasty-bench 2 | version: 0.4.1 3 | cabal-version: 1.18 4 | build-type: Simple 5 | license: MIT 6 | license-file: LICENSE 7 | copyright: 2021 Andrew Lelechenko 8 | author: Andrew Lelechenko 9 | maintainer: Andrew Lelechenko 10 | homepage: https://github.com/Bodigrim/tasty-bench 11 | bug-reports: https://github.com/Bodigrim/tasty-bench/issues 12 | category: Development, Performance, Testing, Benchmarking 13 | synopsis: Featherlight benchmark framework 14 | description: 15 | Featherlight framework (only one file!) 16 | for performance measurement with API mimicking 17 | @criterion@ and @gauge@, featuring built-in comparison 18 | against previous runs and between benchmarks. Our benchmarks are just 19 | regular @tasty@ tests. 20 | 21 | extra-source-files: 22 | compare_benches.sh 23 | extra-doc-files: 24 | changelog.md 25 | example.svg 26 | README.md 27 | 28 | tested-with: GHC == 9.14.1, GHC == 9.12.2, GHC == 9.10.3, GHC == 9.8.4, GHC == 9.6.7, GHC == 9.4.8, GHC == 9.2.8, GHC==9.0.2, GHC==8.10.7, GHC==8.8.4, GHC==8.6.5, GHC==8.4.4, GHC==8.2.2, GHC==8.0.2 29 | 30 | source-repository head 31 | type: git 32 | location: https://github.com/Bodigrim/tasty-bench 33 | 34 | flag tasty 35 | default: True 36 | manual: True 37 | description: 38 | When disabled, reduces API to functions independent of @tasty@: combinators 39 | to construct @Benchmarkable@ and @measureCpuTime@. 40 | 41 | library 42 | exposed-modules: Test.Tasty.Bench 43 | hs-source-dirs: src 44 | default-language: Haskell2010 45 | ghc-options: -O2 -Wall -fno-warn-unused-imports -Wcompat -Widentities 46 | 47 | build-depends: 48 | base >= 4.9 && < 5, 49 | deepseq >= 1.1 && < 1.6 50 | if impl(ghc < 9.0) 51 | build-depends: 52 | ghc-prim < 0.14 53 | if flag(tasty) 54 | build-depends: 55 | containers >= 0.5 && < 0.9, 56 | tasty >= 1.4 && < 1.6 57 | if impl(ghc < 8.4) 58 | build-depends: 59 | time >= 1.2 && < 2 60 | 61 | benchmark bench-fibo 62 | default-language: Haskell2010 63 | hs-source-dirs: bench 64 | main-is: bench-fibo.hs 65 | type: exitcode-stdio-1.0 66 | build-depends: base, tasty-bench 67 | ghc-options: "-with-rtsopts=-A32m" 68 | -------------------------------------------------------------------------------- /convert_readme.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | sed 's/e\. g\./e.g./g' README.md | pandoc -f markdown -t haddock -o README.haddock 3 | 4 | ex README.haddock <> README.haddock 15 | 16 | sed -i '' '/^= /d' README.haddock 17 | sed -i '' '/^#/d' README.haddock 18 | sed -i '' '/^- <#/d' README.haddock 19 | 20 | sed -i '' 's/^== /=== /g' README.haddock 21 | sed -i '' -E 's/<(.*) (criterion|gauge|tasty|tasty-rerun|-fproc-alignment)>/[@\2@](\1)/g' README.haddock 22 | sed -i '' -E 's/<(.*) tasty documentation>/[@tasty@ documentation](\1)/g' README.haddock 23 | sed -i '' -E 's/vs\../vs. /g' README.haddock 24 | sed -i '' -E 's/e\.g\./e. g./g' README.haddock 25 | 26 | sed -i '' 's/@localOption (NumThreads 1)@/@localOption@ (@NumThreads@ 1)/g' README.haddock 27 | sed -i '' 's/@localOption (FailIfSlower 0.10)@/@localOption@ (@FailIfSlower@ 0.10)/g' README.haddock 28 | sed -i '' 's/@localOption (RelStDev 0.02)@/@localOption@ (@RelStDev@ 0.02)/g' README.haddock 29 | sed -i '' 's/@localOption (mkTimeout 100000000)@/@localOption@ (@mkTimeout@ 100000000)/g' README.haddock 30 | sed -i '' "s/@Test.Tasty.Bench.defaultMain@/@Test.Tasty.Bench.@'Test.Tasty.Bench.defaultMain'/g" README.haddock 31 | sed -i '' "s/@Test.Tasty.defaultMain@/@Test.Tasty.@'Test.Tasty.defaultMain'/g" README.haddock 32 | sed -i '' "s/@ghc-options:@\[@-fproc-alignment@\]/@ghc-options:@ [@-fproc-alignment@]/g" README.haddock 33 | 34 | sed -i '' 's/@Test.Tasty.Bench@/"Test.Tasty.Bench"/g' README.haddock 35 | 36 | for word in getCPUTime TimeMode RTSStats allocated_bytes copied_bytes max_mem_in_use_bytes consoleBenchReporter consoleTestReporter env envWithCleanup IO bcompare bcompareWithin locateBenchmark localOption NumThreads Benchmarkable measureCpuTime mkTimeout RelStDev FailIfSlower; do 37 | sed -i '' "s/@$word@/'$word'/g" README.haddock 38 | done 39 | 40 | sed -i '' "s;<>;![Plotting](example.svg);g" README.haddock 41 | sed -i '' "s/^Plotting$//g" README.haddock 42 | 43 | sed -i '' 's/^- @\(.*\)@/[@\1@]:/g' README.haddock 44 | 45 | sed -n '/^{-#/,$p' src/Test/Tasty/Bench.hs > src/Test/Tasty/Bench.temp 46 | cat README.haddock src/Test/Tasty/Bench.temp > src/Test/Tasty/Bench.hs 47 | rm README.haddock src/Test/Tasty/Bench.temp 48 | -------------------------------------------------------------------------------- /example.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Inversion.Data.Mod 5 | 353 ms 6 | 7 | 8 | 353 ms ± 47 ms 9 | 10 | 11 | 12 | 13 | 14 | 15 | Inversion.Data.Mod.Word 294 ms 16 | 17 | 294 ms ± 43 ms 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | Inversion.finite-field 26 | 1.03 s 27 | 28 | 29 | 1.03 s ± 147 ms 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | Inversion.modular-arithmetic 38 | 896 ms 39 | 40 | 41 | 896 ms ± 58 ms 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /changelog.md: -------------------------------------------------------------------------------- 1 | # 0.5 2 | 3 | * Extend `TimeMode` with `MutatorCpuTime`, `MutatorWallTime` and `CustomTime`. 4 | * Use a more robust strategy to force encoding to UTF-8. 5 | 6 | # 0.4.1 7 | 8 | * Force GC before collecting `RTSStats`, otherwise measurements are inaccurate. 9 | * Restore locale encoding after `defaultMain`. 10 | 11 | # 0.4 12 | 13 | * Switch `nf`, `nfIO` and `nfAppIO` to evaluate outputs to a normal form 14 | with `rnf` instead of `force`. It means that parts of the output, which have 15 | already been forced, can be garbage collected early, without waiting for 16 | the entire output to be allocated at the same time. This decreases 17 | benchmarking overhead in many scenarios and brings the behaviour in line 18 | with `criterion`. See [#39](https://github.com/Bodigrim/tasty-bench/issues/39) 19 | for discussion. 20 | * Drop support of `tasty < 1.4`. 21 | * Make `IO` benchmarks immune to `-fspec-constr-count` limit. 22 | * Decomission `debug` build flag. 23 | * Decomission warning when `--timeout` is absent. 24 | * Add `instance {Eq,Ord,Num,Fractional} {RelStDev,FailIfSlower,FailIfFaster}`. 25 | * Add `instance {Eq,Ord} {CsvPath,SvgPath,BaselinePath}`. 26 | 27 | # 0.3.5 28 | 29 | * Support `tasty-1.5`. 30 | * Report benchmarking progress. 31 | 32 | # 0.3.4 33 | 34 | * Force single-threaded execution in `defaultMain`. 35 | * Expose `measureCpuTimeAndStDev` helper to analyse benchmarks manually. 36 | 37 | # 0.3.3 38 | 39 | * Drop support of `tasty < 1.2.3`. 40 | * Make benchmarks immune to `-fspec-constr-count` limit. 41 | 42 | # 0.3.2 43 | 44 | * Add `locateBenchmark` and `mapLeafBenchmarks`. 45 | * Support measuring of wall-clock time. 46 | * Make messages for baseline comparison less ambiguous. 47 | * Graceful degradation on non-Unicode terminals. 48 | 49 | # 0.3.1 50 | 51 | * Add `bcompareWithin` for portable performance tests. 52 | * Add `tasty` and `debug` build flags. 53 | 54 | # 0.3 55 | 56 | * Report mean time with 3 significant digits. 57 | * Report peak memory usage, when run with `+RTS -T`. 58 | * Run benchmarks only once, if `RelStDev` is infinite. 59 | * Make `Benchmarkable` constructor public. 60 | * Expose `measureCpuTime` helper to run benchmarks manually. 61 | * Expose `CsvPath`, `BaselinePath`, `SvgPath`. 62 | 63 | # 0.2.5 64 | 65 | * Fix comparison against baseline. 66 | 67 | # 0.2.4 68 | 69 | * Add a simplistic SVG reporter. 70 | * Add `bcompare` to compare between benchmarks. 71 | * Throw a warning, if benchmarks take too long. 72 | 73 | # 0.2.3 74 | 75 | * Prohibit duplicated benchmark names in CSV reports. 76 | 77 | # 0.2.2 78 | 79 | * Remove `NFData` constraint from `whnfIO`. 80 | 81 | # 0.2.1 82 | 83 | * Fix integer overflow in stdev computations. 84 | 85 | # 0.2 86 | 87 | * Add `env` and `envWithCleanup`. 88 | * Run console and CSV reporters in parallel. 89 | * Extend console reporter and export it as `consoleBenchReporter`. 90 | * Add comparison against baseline and relevant options. 91 | * Export `RelStDev` option. 92 | * Export `benchIngredients`. 93 | 94 | # 0.1 95 | 96 | * Initial release. 97 | -------------------------------------------------------------------------------- /.github/workflows/other.yml: -------------------------------------------------------------------------------- 1 | name: other 2 | on: 3 | - push 4 | - pull_request 5 | 6 | defaults: 7 | run: 8 | shell: bash 9 | 10 | jobs: 11 | build: 12 | runs-on: ${{ matrix.os }} 13 | strategy: 14 | fail-fast: true 15 | matrix: 16 | os: [windows-latest, macOS-latest, macOS-14] 17 | ghc: ['latest'] 18 | steps: 19 | - uses: actions/checkout@v4 20 | - uses: haskell-actions/setup@v2 21 | id: setup-haskell-cabal 22 | with: 23 | ghc-version: ${{ matrix.ghc }} 24 | - name: Update cabal package database 25 | run: cabal update 26 | - uses: actions/cache@v4 27 | name: Cache cabal stuff 28 | with: 29 | path: | 30 | ${{ steps.setup-haskell-cabal.outputs.cabal-store }} 31 | dist-newstyle 32 | key: ${{ runner.os }}-${{ matrix.ghc }} 33 | - name: Build 34 | run: | 35 | cabal sdist -z -o . 36 | cabal get tasty-bench-*.tar.gz 37 | cd tasty-bench-*/ 38 | cabal build 39 | - name: Haddock 40 | run: | 41 | cd tasty-bench-*/ 42 | cabal haddock 43 | 44 | # Emulation on s390x and ppc64le platforms is incredibly slow and memory demanding. 45 | # It seems that any executable with GHC RTS takes at least 7-8 Gb of RAM, so we can 46 | # run `cabal` or `ghc` on their own, but cannot run them both at the same time, striking 47 | # out `cabal build`. Instead we install system packages and invoke `ghc` manually. 48 | 49 | emulated: 50 | needs: build 51 | runs-on: ubuntu-latest 52 | strategy: 53 | fail-fast: true 54 | matrix: 55 | arch: ['s390x', 'ppc64le', 'armv7', 'aarch64', 'riscv64'] 56 | steps: 57 | - uses: actions/checkout@v4 58 | - uses: uraimo/run-on-arch-action@v3 59 | with: 60 | arch: ${{ matrix.arch }} 61 | distro: ubuntu_rolling 62 | githubToken: ${{ github.token }} 63 | install: | 64 | apt-get update -y 65 | apt-get install -y ghc libghc-tasty-dev 66 | run: | 67 | ghc --version 68 | ghc src/Test/Tasty/Bench.hs 69 | 70 | i386: 71 | needs: build 72 | runs-on: ubuntu-latest 73 | container: 74 | image: i386/ubuntu:bionic 75 | steps: 76 | - name: Install 77 | run: | 78 | apt-get update -y 79 | apt-get install -y autoconf build-essential zlib1g-dev libgmp-dev curl libncurses5 libtinfo5 libncurses5-dev libtinfo-dev 80 | curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | BOOTSTRAP_HASKELL_NONINTERACTIVE=1 BOOTSTRAP_HASKELL_INSTALL_NO_STACK=1 sh 81 | - uses: actions/checkout@v1 82 | - name: Test 83 | run: | 84 | source ~/.ghcup/env 85 | cabal update 86 | cabal build 87 | 88 | wasm: 89 | needs: build 90 | runs-on: ubuntu-latest 91 | env: 92 | GHC_WASM_META_REV: 895f7067e1d4c918a45559da9d2d6a403a690703 93 | FLAVOUR: '9.6' 94 | steps: 95 | - name: setup-ghc-wasm32-wasi 96 | run: | 97 | cd $(mktemp -d) 98 | curl -L https://gitlab.haskell.org/ghc/ghc-wasm-meta/-/archive/$GHC_WASM_META_REV/ghc-wasm-meta-master.tar.gz | tar xz --strip-components=1 99 | ./setup.sh 100 | ~/.ghc-wasm/add_to_github_path.sh 101 | - uses: actions/checkout@v4 102 | 103 | - uses: actions/cache@v4 104 | with: 105 | path: | 106 | ~/.ghc-wasm/.cabal/store 107 | dist-newstyle 108 | key: build-wasi-${{ runner.os }}-wasm-meta-${{ env.GHC_WASM_META_REV }}-flavour-${{ env.FLAVOUR }}-${{ github.sha }} 109 | restore-keys: | 110 | build-wasi-${{ runner.os }}-wasm-meta-${{ env.GHC_WASM_META_REV }}-flavour-${{ env.FLAVOUR }}- 111 | 112 | - name: Build 113 | run: | 114 | wasm32-wasi-cabal build all 115 | -------------------------------------------------------------------------------- /.github/workflows/haskell-ci.yml: -------------------------------------------------------------------------------- 1 | # This GitHub workflow config has been generated by a script via 2 | # 3 | # haskell-ci 'github' 'tasty-bench.cabal' '--haddock-jobs=>=8.2' 4 | # 5 | # To regenerate the script (for example after adjusting tested-with) run 6 | # 7 | # haskell-ci regenerate 8 | # 9 | # For more information, see https://github.com/haskell-CI/haskell-ci 10 | # 11 | # version: 0.19.20251105 12 | # 13 | # REGENDATA ("0.19.20251105",["github","tasty-bench.cabal","--haddock-jobs=>=8.2"]) 14 | # 15 | name: Haskell-CI 16 | on: 17 | - push 18 | - pull_request 19 | - merge_group 20 | jobs: 21 | linux: 22 | name: Haskell-CI - Linux - ${{ matrix.compiler }} 23 | runs-on: ubuntu-24.04 24 | timeout-minutes: 25 | 60 26 | container: 27 | image: buildpack-deps:jammy 28 | continue-on-error: ${{ matrix.allow-failure }} 29 | strategy: 30 | matrix: 31 | include: 32 | - compiler: ghc-9.14.0.20251028 33 | compilerKind: ghc 34 | compilerVersion: 9.14.0.20251028 35 | setup-method: ghcup-prerelease 36 | allow-failure: false 37 | - compiler: ghc-9.12.2 38 | compilerKind: ghc 39 | compilerVersion: 9.12.2 40 | setup-method: ghcup 41 | allow-failure: false 42 | - compiler: ghc-9.10.3 43 | compilerKind: ghc 44 | compilerVersion: 9.10.3 45 | setup-method: ghcup 46 | allow-failure: false 47 | - compiler: ghc-9.8.4 48 | compilerKind: ghc 49 | compilerVersion: 9.8.4 50 | setup-method: ghcup 51 | allow-failure: false 52 | - compiler: ghc-9.6.7 53 | compilerKind: ghc 54 | compilerVersion: 9.6.7 55 | setup-method: ghcup 56 | allow-failure: false 57 | - compiler: ghc-9.4.8 58 | compilerKind: ghc 59 | compilerVersion: 9.4.8 60 | setup-method: ghcup 61 | allow-failure: false 62 | - compiler: ghc-9.2.8 63 | compilerKind: ghc 64 | compilerVersion: 9.2.8 65 | setup-method: ghcup 66 | allow-failure: false 67 | - compiler: ghc-9.0.2 68 | compilerKind: ghc 69 | compilerVersion: 9.0.2 70 | setup-method: ghcup 71 | allow-failure: false 72 | - compiler: ghc-8.10.7 73 | compilerKind: ghc 74 | compilerVersion: 8.10.7 75 | setup-method: ghcup 76 | allow-failure: false 77 | - compiler: ghc-8.8.4 78 | compilerKind: ghc 79 | compilerVersion: 8.8.4 80 | setup-method: ghcup 81 | allow-failure: false 82 | - compiler: ghc-8.6.5 83 | compilerKind: ghc 84 | compilerVersion: 8.6.5 85 | setup-method: ghcup 86 | allow-failure: false 87 | - compiler: ghc-8.4.4 88 | compilerKind: ghc 89 | compilerVersion: 8.4.4 90 | setup-method: ghcup 91 | allow-failure: false 92 | - compiler: ghc-8.2.2 93 | compilerKind: ghc 94 | compilerVersion: 8.2.2 95 | setup-method: ghcup 96 | allow-failure: false 97 | - compiler: ghc-8.0.2 98 | compilerKind: ghc 99 | compilerVersion: 8.0.2 100 | setup-method: ghcup 101 | allow-failure: false 102 | fail-fast: false 103 | steps: 104 | - name: apt-get install 105 | run: | 106 | apt-get update 107 | apt-get install -y --no-install-recommends gnupg ca-certificates dirmngr curl git software-properties-common libtinfo5 libnuma-dev 108 | - name: Install GHCup 109 | run: | 110 | mkdir -p "$HOME/.ghcup/bin" 111 | curl -sL https://downloads.haskell.org/ghcup/0.1.50.1/x86_64-linux-ghcup-0.1.50.1 > "$HOME/.ghcup/bin/ghcup" 112 | chmod a+x "$HOME/.ghcup/bin/ghcup" 113 | - name: Install cabal-install 114 | run: | 115 | "$HOME/.ghcup/bin/ghcup" install cabal 3.16.0.0 || (cat "$HOME"/.ghcup/logs/*.* && false) 116 | echo "CABAL=$HOME/.ghcup/bin/cabal-3.16.0.0 -vnormal+nowrap" >> "$GITHUB_ENV" 117 | - name: Install GHC (GHCup) 118 | if: matrix.setup-method == 'ghcup' 119 | run: | 120 | "$HOME/.ghcup/bin/ghcup" install ghc "$HCVER" || (cat "$HOME"/.ghcup/logs/*.* && false) 121 | HC=$("$HOME/.ghcup/bin/ghcup" whereis ghc "$HCVER") 122 | HCPKG=$(echo "$HC" | sed 's#ghc$#ghc-pkg#') 123 | HADDOCK=$(echo "$HC" | sed 's#ghc$#haddock#') 124 | echo "HC=$HC" >> "$GITHUB_ENV" 125 | echo "HCPKG=$HCPKG" >> "$GITHUB_ENV" 126 | echo "HADDOCK=$HADDOCK" >> "$GITHUB_ENV" 127 | env: 128 | HCKIND: ${{ matrix.compilerKind }} 129 | HCNAME: ${{ matrix.compiler }} 130 | HCVER: ${{ matrix.compilerVersion }} 131 | - name: Install GHC (GHCup prerelease) 132 | if: matrix.setup-method == 'ghcup-prerelease' 133 | run: | 134 | "$HOME/.ghcup/bin/ghcup" config add-release-channel prereleases 135 | "$HOME/.ghcup/bin/ghcup" install ghc "$HCVER" || (cat "$HOME"/.ghcup/logs/*.* && false) 136 | HC=$("$HOME/.ghcup/bin/ghcup" whereis ghc "$HCVER") 137 | HCPKG=$(echo "$HC" | sed 's#ghc$#ghc-pkg#') 138 | HADDOCK=$(echo "$HC" | sed 's#ghc$#haddock#') 139 | echo "HC=$HC" >> "$GITHUB_ENV" 140 | echo "HCPKG=$HCPKG" >> "$GITHUB_ENV" 141 | echo "HADDOCK=$HADDOCK" >> "$GITHUB_ENV" 142 | env: 143 | HCKIND: ${{ matrix.compilerKind }} 144 | HCNAME: ${{ matrix.compiler }} 145 | HCVER: ${{ matrix.compilerVersion }} 146 | - name: Set PATH and environment variables 147 | run: | 148 | echo "$HOME/.cabal/bin" >> $GITHUB_PATH 149 | echo "LANG=C.UTF-8" >> "$GITHUB_ENV" 150 | echo "CABAL_DIR=$HOME/.cabal" >> "$GITHUB_ENV" 151 | echo "CABAL_CONFIG=$HOME/.cabal/config" >> "$GITHUB_ENV" 152 | HCNUMVER=$(${HC} --numeric-version|perl -ne '/^(\d+)\.(\d+)\.(\d+)(\.(\d+))?$/; print(10000 * $1 + 100 * $2 + ($3 == 0 ? $5 != 1 : $3))') 153 | echo "HCNUMVER=$HCNUMVER" >> "$GITHUB_ENV" 154 | echo "ARG_TESTS=--enable-tests" >> "$GITHUB_ENV" 155 | echo "ARG_BENCH=--enable-benchmarks" >> "$GITHUB_ENV" 156 | if [ $((HCNUMVER >= 91400)) -ne 0 ] ; then echo "HEADHACKAGE=true" >> "$GITHUB_ENV" ; else echo "HEADHACKAGE=false" >> "$GITHUB_ENV" ; fi 157 | echo "ARG_COMPILER=--$HCKIND --with-compiler=$HC" >> "$GITHUB_ENV" 158 | env: 159 | HCKIND: ${{ matrix.compilerKind }} 160 | HCNAME: ${{ matrix.compiler }} 161 | HCVER: ${{ matrix.compilerVersion }} 162 | - name: env 163 | run: | 164 | env 165 | - name: write cabal config 166 | run: | 167 | mkdir -p $CABAL_DIR 168 | cat >> $CABAL_CONFIG <> $CABAL_CONFIG <> $CABAL_CONFIG < cabal-plan.xz 213 | echo 'f62ccb2971567a5f638f2005ad3173dba14693a45154c1508645c52289714cb2 cabal-plan.xz' | sha256sum -c - 214 | xz -d < cabal-plan.xz > $HOME/.cabal/bin/cabal-plan 215 | rm -f cabal-plan.xz 216 | chmod a+x $HOME/.cabal/bin/cabal-plan 217 | cabal-plan --version 218 | - name: checkout 219 | uses: actions/checkout@v5 220 | with: 221 | path: source 222 | - name: initial cabal.project for sdist 223 | run: | 224 | touch cabal.project 225 | echo "packages: $GITHUB_WORKSPACE/source/." >> cabal.project 226 | cat cabal.project 227 | - name: sdist 228 | run: | 229 | mkdir -p sdist 230 | $CABAL sdist all --output-dir $GITHUB_WORKSPACE/sdist 231 | - name: unpack 232 | run: | 233 | mkdir -p unpacked 234 | find sdist -maxdepth 1 -type f -name '*.tar.gz' -exec tar -C $GITHUB_WORKSPACE/unpacked -xzvf {} \; 235 | - name: generate cabal.project 236 | run: | 237 | PKGDIR_tasty_bench="$(find "$GITHUB_WORKSPACE/unpacked" -maxdepth 1 -type d -regex '.*/tasty-bench-[0-9.]*')" 238 | echo "PKGDIR_tasty_bench=${PKGDIR_tasty_bench}" >> "$GITHUB_ENV" 239 | rm -f cabal.project cabal.project.local 240 | touch cabal.project 241 | touch cabal.project.local 242 | echo "packages: ${PKGDIR_tasty_bench}" >> cabal.project 243 | if [ $((HCNUMVER >= 80200)) -ne 0 ] ; then echo "package tasty-bench" >> cabal.project ; fi 244 | if [ $((HCNUMVER >= 80200)) -ne 0 ] ; then echo " ghc-options: -Werror=missing-methods -Werror=missing-fields" >> cabal.project ; fi 245 | if [ $((HCNUMVER >= 90400)) -ne 0 ] ; then echo "package tasty-bench" >> cabal.project ; fi 246 | if [ $((HCNUMVER >= 90400)) -ne 0 ] ; then echo " ghc-options: -Werror=unused-packages" >> cabal.project ; fi 247 | if [ $((HCNUMVER >= 90000)) -ne 0 ] ; then echo "package tasty-bench" >> cabal.project ; fi 248 | if [ $((HCNUMVER >= 90000)) -ne 0 ] ; then echo " ghc-options: -Werror=incomplete-patterns -Werror=incomplete-uni-patterns" >> cabal.project ; fi 249 | cat >> cabal.project <> cabal.project 253 | fi 254 | $HCPKG list --simple-output --names-only | perl -ne 'for (split /\s+/) { print "constraints: any.$_ installed\n" unless /^(tasty-bench)$/; }' >> cabal.project.local 255 | cat cabal.project 256 | cat cabal.project.local 257 | - name: dump install plan 258 | run: | 259 | $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH --dry-run all 260 | cabal-plan 261 | - name: restore cache 262 | uses: actions/cache/restore@v4 263 | with: 264 | key: ${{ runner.os }}-${{ matrix.compiler }}-${{ github.sha }} 265 | path: ~/.cabal/store 266 | restore-keys: ${{ runner.os }}-${{ matrix.compiler }}- 267 | - name: install dependencies 268 | run: | 269 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --dependencies-only -j2 all 270 | $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH --dependencies-only -j2 all 271 | - name: build w/o tests 272 | run: | 273 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks all 274 | - name: build 275 | run: | 276 | $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH all --write-ghc-environment-files=always 277 | - name: cabal check 278 | run: | 279 | cd ${PKGDIR_tasty_bench} || false 280 | ${CABAL} -vnormal check 281 | - name: haddock 282 | run: | 283 | if [ $((HCNUMVER >= 80200)) -ne 0 ] ; then $CABAL v2-haddock --disable-documentation --haddock-all $ARG_COMPILER --with-haddock $HADDOCK $ARG_TESTS $ARG_BENCH all ; fi 284 | - name: unconstrained build 285 | run: | 286 | rm -f cabal.project.local 287 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks all 288 | - name: prepare for constraint sets 289 | run: | 290 | rm -f cabal.project.local 291 | - name: constraint set tasty-1.5 292 | run: | 293 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --constraint='tasty ^>= 1.5' all --dry-run 294 | cabal-plan topo | sort 295 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --constraint='tasty ^>= 1.5' --dependencies-only -j2 all 296 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --constraint='tasty ^>= 1.5' all 297 | - name: constraint set tasty-1.4 298 | run: | 299 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --constraint='tasty ^>= 1.4' all --dry-run 300 | cabal-plan topo | sort 301 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --constraint='tasty ^>= 1.4' --dependencies-only -j2 all 302 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --constraint='tasty ^>= 1.4' all 303 | - name: constraint set no-tasty 304 | run: | 305 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --constraint='tasty-bench -tasty' all --dry-run 306 | cabal-plan topo | sort 307 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --constraint='tasty-bench -tasty' --dependencies-only -j2 all 308 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --constraint='tasty-bench -tasty' all 309 | - name: save cache 310 | if: always() 311 | uses: actions/cache/save@v4 312 | with: 313 | key: ${{ runner.os }}-${{ matrix.compiler }}-${{ github.sha }} 314 | path: ~/.cabal/store 315 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tasty-bench [![Hackage](http://img.shields.io/hackage/v/tasty-bench.svg)](https://hackage.haskell.org/package/tasty-bench) [![Stackage LTS](http://stackage.org/package/tasty-bench/badge/lts)](http://stackage.org/lts/package/tasty-bench) [![Stackage Nightly](http://stackage.org/package/tasty-bench/badge/nightly)](http://stackage.org/nightly/package/tasty-bench) 2 | 3 | Featherlight benchmark framework (only one file!) for performance measurement 4 | with API mimicking [`criterion`](http://hackage.haskell.org/package/criterion) 5 | and [`gauge`](http://hackage.haskell.org/package/gauge). 6 | A prominent feature is built-in comparison against previous runs 7 | and between benchmarks. 8 | 9 | 10 | 11 | - [How lightweight is it?](#how-lightweight-is-it) 12 | - [How is it possible?](#how-is-it-possible) 13 | - [How to switch?](#how-to-switch) 14 | - [How to write a benchmark?](#how-to-write-a-benchmark) 15 | - [How to read results?](#how-to-read-results) 16 | - [Wall-clock time vs. CPU time](#wall-clock-time-vs-cpu-time) 17 | - [Statistical model](#statistical-model) 18 | - [Memory usage](#memory-usage) 19 | - [Combining tests and benchmarks](#combining-tests-and-benchmarks) 20 | - [Troubleshooting](#troubleshooting) 21 | - [Isolating interfering benchmarks](#isolating-interfering-benchmarks) 22 | - [Comparison against baseline](#comparison-against-baseline) 23 | - [Comparison between benchmarks](#comparison-between-benchmarks) 24 | - [Plotting results](#plotting-results) 25 | - [Build flags](#build-flags) 26 | - [Command-line options](#command-line-options) 27 | - [Custom command-line options](#custom-command-line-options) 28 | 29 | 30 | 31 | ## How lightweight is it? 32 | 33 | There is only one source file `Test.Tasty.Bench` and no non-boot dependencies 34 | except [`tasty`](http://hackage.haskell.org/package/tasty). 35 | So if you already depend on `tasty` for a test suite, there 36 | is nothing else to install. 37 | 38 | Compare this to `criterion` (10+ modules, 50+ dependencies) 39 | and `gauge` (40+ modules, depends on `basement` and `vector`). 40 | A build on a clean machine is up to 16x 41 | faster than `criterion` and up to 4x faster than `gauge`. A build without dependencies 42 | is up to 6x faster than `criterion` and up to 8x faster than `gauge`. 43 | 44 | `tasty-bench` is a native Haskell library and works everywhere, where GHC 45 | does, including WASM. We support a full range of architectures (`i386`, `amd64`, `armhf`, 46 | `arm64`, `ppc64le`, `s390x`, `riscv64`) and operating systems (Linux, Windows, macOS, 47 | FreeBSD, OpenBSD, NetBSD), plus any GHC from 8.0 to 9.14 48 | (and earlier releases stretch back to GHC 7.0). 49 | 50 | ## How is it possible? 51 | 52 | Our benchmarks are literally regular `tasty` tests, so we can leverage all existing 53 | machinery for command-line options, resource management, structuring, 54 | listing and filtering benchmarks, running them and reporting results. It also means 55 | that `tasty-bench` can be used in conjunction with other `tasty` ingredients. 56 | 57 | Unlike `criterion` and `gauge` we use a very simple statistical model described below. 58 | This is arguably a questionable choice, but it works pretty well in practice. 59 | A rare developer is sufficiently well-versed in probability theory 60 | to make sense and use of all numbers generously generated by `criterion`. 61 | 62 | ## How to switch? 63 | 64 | [Cabal mixins](https://cabal.readthedocs.io/en/3.4/cabal-package.html#pkg-field-mixins) 65 | allow to taste `tasty-bench` instead of `criterion` or `gauge` 66 | without changing a single line of code: 67 | 68 | ```cabal 69 | cabal-version: 2.0 70 | 71 | benchmark foo 72 | ... 73 | build-depends: 74 | tasty-bench 75 | mixins: 76 | tasty-bench (Test.Tasty.Bench as Criterion, Test.Tasty.Bench as Criterion.Main, Test.Tasty.Bench as Gauge, Test.Tasty.Bench as Gauge.Main) 77 | ``` 78 | 79 | This works vice versa as well: if you use `tasty-bench`, but at some point 80 | need a more comprehensive statistical analysis, 81 | it is easy to switch temporarily back to `criterion`. 82 | 83 | ## How to write a benchmark? 84 | 85 | Benchmarks are declared 86 | in [a separate section of `cabal` file](https://cabal.readthedocs.io/en/stable/cabal-package-description-file.html#benchmarks): 87 | 88 | ```cabal 89 | cabal-version: 2.0 90 | name: bench-fibo 91 | version: 0.0 92 | build-type: Simple 93 | synopsis: Example of a benchmark 94 | 95 | benchmark bench-fibo 96 | main-is: BenchFibo.hs 97 | type: exitcode-stdio-1.0 98 | build-depends: base, tasty-bench 99 | ghc-options: "-with-rtsopts=-A32m" 100 | if impl(ghc >= 8.6) 101 | ghc-options: -fproc-alignment=64 102 | ``` 103 | 104 | And here is `BenchFibo.hs`: 105 | 106 | ```haskell 107 | import Test.Tasty.Bench 108 | 109 | fibo :: Int -> Integer 110 | fibo n = if n < 2 then toInteger n else fibo (n - 1) + fibo (n - 2) 111 | 112 | main :: IO () 113 | main = defaultMain 114 | [ bgroup "Fibonacci numbers" 115 | [ bench "fifth" $ nf fibo 5 116 | , bench "tenth" $ nf fibo 10 117 | , bench "twentieth" $ nf fibo 20 118 | ] 119 | ] 120 | ``` 121 | 122 | Since `tasty-bench` provides an API compatible with `criterion`, 123 | one can refer to [its documentation](https://hackage.haskell.org/package/criterion#readme) for more examples. 124 | 125 | ## How to read results? 126 | 127 | Running the example above (`cabal bench` or `stack bench`) 128 | results in the following output: 129 | 130 | ``` 131 | All 132 | Fibonacci numbers 133 | fifth: OK 134 | 63 ns ± 3.4 ns 135 | tenth: OK 136 | 809 ns ± 73 ns 137 | twentieth: OK 138 | 104 μs ± 4.9 μs 139 | 140 | All 3 tests passed (7.25s) 141 | ``` 142 | 143 | The output says that, for instance, in the first benchmark the mean CPU time is 144 | 63 nanoseconds and means of individual samples do not often diverge from it 145 | further than ±3.4 nanoseconds (double standard deviation). Take standard 146 | deviation numbers with a grain of salt; there are lies, damned lies, and 147 | statistics. 148 | 149 | ## Wall-clock time vs. CPU time 150 | 151 | What time are we talking about? 152 | Both `criterion` and `gauge` by default report wall-clock time, which is 153 | affected by any other application which runs concurrently. 154 | Ideally benchmarks are executed on a dedicated server without any other load, 155 | but — let's face the truth — most of developers run benchmarks 156 | on a laptop with a hundred other services and a window manager, and 157 | watch videos while waiting for benchmarks to finish. That's the cause 158 | of a notorious "variance introduced by outliers: 88% (severely inflated)" warning. 159 | 160 | To alleviate this issue `tasty-bench` measures CPU time by `getCPUTime` 161 | instead of wall-clock time by default. 162 | It does not provide a perfect isolation from other processes (e. g., 163 | if CPU cache is spoiled by others, populating data back from RAM 164 | is your burden), but is a bit more stable. 165 | 166 | Caveat: this means that for multithreaded algorithms 167 | `tasty-bench` reports total elapsed CPU time across all cores, while 168 | `criterion` and `gauge` print maximum of core's wall-clock time. 169 | It also means that by default `tasty-bench` does not measure time spent out of process, 170 | e. g., calls to other executables. To work around this limitation 171 | use `--time-mode` command-line option or set it locally via `TimeMode` option. 172 | 173 | ## Statistical model 174 | 175 | Here is a procedure used by `tasty-bench` to measure execution time: 176 | 177 | 1. Set $n \leftarrow 1$. 178 | 2. Measure execution time $t_n$ of $n$ iterations 179 | and execution time $t_{2n}$ of $2n$ iterations. 180 | 3. Find $t$ which minimizes deviation of $(nt,2nt)$ from $(t_n,t_{2n})$, 181 | namely $t \leftarrow (t_n + 2t_{2n}) / 5n$. 182 | 4. If deviation is small enough (see `--stdev` below) 183 | or time is running out soon (see `--timeout` below), 184 | return $t$ as a mean execution time. 185 | 5. Otherwise set $n \leftarrow 2n$ and jump back to Step 2. 186 | 187 | This is roughly similar to the linear regression approach which `criterion` takes, 188 | but we fit only two last points. This allows us to simplify away all heavy-weight 189 | statistical analysis. More importantly, earlier measurements, 190 | which are presumably shorter and noisier, do not affect overall result. 191 | This is in contrast to `criterion`, which fits all measurements and 192 | is biased to use more data points corresponding to shorter runs 193 | (it employs $n \leftarrow 1.05n$ progression). 194 | 195 | Mean time and its deviation does not say much about the 196 | distribution of individual timings. E. g., imagine a computation which 197 | (according to a coarse system timer) takes either 0 ms or 1 ms with equal 198 | probability. While one would be able to establish that its mean time is 0.5 ms 199 | with a very small deviation, this does not imply that individual measurements 200 | are anywhere near 0.5 ms. Even assuming an infinite precision of a system 201 | timer, the distribution of individual times is not known to be 202 | [normal](https://en.wikipedia.org/wiki/Normal_distribution). 203 | 204 | Obligatory disclaimer: statistics is a tricky matter, there is no 205 | one-size-fits-all approach. 206 | In the absence of a good theory 207 | simplistic approaches are as (un)sound as obscure ones. 208 | Those who seek statistical soundness should rather collect raw data 209 | and process it themselves using a proper statistical toolbox. 210 | Data reported by `tasty-bench` 211 | is only of indicative and comparative significance. 212 | 213 | ## Memory usage 214 | 215 | Configuring RTS to collect GC statistics 216 | (e. g., via `cabal bench --benchmark-options '+RTS -T'` 217 | or `stack bench --ba '+RTS -T'`) enables `tasty-bench` to estimate and report 218 | memory usage: 219 | 220 | ``` 221 | All 222 | Fibonacci numbers 223 | fifth: OK 224 | 63 ns ± 3.4 ns, 223 B allocated, 0 B copied, 2.0 MB peak memory 225 | tenth: OK 226 | 809 ns ± 73 ns, 2.3 KB allocated, 0 B copied, 4.0 MB peak memory 227 | twentieth: OK 228 | 104 μs ± 4.9 μs, 277 KB allocated, 59 B copied, 5.0 MB peak memory 229 | 230 | All 3 tests passed (7.25s) 231 | ``` 232 | 233 | This data is reported as per [`GHC.Stats.RTSStats`](https://hackage.haskell.org/package/base/docs/GHC-Stats.html#t:RTSStats) fields: 234 | 235 | * `allocated_bytes` 236 | 237 | Total size of data ever allocated since the start 238 | of the benchmark iteration. Even if data was immediately 239 | garbage collected and freed, it still counts. 240 | 241 | * `copied_bytes` 242 | 243 | Total size of data ever copied by GC (because it was alive and kicking) 244 | since the start of the benchmark iteration. Note that zero bytes often mean 245 | that the benchmark was too short to trigger GC at all. 246 | 247 | * `max_mem_in_use_bytes` 248 | 249 | Peak size of live data since the very start of the process. 250 | This is a global metric, it cumulatively grows and does not say much 251 | about individual benchmarks, but rather characterizes heap 252 | environment in which they are executed. 253 | 254 | ## Combining tests and benchmarks 255 | 256 | When optimizing an existing function, it is important to check that its 257 | observable behavior remains unchanged. One can rebuild 258 | both tests and benchmarks after each change, but it would be more convenient 259 | to run sanity checks within benchmark itself. Since our benchmarks 260 | are compatible with `tasty` tests, we can easily do so. 261 | 262 | Imagine you come up with a faster function `myFibo` to generate Fibonacci numbers: 263 | 264 | ```haskell 265 | import Test.Tasty.Bench 266 | import Test.Tasty.QuickCheck -- from tasty-quickcheck package 267 | 268 | fibo :: Int -> Integer 269 | fibo n = if n < 2 then toInteger n else fibo (n - 1) + fibo (n - 2) 270 | 271 | myFibo :: Int -> Integer 272 | myFibo n = if n < 3 then toInteger n else myFibo (n - 1) + myFibo (n - 2) 273 | 274 | main :: IO () 275 | main = Test.Tasty.Bench.defaultMain -- not Test.Tasty.defaultMain 276 | [ bench "fibo 20" $ nf fibo 20 277 | , bench "myFibo 20" $ nf myFibo 20 278 | , testProperty "myFibo = fibo" $ \n -> fibo n === myFibo n 279 | ] 280 | ``` 281 | 282 | This outputs: 283 | 284 | ``` 285 | All 286 | fibo 20: OK 287 | 104 μs ± 4.9 μs 288 | myFibo 20: OK 289 | 71 μs ± 5.3 μs 290 | myFibo = fibo: FAIL 291 | *** Failed! Falsified (after 5 tests and 1 shrink): 292 | 2 293 | 1 /= 2 294 | Use --quickcheck-replay=927711 to reproduce. 295 | 296 | 1 out of 3 tests failed (5.03s) 297 | ``` 298 | 299 | We see that `myFibo` is indeed significantly faster than `fibo`, 300 | but unfortunately does not do the same thing. One should probably 301 | look for another way to speed up Fibonacci numbers. 302 | 303 | ## Troubleshooting 304 | 305 | * If benchmarks take too long, set `--timeout` to limit execution time 306 | of individual benchmarks, and `tasty-bench` will do its best to fit 307 | into a given time frame. Without `--timeout` we rerun benchmarks until 308 | achieving a target precision set by `--stdev`, which in a noisy environment 309 | of a modern laptop with hundreds of service processes may take a lot of time. 310 | 311 | While `criterion` runs each benchmark at least for 5 seconds, 312 | `tasty-bench` is happy to conclude earlier, if it does not compromise 313 | the quality of results. In our experiments `tasty-bench` suites 314 | tend to finish earlier, even if some individual benchmarks 315 | take longer than with `criterion`. 316 | 317 | A common source of noisiness is garbage collection. Setting a larger 318 | allocation area (_nursery_) is often a good idea, either via 319 | `cabal bench --benchmark-options '+RTS -A32m'` or `stack bench --ba '+RTS -A32m'`. 320 | Alternatively bake it into 321 | `cabal` file as `ghc-options: "-with-rtsopts=-A32m"`. 322 | 323 | * Never compile benchmarks with `-fstatic-argument-transformation`, because it 324 | breaks a trick we use to force GHC into reevaluation of the same function application 325 | over and over again. 326 | 327 | * If benchmark results look malformed like below, make sure that you are 328 | invoking `Test.Tasty.Bench.defaultMain` and not `Test.Tasty.defaultMain` 329 | (the underlying difference is `consoleBenchReporter` vs. `consoleTestReporter`): 330 | 331 | ``` 332 | All 333 | fibo 20: OK (1.46s) 334 | WithLoHi (Estimate {estMean = Measurement {measTime = 41529118775, measAllocs = 0, measCopied = 0, measMaxMem = 0}, estStdev = 1595055320}) (-Infinity) Infinity 335 | ``` 336 | 337 | * If benchmarks fail with an error message 338 | 339 | ``` 340 | Unhandled resource. Probably a bug in the runner you're using. 341 | ``` 342 | 343 | or 344 | 345 | ``` 346 | Unexpected state of the resource (NotCreated) in getResource. Report as a tasty bug. 347 | ``` 348 | 349 | this is likely caused by `env` or `envWithCleanup` affecting the structure of benchmarks. 350 | You can use `env` to read test data from `IO`, but not to read benchmark names 351 | or affect their hierarchy in another way. This is a fundamental restriction of `tasty` 352 | so that it can list and filter benchmarks without launching missiles. 353 | 354 | Strict pattern-matching on resource is also prohibited. For instance, 355 | if it is a tuple, the second argument of `env` should use a lazy pattern match 356 | `\~(a, b) -> ...` 357 | 358 | * If benchmarks fail with `Test dependencies form a loop` 359 | or `Test dependencies have cycles`, this is likely 360 | because of `bcompare`, which compares a benchmark with itself. 361 | Locating a benchmark in a global environment may be tricky, please refer to 362 | [`tasty` documentation](https://github.com/UnkindPartition/tasty#patterns) for details 363 | and consider using `locateBenchmark`. 364 | 365 | * When seeing 366 | 367 | ``` 368 | This benchmark takes more than 100 seconds. Consider setting --timeout, if this is unexpected (or to silence this warning). 369 | ``` 370 | 371 | do follow the advice: abort benchmarks and pass `-t100` or similar. Unless you are 372 | benchmarking a very computationally expensive function, a single benchmark should 373 | stabilize after a couple of seconds. This warning is a sign that your environment 374 | is too noisy, in which case `tasty-bench` will continue trying with exponentially 375 | longer intervals, often unproductively. 376 | 377 | * The following error can be thrown when benchmarks are built with 378 | `ghc-options: -threaded`: 379 | 380 | ``` 381 | Benchmarks must not be run concurrently. Please pass -j1 and/or avoid +RTS -N. 382 | ``` 383 | 384 | The underlying cause is that `tasty` runs tests concurrently, which is harmful 385 | for reliable performance measurements. Make sure to use `tasty-bench >= 0.3.4` and invoke 386 | `Test.Tasty.Bench.defaultMain` and not `Test.Tasty.defaultMain`. Note that 387 | `localOption (NumThreads 1)` quashes the warning, but does not eliminate the cause. 388 | 389 | * If benchmarks using GHC 9.4.4+ segfault on Windows, check that you 390 | are not using non-moving garbage collector `--nonmoving-gc`. This is likely caused 391 | by [GHC issue #23003](https://gitlab.haskell.org/ghc/ghc/-/issues/23003). 392 | Previous releases of `tasty-bench` recommended enabling `--nonmoving-gc` 393 | to stabilise benchmarks, but it's discouraged now. 394 | 395 | * If you see 396 | 397 | ``` 398 | : commitBuffer: invalid argument (cannot encode character '\177') 399 | ``` 400 | 401 | or 402 | 403 | ``` 404 | Uncaught exception ghc-internal:GHC.Internal.IO.Exception.IOException: 405 | : commitBuffer: invalid argument (cannot encode character '\956') 406 | ``` 407 | 408 | it means that your locale does not support UTF-8. `tasty-bench` makes an effort 409 | to force locale to UTF-8, but it's not bulletproof. 410 | In such case run `locale -a` to list available locales and set a UTF-8-capable 411 | one (e. g., `export LANG=C.UTF-8`) before starting benchmarks. 412 | 413 | ## Isolating interfering benchmarks 414 | 415 | One difficulty of benchmarking in Haskell is that it is 416 | hard to isolate benchmarks so that they do not interfere. 417 | Changing the order of benchmarks or skipping some of them 418 | has an effect on heap's layout and thus affects garbage collection. 419 | This issue is well attested in 420 | [both](https://github.com/haskell/criterion/issues/166) 421 | [`criterion`](https://github.com/haskell/criterion/issues/60) 422 | and 423 | [`gauge`](https://github.com/vincenthz/hs-gauge/issues/2). 424 | 425 | Usually (but not always) skipping some benchmarks speeds up remaining ones. 426 | That's because once a benchmark allocated heap which for some reason 427 | was not promptly released afterwards (e. g., it forced a top-level thunk 428 | in an underlying library), all further benchmarks are slowed down 429 | by garbage collector processing this additional amount of live data 430 | over and over again. 431 | 432 | There are several mitigation strategies. First of all, giving garbage collector 433 | more breathing space by `+RTS -A32m` (or more) is often good enough. 434 | 435 | Further, avoid using top-level bindings to store large test data. Once such thunks 436 | are forced, they remain allocated forever, which affects detrimentally subsequent 437 | unrelated benchmarks. Treat them as external data, supplied via `env`: instead of 438 | 439 | ```haskell 440 | largeData :: String 441 | largeData = replicate 1000000 'a' 442 | 443 | main :: IO () 444 | main = defaultMain 445 | [ bench "large" $ nf length largeData, ... ] 446 | ``` 447 | 448 | use 449 | 450 | ```haskell 451 | import Control.DeepSeq (force) 452 | import Control.Exception (evaluate) 453 | 454 | main :: IO () 455 | main = defaultMain 456 | [ env (evaluate (force (replicate 1000000 'a'))) $ \largeData -> 457 | bench "large" $ nf length largeData, ... ] 458 | ``` 459 | 460 | Finally, as an ultimate measure to reduce interference between benchmarks, 461 | one can run each of them in a separate process. We do not quite recommend 462 | this approach, but if you are desperate, here is how: 463 | 464 | ```sh 465 | cabal run -v0 all:benches -- -l | sed -e 's/[\"]/\\\\\\&/g' | while read -r name; do cabal run -v0 all:benches -- -p '$0 == "'"$name"'"'; done 466 | ``` 467 | 468 | This assumes that there is a single benchmark suite in the project 469 | and that benchmark names do not contain newlines. 470 | 471 | ## Comparison against baseline 472 | 473 | One can compare benchmark results against an earlier run in an automatic way. 474 | 475 | When using this feature, it's especially important to compile benchmarks with 476 | `ghc-options: `[`-fproc-alignment`](https://downloads.haskell.org/ghc/latest/docs/users_guide/debugging.html#ghc-flag--fproc-alignment)`=64`, otherwise results could be skewed by 477 | intermittent changes in cache-line alignment. 478 | 479 | Firstly, run `tasty-bench` with `--csv FILE` key 480 | to dump results to `FILE` in CSV format 481 | (it could be a good idea to set smaller `--stdev`, if possible): 482 | 483 | ``` 484 | Name,Mean (ps),2*Stdev (ps) 485 | All.Fibonacci numbers.fifth,48453,4060 486 | All.Fibonacci numbers.tenth,637152,46744 487 | All.Fibonacci numbers.twentieth,81369531,3342646 488 | ``` 489 | 490 | Now modify implementation and rerun benchmarks 491 | with `--baseline FILE` key. This produces a report as follows: 492 | 493 | ``` 494 | All 495 | Fibonacci numbers 496 | fifth: OK 497 | 53 ns ± 2.7 ns, 8% more than baseline 498 | tenth: OK 499 | 641 ns ± 59 ns, same as baseline 500 | twentieth: OK 501 | 77 μs ± 6.4 μs, 5% less than baseline 502 | 503 | All 3 tests passed (1.50s) 504 | ``` 505 | 506 | You can also fail benchmarks, which deviate too far from baseline, using 507 | `--fail-if-slower` and `--fail-if-faster` options. For example, setting both of them 508 | to 6 will fail the first benchmark above (because it is more than 6% slower), 509 | but the last one still succeeds (even while it is measurably faster than baseline, 510 | deviation is less than 6%). Consider also using `--hide-successes` to show 511 | only problematic benchmarks, or even 512 | [`tasty-rerun`](http://hackage.haskell.org/package/tasty-rerun) package 513 | to focus on rerunning failing items only. 514 | 515 | If you wish to compare two CSV reports non-interactively, here is a handy `awk` incantation: 516 | 517 | ```sh 518 | awk 'BEGIN{FS=",";OFS=",";print "Name,Old,New,Ratio"}FNR==1{trueNF=NF;next}NF0)print "Geometric mean,,",exp(gs/gc)}' old.csv new.csv 519 | ``` 520 | 521 | A larger shell snippet to compare two `git` commits can be found in `compare_benches.sh`. 522 | 523 | Note that columns in CSV report are different from what `criterion` or `gauge` 524 | would produce. If names do not contain commas, missing columns can be faked this way: 525 | 526 | ```sh 527 | awk 'BEGIN{FS=",";OFS=",";print "Name,Mean,MeanLB,MeanUB,Stddev,StddevLB,StddevUB"}NR==1{trueNF=NF;next}NF Integer 545 | fibo n = if n < 2 then toInteger n else fibo (n - 1) + fibo (n - 2) 546 | 547 | main :: IO () 548 | main = defaultMain 549 | [ bgroup "Fibonacci numbers" 550 | [ bcompare "tenth" $ bench "fifth" $ nf fibo 5 551 | , bench "tenth" $ nf fibo 10 552 | , bcompare "tenth" $ bench "twentieth" $ nf fibo 20 553 | ] 554 | ] 555 | ``` 556 | 557 | This produces a report, comparing mean times of `fifth` and `twentieth` to `tenth`: 558 | 559 | ``` 560 | All 561 | Fibonacci numbers 562 | fifth: OK 563 | 121 ns ± 2.6 ns, 0.08x 564 | tenth: OK 565 | 1.6 μs ± 31 ns 566 | twentieth: OK 567 | 203 μs ± 4.1 μs, 128.36x 568 | ``` 569 | 570 | To locate a baseline benchmark in a larger suite use `locateBenchmark`. 571 | 572 | One can leverage comparisons between benchmarks to implement portable performance 573 | tests, expressing properties like "this algorithm must be at least twice faster 574 | than that one" or "this operation should not be more than thrice slower than that". 575 | This can be achieved with `bcompareWithin`, which takes an acceptable interval 576 | of performance as an argument. 577 | 578 | ## Plotting results 579 | 580 | Users can dump results into CSV with `--csv FILE` 581 | and plot them using `gnuplot` or other software. But for convenience 582 | there is also a built-in quick-and-dirty SVG plotting feature, 583 | which can be invoked by passing `--svg FILE`. Here is a sample of its output: 584 | 585 | ![Plotting](https://hackage.haskell.org/package/tasty-bench/src/example.svg) 586 | 587 | ## Build flags 588 | 589 | Build flags are a brittle subject and users do not normally need to touch them. 590 | 591 | * If you find yourself in an environment, where `tasty` is not available and you 592 | have access to boot packages only, you can still use `tasty-bench`! Just copy 593 | `Test/Tasty/Bench.hs` to your project (imagine it like a header-only C library). 594 | It will provide you with functions to build `Benchmarkable` and run them manually 595 | via `measureCpuTime`. This mode of operation can be also configured 596 | by disabling Cabal flag `tasty`. 597 | 598 | ## Command-line options 599 | 600 | Use `--help` to list all command-line options. 601 | 602 | * `-p`, `--pattern` 603 | 604 | This is a standard `tasty` option, which allows filtering benchmarks 605 | by a pattern or `awk` expression. Please refer to 606 | [`tasty` documentation](https://github.com/UnkindPartition/tasty#patterns) 607 | for details. 608 | 609 | * `-t`, `--timeout` 610 | 611 | This is a standard `tasty` option, setting timeout for individual benchmarks 612 | in seconds. Use it when benchmarks tend to take too long: `tasty-bench` will make 613 | an effort to report results (even if of subpar quality) before timeout. Setting 614 | timeout too tight (insufficient for at least three iterations) 615 | will result in a benchmark failure. One can adjust it locally for a group 616 | of benchmarks, e. g., `localOption (mkTimeout 100000000)` for 100 seconds. 617 | 618 | * `--stdev` 619 | 620 | Target relative standard deviation of measurements in percents (5% by default). 621 | Large values correspond to fast and loose benchmarks, and small ones to long and precise. 622 | It can also be adjusted locally for a group of benchmarks, 623 | e. g., `localOption (RelStDev 0.02)`. 624 | If benchmarking takes far too long, consider setting `--timeout`, 625 | which will interrupt benchmarks, potentially before reaching the target deviation. 626 | 627 | * `--csv` 628 | 629 | File to write results in CSV format. 630 | 631 | * `--baseline` 632 | 633 | File to read baseline results in CSV format (as produced by `--csv`). 634 | 635 | * `--fail-if-slower`, `--fail-if-faster` 636 | 637 | Upper bounds of acceptable slow down / speed up in percents. If a benchmark is unacceptably slower / faster than baseline (see `--baseline`), 638 | it will be reported as failed. Can be used in conjunction with 639 | a standard `tasty` option `--hide-successes` to show only problematic benchmarks. 640 | Both options can be adjusted locally for a group of benchmarks, 641 | e. g., `localOption (FailIfSlower 0.10)`. 642 | 643 | * `--svg` 644 | 645 | File to plot results in SVG format. 646 | 647 | * `--time-mode` 648 | 649 | Whether to measure CPU time (`cpu`, default) or wall-clock time (`wall`). 650 | 651 | * `+RTS -T` 652 | 653 | Estimate and report memory usage. 654 | 655 | ## Custom command-line options 656 | 657 | As usual with `tasty`, it is easy to extend benchmarks with custom command-line options. 658 | Here is an example: 659 | 660 | ```haskell 661 | import Data.Proxy 662 | import Test.Tasty.Bench 663 | import Test.Tasty.Ingredients.Basic 664 | import Test.Tasty.Options 665 | import Test.Tasty.Runners 666 | 667 | newtype RandomSeed = RandomSeed Int 668 | 669 | instance IsOption RandomSeed where 670 | defaultValue = RandomSeed 42 671 | parseValue = fmap RandomSeed . safeRead 672 | optionName = pure "seed" 673 | optionHelp = pure "Random seed used in benchmarks" 674 | 675 | main :: IO () 676 | main = do 677 | let customOpts = [Option (Proxy :: Proxy RandomSeed)] 678 | ingredients = includingOptions customOpts : benchIngredients 679 | opts <- parseOptions ingredients benchmarks 680 | let RandomSeed seed = lookupOption opts 681 | defaultMainWithIngredients ingredients benchmarks 682 | 683 | benchmarks :: Benchmark 684 | benchmarks = bgroup "All" [] 685 | ``` 686 | -------------------------------------------------------------------------------- /src/Test/Tasty/Bench.hs: -------------------------------------------------------------------------------- 1 | {- | 2 | Module: Test.Tasty.Bench 3 | Copyright: (c) 2021 Andrew Lelechenko 4 | License: MIT 5 | 6 | Featherlight benchmark framework (only one file!) for performance 7 | measurement with API mimicking 8 | [@criterion@](http://hackage.haskell.org/package/criterion) and 9 | [@gauge@](http://hackage.haskell.org/package/gauge). A prominent feature is 10 | built-in comparison against previous runs and between benchmarks. 11 | 12 | 13 | === How lightweight is it? 14 | 15 | There is only one source file "Test.Tasty.Bench" and no non-boot 16 | dependencies except [@tasty@](http://hackage.haskell.org/package/tasty). So 17 | if you already depend on @tasty@ for a test suite, there is nothing else 18 | to install. 19 | 20 | Compare this to @criterion@ (10+ modules, 50+ dependencies) and @gauge@ 21 | (40+ modules, depends on @basement@ and @vector@). A build on a clean 22 | machine is up to 16x faster than @criterion@ and up to 4x faster than 23 | @gauge@. A build without dependencies is up to 6x faster than 24 | @criterion@ and up to 8x faster than @gauge@. 25 | 26 | @tasty-bench@ is a native Haskell library and works everywhere, where 27 | GHC does, including WASM. We support a full range of architectures 28 | (@i386@, @amd64@, @armhf@, @arm64@, @ppc64le@, @s390x@, @riscv64@) and 29 | operating systems (Linux, Windows, macOS, FreeBSD, OpenBSD, NetBSD), 30 | plus any GHC from 8.0 to 9.14 (and earlier releases stretch back to GHC 31 | 7.0). 32 | 33 | === How is it possible? 34 | 35 | Our benchmarks are literally regular @tasty@ tests, so we can leverage 36 | all existing machinery for command-line options, resource management, 37 | structuring, listing and filtering benchmarks, running them and 38 | reporting results. It also means that @tasty-bench@ can be used in 39 | conjunction with other @tasty@ ingredients. 40 | 41 | Unlike @criterion@ and @gauge@ we use a very simple statistical model 42 | described below. This is arguably a questionable choice, but it works 43 | pretty well in practice. A rare developer is sufficiently well-versed in 44 | probability theory to make sense and use of all numbers generously 45 | generated by @criterion@. 46 | 47 | === How to switch? 48 | 49 | 50 | allow to taste @tasty-bench@ instead of @criterion@ or @gauge@ without 51 | changing a single line of code: 52 | 53 | > cabal-version: 2.0 54 | > 55 | > benchmark foo 56 | > ... 57 | > build-depends: 58 | > tasty-bench 59 | > mixins: 60 | > tasty-bench (Test.Tasty.Bench as Criterion, Test.Tasty.Bench as Criterion.Main, Test.Tasty.Bench as Gauge, Test.Tasty.Bench as Gauge.Main) 61 | 62 | This works vice versa as well: if you use @tasty-bench@, but at some 63 | point need a more comprehensive statistical analysis, it is easy to 64 | switch temporarily back to @criterion@. 65 | 66 | === How to write a benchmark? 67 | 68 | Benchmarks are declared in 69 | : 70 | 71 | > cabal-version: 2.0 72 | > name: bench-fibo 73 | > version: 0.0 74 | > build-type: Simple 75 | > synopsis: Example of a benchmark 76 | > 77 | > benchmark bench-fibo 78 | > main-is: BenchFibo.hs 79 | > type: exitcode-stdio-1.0 80 | > build-depends: base, tasty-bench 81 | > ghc-options: "-with-rtsopts=-A32m" 82 | > if impl(ghc >= 8.6) 83 | > ghc-options: -fproc-alignment=64 84 | 85 | And here is @BenchFibo.hs@: 86 | 87 | > import Test.Tasty.Bench 88 | > 89 | > fibo :: Int -> Integer 90 | > fibo n = if n < 2 then toInteger n else fibo (n - 1) + fibo (n - 2) 91 | > 92 | > main :: IO () 93 | > main = defaultMain 94 | > [ bgroup "Fibonacci numbers" 95 | > [ bench "fifth" $ nf fibo 5 96 | > , bench "tenth" $ nf fibo 10 97 | > , bench "twentieth" $ nf fibo 20 98 | > ] 99 | > ] 100 | 101 | Since @tasty-bench@ provides an API compatible with @criterion@, one can 102 | refer to 103 | 104 | for more examples. 105 | 106 | === How to read results? 107 | 108 | Running the example above (@cabal bench@ or @stack bench@) results in 109 | the following output: 110 | 111 | > All 112 | > Fibonacci numbers 113 | > fifth: OK 114 | > 63 ns ± 3.4 ns 115 | > tenth: OK 116 | > 809 ns ± 73 ns 117 | > twentieth: OK 118 | > 104 μs ± 4.9 μs 119 | > 120 | > All 3 tests passed (7.25s) 121 | 122 | The output says that, for instance, in the first benchmark the mean CPU 123 | time is 63 nanoseconds and means of individual samples do not often 124 | diverge from it further than ±3.4 nanoseconds (double standard 125 | deviation). Take standard deviation numbers with a grain of salt; there 126 | are lies, damned lies, and statistics. 127 | 128 | === Wall-clock time vs. CPU time 129 | 130 | What time are we talking about? Both @criterion@ and @gauge@ by default 131 | report wall-clock time, which is affected by any other application which 132 | runs concurrently. Ideally benchmarks are executed on a dedicated server 133 | without any other load, but — let’s face the truth — most of developers 134 | run benchmarks on a laptop with a hundred other services and a window 135 | manager, and watch videos while waiting for benchmarks to finish. That’s 136 | the cause of a notorious “variance introduced by outliers: 88% (severely 137 | inflated)” warning. 138 | 139 | To alleviate this issue @tasty-bench@ measures CPU time by 'getCPUTime' 140 | instead of wall-clock time by default. It does not provide a perfect 141 | isolation from other processes (e. g., if CPU cache is spoiled by others, 142 | populating data back from RAM is your burden), but is a bit more stable. 143 | 144 | Caveat: this means that for multithreaded algorithms @tasty-bench@ 145 | reports total elapsed CPU time across all cores, while @criterion@ and 146 | @gauge@ print maximum of core’s wall-clock time. It also means that by 147 | default @tasty-bench@ does not measure time spent out of process, e. g., 148 | calls to other executables. To work around this limitation use 149 | @--time-mode@ command-line option or set it locally via 'TimeMode' 150 | option. 151 | 152 | === Statistical model 153 | 154 | Here is a procedure used by @tasty-bench@ to measure execution time: 155 | 156 | 1. Set \(n \leftarrow 1\). 157 | 2. Measure execution time \(t_n\) of \(n\) iterations and execution 158 | time \(t_{2n}\) of \(2n\) iterations. 159 | 3. Find \(t\) which minimizes deviation of \((nt,2nt)\) from 160 | \((t_n,t_{2n})\), namely \(t \leftarrow (t_n + 2t_{2n}) / 5n\). 161 | 4. If deviation is small enough (see @--stdev@ below) or time is 162 | running out soon (see @--timeout@ below), return \(t\) as a mean 163 | execution time. 164 | 5. Otherwise set \(n \leftarrow 2n\) and jump back to Step 2. 165 | 166 | This is roughly similar to the linear regression approach which 167 | @criterion@ takes, but we fit only two last points. This allows us to 168 | simplify away all heavy-weight statistical analysis. More importantly, 169 | earlier measurements, which are presumably shorter and noisier, do not 170 | affect overall result. This is in contrast to @criterion@, which fits 171 | all measurements and is biased to use more data points corresponding to 172 | shorter runs (it employs \(n \leftarrow 1.05n\) progression). 173 | 174 | Mean time and its deviation does not say much about the distribution of 175 | individual timings. E. g., imagine a computation which (according to a 176 | coarse system timer) takes either 0 ms or 1 ms with equal probability. 177 | While one would be able to establish that its mean time is 0.5 ms with a 178 | very small deviation, this does not imply that individual measurements 179 | are anywhere near 0.5 ms. Even assuming an infinite precision of a 180 | system timer, the distribution of individual times is not known to be 181 | . 182 | 183 | Obligatory disclaimer: statistics is a tricky matter, there is no 184 | one-size-fits-all approach. In the absence of a good theory simplistic 185 | approaches are as (un)sound as obscure ones. Those who seek statistical 186 | soundness should rather collect raw data and process it themselves using 187 | a proper statistical toolbox. Data reported by @tasty-bench@ is only of 188 | indicative and comparative significance. 189 | 190 | === Memory usage 191 | 192 | Configuring RTS to collect GC statistics (e. g., via 193 | @cabal bench --benchmark-options \'+RTS -T\'@ or 194 | @stack bench --ba \'+RTS -T\'@) enables @tasty-bench@ to estimate and 195 | report memory usage: 196 | 197 | > All 198 | > Fibonacci numbers 199 | > fifth: OK 200 | > 63 ns ± 3.4 ns, 223 B allocated, 0 B copied, 2.0 MB peak memory 201 | > tenth: OK 202 | > 809 ns ± 73 ns, 2.3 KB allocated, 0 B copied, 4.0 MB peak memory 203 | > twentieth: OK 204 | > 104 μs ± 4.9 μs, 277 KB allocated, 59 B copied, 5.0 MB peak memory 205 | > 206 | > All 3 tests passed (7.25s) 207 | 208 | This data is reported as per 209 | 210 | fields: 211 | 212 | - 'allocated_bytes' 213 | 214 | Total size of data ever allocated since the start of the benchmark 215 | iteration. Even if data was immediately garbage collected and freed, 216 | it still counts. 217 | 218 | - 'copied_bytes' 219 | 220 | Total size of data ever copied by GC (because it was alive and 221 | kicking) since the start of the benchmark iteration. Note that zero 222 | bytes often mean that the benchmark was too short to trigger GC at 223 | all. 224 | 225 | - 'max_mem_in_use_bytes' 226 | 227 | Peak size of live data since the very start of the process. This is 228 | a global metric, it cumulatively grows and does not say much about 229 | individual benchmarks, but rather characterizes heap environment in 230 | which they are executed. 231 | 232 | === Combining tests and benchmarks 233 | 234 | When optimizing an existing function, it is important to check that its 235 | observable behavior remains unchanged. One can rebuild both tests and 236 | benchmarks after each change, but it would be more convenient to run 237 | sanity checks within benchmark itself. Since our benchmarks are 238 | compatible with @tasty@ tests, we can easily do so. 239 | 240 | Imagine you come up with a faster function @myFibo@ to generate 241 | Fibonacci numbers: 242 | 243 | > import Test.Tasty.Bench 244 | > import Test.Tasty.QuickCheck -- from tasty-quickcheck package 245 | > 246 | > fibo :: Int -> Integer 247 | > fibo n = if n < 2 then toInteger n else fibo (n - 1) + fibo (n - 2) 248 | > 249 | > myFibo :: Int -> Integer 250 | > myFibo n = if n < 3 then toInteger n else myFibo (n - 1) + myFibo (n - 2) 251 | > 252 | > main :: IO () 253 | > main = Test.Tasty.Bench.defaultMain -- not Test.Tasty.defaultMain 254 | > [ bench "fibo 20" $ nf fibo 20 255 | > , bench "myFibo 20" $ nf myFibo 20 256 | > , testProperty "myFibo = fibo" $ \n -> fibo n === myFibo n 257 | > ] 258 | 259 | This outputs: 260 | 261 | > All 262 | > fibo 20: OK 263 | > 104 μs ± 4.9 μs 264 | > myFibo 20: OK 265 | > 71 μs ± 5.3 μs 266 | > myFibo = fibo: FAIL 267 | > *** Failed! Falsified (after 5 tests and 1 shrink): 268 | > 2 269 | > 1 /= 2 270 | > Use --quickcheck-replay=927711 to reproduce. 271 | > 272 | > 1 out of 3 tests failed (5.03s) 273 | 274 | We see that @myFibo@ is indeed significantly faster than @fibo@, but 275 | unfortunately does not do the same thing. One should probably look for 276 | another way to speed up Fibonacci numbers. 277 | 278 | === Troubleshooting 279 | 280 | - If benchmarks take too long, set @--timeout@ to limit execution time 281 | of individual benchmarks, and @tasty-bench@ will do its best to fit 282 | into a given time frame. Without @--timeout@ we rerun benchmarks 283 | until achieving a target precision set by @--stdev@, which in a 284 | noisy environment of a modern laptop with hundreds of service 285 | processes may take a lot of time. 286 | 287 | While @criterion@ runs each benchmark at least for 5 seconds, 288 | @tasty-bench@ is happy to conclude earlier, if it does not 289 | compromise the quality of results. In our experiments @tasty-bench@ 290 | suites tend to finish earlier, even if some individual benchmarks 291 | take longer than with @criterion@. 292 | 293 | A common source of noisiness is garbage collection. Setting a larger 294 | allocation area (/nursery/) is often a good idea, either via 295 | @cabal bench --benchmark-options \'+RTS -A32m\'@ or 296 | @stack bench --ba \'+RTS -A32m\'@. Alternatively bake it into 297 | @cabal@ file as @ghc-options: \"-with-rtsopts=-A32m\"@. 298 | 299 | - Never compile benchmarks with @-fstatic-argument-transformation@, 300 | because it breaks a trick we use to force GHC into reevaluation of 301 | the same function application over and over again. 302 | 303 | - If benchmark results look malformed like below, make sure that you 304 | are invoking @Test.Tasty.Bench.@'Test.Tasty.Bench.defaultMain' and not 305 | @Test.Tasty.@'Test.Tasty.defaultMain' (the underlying difference is 306 | 'consoleBenchReporter' vs. 'consoleTestReporter'): 307 | 308 | > All 309 | > fibo 20: OK (1.46s) 310 | > WithLoHi (Estimate {estMean = Measurement {measTime = 41529118775, measAllocs = 0, measCopied = 0, measMaxMem = 0}, estStdev = 1595055320}) (-Infinity) Infinity 311 | 312 | - If benchmarks fail with an error message 313 | 314 | > Unhandled resource. Probably a bug in the runner you're using. 315 | 316 | or 317 | 318 | > Unexpected state of the resource (NotCreated) in getResource. Report as a tasty bug. 319 | 320 | this is likely caused by 'env' or 'envWithCleanup' affecting the 321 | structure of benchmarks. You can use 'env' to read test data from 322 | 'IO', but not to read benchmark names or affect their hierarchy in 323 | another way. This is a fundamental restriction of @tasty@ so that it 324 | can list and filter benchmarks without launching missiles. 325 | 326 | Strict pattern-matching on resource is also prohibited. For 327 | instance, if it is a tuple, the second argument of 'env' should use 328 | a lazy pattern match @\\~(a, b) -> ...@ 329 | 330 | - If benchmarks fail with @Test dependencies form a loop@ or 331 | @Test dependencies have cycles@, this is likely because of 332 | 'bcompare', which compares a benchmark with itself. Locating a 333 | benchmark in a global environment may be tricky, please refer to 334 | [@tasty@ documentation](https://github.com/UnkindPartition/tasty#patterns) 335 | for details and consider using 'locateBenchmark'. 336 | 337 | - When seeing 338 | 339 | > This benchmark takes more than 100 seconds. Consider setting --timeout, if this is unexpected (or to silence this warning). 340 | 341 | do follow the advice: abort benchmarks and pass @-t100@ or similar. 342 | Unless you are benchmarking a very computationally expensive 343 | function, a single benchmark should stabilize after a couple of 344 | seconds. This warning is a sign that your environment is too noisy, 345 | in which case @tasty-bench@ will continue trying with exponentially 346 | longer intervals, often unproductively. 347 | 348 | - The following error can be thrown when benchmarks are built with 349 | @ghc-options: -threaded@: 350 | 351 | > Benchmarks must not be run concurrently. Please pass -j1 and/or avoid +RTS -N. 352 | 353 | The underlying cause is that @tasty@ runs tests concurrently, which 354 | is harmful for reliable performance measurements. Make sure to use 355 | @tasty-bench >= 0.3.4@ and invoke @Test.Tasty.Bench.@'Test.Tasty.Bench.defaultMain' and 356 | not @Test.Tasty.@'Test.Tasty.defaultMain'. Note that 'localOption' ('NumThreads' 1) 357 | quashes the warning, but does not eliminate the cause. 358 | 359 | - If benchmarks using GHC 9.4.4+ segfault on Windows, check that you 360 | are not using non-moving garbage collector @--nonmoving-gc@. This is 361 | likely caused by 362 | . 363 | Previous releases of @tasty-bench@ recommended enabling 364 | @--nonmoving-gc@ to stabilise benchmarks, but it’s discouraged now. 365 | 366 | - If you see 367 | 368 | > : commitBuffer: invalid argument (cannot encode character '\177') 369 | 370 | or 371 | 372 | > Uncaught exception ghc-internal:GHC.Internal.IO.Exception.IOException: 373 | > : commitBuffer: invalid argument (cannot encode character '\956') 374 | 375 | it means that your locale does not support UTF-8. @tasty-bench@ 376 | makes an effort to force locale to UTF-8, but it’s not bulletproof. 377 | In such case run @locale -a@ to list available locales and set a 378 | UTF-8-capable one (e. g., @export LANG=C.UTF-8@) before starting 379 | benchmarks. 380 | 381 | === Isolating interfering benchmarks 382 | 383 | One difficulty of benchmarking in Haskell is that it is hard to isolate 384 | benchmarks so that they do not interfere. Changing the order of 385 | benchmarks or skipping some of them has an effect on heap’s layout and 386 | thus affects garbage collection. This issue is well attested in 387 | 388 | [@criterion@](https://github.com/haskell/criterion/issues/60) and 389 | [@gauge@](https://github.com/vincenthz/hs-gauge/issues/2). 390 | 391 | Usually (but not always) skipping some benchmarks speeds up remaining 392 | ones. That’s because once a benchmark allocated heap which for some 393 | reason was not promptly released afterwards (e. g., it forced a top-level 394 | thunk in an underlying library), all further benchmarks are slowed down 395 | by garbage collector processing this additional amount of live data over 396 | and over again. 397 | 398 | There are several mitigation strategies. First of all, giving garbage 399 | collector more breathing space by @+RTS -A32m@ (or more) is often good 400 | enough. 401 | 402 | Further, avoid using top-level bindings to store large test data. Once 403 | such thunks are forced, they remain allocated forever, which affects 404 | detrimentally subsequent unrelated benchmarks. Treat them as external 405 | data, supplied via 'env': instead of 406 | 407 | > largeData :: String 408 | > largeData = replicate 1000000 'a' 409 | > 410 | > main :: IO () 411 | > main = defaultMain 412 | > [ bench "large" $ nf length largeData, ... ] 413 | 414 | use 415 | 416 | > import Control.DeepSeq (force) 417 | > import Control.Exception (evaluate) 418 | > 419 | > main :: IO () 420 | > main = defaultMain 421 | > [ env (evaluate (force (replicate 1000000 'a'))) $ \largeData -> 422 | > bench "large" $ nf length largeData, ... ] 423 | 424 | Finally, as an ultimate measure to reduce interference between 425 | benchmarks, one can run each of them in a separate process. We do not 426 | quite recommend this approach, but if you are desperate, here is how: 427 | 428 | > cabal run -v0 all:benches -- -l | sed -e 's/[\"]/\\\\\\&/g' | while read -r name; do cabal run -v0 all:benches -- -p '$0 == "'"$name"'"'; done 429 | 430 | This assumes that there is a single benchmark suite in the project and 431 | that benchmark names do not contain newlines. 432 | 433 | === Comparison against baseline 434 | 435 | One can compare benchmark results against an earlier run in an automatic 436 | way. 437 | 438 | When using this feature, it’s especially important to compile benchmarks 439 | with 440 | @ghc-options:@ [@-fproc-alignment@](https://downloads.haskell.org/ghc/latest/docs/users_guide/debugging.html#ghc-flag--fproc-alignment)@=64@, 441 | otherwise results could be skewed by intermittent changes in cache-line 442 | alignment. 443 | 444 | Firstly, run @tasty-bench@ with @--csv FILE@ key to dump results to 445 | @FILE@ in CSV format (it could be a good idea to set smaller @--stdev@, 446 | if possible): 447 | 448 | > Name,Mean (ps),2*Stdev (ps) 449 | > All.Fibonacci numbers.fifth,48453,4060 450 | > All.Fibonacci numbers.tenth,637152,46744 451 | > All.Fibonacci numbers.twentieth,81369531,3342646 452 | 453 | Now modify implementation and rerun benchmarks with @--baseline FILE@ 454 | key. This produces a report as follows: 455 | 456 | > All 457 | > Fibonacci numbers 458 | > fifth: OK 459 | > 53 ns ± 2.7 ns, 8% more than baseline 460 | > tenth: OK 461 | > 641 ns ± 59 ns, same as baseline 462 | > twentieth: OK 463 | > 77 μs ± 6.4 μs, 5% less than baseline 464 | > 465 | > All 3 tests passed (1.50s) 466 | 467 | You can also fail benchmarks, which deviate too far from baseline, using 468 | @--fail-if-slower@ and @--fail-if-faster@ options. For example, setting 469 | both of them to 6 will fail the first benchmark above (because it is 470 | more than 6% slower), but the last one still succeeds (even while it is 471 | measurably faster than baseline, deviation is less than 6%). Consider 472 | also using @--hide-successes@ to show only problematic benchmarks, or 473 | even [@tasty-rerun@](http://hackage.haskell.org/package/tasty-rerun) 474 | package to focus on rerunning failing items only. 475 | 476 | If you wish to compare two CSV reports non-interactively, here is a 477 | handy @awk@ incantation: 478 | 479 | > awk 'BEGIN{FS=",";OFS=",";print "Name,Old,New,Ratio"}FNR==1{trueNF=NF;next}NF0)print "Geometric mean,,",exp(gs/gc)}' old.csv new.csv 480 | 481 | A larger shell snippet to compare two @git@ commits can be found in 482 | @compare_benches.sh@. 483 | 484 | Note that columns in CSV report are different from what @criterion@ or 485 | @gauge@ would produce. If names do not contain commas, missing columns 486 | can be faked this way: 487 | 488 | > awk 'BEGIN{FS=",";OFS=",";print "Name,Mean,MeanLB,MeanUB,Stddev,StddevLB,StddevUB"}NR==1{trueNF=NF;next}NF awk 'BEGIN{FS=",";OFS=",";print "name,iters,time,cycles,cpuTime,utime,stime,maxrss,minflt,majflt,nvcsw,nivcsw,allocated,numGcs,bytesCopied,mutatorWallSeconds,mutatorCpuSeconds,gcWallSeconds,gcCpuSeconds"}NR==1{trueNF=NF;next}NF import Test.Tasty.Bench 500 | > 501 | > fibo :: Int -> Integer 502 | > fibo n = if n < 2 then toInteger n else fibo (n - 1) + fibo (n - 2) 503 | > 504 | > main :: IO () 505 | > main = defaultMain 506 | > [ bgroup "Fibonacci numbers" 507 | > [ bcompare "tenth" $ bench "fifth" $ nf fibo 5 508 | > , bench "tenth" $ nf fibo 10 509 | > , bcompare "tenth" $ bench "twentieth" $ nf fibo 20 510 | > ] 511 | > ] 512 | 513 | This produces a report, comparing mean times of @fifth@ and @twentieth@ 514 | to @tenth@: 515 | 516 | > All 517 | > Fibonacci numbers 518 | > fifth: OK 519 | > 121 ns ± 2.6 ns, 0.08x 520 | > tenth: OK 521 | > 1.6 μs ± 31 ns 522 | > twentieth: OK 523 | > 203 μs ± 4.1 μs, 128.36x 524 | 525 | To locate a baseline benchmark in a larger suite use 'locateBenchmark'. 526 | 527 | One can leverage comparisons between benchmarks to implement portable 528 | performance tests, expressing properties like “this algorithm must be at 529 | least twice faster than that one” or “this operation should not be more 530 | than thrice slower than that”. This can be achieved with 531 | 'bcompareWithin', which takes an acceptable interval of performance as 532 | an argument. 533 | 534 | === Plotting results 535 | 536 | Users can dump results into CSV with @--csv FILE@ and plot them using 537 | @gnuplot@ or other software. But for convenience there is also a 538 | built-in quick-and-dirty SVG plotting feature, which can be invoked by 539 | passing @--svg FILE@. Here is a sample of its output: 540 | 541 | ![Plotting](example.svg) 542 | 543 | 544 | === Build flags 545 | 546 | Build flags are a brittle subject and users do not normally need to 547 | touch them. 548 | 549 | - If you find yourself in an environment, where @tasty@ is not 550 | available and you have access to boot packages only, you can still 551 | use @tasty-bench@! Just copy @Test\/Tasty\/Bench.hs@ to your project 552 | (imagine it like a header-only C library). It will provide you with 553 | functions to build 'Benchmarkable' and run them manually via 554 | 'measureCpuTime'. This mode of operation can be also configured by 555 | disabling Cabal flag @tasty@. 556 | 557 | === Command-line options 558 | 559 | Use @--help@ to list all command-line options. 560 | 561 | [@-p@, @--pattern@]: 562 | 563 | This is a standard @tasty@ option, which allows filtering benchmarks 564 | by a pattern or @awk@ expression. Please refer to 565 | [@tasty@ documentation](https://github.com/UnkindPartition/tasty#patterns) 566 | for details. 567 | 568 | [@-t@, @--timeout@]: 569 | 570 | This is a standard @tasty@ option, setting timeout for individual 571 | benchmarks in seconds. Use it when benchmarks tend to take too long: 572 | @tasty-bench@ will make an effort to report results (even if of 573 | subpar quality) before timeout. Setting timeout too tight 574 | (insufficient for at least three iterations) will result in a 575 | benchmark failure. One can adjust it locally for a group of 576 | benchmarks, e. g., 'localOption' ('mkTimeout' 100000000) for 100 577 | seconds. 578 | 579 | [@--stdev@]: 580 | 581 | Target relative standard deviation of measurements in percents (5% 582 | by default). Large values correspond to fast and loose benchmarks, 583 | and small ones to long and precise. It can also be adjusted locally 584 | for a group of benchmarks, e. g., 'localOption' ('RelStDev' 0.02). If 585 | benchmarking takes far too long, consider setting @--timeout@, which 586 | will interrupt benchmarks, potentially before reaching the target 587 | deviation. 588 | 589 | [@--csv@]: 590 | 591 | File to write results in CSV format. 592 | 593 | [@--baseline@]: 594 | 595 | File to read baseline results in CSV format (as produced by 596 | @--csv@). 597 | 598 | [@--fail-if-slower@, @--fail-if-faster@]: 599 | 600 | Upper bounds of acceptable slow down \/ speed up in percents. If a 601 | benchmark is unacceptably slower \/ faster than baseline (see 602 | @--baseline@), it will be reported as failed. Can be used in 603 | conjunction with a standard @tasty@ option @--hide-successes@ to 604 | show only problematic benchmarks. Both options can be adjusted 605 | locally for a group of benchmarks, e. g., 606 | 'localOption' ('FailIfSlower' 0.10). 607 | 608 | [@--svg@]: 609 | 610 | File to plot results in SVG format. 611 | 612 | [@--time-mode@]: 613 | 614 | Whether to measure CPU time (@cpu@, default) or wall-clock time 615 | (@wall@). 616 | 617 | [@+RTS -T@]: 618 | 619 | Estimate and report memory usage. 620 | 621 | === Custom command-line options 622 | 623 | As usual with @tasty@, it is easy to extend benchmarks with custom 624 | command-line options. Here is an example: 625 | 626 | > import Data.Proxy 627 | > import Test.Tasty.Bench 628 | > import Test.Tasty.Ingredients.Basic 629 | > import Test.Tasty.Options 630 | > import Test.Tasty.Runners 631 | > 632 | > newtype RandomSeed = RandomSeed Int 633 | > 634 | > instance IsOption RandomSeed where 635 | > defaultValue = RandomSeed 42 636 | > parseValue = fmap RandomSeed . safeRead 637 | > optionName = pure "seed" 638 | > optionHelp = pure "Random seed used in benchmarks" 639 | > 640 | > main :: IO () 641 | > main = do 642 | > let customOpts = [Option (Proxy :: Proxy RandomSeed)] 643 | > ingredients = includingOptions customOpts : benchIngredients 644 | > opts <- parseOptions ingredients benchmarks 645 | > let RandomSeed seed = lookupOption opts 646 | > defaultMainWithIngredients ingredients benchmarks 647 | > 648 | > benchmarks :: Benchmark 649 | > benchmarks = bgroup "All" [] 650 | 651 | -} 652 | {-# LANGUAGE BangPatterns #-} 653 | {-# LANGUAGE CPP #-} 654 | {-# LANGUAGE DeriveFunctor #-} 655 | {-# LANGUAGE FlexibleContexts #-} 656 | {-# LANGUAGE FlexibleInstances #-} 657 | {-# LANGUAGE GeneralizedNewtypeDeriving #-} 658 | {-# LANGUAGE ScopedTypeVariables #-} 659 | {-# LANGUAGE TupleSections #-} 660 | 661 | {-# OPTIONS_GHC -Wno-unrecognised-pragmas #-} 662 | {-# HLINT ignore "Unused LANGUAGE pragma" #-} 663 | 664 | module Test.Tasty.Bench 665 | ( 666 | #ifdef MIN_VERSION_tasty 667 | -- * Running 'Benchmark' 668 | defaultMain 669 | , Benchmark 670 | , bench 671 | , bgroup 672 | , bcompare 673 | , bcompareWithin 674 | , env 675 | , envWithCleanup 676 | , 677 | #endif 678 | -- * Creating t'Benchmarkable' 679 | Benchmarkable(..) 680 | , nf 681 | , whnf 682 | , nfIO 683 | , whnfIO 684 | , nfAppIO 685 | , whnfAppIO 686 | , measureCpuTime 687 | , measureCpuTimeAndStDev 688 | #ifdef MIN_VERSION_tasty 689 | -- * Ingredients 690 | , benchIngredients 691 | , consoleBenchReporter 692 | , csvReporter 693 | , svgReporter 694 | , RelStDev(..) 695 | , FailIfSlower(..) 696 | , FailIfFaster(..) 697 | , CsvPath(..) 698 | , BaselinePath(..) 699 | , SvgPath(..) 700 | , TimeMode(..) 701 | -- * Utilities 702 | , locateBenchmark 703 | , mapLeafBenchmarks 704 | #else 705 | , Timeout(..) 706 | , RelStDev(..) 707 | #endif 708 | ) where 709 | 710 | import Prelude hiding (Int, Integer) 711 | import qualified Prelude 712 | import Control.Applicative 713 | import Control.Arrow (first, second) 714 | import Control.DeepSeq (NFData, force, rnf) 715 | import Control.Exception (bracket, bracket_, evaluate) 716 | import Control.Monad (void, unless, guard, (>=>), when) 717 | import Data.Foldable (foldMap, traverse_) 718 | import Data.Int (Int64) 719 | import Data.IORef 720 | import Data.List (intercalate, stripPrefix, isPrefixOf, genericLength, genericDrop, foldl1') 721 | import Data.Maybe (fromMaybe) 722 | import Data.Monoid (All(..), Any(..)) 723 | import Data.Proxy 724 | import Data.Traversable (forM) 725 | import Data.Word (Word64) 726 | import GHC.Conc 727 | import GHC.IO.Encoding 728 | import GHC.Stats 729 | #if MIN_VERSION_base(4,15,0) 730 | import GHC.Exts (SPEC(..)) 731 | #else 732 | import GHC.Types (SPEC(..)) 733 | #endif 734 | import System.CPUTime 735 | import System.Exit 736 | import System.IO 737 | import System.IO.Unsafe 738 | import System.Mem 739 | import Text.Printf 740 | 741 | #ifdef MIN_VERSION_tasty 742 | import Data.Semigroup (Semigroup(..)) 743 | import qualified Data.IntMap.Strict as IM 744 | import Data.IntMap (IntMap) 745 | import Data.Sequence (Seq, (<|)) 746 | import qualified Data.Sequence as Seq 747 | import qualified Data.Set as S 748 | import Test.Tasty hiding (defaultMain) 749 | import qualified Test.Tasty 750 | import Test.Tasty.Ingredients 751 | import Test.Tasty.Ingredients.ConsoleReporter 752 | import Test.Tasty.Options 753 | import Test.Tasty.Patterns.Eval (eval, asB, withFields) 754 | import Test.Tasty.Patterns.Types (Expr (And, Field, IntLit, NF, StringLit, Sub)) 755 | import qualified Test.Tasty.Patterns.Types as Patterns 756 | import Test.Tasty.Providers 757 | import Test.Tasty.Runners 758 | #endif 759 | 760 | #if MIN_VERSION_base(4,11,0) 761 | import GHC.Clock (getMonotonicTime) 762 | #else 763 | import Data.Time.Clock.POSIX (getPOSIXTime) 764 | #endif 765 | 766 | #if defined(mingw32_HOST_OS) 767 | import Data.Word (Word32) 768 | #endif 769 | 770 | #ifndef MIN_VERSION_tasty 771 | data Timeout 772 | = Timeout 773 | Prelude.Integer -- ^ number of microseconds (e. g., 200000) 774 | String -- ^ textual representation (e. g., @"0.2s"@) 775 | | NoTimeout 776 | deriving (Show) 777 | 778 | type Progress = () 779 | #endif 780 | 781 | 782 | -- | In addition to @--stdev@ command-line option, 783 | -- one can adjust target relative standard deviation 784 | -- for individual benchmarks and groups of benchmarks 785 | -- using 'adjustOption' and 'localOption'. 786 | -- 787 | -- E. g., set target relative standard deviation to 2% as follows: 788 | -- 789 | -- > import Test.Tasty (localOption) 790 | -- > localOption (RelStDev 0.02) (bgroup [...]) 791 | -- 792 | -- If you set t'RelStDev' to infinity, 793 | -- a benchmark will be executed 794 | -- only once and its standard deviation will be recorded as zero. 795 | -- This is rather a blunt approach, but it might be a necessary evil 796 | -- for extremely long benchmarks. If you wish to run all benchmarks 797 | -- only once, use command-line option @--stdev@ @Infinity@. 798 | -- 799 | -- @since 0.2 800 | newtype RelStDev = RelStDev Double 801 | deriving 802 | ( Eq 803 | -- ^ @since 0.4 804 | , Ord 805 | -- ^ @since 0.4 806 | , Show 807 | , Read 808 | , Num 809 | -- ^ @since 0.4 810 | , Fractional 811 | -- ^ @since 0.4 812 | ) 813 | 814 | -- | Whether to measure CPU time or wall-clock time. 815 | -- Normally 'CpuTime' is a better option (and default), 816 | -- but consider switching to 'WallTime' 817 | -- to measure multithreaded algorithms or time spent in external processes. 818 | -- 819 | -- One can switch the default measurement mode globally 820 | -- using @--time-mode@ command-line option, 821 | -- but it is usually better to adjust the mode locally: 822 | -- 823 | -- > import Test.Tasty (localOption) 824 | -- > localOption WallTime (bgroup [...]) 825 | -- 826 | -- You can measure both times and report their ratio with the following gadget: 827 | -- 828 | -- @ 829 | -- bgroup \"Foo\" 830 | -- [ localOption WallTime $ bench \"WallTime\" foo 831 | -- , bcompare \"Foo.WallTime\" 832 | -- $ localOption CpuTime $ bench \"CPUTime\" foo 833 | -- ] 834 | -- @ 835 | -- 836 | -- @since 0.3.2 837 | data TimeMode = CpuTime 838 | -- ^ Measure CPU time. 839 | | WallTime 840 | -- ^ Measure wall-clock time. 841 | | MutatorCpuTime 842 | -- ^ Measure CPU time 843 | -- excluding garbage collection, known as "mutator time". 844 | -- 845 | -- @since 0.5 846 | | MutatorWallTime 847 | -- ^ Measure wall-clock time 848 | -- excluding garbage collection, known as "mutator time". 849 | -- 850 | -- @since 0.5 851 | | CustomTime (IO Word64) 852 | -- ^ Custom measurement action, returning time in picoseconds. 853 | 854 | #ifdef MIN_VERSION_tasty 855 | instance IsOption RelStDev where 856 | defaultValue = RelStDev 0.05 857 | parseValue = fmap RelStDev . parsePositivePercents 858 | optionName = pure "stdev" 859 | optionHelp = pure "Target relative standard deviation of measurements in percents (5 by default). Large values correspond to fast and loose benchmarks, and small ones to long and precise. If it takes far too long, consider setting --timeout, which will interrupt benchmarks, potentially before reaching the target deviation." 860 | 861 | -- | In addition to @--fail-if-slower@ command-line option, 862 | -- one can adjust an upper bound of acceptable slow down 863 | -- in comparison to baseline for 864 | -- individual benchmarks and groups of benchmarks 865 | -- using 'adjustOption' and 'localOption'. 866 | -- 867 | -- E. g., set upper bound of acceptable slow down to 10% as follows: 868 | -- 869 | -- > import Test.Tasty (localOption) 870 | -- > localOption (FailIfSlower 0.10) (bgroup [...]) 871 | -- 872 | -- @since 0.2 873 | newtype FailIfSlower = FailIfSlower Double 874 | deriving 875 | ( Eq 876 | -- ^ @since 0.4 877 | , Ord 878 | -- ^ @since 0.4 879 | , Show 880 | , Read 881 | , Num 882 | -- ^ @since 0.4 883 | , Fractional 884 | -- ^ @since 0.4 885 | ) 886 | 887 | instance IsOption FailIfSlower where 888 | defaultValue = FailIfSlower (1.0 / 0.0) 889 | parseValue = fmap FailIfSlower . parsePositivePercents 890 | optionName = pure "fail-if-slower" 891 | optionHelp = pure "Upper bound of acceptable slow down in percents. If a benchmark is unacceptably slower than baseline (see --baseline), it will be reported as failed." 892 | 893 | -- | In addition to @--fail-if-faster@ command-line option, 894 | -- one can adjust an upper bound of acceptable speed up 895 | -- in comparison to baseline for 896 | -- individual benchmarks and groups of benchmarks 897 | -- using 'adjustOption' and 'localOption'. 898 | -- 899 | -- E. g., set upper bound of acceptable speed up to 10% as follows: 900 | -- 901 | -- > import Test.Tasty (localOption) 902 | -- > localOption (FailIfFaster 0.10) (bgroup [...]) 903 | -- 904 | -- @since 0.2 905 | newtype FailIfFaster = FailIfFaster Double 906 | deriving 907 | ( Eq 908 | -- ^ @since 0.4 909 | , Ord 910 | -- ^ @since 0.4 911 | , Show 912 | , Read 913 | , Num 914 | -- ^ @since 0.4 915 | , Fractional 916 | -- ^ @since 0.4 917 | ) 918 | 919 | instance IsOption FailIfFaster where 920 | defaultValue = FailIfFaster (1.0 / 0.0) 921 | parseValue = fmap FailIfFaster . parsePositivePercents 922 | optionName = pure "fail-if-faster" 923 | optionHelp = pure "Upper bound of acceptable speed up in percents. If a benchmark is unacceptably faster than baseline (see --baseline), it will be reported as failed." 924 | 925 | parsePositivePercents :: String -> Maybe Double 926 | parsePositivePercents xs = do 927 | x <- safeRead xs 928 | guard (x > 0) 929 | pure (x / 100) 930 | 931 | instance IsOption TimeMode where 932 | defaultValue = CpuTime 933 | parseValue v = case v of 934 | "cpu" -> Just CpuTime 935 | "wall" -> Just WallTime 936 | "mutcpu" -> Just MutatorCpuTime 937 | "mutwall" -> Just MutatorWallTime 938 | _ -> Nothing 939 | optionName = pure "time-mode" 940 | optionHelp = pure "Whether to measure total CPU time (\"cpu\"), total wall-clock time (\"wall\"), or time spent by the mutator (CPU \"mutcpu\" or wall-clock \"mutwall\")" 941 | showDefaultValue m = case m of 942 | CpuTime -> Just "cpu" 943 | WallTime -> Just "wall" 944 | MutatorCpuTime -> Just "mutcpu" 945 | MutatorWallTime -> Just "mutwall" 946 | CustomTime _ -> Nothing 947 | #endif 948 | 949 | -- | Something that can be benchmarked, produced by 'nf', 'whnf', 'nfIO', 'whnfIO', 950 | -- 'nfAppIO', 'whnfAppIO' below. 951 | -- 952 | -- Drop-in replacement for @Criterion.Benchmarkable@ and 953 | -- @Gauge.Benchmarkable@. 954 | -- 955 | -- @since 0.1 956 | newtype Benchmarkable = 957 | -- | @since 0.3 958 | Benchmarkable 959 | { unBenchmarkable :: Word64 -> IO () -- ^ Run benchmark given number of times. 960 | } 961 | 962 | #ifdef MIN_VERSION_tasty 963 | 964 | -- | 'defaultMain' forces encoding to 'utf8', but users might 965 | -- be running benchmarks outside of it (e. g., via 'defaultMainWithIngredients'). 966 | supportsUnicode :: Bool 967 | supportsUnicode = maybe False ((== "UTF") . take 3 . textEncodingName) enc 968 | #if defined(mingw32_HOST_OS) 969 | && unsafePerformIO getConsoleOutputCP == 65001 970 | #endif 971 | where 972 | enc = unsafePerformIO (hGetEncoding stdout) 973 | {-# NOINLINE supportsUnicode #-} 974 | 975 | mu :: Char 976 | mu = if supportsUnicode then 'μ' else 'u' 977 | 978 | pm :: String 979 | pm = if supportsUnicode then " ± " else " +-" 980 | 981 | -- | Show picoseconds, fitting number in 3 characters. 982 | showPicos3 :: Word64 -> String 983 | showPicos3 i 984 | | t < 995 = printf "%3.0f ps" t 985 | | t < 995e1 = printf "%3.1f ns" (t / 1e3) 986 | | t < 995e3 = printf "%3.0f ns" (t / 1e3) 987 | | t < 995e4 = printf "%3.1f %cs" (t / 1e6) mu 988 | | t < 995e6 = printf "%3.0f %cs" (t / 1e6) mu 989 | | t < 995e7 = printf "%3.1f ms" (t / 1e9) 990 | | t < 995e9 = printf "%3.0f ms" (t / 1e9) 991 | | otherwise = printf "%4.2f s" (t / 1e12) 992 | where 993 | t = word64ToDouble i 994 | 995 | -- | Show picoseconds, fitting number in 4 characters. 996 | showPicos4 :: Word64 -> String 997 | showPicos4 i 998 | | t < 995 = printf "%3.0f ps" t 999 | | t < 995e1 = printf "%4.2f ns" (t / 1e3) 1000 | | t < 995e2 = printf "%4.1f ns" (t / 1e3) 1001 | | t < 995e3 = printf "%3.0f ns" (t / 1e3) 1002 | | t < 995e4 = printf "%4.2f %cs" (t / 1e6) mu 1003 | | t < 995e5 = printf "%4.1f %cs" (t / 1e6) mu 1004 | | t < 995e6 = printf "%3.0f %cs" (t / 1e6) mu 1005 | | t < 995e7 = printf "%4.2f ms" (t / 1e9) 1006 | | t < 995e8 = printf "%4.1f ms" (t / 1e9) 1007 | | t < 995e9 = printf "%3.0f ms" (t / 1e9) 1008 | | otherwise = printf "%4.3f s" (t / 1e12) 1009 | where 1010 | t = word64ToDouble i 1011 | 1012 | showBytes :: Word64 -> String 1013 | showBytes i 1014 | | t < 1000 = printf "%3.0f B " t 1015 | | t < 10189 = printf "%3.1f KB" (t / 1024) 1016 | | t < 1023488 = printf "%3.0f KB" (t / 1024) 1017 | | t < 10433332 = printf "%3.1f MB" (t / 1048576) 1018 | | t < 1048051712 = printf "%3.0f MB" (t / 1048576) 1019 | | t < 10683731149 = printf "%3.1f GB" (t / 1073741824) 1020 | | t < 1073204953088 = printf "%3.0f GB" (t / 1073741824) 1021 | | t < 10940140696372 = printf "%3.1f TB" (t / 1099511627776) 1022 | | t < 1098961871962112 = printf "%3.0f TB" (t / 1099511627776) 1023 | | t < 11202704073084108 = printf "%3.1f PB" (t / 1125899906842624) 1024 | | t < 1125336956889202624 = printf "%3.0f PB" (t / 1125899906842624) 1025 | | t < 11471568970838126592 = printf "%3.1f EB" (t / 1152921504606846976) 1026 | | otherwise = printf "%3.0f EB" (t / 1152921504606846976) 1027 | where 1028 | t = word64ToDouble i 1029 | #endif 1030 | 1031 | data Measurement = Measurement 1032 | { measTime :: !Word64 -- ^ time in picoseconds 1033 | , measAllocs :: !Word64 -- ^ allocations in bytes 1034 | , measCopied :: !Word64 -- ^ copied bytes 1035 | , measMaxMem :: !Word64 -- ^ max memory in use 1036 | } deriving (Show, Read) 1037 | 1038 | data Estimate = Estimate 1039 | { estMean :: !Measurement 1040 | , estStdev :: !Word64 -- ^ standard deviation in picoseconds 1041 | } deriving (Show, Read) 1042 | 1043 | #ifdef MIN_VERSION_tasty 1044 | 1045 | data WithLoHi a = WithLoHi 1046 | !a -- payload 1047 | !Double -- lower bound (e. g., 0.9 for -10% speedup) 1048 | !Double -- upper bound (e. g., 1.2 for +20% slowdown) 1049 | deriving (Show, Read) 1050 | 1051 | prettyEstimate :: Estimate -> String 1052 | prettyEstimate (Estimate m stdev) = 1053 | showPicos4 (measTime m) 1054 | ++ (if stdev == 0 then " " else pm ++ showPicos3 (2 * stdev)) 1055 | 1056 | prettyEstimateWithGC :: Estimate -> String 1057 | prettyEstimateWithGC (Estimate m stdev) = 1058 | showPicos4 (measTime m) 1059 | ++ (if stdev == 0 then ", " else pm ++ showPicos3 (2 * stdev) ++ ", ") 1060 | ++ showBytes (measAllocs m) ++ " allocated, " 1061 | ++ showBytes (measCopied m) ++ " copied, " 1062 | ++ showBytes (measMaxMem m) ++ " peak memory" 1063 | 1064 | csvEstimate :: Estimate -> String 1065 | csvEstimate (Estimate m stdev) = show (measTime m) ++ "," ++ show (2 * stdev) 1066 | 1067 | csvEstimateWithGC :: Estimate -> String 1068 | csvEstimateWithGC (Estimate m stdev) = show (measTime m) ++ "," ++ show (2 * stdev) 1069 | ++ "," ++ show (measAllocs m) ++ "," ++ show (measCopied m) ++ "," ++ show (measMaxMem m) 1070 | #endif 1071 | 1072 | predict 1073 | :: Measurement -- ^ time for one run 1074 | -> Measurement -- ^ time for two runs 1075 | -> Estimate 1076 | predict (Measurement t1 a1 c1 m1) (Measurement t2 a2 c2 m2) = Estimate 1077 | { estMean = Measurement t (fit a1 a2) (fit c1 c2) (max m1 m2) 1078 | , estStdev = truncate (sqrt d :: Double) 1079 | } 1080 | where 1081 | fit x1 x2 = x1 `quot` 5 + 2 * (x2 `quot` 5) 1082 | t = fit t1 t2 1083 | sqr x = x * x 1084 | d = sqr (word64ToDouble t1 - word64ToDouble t) 1085 | + sqr (word64ToDouble t2 - 2 * word64ToDouble t) 1086 | 1087 | predictPerturbed :: Measurement -> Measurement -> Estimate 1088 | predictPerturbed t1 t2 = Estimate 1089 | { estMean = estMean (predict t1 t2) 1090 | , estStdev = max 1091 | (estStdev (predict (lo t1) (hi t2))) 1092 | (estStdev (predict (hi t1) (lo t2))) 1093 | } 1094 | where 1095 | prec = max (fromInteger cpuTimePrecision) 1000000000 -- 1 ms 1096 | hi meas = meas { measTime = measTime meas + prec } 1097 | lo meas = meas { measTime = if measTime meas > prec then measTime meas - prec else 0 } 1098 | 1099 | hasGCStats :: Bool 1100 | #if MIN_VERSION_base(4,10,0) 1101 | hasGCStats = unsafePerformIO getRTSStatsEnabled 1102 | #else 1103 | hasGCStats = unsafePerformIO getGCStatsEnabled 1104 | #endif 1105 | {-# NOINLINE hasGCStats #-} 1106 | 1107 | getAllocsAndCopied :: IO (Word64, Word64, Word64) 1108 | getAllocsAndCopied = do 1109 | if not hasGCStats then pure (0, 0, 0) else 1110 | #if MIN_VERSION_base(4,10,0) 1111 | (\s -> (allocated_bytes s, copied_bytes s, max_mem_in_use_bytes s)) <$> getRTSStats 1112 | #else 1113 | (\s -> (int64ToWord64 $ bytesAllocated s, int64ToWord64 $ bytesCopied s, int64ToWord64 $ peakMegabytesAllocated s * 1024 * 1024)) <$> getGCStats 1114 | #endif 1115 | 1116 | getWallTimeSecs :: IO Double 1117 | #if MIN_VERSION_base(4,11,0) 1118 | getWallTimeSecs = getMonotonicTime 1119 | #else 1120 | getWallTimeSecs = realToFrac <$> getPOSIXTime 1121 | #endif 1122 | 1123 | getMutatorCpuTime :: IO Word64 1124 | #if MIN_VERSION_base(4,10,0) 1125 | getMutatorCpuTime = (1000 *) . fromIntegral . mutator_cpu_ns <$> getRTSStats 1126 | #else 1127 | getMutatorCpuTime = round . (1e12 *) . mutatorCpuSeconds <$> getGCStats 1128 | #endif 1129 | 1130 | getMutatorWallTime :: IO Word64 1131 | #if MIN_VERSION_base(4,10,0) 1132 | getMutatorWallTime = (1000 *) . fromIntegral . mutator_elapsed_ns <$> getRTSStats 1133 | #else 1134 | getMutatorWallTime = round . (1e12 *) . mutatorWallSeconds <$> getGCStats 1135 | #endif 1136 | 1137 | 1138 | getTimePicoSecs :: TimeMode -> IO Word64 1139 | getTimePicoSecs timeMode = case timeMode of 1140 | CpuTime -> fromInteger <$> getCPUTime 1141 | WallTime -> round . (1e12 *) <$> getWallTimeSecs 1142 | MutatorCpuTime -> getMutatorCpuTime 1143 | MutatorWallTime -> getMutatorWallTime 1144 | CustomTime getCustomTime -> getCustomTime 1145 | 1146 | measure :: TimeMode -> Word64 -> Benchmarkable -> IO Measurement 1147 | measure timeMode n (Benchmarkable act) = do 1148 | let getTimePicoSecs' = getTimePicoSecs timeMode 1149 | performGC 1150 | startTime <- getTimePicoSecs' 1151 | (startAllocs, startCopied, startMaxMemInUse) <- getAllocsAndCopied 1152 | act n 1153 | endTime <- getTimePicoSecs' 1154 | performMinorGC -- perform GC to update RTSStats 1155 | (endAllocs, endCopied, endMaxMemInUse) <- getAllocsAndCopied 1156 | let meas = Measurement 1157 | { measTime = endTime - startTime 1158 | , measAllocs = endAllocs - startAllocs 1159 | , measCopied = endCopied - startCopied 1160 | , measMaxMem = max endMaxMemInUse startMaxMemInUse 1161 | } 1162 | pure meas 1163 | 1164 | measureUntil 1165 | :: (Progress -> IO ()) 1166 | -> TimeMode 1167 | -> Timeout 1168 | -> RelStDev 1169 | -> Benchmarkable 1170 | -> IO Estimate 1171 | measureUntil _ timeMode _ (RelStDev targetRelStDev) b 1172 | | isInfinite targetRelStDev, targetRelStDev > 0 = do 1173 | t1 <- measure timeMode 1 b 1174 | pure $ Estimate { estMean = t1, estStdev = 0 } 1175 | measureUntil yieldProgress timeMode timeout (RelStDev targetRelStDev) b = do 1176 | t1 <- measure' 1 b 1177 | go 1 t1 0 1178 | where 1179 | measure' = measure timeMode 1180 | 1181 | go :: Word64 -> Measurement -> Word64 -> IO Estimate 1182 | go n t1 sumOfTs = do 1183 | t2 <- measure' (2 * n) b 1184 | 1185 | let Estimate (Measurement meanN allocN copiedN maxMemN) stdevN = predictPerturbed t1 t2 1186 | isTimeoutSoon = case timeout of 1187 | NoTimeout -> False 1188 | -- multiplying by 12/10 helps to avoid accidental timeouts 1189 | Timeout micros _ -> (sumOfTs' + 3 * measTime t2) `quot` (1000000 * 10 `quot` 12) >= fromInteger micros 1190 | isStDevInTargetRange = stdevN < truncate (max 0 targetRelStDev * word64ToDouble meanN) 1191 | scale = (`quot` n) 1192 | sumOfTs' = sumOfTs + measTime t1 1193 | 1194 | let scaledEstimate = Estimate 1195 | { estMean = Measurement (scale meanN) (scale allocN) (scale copiedN) maxMemN 1196 | , estStdev = scale stdevN } 1197 | 1198 | #ifdef MIN_VERSION_tasty 1199 | yieldProgress $ Progress 1200 | { progressText = prettyEstimate scaledEstimate 1201 | , progressPercent = 0.0 1202 | } 1203 | #else 1204 | yieldProgress () 1205 | #endif 1206 | 1207 | if isStDevInTargetRange || isTimeoutSoon 1208 | then pure scaledEstimate 1209 | else go (2 * n) t2 sumOfTs' 1210 | 1211 | -- | An internal routine to measure CPU execution time in seconds 1212 | -- for a given timeout (put 'NoTimeout', or 'mkTimeout' 100000000 for 100 seconds) 1213 | -- and a target relative standard deviation 1214 | -- (put v'RelStDev' 0.05 for 5% or v'RelStDev' (1/0) to run only one iteration). 1215 | -- 1216 | -- t'Timeout' takes soft priority over t'RelStDev': this function prefers 1217 | -- to finish in time even if at cost of precision. However, timeout is guidance 1218 | -- not guarantee: 'measureCpuTime' can take longer, if there is not enough time 1219 | -- to run at least thrice or an iteration takes unusually long. 1220 | -- 1221 | -- @since 0.3 1222 | measureCpuTime :: Timeout -> RelStDev -> Benchmarkable -> IO Double 1223 | measureCpuTime = ((fmap fst .) .) . measureCpuTimeAndStDev 1224 | 1225 | -- | Same as 'measureCpuTime', but returns both CPU execution time 1226 | -- and its standard deviation. 1227 | -- 1228 | -- @since 0.3.4 1229 | measureCpuTimeAndStDev :: Timeout -> RelStDev -> Benchmarkable -> IO (Double, Double) 1230 | measureCpuTimeAndStDev 1231 | = ((fmap (\x -> 1232 | ( word64ToDouble (measTime (estMean x)) / 1e12 1233 | , word64ToDouble (estStdev x) / 1e12 1234 | )) .) .) 1235 | . measureUntil (const $ pure ()) CpuTime 1236 | 1237 | #ifdef MIN_VERSION_tasty 1238 | 1239 | instance IsTest Benchmarkable where 1240 | testOptions = pure 1241 | [ Option (Proxy :: Proxy RelStDev) 1242 | -- FailIfSlower and FailIfFaster must be options of a test provider rather 1243 | -- than options of an ingredient to allow setting them on per-test level. 1244 | , Option (Proxy :: Proxy FailIfSlower) 1245 | , Option (Proxy :: Proxy FailIfFaster) 1246 | , Option (Proxy :: Proxy TimeMode) 1247 | ] 1248 | run opts b yieldProgress = case getNumThreads (lookupOption opts) of 1249 | 1 -> do 1250 | let timeMode = lookupOption opts 1251 | est <- measureUntil yieldProgress timeMode (lookupOption opts) (lookupOption opts) b 1252 | let FailIfSlower ifSlower = lookupOption opts 1253 | FailIfFaster ifFaster = lookupOption opts 1254 | pure $ testPassed $ show (WithLoHi est (1 - ifFaster) (1 + ifSlower)) 1255 | _ -> pure $ testFailed "Benchmarks must not be run concurrently. Please pass -j1 and/or avoid +RTS -N." 1256 | 1257 | -- | Attach a name to t'Benchmarkable'. 1258 | -- 1259 | -- This is actually a synonym of 'Test.Tasty.Providers.singleTest' to 1260 | -- provide an interface compatible with @Criterion.bench@ 1261 | -- and @Gauge.bench@. 1262 | -- 1263 | -- @since 0.1 1264 | bench :: String -> Benchmarkable -> Benchmark 1265 | bench = singleTest 1266 | 1267 | -- | Attach a name to a group of 'Benchmark'. 1268 | -- 1269 | -- This is actually a synonym of 'Test.Tasty.testGroup' to provide an 1270 | -- interface compatible with @Criterion.bgroup@ and 1271 | -- @Gauge.bgroup@. 1272 | -- 1273 | -- @since 0.1 1274 | bgroup :: String -> [Benchmark] -> Benchmark 1275 | bgroup = testGroup 1276 | 1277 | -- | Compare benchmarks, reporting relative speed up or slow down. 1278 | -- 1279 | -- This function is a vague reminiscence of @bcompare@, which existed in pre-1.0 1280 | -- versions of @criterion@, but their types are incompatible. Under the hood 1281 | -- 'bcompare' is a thin wrapper over 'after'. 1282 | -- 1283 | -- Here is a basic example: 1284 | -- 1285 | -- > import Test.Tasty.Bench 1286 | -- > 1287 | -- > fibo :: Int -> Integer 1288 | -- > fibo n = if n < 2 then toInteger n else fibo (n - 1) + fibo (n - 2) 1289 | -- > 1290 | -- > main :: IO () 1291 | -- > main = defaultMain 1292 | -- > [ bgroup "Fibonacci numbers" 1293 | -- > [ bcompare "tenth" $ bench "fifth" $ nf fibo 5 1294 | -- > , bench "tenth" $ nf fibo 10 1295 | -- > , bcompare "tenth" $ bench "twentieth" $ nf fibo 20 1296 | -- > ] 1297 | -- > ] 1298 | -- 1299 | -- More complex examples: 1300 | -- 1301 | -- * https://hackage.haskell.org/package/chimera-0.4.1.0/src/bench/Read.hs 1302 | -- * https://hackage.haskell.org/package/fast-digits-0.3.2.0/src/bench/Bench.hs 1303 | -- * https://hackage.haskell.org/package/unicode-data-0.8.0/src/bench/Unicode/Char/Bench.hs 1304 | -- 1305 | -- @since 0.2.4 1306 | bcompare 1307 | :: String 1308 | -- ^ @tasty@ pattern, which must unambiguously 1309 | -- match a unique baseline benchmark. Consider using 'locateBenchmark' to construct it. 1310 | -> Benchmark 1311 | -- ^ Benchmark (or a group of benchmarks) 1312 | -- to be compared against the baseline benchmark by dividing measured mean times. 1313 | -- The result is reported by 'consoleBenchReporter', e. g., 0.50x or 1.25x. 1314 | -> Benchmark 1315 | bcompare = bcompareWithin (-1/0) (1/0) 1316 | 1317 | -- | Same as 'bcompare', but takes expected lower and upper bounds of 1318 | -- comparison. If the result is not within provided bounds, benchmark fails. 1319 | -- This allows to create portable performance tests: instead of comparing 1320 | -- to an absolute timeout or to previous runs, you can state that one implementation 1321 | -- of an algorithm must be faster than another. 1322 | -- 1323 | -- E. g., 'bcompareWithin' 2.0 3.0 passes only if a benchmark is at least 2x 1324 | -- and at most 3x slower than a baseline. 1325 | -- 1326 | -- Examples: 1327 | -- 1328 | -- * https://hackage.haskell.org/package/text-2.1.3/src/benchmarks/haskell/Benchmarks/Micro.hs 1329 | -- * https://hackage.haskell.org/package/bluefin-algae-0.1.0.2/src/bench/quadratic-counter.hs 1330 | -- 1331 | -- @since 0.3.1 1332 | bcompareWithin 1333 | :: Double -- ^ Lower bound of relative speed up. 1334 | -> Double -- ^ Upper bound of relative speed up. 1335 | -> String -- ^ @tasty@ pattern to locate a baseline benchmark. 1336 | -> Benchmark -- ^ Benchmark to compare against baseline. 1337 | -> Benchmark 1338 | bcompareWithin lo hi s = case parseExpr s of 1339 | Nothing -> error $ "Could not parse bcompare pattern " ++ s 1340 | Just e -> after_ AllSucceed (And (StringLit (bcomparePrefix ++ show (lo, hi))) e) 1341 | 1342 | bcomparePrefix :: String 1343 | bcomparePrefix = "tasty-bench" 1344 | 1345 | -- | Benchmarks are actually just a regular 'Test.Tasty.TestTree' in disguise. 1346 | -- 1347 | -- This is a drop-in replacement for @Criterion.Benchmark@ 1348 | -- and @Gauge.Benchmark@. 1349 | -- 1350 | -- @since 0.1 1351 | type Benchmark = TestTree 1352 | 1353 | -- | Run benchmarks and report results, providing an interface 1354 | -- compatible with @Criterion.defaultMain@ and 1355 | -- @Gauge.defaultMain@. 1356 | -- 1357 | -- An unfortunate consequence of being a drop-in replacement for @criterion@ 1358 | -- and @gauge@ is that this function diverges from @tasty@, 1359 | -- where 'Test.Tasty.defaultMain' takes a single input, not a list of inputs. 1360 | -- 1361 | -- @since 0.1 1362 | defaultMain :: [Benchmark] -> IO () 1363 | defaultMain bs = do 1364 | let act = defaultMain' bs 1365 | bracketUtf8 act 1366 | 1367 | bracketUtf8 :: IO a -> IO a 1368 | bracketUtf8 act = do 1369 | prevStdoutEnc <- hGetEncoding stdout 1370 | #if defined(mingw32_HOST_OS) 1371 | codePage <- getConsoleOutputCP 1372 | bracket_ 1373 | (hSetEncoding stdout utf8 1374 | >> setConsoleOutputCP 65001) 1375 | (maybe (hSetBinaryMode stdout True) (hSetEncoding stdout) prevStdoutEnc 1376 | >> setConsoleOutputCP codePage) 1377 | act 1378 | #else 1379 | bracket_ 1380 | (hSetEncoding stdout utf8) 1381 | (maybe (hSetBinaryMode stdout True) (hSetEncoding stdout) prevStdoutEnc) 1382 | act 1383 | #endif 1384 | 1385 | defaultMain' :: [Benchmark] -> IO () 1386 | defaultMain' bs = do 1387 | installSignalHandlers 1388 | let b = testGroup "All" bs 1389 | opts <- parseOptions benchIngredients b 1390 | let opts' = setOption (NumThreads 1) opts 1391 | #if MIN_VERSION_tasty(1,5,0) 1392 | opts'' = setOption (MinDurationToReport 1000000000000) opts' 1393 | #else 1394 | opts'' = opts' 1395 | #endif 1396 | case tryIngredients benchIngredients opts'' b of 1397 | Nothing -> exitFailure 1398 | Just act -> act >>= \x -> if x then exitSuccess else exitFailure 1399 | 1400 | -- | List of default benchmark ingredients. This is what 'defaultMain' runs. 1401 | -- 1402 | -- @since 0.2 1403 | benchIngredients :: [Ingredient] 1404 | benchIngredients = [listingTests, composeReporters consoleBenchReporter (composeReporters csvReporter svgReporter)] 1405 | 1406 | #endif 1407 | 1408 | funcToBench :: forall a b c. (b -> c) -> (a -> b) -> a -> Benchmarkable 1409 | funcToBench frc = (Benchmarkable .) . funcToBenchLoop SPEC 1410 | where 1411 | -- Here we rely on the fact that GHC (unless spurred by 1412 | -- -fstatic-argument-transformation) is not smart enough: 1413 | -- it does not notice that `f` and `x` arguments are loop invariant 1414 | -- and could be floated, and the whole `f x` expression shared. 1415 | -- If we create a closure with `f` and `x` bound in the environment, 1416 | -- then GHC is smart enough to share computation of `f x`. 1417 | -- 1418 | -- For perspective, gauge and criterion < 1.4 mark similar functions as INLINE, 1419 | -- while criterion >= 1.4 switches to NOINLINE. 1420 | -- If we mark `funcToBenchLoop` NOINLINE then benchmark results are slightly larger 1421 | -- (noticeable in bench-fibo), because the loop body is slightly bigger, 1422 | -- since GHC does not unbox numbers or inline `Eq @Word64` dictionary. 1423 | -- 1424 | -- This function is called `funcToBenchLoop` instead of, say, `go`, 1425 | -- so it is easier to spot in Core dumps. 1426 | -- 1427 | -- Forcing SpecConst optimization with SPEC makes the behaviour of benchmarks 1428 | -- independent of -fspec-constr-count. 1429 | funcToBenchLoop :: SPEC -> (a -> b) -> a -> Word64 -> IO () 1430 | funcToBenchLoop !_ f x n 1431 | | n == 0 = pure () 1432 | | otherwise = do 1433 | _ <- evaluate (frc (f x)) 1434 | funcToBenchLoop SPEC f x (n - 1) 1435 | {-# INLINE funcToBench #-} 1436 | 1437 | -- | 'nf' @f@ @x@ measures time to compute 1438 | -- a normal form (by means of 'Control.DeepSeq.rnf', not 'Control.DeepSeq.force') 1439 | -- of an application of @f@ to @x@. 1440 | -- This does not include time to evaluate @f@ or @x@ themselves. 1441 | -- Ideally @x@ should be a primitive data type like 'Data.Int.Int'. 1442 | -- 1443 | -- The same thunk of @x@ is shared by multiple calls of @f@. We cannot evaluate 1444 | -- @x@ beforehand: there is no 'NFData' @a@ constraint, and potentially @x@ may 1445 | -- be an infinite structure. Thus @x@ will be evaluated in course of the first 1446 | -- application of @f@. This noisy measurement is to be discarded soon, 1447 | -- but if @x@ is not a primitive data type, consider forcing its evaluation 1448 | -- separately, e. g., via 'env' or 'withResource'. 1449 | -- 1450 | -- Here is a textbook anti-pattern: 'nf' 'sum' @[1..1000000]@. 1451 | -- Since an input list is shared by multiple invocations of 'sum', 1452 | -- it will be allocated in memory in full, putting immense pressure 1453 | -- on garbage collector. Also no list fusion will happen. 1454 | -- A better approach is 'nf' (@\\n@ @->@ 'sum' @[1..n]@) @1000000@. 1455 | -- 1456 | -- It is preferable that the return type of the function under measurement 1457 | -- is inhabited enough to depend genuinely on all computations and is not simply @b ~ ()@. 1458 | -- Otherwise GHC might get aggressive and optimise the payload away. 1459 | -- 1460 | -- If you are measuring an inlinable function, 1461 | -- it is prudent to ensure that its invocation is fully saturated, 1462 | -- otherwise inlining will not happen. That's why one can often 1463 | -- see 'nf' (@\\n@ @->@ @f@ @n@) @x@ instead of 'nf' @f@ @x@. 1464 | -- Same applies to rewrite rules. 1465 | -- 1466 | -- If you suspect that GHC overoptimizes / overspecializes the function call @f@, 1467 | -- try defeating it with 'GHC.Exts.noinline' @f@. 1468 | -- 1469 | -- While @tasty-bench@ is capable to perform micro- and even nanobenchmarks, 1470 | -- such measurements are noisy and involve an overhead. Results are more reliable 1471 | -- when @f@ @x@ takes at least several milliseconds. 1472 | -- 1473 | -- Remember that forcing a normal form requires an additional 1474 | -- traverse of the structure. In certain scenarios (imagine benchmarking 'tail'), 1475 | -- especially when 'NFData' instance is badly written, 1476 | -- this traversal may take non-negligible time and affect results. 1477 | -- 1478 | -- 'nf' @f@ is equivalent to 'whnf' ('Control.DeepSeq.rnf' '.' @f@), but not to 1479 | -- 'whnf' ('Control.DeepSeq.force' '.' @f@). The latter retains the result 1480 | -- in memory until it is fully evaluated, while the former allows 1481 | -- evaluated parts of the result to be garbage-collected immediately. 1482 | -- 1483 | -- For users of @{-# LANGUAGE LinearTypes #-}@: if @f@ is a linear function, 1484 | -- then 'nf' @f@ @x@ is ill-typed, but you can use 'nf' @(\\y -> f y)@ @x@ 1485 | -- instead. 1486 | -- 1487 | -- Drop-in replacement for @Criterion.nf@ and 1488 | -- @Gauge.nf@. 1489 | -- 1490 | -- @since 0.1 1491 | nf :: NFData b => (a -> b) -> a -> Benchmarkable 1492 | nf = funcToBench rnf 1493 | {-# INLINE nf #-} 1494 | 1495 | -- | 'whnf' @f@ @x@ measures time to compute 1496 | -- a weak head normal form of an application of @f@ to @x@. 1497 | -- This does not include time to evaluate @f@ or @x@ themselves. 1498 | -- Ideally @x@ should be a primitive data type like 'Data.Int.Int'. 1499 | -- 1500 | -- The same thunk of @x@ is shared by multiple calls of @f@. We cannot evaluate 1501 | -- @x@ beforehand: there is no 'NFData' @a@ constraint, and potentially @x@ may 1502 | -- be an infinite structure. Thus @x@ will be evaluated in course of the first 1503 | -- application of @f@. This noisy measurement is to be discarded soon, 1504 | -- but if @x@ is not a primitive data type, consider forcing its evaluation 1505 | -- separately, e. g., via 'env' or 'withResource'. 1506 | -- 1507 | -- Computing only a weak head normal form is 1508 | -- rarely what intuitively is meant by "evaluation". 1509 | -- Beware that many educational materials contain examples with 'whnf': 1510 | -- this is a wrong default. 1511 | -- Unless you understand precisely, what is measured, 1512 | -- it is recommended to use 'nf' instead. 1513 | -- 1514 | -- Here is a textbook anti-pattern: 'whnf' ('Data.List.replicate' @1000000@) @1@. 1515 | -- This will succeed in a matter of nanoseconds, because weak head 1516 | -- normal form forces only the first element of the list. 1517 | -- 1518 | -- Drop-in replacement for @Criterion.whnf@ and @Gauge.whnf@. 1519 | -- 1520 | -- @since 0.1 1521 | whnf :: (a -> b) -> a -> Benchmarkable 1522 | whnf = funcToBench id 1523 | {-# INLINE whnf #-} 1524 | 1525 | ioToBench :: (b -> c) -> IO b -> Benchmarkable 1526 | ioToBench frc act = Benchmarkable (ioToBenchLoop SPEC) 1527 | where 1528 | ioToBenchLoop :: SPEC -> Word64 -> IO () 1529 | ioToBenchLoop !_ n 1530 | | n == 0 = pure () 1531 | | otherwise = do 1532 | val <- act 1533 | _ <- evaluate (frc val) 1534 | ioToBenchLoop SPEC (n - 1) 1535 | {-# INLINE ioToBench #-} 1536 | 1537 | -- | 'nfIO' @x@ measures time to evaluate side-effects of @x@ 1538 | -- and compute its normal form 1539 | -- (by means of 'Control.DeepSeq.rnf', not 'Control.DeepSeq.force'). 1540 | -- 1541 | -- Pure subexpression of an effectful computation @x@ 1542 | -- may be evaluated only once and get cached. For example, 1543 | -- GHC is likely to float @x@ out of 'nfIO' ('pure' @x@) and 1544 | -- evaluate in only once, which leaves 'nfIO' to measure 'pure' only 1545 | -- with results in nanosecond range. 1546 | -- 1547 | -- To avoid surprising results it is usually preferable 1548 | -- to use 'nfAppIO' instead. You can also try turning off 1549 | -- let floating by 1550 | -- [@-fno-full-laziness@](https://downloads.haskell.org/ghc/latest/docs/users_guide/using-optimisation.html#ghc-flag-ffull-laziness), 1551 | -- but this is likely to cause more problems than solve. 1552 | -- 1553 | -- Remember that forcing a normal form requires an additional 1554 | -- traverse of the structure. In certain scenarios, 1555 | -- especially when 'NFData' instance is badly written, 1556 | -- this traversal may take non-negligible time and affect results. 1557 | -- 1558 | -- A typical use case is 'nfIO' ('readFile' @"foo.txt"@). 1559 | -- However, if your goal is not to benchmark I\/O per se, 1560 | -- but just read input data from a file, it is cleaner to 1561 | -- use 'env' or 'withResource'. 1562 | -- 1563 | -- One handy consequence of having access to `IO` is that you can 1564 | -- generate new random inputs for each run using @System.Random.randomIO@ 1565 | -- or @System.Random.Stateful.uniformM@ @System.Random.Stateful.globalStdGen@. 1566 | -- 1567 | -- Drop-in replacement for @Criterion.nfIO@ and @Gauge.nfIO@. 1568 | -- 1569 | -- @since 0.1 1570 | nfIO :: NFData a => IO a -> Benchmarkable 1571 | nfIO = ioToBench rnf 1572 | {-# INLINE nfIO #-} 1573 | 1574 | -- | 'whnfIO' @x@ measures time to evaluate side-effects of @x@ 1575 | -- and compute its weak head normal form. 1576 | -- 1577 | -- Pure subexpression of an effectful computation @x@ 1578 | -- may be evaluated only once and get cached. For example, 1579 | -- GHC is likely to float @x@ out of 'whnfIO' ('pure' @x@) and 1580 | -- evaluate in only once, which leaves 'whnfIO' to measure 'pure' only 1581 | -- with results in nanosecond range. 1582 | -- 1583 | -- To avoid surprising results it is usually preferable 1584 | -- to use 'whnfAppIO' instead. You can also try turning off 1585 | -- let floating by 1586 | -- [@-fno-full-laziness@](https://downloads.haskell.org/ghc/latest/docs/users_guide/using-optimisation.html#ghc-flag-ffull-laziness), 1587 | -- but this is likely to cause more problems than solve. 1588 | -- 1589 | -- Computing only a weak head normal form is 1590 | -- rarely what intuitively is meant by "evaluation". 1591 | -- Unless you understand precisely, what is measured, 1592 | -- it is recommended to use 'nfIO' instead. 1593 | -- 1594 | -- Lazy I\/O is treacherous. 1595 | -- If your goal is not to benchmark I\/O per se, 1596 | -- but just read input data from a file, it is cleaner to 1597 | -- use 'env' or 'withResource'. 1598 | -- 1599 | -- One handy consequence of having access to `IO` is that you can 1600 | -- generate new random inputs for each run using @System.Random.randomIO@ 1601 | -- or @System.Random.Stateful.uniformM@ @System.Random.Stateful.globalStdGen@. 1602 | -- 1603 | -- Drop-in replacement for @Criterion.whnfIO@ and @Gauge.whnfIO@. 1604 | -- 1605 | -- @since 0.1 1606 | whnfIO :: IO a -> Benchmarkable 1607 | whnfIO = ioToBench id 1608 | {-# INLINE whnfIO #-} 1609 | 1610 | ioFuncToBench :: forall a b c. (b -> c) -> (a -> IO b) -> a -> Benchmarkable 1611 | ioFuncToBench frc = (Benchmarkable .) . ioFuncToBenchLoop SPEC 1612 | where 1613 | ioFuncToBenchLoop :: SPEC -> (a -> IO b) -> a -> Word64 -> IO () 1614 | ioFuncToBenchLoop !_ f x n 1615 | | n == 0 = pure () 1616 | | otherwise = do 1617 | val <- f x 1618 | _ <- evaluate (frc val) 1619 | ioFuncToBenchLoop SPEC f x (n - 1) 1620 | {-# INLINE ioFuncToBench #-} 1621 | 1622 | -- | 'nfAppIO' @f@ @x@ measures time to evaluate side-effects of 1623 | -- an application of @f@ to @x@ 1624 | -- and compute its normal form 1625 | -- (by means of 'Control.DeepSeq.rnf', not 'Control.DeepSeq.force'). 1626 | -- This does not include time to evaluate @f@ or @x@ themselves. 1627 | -- Ideally @x@ should be a primitive data type like 'Data.Int.Int'. 1628 | -- 1629 | -- The same thunk of @x@ is shared by multiple calls of @f@. We cannot evaluate 1630 | -- @x@ beforehand: there is no 'NFData' @a@ constraint, and potentially @x@ may 1631 | -- be an infinite structure. Thus @x@ will be evaluated in course of the first 1632 | -- application of @f@. This noisy measurement is to be discarded soon, 1633 | -- but if @x@ is not a primitive data type, consider forcing its evaluation 1634 | -- separately, e. g., via 'env' or 'withResource'. 1635 | -- 1636 | -- Remember that forcing a normal form requires an additional 1637 | -- traverse of the structure. In certain scenarios, 1638 | -- especially when 'NFData' instance is badly written, 1639 | -- this traversal may take non-negligible time and affect results. 1640 | -- 1641 | -- A typical use case is 'nfAppIO' 'readFile' @"foo.txt"@. 1642 | -- However, if your goal is not to benchmark I\/O per se, 1643 | -- but just read input data from a file, it is cleaner to 1644 | -- use 'env' or 'withResource'. 1645 | -- 1646 | -- One handy consequence of having access to `IO` is that you can 1647 | -- generate new random inputs for each run using @System.Random.randomIO@ 1648 | -- or @System.Random.Stateful.uniformM@ @System.Random.Stateful.globalStdGen@. 1649 | -- 1650 | -- Drop-in replacement for @Criterion.nfAppIO@ and @Gauge.nfAppIO@. 1651 | -- 1652 | -- @since 0.1 1653 | nfAppIO :: NFData b => (a -> IO b) -> a -> Benchmarkable 1654 | nfAppIO = ioFuncToBench rnf 1655 | {-# INLINE nfAppIO #-} 1656 | 1657 | -- | 'whnfAppIO' @f@ @x@ measures time to evaluate side-effects of 1658 | -- an application of @f@ to @x@ 1659 | -- and compute its weak head normal form. 1660 | -- This does not include time to evaluate @f@ or @x@ themselves. 1661 | -- Ideally @x@ should be a primitive data type like 'Data.Int.Int'. 1662 | -- 1663 | -- The same thunk of @x@ is shared by multiple calls of @f@. We cannot evaluate 1664 | -- @x@ beforehand: there is no 'NFData' @a@ constraint, and potentially @x@ may 1665 | -- be an infinite structure. Thus @x@ will be evaluated in course of the first 1666 | -- application of @f@. This noisy measurement is to be discarded soon, 1667 | -- but if @x@ is not a primitive data type, consider forcing its evaluation 1668 | -- separately, e. g., via 'env' or 'withResource'. 1669 | -- 1670 | -- Computing only a weak head normal form is 1671 | -- rarely what intuitively is meant by "evaluation". 1672 | -- Unless you understand precisely, what is measured, 1673 | -- it is recommended to use 'nfAppIO' instead. 1674 | -- 1675 | -- Lazy I\/O is treacherous. 1676 | -- If your goal is not to benchmark I\/O per se, 1677 | -- but just read input data from a file, it is cleaner to 1678 | -- use 'env' or 'withResource'. 1679 | -- 1680 | -- One handy consequence of having access to `IO` is that you can 1681 | -- generate new random inputs for each run using @System.Random.randomIO@ 1682 | -- or @System.Random.Stateful.uniformM@ @System.Random.Stateful.globalStdGen@. 1683 | -- 1684 | -- Drop-in replacement for @Criterion.whnfAppIO@ and @Gauge.whnfAppIO@. 1685 | -- 1686 | -- @since 0.1 1687 | whnfAppIO :: (a -> IO b) -> a -> Benchmarkable 1688 | whnfAppIO = ioFuncToBench id 1689 | {-# INLINE whnfAppIO #-} 1690 | 1691 | #ifdef MIN_VERSION_tasty 1692 | 1693 | -- | Run benchmarks in the given environment, usually reading large input data from file. 1694 | -- 1695 | -- One might wonder why 'env' is needed, 1696 | -- when we can simply read all input data 1697 | -- before calling 'defaultMain'. The reason is that large data 1698 | -- dangling in the heap causes longer garbage collection 1699 | -- and slows down all benchmarks, even those which do not use it at all. 1700 | -- 1701 | -- It is instrumental not only for proper 'IO' actions, 1702 | -- but also for a large statically-known data as well. Instead of a top-level 1703 | -- definition, which once evaluated will slow down garbage collection 1704 | -- during all subsequent benchmarks, 1705 | -- 1706 | -- > largeData :: String 1707 | -- > largeData = replicate 1000000 'a' 1708 | -- > 1709 | -- > main :: IO () 1710 | -- > main = defaultMain 1711 | -- > [ bench "large" $ nf length largeData, ... ] 1712 | -- 1713 | -- use 1714 | -- 1715 | -- > main :: IO () 1716 | -- > main = defaultMain 1717 | -- > [ env (pure (replicate 1000000 'a')) $ \largeData -> 1718 | -- > bench "large" $ nf length largeData, ... ] 1719 | -- 1720 | -- Even with 'env', it's advisable to store input data in as few heap objects 1721 | -- as possible. 'Data.Array.ByteArray.ByteArray' (ideally pinned) 1722 | -- or unboxed @Vector@ are good, boxed arrays are worse, lists and trees are bad. 1723 | -- 1724 | -- @Test.Tasty.Bench.@'env' is provided only for the sake of 1725 | -- compatibility with @Criterion.env@ and 1726 | -- @Gauge.env@, and involves 'unsafePerformIO'. Consider using 1727 | -- 'withResource' instead. 1728 | -- 1729 | -- When working with a mutable environment, bear in mind that it is threaded 1730 | -- through all iterations of a benchmark. @tasty-bench@ does not roll it back 1731 | -- or reset, it's user's resposibility. You might have better luck 1732 | -- with @Criterion.perBatchEnv@ or @Criterion.perRunEnv@. 1733 | -- 1734 | -- 'defaultMain' requires that the hierarchy of benchmarks and their names is 1735 | -- independent of underlying 'IO' actions. While executing 'IO' inside 'bench' 1736 | -- via 'nfIO' is fine, and reading test data from files via 'env' is also fine, 1737 | -- using 'env' to choose benchmarks or their names depending on 'IO' side effects 1738 | -- will throw a rather cryptic error message: 1739 | -- 1740 | -- > Unhandled resource. Probably a bug in the runner you're using. 1741 | -- 1742 | -- Strict pattern-matching on resource is also prohibited. For 1743 | -- instance, if it is a tuple, the second argument of 'env' should use 1744 | -- a lazy pattern match @\\~(a, b) -> ...@ 1745 | -- 1746 | -- @since 0.2 1747 | env :: NFData env => IO env -> (env -> Benchmark) -> Benchmark 1748 | env res = envWithCleanup res (const $ pure ()) 1749 | 1750 | -- | Similar to 'env', but includes an additional argument 1751 | -- to clean up created environment. 1752 | -- 1753 | -- Provided only for the sake of compatibility with 1754 | -- @Criterion.envWithCleanup@ and 1755 | -- @Gauge.envWithCleanup@, and involves 1756 | -- 'unsafePerformIO'. Consider using 'withResource' instead. 1757 | -- 1758 | -- @since 0.2 1759 | envWithCleanup :: NFData env => IO env -> (env -> IO a) -> (env -> Benchmark) -> Benchmark 1760 | envWithCleanup res fin f = withResource 1761 | (res >>= evaluate . force) 1762 | (void . fin) 1763 | (f . unsafePerformIO) 1764 | 1765 | -- | A path to write results in CSV format, populated by @--csv@. 1766 | -- 1767 | -- This is an option of 'csvReporter' and can be set only globally. 1768 | -- Modifying it via 'adjustOption' or 'localOption' does not have any effect. 1769 | -- One can however pass it to 'tryIngredients' 'benchIngredients'. For example, 1770 | -- here is how to set a default CSV location: 1771 | -- 1772 | -- @ 1773 | -- import Data.Maybe 1774 | -- import System.Exit 1775 | -- import Test.Tasty.Bench 1776 | -- import Test.Tasty.Ingredients 1777 | -- import Test.Tasty.Options 1778 | -- import Test.Tasty.Runners 1779 | -- 1780 | -- main :: IO () 1781 | -- main = do 1782 | -- let benchmarks = bgroup \"All\" ... 1783 | -- opts <- parseOptions benchIngredients benchmarks 1784 | -- let opts' = changeOption (Just . fromMaybe (CsvPath "foo.csv")) opts 1785 | -- case tryIngredients benchIngredients opts' benchmarks of 1786 | -- Nothing -> exitFailure 1787 | -- Just mb -> mb >>= \\b -> if b then exitSuccess else exitFailure 1788 | -- @ 1789 | -- 1790 | -- @since 0.3 1791 | newtype CsvPath = CsvPath FilePath 1792 | deriving 1793 | ( Eq 1794 | -- ^ @since 0.4 1795 | , Ord 1796 | -- ^ @since 0.4 1797 | ) 1798 | 1799 | instance IsOption (Maybe CsvPath) where 1800 | defaultValue = Nothing 1801 | parseValue = Just . Just . CsvPath 1802 | optionName = pure "csv" 1803 | optionHelp = pure "File to write results in CSV format" 1804 | 1805 | -- | Run benchmarks and save results in CSV format. 1806 | -- It activates when @--csv@ @FILE@ command line option is specified. 1807 | -- 1808 | -- @since 0.1 1809 | csvReporter :: Ingredient 1810 | csvReporter = TestReporter [Option (Proxy :: Proxy (Maybe CsvPath))] $ 1811 | \opts tree -> do 1812 | CsvPath path <- lookupOption opts 1813 | let names = testsNames opts tree 1814 | namesMap = IM.fromDistinctAscList $ zip [0..] names 1815 | pure $ \smap -> do 1816 | case findNonUniqueElement names of 1817 | Nothing -> pure () 1818 | Just name -> do -- 'die' is not available before base-4.8 1819 | hPutStrLn stderr $ "CSV report cannot proceed, because name '" ++ name ++ "' corresponds to two or more benchmarks. Please disambiguate them." 1820 | exitFailure 1821 | let augmented = IM.intersectionWith (,) namesMap smap 1822 | bracket 1823 | (do 1824 | h <- openFile path WriteMode 1825 | hSetBuffering h LineBuffering 1826 | hPutStrLn h $ "Name,Mean (ps),2*Stdev (ps)" ++ 1827 | (if hasGCStats then ",Allocated,Copied,Peak Memory" else "") 1828 | pure h 1829 | ) 1830 | hClose 1831 | (`csvOutput` augmented) 1832 | pure $ const $ isSuccessful smap 1833 | 1834 | findNonUniqueElement :: Ord a => [a] -> Maybe a 1835 | findNonUniqueElement = go S.empty 1836 | where 1837 | go _ [] = Nothing 1838 | go acc (x : xs) 1839 | | x `S.member` acc = Just x 1840 | | otherwise = go (S.insert x acc) xs 1841 | 1842 | csvOutput :: Handle -> IntMap (TestName, TVar Status) -> IO () 1843 | csvOutput h = traverse_ $ \(name, tv) -> do 1844 | let csv = if hasGCStats then csvEstimateWithGC else csvEstimate 1845 | r <- atomically $ readTVar tv >>= \s -> case s of Done r -> pure r; _ -> retry 1846 | case safeRead (resultDescription r) of 1847 | Nothing -> pure () 1848 | Just (WithLoHi est _ _) -> do 1849 | msg <- formatMessage $ csv est 1850 | hPutStrLn h (encodeCsv name ++ ',' : msg) 1851 | 1852 | encodeCsv :: String -> String 1853 | encodeCsv xs 1854 | | any (`elem` xs) ",\"\n\r" 1855 | = '"' : go xs -- opening quote 1856 | | otherwise = xs 1857 | where 1858 | go [] = '"' : [] -- closing quote 1859 | go ('"' : ys) = '"' : '"' : go ys 1860 | go (y : ys) = y : go ys 1861 | 1862 | -- | A path to plot results in SVG format, populated by @--svg@. 1863 | -- 1864 | -- This is an option of 'svgReporter' and can be set only globally. 1865 | -- Modifying it via 'adjustOption' or 'localOption' does not have any effect. 1866 | -- One can however pass it to 'tryIngredients' 'benchIngredients'. 1867 | -- 1868 | -- @since 0.3 1869 | newtype SvgPath = SvgPath FilePath 1870 | deriving 1871 | ( Eq 1872 | -- ^ @since 0.4 1873 | , Ord 1874 | -- ^ @since 0.4 1875 | ) 1876 | 1877 | instance IsOption (Maybe SvgPath) where 1878 | defaultValue = Nothing 1879 | parseValue = Just . Just . SvgPath 1880 | optionName = pure "svg" 1881 | optionHelp = pure "File to plot results in SVG format" 1882 | 1883 | -- | Run benchmarks and plot results in SVG format. 1884 | -- It activates when @--svg@ @FILE@ command line option is specified. 1885 | -- 1886 | -- @since 0.2.4 1887 | svgReporter :: Ingredient 1888 | svgReporter = TestReporter [Option (Proxy :: Proxy (Maybe SvgPath))] $ 1889 | \opts tree -> do 1890 | SvgPath path <- lookupOption opts 1891 | let names = testsNames opts tree 1892 | namesMap = IM.fromDistinctAscList $ zip [0..] names 1893 | pure $ \smap -> do 1894 | ref <- newIORef [] 1895 | svgCollect ref (IM.intersectionWith (,) namesMap smap) 1896 | res <- readIORef ref 1897 | writeFile path (svgRender (reverse res)) 1898 | pure $ const $ isSuccessful smap 1899 | 1900 | isSuccessful :: StatusMap -> IO Bool 1901 | isSuccessful = go . IM.elems 1902 | where 1903 | go [] = pure True 1904 | go (tv : tvs) = do 1905 | b <- atomically $ readTVar tv >>= \s -> case s of Done r -> pure (resultSuccessful r); _ -> retry 1906 | if b then go tvs else pure False 1907 | 1908 | svgCollect :: IORef [(TestName, Estimate)] -> IntMap (TestName, TVar Status) -> IO () 1909 | svgCollect ref = traverse_ $ \(name, tv) -> do 1910 | r <- atomically $ readTVar tv >>= \s -> case s of Done r -> pure r; _ -> retry 1911 | case safeRead (resultDescription r) of 1912 | Nothing -> pure () 1913 | Just (WithLoHi est _ _) -> modifyIORef ref ((name, est) :) 1914 | 1915 | svgRender :: [(TestName, Estimate)] -> String 1916 | svgRender [] = "" 1917 | svgRender pairs = header ++ concat (zipWith 1918 | (\i (name, est) -> svgRenderItem i l xMax (dropAllPrefix name) est) 1919 | [0..] 1920 | pairs) ++ footer 1921 | where 1922 | dropAllPrefix 1923 | | all (("All." `isPrefixOf`) . fst) pairs = drop 4 1924 | | otherwise = id 1925 | 1926 | l = genericLength pairs 1927 | findMaxX (Estimate m stdev) = measTime m + 2 * stdev 1928 | xMax = word64ToDouble $ maximum $ minBound : map (findMaxX . snd) pairs 1929 | header = printf "\n\n" (svgItemOffset l - 15) svgCanvasWidth svgFontSize svgCanvasMargin 1930 | footer = "\n\n" 1931 | 1932 | svgCanvasWidth :: Double 1933 | svgCanvasWidth = 960 1934 | 1935 | svgCanvasMargin :: Double 1936 | svgCanvasMargin = 10 1937 | 1938 | svgItemOffset :: Word64 -> Word64 1939 | svgItemOffset i = 22 + 55 * i 1940 | 1941 | svgFontSize :: Word64 1942 | svgFontSize = 16 1943 | 1944 | svgRenderItem :: Word64 -> Word64 -> Double -> TestName -> Estimate -> String 1945 | svgRenderItem i iMax xMax name est@(Estimate m stdev) = 1946 | (if genericLength shortTextContent * glyphWidth < boxWidth then longText else shortText) ++ box 1947 | where 1948 | y = svgItemOffset i 1949 | y' = y + (svgFontSize * 3) `quot` 8 1950 | y1 = y' + whiskerMargin 1951 | y2 = y' + boxHeight `quot` 2 1952 | y3 = y' + boxHeight - whiskerMargin 1953 | x1 = boxWidth - whiskerWidth 1954 | x2 = boxWidth + whiskerWidth 1955 | deg = (i * 360) `quot` iMax 1956 | glyphWidth = word64ToDouble svgFontSize / 2 1957 | 1958 | scale w = word64ToDouble w * (svgCanvasWidth - 2 * svgCanvasMargin) / xMax 1959 | boxWidth = scale (measTime m) 1960 | whiskerWidth = scale (2 * stdev) 1961 | boxHeight = 22 1962 | whiskerMargin = 5 1963 | 1964 | box = printf boxTemplate 1965 | (prettyEstimate est) 1966 | y' boxHeight boxWidth deg deg 1967 | deg 1968 | x1 x2 y2 y2 1969 | x1 x1 y1 y3 1970 | x2 x2 y1 y3 1971 | boxTemplate 1972 | = "\n%s\n" 1973 | ++ "\n" 1974 | ++ "" 1975 | ++ "\n" 1976 | ++ "\n" 1977 | ++ "\n" 1978 | ++ "\n\n" 1979 | 1980 | longText = printf longTextTemplate 1981 | deg 1982 | y (encodeSvg name) 1983 | y boxWidth (showPicos4 (measTime m)) 1984 | longTextTemplate 1985 | = "\n" 1986 | ++ "%s\n" 1987 | ++ "%s\n" 1988 | ++ "\n" 1989 | 1990 | shortTextContent = encodeSvg name ++ " " ++ showPicos4 (measTime m) 1991 | shortText = printf shortTextTemplate deg y shortTextContent 1992 | shortTextTemplate = "%s\n" 1993 | 1994 | encodeSvg :: String -> String 1995 | encodeSvg [] = [] 1996 | encodeSvg ('<' : xs) = '&' : 'l' : 't' : ';' : encodeSvg xs 1997 | encodeSvg ('&' : xs) = '&' : 'a' : 'm' : 'p' : ';' : encodeSvg xs 1998 | encodeSvg (x : xs) = x : encodeSvg xs 1999 | 2000 | -- | A path to read baseline results in CSV format, populated by @--baseline@. 2001 | -- 2002 | -- This is an option of 'csvReporter' and can be set only globally. 2003 | -- Modifying it via 'adjustOption' or 'localOption' does not have any effect. 2004 | -- One can however pass it to 'tryIngredients' 'benchIngredients'. 2005 | -- 2006 | -- @since 0.3 2007 | newtype BaselinePath = BaselinePath FilePath 2008 | deriving 2009 | ( Eq 2010 | -- ^ @since 0.4 2011 | , Ord 2012 | -- ^ @since 0.4 2013 | ) 2014 | 2015 | instance IsOption (Maybe BaselinePath) where 2016 | defaultValue = Nothing 2017 | parseValue = Just . Just . BaselinePath 2018 | optionName = pure "baseline" 2019 | optionHelp = pure "File with baseline results in CSV format to compare against" 2020 | 2021 | -- | Run benchmarks and report results 2022 | -- in a manner similar to 'consoleTestReporter'. 2023 | -- 2024 | -- If @--baseline@ @FILE@ command line option is specified, 2025 | -- compare results against an earlier run and mark 2026 | -- too slow / too fast benchmarks as failed in accordance to 2027 | -- bounds specified by @--fail-if-slower@ @PERCENT@ and @--fail-if-faster@ @PERCENT@. 2028 | -- 2029 | -- @since 0.2 2030 | consoleBenchReporter :: Ingredient 2031 | consoleBenchReporter = modifyConsoleReporter [Option (Proxy :: Proxy (Maybe BaselinePath))] $ \opts -> do 2032 | baseline <- case lookupOption opts of 2033 | Nothing -> pure S.empty 2034 | Just (BaselinePath path) -> S.fromList . joinQuotedFields . lines <$> (readFile path >>= evaluate . force) 2035 | let pretty = if hasGCStats then prettyEstimateWithGC else prettyEstimate 2036 | pure $ \name mDepR r -> case safeRead (resultDescription r) of 2037 | Nothing -> r 2038 | Just (WithLoHi est lowerBound upperBound) -> 2039 | (if isAcceptable then id else forceFail) 2040 | r { resultDescription = pretty est ++ bcompareMsg ++ formatSlowDown mSlowDown } 2041 | where 2042 | isAcceptable = isAcceptableVsBaseline && isAcceptableVsBcompare 2043 | mSlowDown = compareVsBaseline baseline name est 2044 | slowDown = fromMaybe 1 mSlowDown 2045 | isAcceptableVsBaseline = slowDown >= lowerBound && slowDown <= upperBound 2046 | (isAcceptableVsBcompare, bcompareMsg) = case mDepR of 2047 | Nothing -> (True, "") 2048 | Just (WithLoHi depR depLowerBound depUpperBound) -> case safeRead (resultDescription depR) of 2049 | Nothing -> (True, "") 2050 | Just (WithLoHi depEst _ _) -> let ratio = estTime est / estTime depEst in 2051 | ( ratio >= depLowerBound && ratio <= depUpperBound 2052 | , printf ", %.2fx" ratio 2053 | ) 2054 | 2055 | -- | A well-formed CSV entry contains an even number of quotes: 0, 2 or more. 2056 | joinQuotedFields :: [String] -> [String] 2057 | joinQuotedFields [] = [] 2058 | joinQuotedFields (x : xs) 2059 | | areQuotesBalanced x = x : joinQuotedFields xs 2060 | | otherwise = case span areQuotesBalanced xs of 2061 | (_, []) -> [] -- malformed CSV 2062 | (ys, z : zs) -> unlines (x : ys ++ [z]) : joinQuotedFields zs 2063 | where 2064 | areQuotesBalanced = even . length . filter (== '"') 2065 | 2066 | estTime :: Estimate -> Double 2067 | estTime = word64ToDouble . measTime . estMean 2068 | 2069 | compareVsBaseline :: S.Set String -> TestName -> Estimate -> Maybe Double 2070 | compareVsBaseline baseline name (Estimate m stdev) = case mOld of 2071 | Nothing -> Nothing 2072 | Just (oldTime, oldDoubleSigma) 2073 | -- time and oldTime must be signed integers to use 'abs' 2074 | | abs (time - oldTime) < max (2 * word64ToInt64 stdev) oldDoubleSigma -> Just 1 2075 | | otherwise -> Just $ int64ToDouble time / int64ToDouble oldTime 2076 | where 2077 | time = word64ToInt64 $ measTime m 2078 | 2079 | mOld :: Maybe (Int64, Int64) 2080 | mOld = do 2081 | let prefix = encodeCsv name ++ "," 2082 | (line, furtherLines) <- S.minView $ snd $ S.split prefix baseline 2083 | 2084 | case S.minView furtherLines of 2085 | Nothing -> pure () 2086 | Just (nextLine, _) -> case stripPrefix prefix nextLine of 2087 | Nothing -> pure () 2088 | -- If there are several lines matching prefix, skip them all. 2089 | -- Should not normally happen, 'csvReporter' prohibits repeating test names. 2090 | Just{} -> Nothing 2091 | 2092 | (timeCell, ',' : rest) <- span (/= ',') <$> stripPrefix prefix line 2093 | let doubleSigmaCell = takeWhile (/= ',') rest 2094 | (,) <$> safeRead timeCell <*> safeRead doubleSigmaCell 2095 | 2096 | formatSlowDown :: Maybe Double -> String 2097 | formatSlowDown Nothing = "" 2098 | formatSlowDown (Just ratio) = case percents `compare` 0 of 2099 | LT -> printf ", %2i%% less than baseline" (-percents) 2100 | EQ -> ", same as baseline" 2101 | GT -> printf ", %2i%% more than baseline" percents 2102 | where 2103 | percents :: Int64 2104 | percents = truncate ((ratio - 1) * 100) 2105 | 2106 | forceFail :: Result -> Result 2107 | forceFail r = r { resultOutcome = Failure TestFailed, resultShortDescription = "FAIL" } 2108 | 2109 | data Unique a = None | Unique !a | NotUnique 2110 | deriving (Functor) 2111 | 2112 | instance Semigroup (Unique a) where 2113 | None <> a = a 2114 | a <> None = a 2115 | _ <> _ = NotUnique 2116 | 2117 | instance Monoid (Unique a) where 2118 | mempty = None 2119 | mappend = (<>) 2120 | 2121 | modifyConsoleReporter 2122 | :: [OptionDescription] 2123 | -> (OptionSet -> IO (TestName -> Maybe (WithLoHi Result) -> Result -> Result)) 2124 | -> Ingredient 2125 | modifyConsoleReporter desc' iof = TestReporter (desc ++ desc') $ \opts tree -> 2126 | let nameSeqs = IM.fromDistinctAscList $ zip [0..] $ testNameSeqs opts tree 2127 | namesAndDeps = IM.fromDistinctAscList $ zip [0..] $ map (second isSingle) 2128 | $ testNamesAndDeps nameSeqs opts tree 2129 | modifySMap = (iof opts >>=) . flip postprocessResult 2130 | . IM.intersectionWith (\(a, b) c -> (a, b, c)) namesAndDeps 2131 | in (modifySMap >=>) <$> cb opts tree 2132 | where 2133 | (desc, cb) = case consoleTestReporter of 2134 | TestReporter d c -> (d, c) 2135 | _ -> error "modifyConsoleReporter: consoleTestReporter must be TestReporter" 2136 | 2137 | isSingle (Unique a) = Just a 2138 | isSingle _ = Nothing 2139 | 2140 | -- | Convert a test tree to a list of test names. 2141 | testNameSeqs :: OptionSet -> TestTree -> [Seq TestName] 2142 | testNameSeqs = foldTestTree trivialFold 2143 | { foldSingle = const $ const . (:[]) . Seq.singleton 2144 | #if MIN_VERSION_tasty(1,5,0) 2145 | , foldGroup = const $ (. concat) . map . (<|) 2146 | #else 2147 | , foldGroup = const $ map . (<|) 2148 | #endif 2149 | } 2150 | 2151 | testNamesAndDeps :: IntMap (Seq TestName) -> OptionSet -> TestTree -> [(TestName, Unique (WithLoHi IM.Key))] 2152 | testNamesAndDeps im = foldTestTree trivialFold 2153 | { foldSingle = const $ const . (: []) . (, mempty) 2154 | #if MIN_VERSION_tasty(1,5,0) 2155 | , foldGroup = const $ (. concat) . map . first . (++) . (++ ".") 2156 | #else 2157 | , foldGroup = const $ map . first . (++) . (++ ".") 2158 | #endif 2159 | , foldAfter = const foldDeps 2160 | } 2161 | where 2162 | foldDeps :: DependencyType -> Expr -> [(a, Unique (WithLoHi IM.Key))] -> [(a, Unique (WithLoHi IM.Key))] 2163 | foldDeps AllSucceed (And (StringLit xs) p) 2164 | | bcomparePrefix `isPrefixOf` xs 2165 | , Just (lo :: Double, hi :: Double) <- safeRead $ drop (length bcomparePrefix) xs 2166 | = map $ second $ mappend $ (\x -> WithLoHi x lo hi) <$> findMatchingKeys im p 2167 | foldDeps _ _ = id 2168 | 2169 | findMatchingKeys :: IntMap (Seq TestName) -> Expr -> Unique IM.Key 2170 | findMatchingKeys im pattern = 2171 | foldMap (\(k, v) -> if withFields v pat == Right True then Unique k else mempty) $ IM.assocs im 2172 | where 2173 | pat = eval pattern >>= asB 2174 | 2175 | postprocessResult 2176 | :: (TestName -> Maybe (WithLoHi Result) -> Result -> Result) 2177 | -> IntMap (TestName, Maybe (WithLoHi IM.Key), TVar Status) 2178 | -> IO StatusMap 2179 | postprocessResult f src = do 2180 | paired <- forM src $ \(name, mDepId, tv) -> (name, mDepId, tv,) <$> newTVarIO NotStarted 2181 | let doUpdate = atomically $ do 2182 | (Any anyUpdated, All allDone) <- 2183 | getApp $ flip foldMap paired $ \(name, mDepId, newTV, oldTV) -> Ap $ do 2184 | old <- readTVar oldTV 2185 | case old of 2186 | Done{} -> pure (Any False, All True) 2187 | _ -> do 2188 | new <- readTVar newTV 2189 | case new of 2190 | Done res -> do 2191 | 2192 | depRes <- case mDepId of 2193 | Nothing -> pure Nothing 2194 | Just (WithLoHi depId lo hi) -> case IM.lookup depId src of 2195 | Nothing -> pure Nothing 2196 | Just (_, _, depTV) -> do 2197 | depStatus <- readTVar depTV 2198 | case depStatus of 2199 | Done dep -> pure $ Just (WithLoHi dep lo hi) 2200 | _ -> pure Nothing 2201 | 2202 | writeTVar oldTV (Done (f name depRes res)) 2203 | pure (Any True, All True) 2204 | #if MIN_VERSION_tasty(1,5,0) 2205 | Executing newProgr -> do 2206 | let updated = case old of 2207 | Executing oldProgr -> oldProgr /= newProgr 2208 | _ -> True 2209 | when updated $ 2210 | writeTVar oldTV (Executing newProgr) 2211 | pure (Any updated, All False) 2212 | #else 2213 | Executing{} -> pure (Any False, All False) 2214 | #endif 2215 | NotStarted -> pure (Any False, All False) 2216 | if anyUpdated || allDone then pure allDone else retry 2217 | adNauseam = doUpdate >>= (`unless` adNauseam) 2218 | _ <- forkIO adNauseam 2219 | pure $ fmap (\(_, _, _, a) -> a) paired 2220 | 2221 | int64ToDouble :: Int64 -> Double 2222 | int64ToDouble = fromIntegral 2223 | 2224 | word64ToInt64 :: Word64 -> Int64 2225 | word64ToInt64 = fromIntegral 2226 | 2227 | #endif 2228 | 2229 | word64ToDouble :: Word64 -> Double 2230 | word64ToDouble = fromIntegral 2231 | 2232 | #if !MIN_VERSION_base(4,10,0) 2233 | int64ToWord64 :: Int64 -> Word64 2234 | int64ToWord64 = fromIntegral 2235 | #endif 2236 | 2237 | 2238 | #if defined(mingw32_HOST_OS) 2239 | 2240 | #if defined(i386_HOST_ARCH) 2241 | #define CCONV stdcall 2242 | #else 2243 | #define CCONV ccall 2244 | #endif 2245 | 2246 | foreign import CCONV unsafe "windows.h GetConsoleOutputCP" getConsoleOutputCP :: IO Word32 2247 | foreign import CCONV unsafe "windows.h SetConsoleOutputCP" setConsoleOutputCP :: Word32 -> IO () 2248 | 2249 | #endif 2250 | 2251 | #ifdef MIN_VERSION_tasty 2252 | 2253 | -- | Map leaf benchmarks ('bench', not 'bgroup') with a provided function, 2254 | -- which has an access to leaf's reversed path. 2255 | -- 2256 | -- This helper is useful for bulk application of 'bcompare'. 2257 | -- See also 'locateBenchmark'. 2258 | -- 2259 | -- Real world examples: 2260 | -- 2261 | -- * https://hackage.haskell.org/package/chimera-0.3.3.0/src/bench/Bench.hs 2262 | -- * https://hackage.haskell.org/package/text-builder-linear-0.1.1/src/bench/Main.hs 2263 | -- 2264 | -- @since 0.3.2 2265 | mapLeafBenchmarks :: ([String] -> Benchmark -> Benchmark) -> Benchmark -> Benchmark 2266 | mapLeafBenchmarks processLeaf = go mempty 2267 | where 2268 | go :: [String] -> Benchmark -> Benchmark 2269 | go path x = case x of 2270 | SingleTest name t -> processLeaf (name : path) (SingleTest name t) 2271 | TestGroup name tts -> TestGroup name (map (go (name : path)) tts) 2272 | PlusTestOptions g tt -> PlusTestOptions g (go path tt) 2273 | WithResource res f -> WithResource res (go path . f) 2274 | AskOptions f -> AskOptions (go path . f) 2275 | After dep expr tt -> After dep expr (go path tt) 2276 | 2277 | -- | Construct an AWK expression to locate an individual element or elements in 'Benchmark' 2278 | -- by the suffix of the path. Names are listed in reverse order: 2279 | -- from 'bench'\'s own name to a name of the outermost 'bgroup'. 2280 | -- 2281 | -- This function is meant to be used in conjunction with 'bcompare', e. g., 2282 | -- 'bcompare' ('Test.Tasty.Patterns.Printer.printAwkExpr' ('locateBenchmark' @path@)). 2283 | -- See also 'mapLeafBenchmarks'. 2284 | -- 2285 | -- Real world examples: 2286 | -- 2287 | -- * https://hackage.haskell.org/package/chimera-0.3.3.0/src/bench/Bench.hs 2288 | -- * https://hackage.haskell.org/package/text-builder-linear-0.1.1/src/bench/Main.hs 2289 | -- 2290 | -- @since 0.3.2 2291 | locateBenchmark :: [String] -> Expr 2292 | locateBenchmark [] = IntLit 1 2293 | locateBenchmark path 2294 | = foldl1' And 2295 | $ zipWith (\i name -> Patterns.EQ (Field (Sub NF (IntLit i))) (StringLit name)) [0..] path 2296 | 2297 | #endif 2298 | --------------------------------------------------------------------------------