├── .ghci
├── .gitattributes
├── .github
└── workflows
│ └── haskell-ci.yml
├── .gitignore
├── .hlint.yaml
├── .projectile
├── ChangeLog.md
├── LICENSE
├── app
├── bench.hs
└── explore.hs
├── cabal.project
├── other
├── ExampleSum-1000-1000-MeasureTime-StatAverage.perf
├── RunExample-ExampleNoOp-1000-1000-MeasureTime-StatAverage.perf
├── RunExampleIO-MeasureTime-StatAverage.perf
├── RunExamples-1000-1000-MeasureTime-StatAverage.perf
├── perf.svg
├── perf20000.svg
└── perffuse.svg
├── perf.cabal
├── perf.org
├── readme.md
└── src
├── Perf.hs
└── Perf
├── Algos.hs
├── BigO.hs
├── Chart.hs
├── Count.hs
├── Measure.hs
├── Report.hs
├── Space.hs
├── Stats.hs
├── Time.hs
└── Types.hs
/.ghci:
--------------------------------------------------------------------------------
1 | :set -XOverloadedStrings
2 | :set -Wno-type-defaults
3 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | other/* linguist-documentation
2 | index.html linguist-documentation
3 |
--------------------------------------------------------------------------------
/.github/workflows/haskell-ci.yml:
--------------------------------------------------------------------------------
1 | name: build
2 | on: [push]
3 |
4 | # INFO: The following configuration block ensures that only one build runs per branch,
5 | # which may be desirable for projects with a costly build process.
6 | # Remove this block from the CI workflow to let each CI job run to completion.
7 | concurrency:
8 | group: build-${{ github.ref }}
9 | cancel-in-progress: true
10 |
11 | jobs:
12 | hlint:
13 | runs-on: ubuntu-latest
14 | steps:
15 | - uses: actions/checkout@v4
16 | - uses: haskell-actions/hlint-setup@v2
17 | - uses: haskell-actions/hlint-run@v2
18 | with:
19 | path: .
20 | fail-on: warning
21 | ormolu:
22 | runs-on: ubuntu-latest
23 | steps:
24 | - uses: actions/checkout@v4
25 | - uses: haskell-actions/run-ormolu@v16
26 | build:
27 | name: GHC ${{ matrix.ghc-version }} on ${{ matrix.os }}
28 | runs-on: ${{ matrix.os }}
29 | strategy:
30 | fail-fast: false
31 | matrix:
32 | os: [ubuntu-latest]
33 | ghc-version: ['9.10', '9.8', '9.6']
34 |
35 | include:
36 | - os: windows-latest
37 | ghc-version: '9.8'
38 | - os: macos-latest
39 | ghc-version: '9.8'
40 |
41 | steps:
42 | - uses: actions/checkout@v4
43 |
44 | - name: Set up GHC ${{ matrix.ghc-version }}
45 | uses: haskell-actions/setup@v2
46 | id: setup
47 | with:
48 | ghc-version: ${{ matrix.ghc-version }}
49 |
50 | - name: Configure the build
51 | run: |
52 | cabal configure --enable-tests --enable-benchmarks --disable-documentation
53 | cabal build --dry-run
54 | # The last step generates dist-newstyle/cache/plan.json for the cache key.
55 |
56 | - name: Restore cached dependencies
57 | uses: actions/cache/restore@v4
58 | id: cache
59 | env:
60 | key: ${{ runner.os }}-ghc-${{ steps.setup.outputs.ghc-version }}-cabal-${{ steps.setup.outputs.cabal-version }}
61 | with:
62 | path: ${{ steps.setup.outputs.cabal-store }}
63 | key: ${{ env.key }}-plan-${{ hashFiles('**/plan.json') }}
64 | restore-keys: ${{ env.key }}-
65 |
66 | - name: Install dependencies
67 | # If we had an exact cache hit, the dependencies will be up to date.
68 | if: steps.cache.outputs.cache-hit != 'true'
69 | run: cabal build all --only-dependencies
70 |
71 | # Cache dependencies already here, so that we do not have to rebuild them should the subsequent steps fail.
72 | - name: Save cached dependencies
73 | uses: actions/cache/save@v4
74 | # If we had an exact cache hit, trying to save the cache would error because of key clash.
75 | if: steps.cache.outputs.cache-hit != 'true'
76 | with:
77 | path: ${{ steps.setup.outputs.cabal-store }}
78 | key: ${{ steps.cache.outputs.cache-primary-key }}
79 |
80 | - name: Build
81 | run: cabal build all
82 |
83 | - name: Check cabal file
84 | run: cabal check
85 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /.stack-work/
2 | .DS_Store
3 | /dist-newstyle/
4 | /stack.yaml.lock
5 | /.hie/
6 | /cabal.project.local*
7 | /.ghc.environment.*
8 | /.hkgr/
9 | /checklist.org
10 | /other/*.perf
11 |
--------------------------------------------------------------------------------
/.hlint.yaml:
--------------------------------------------------------------------------------
1 | - ignore: {name: Reduce duplication, within: Perf.Measure}
2 | - ignore: {name: Redundant Lambda}
3 | - ignore: {name: Eta reduce}
4 | - ignore: {name: Use if}
5 |
--------------------------------------------------------------------------------
/.projectile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonyday567/perf/5f1cd67117f43511dd27064a933f2c10b69a96ce/.projectile
--------------------------------------------------------------------------------
/ChangeLog.md:
--------------------------------------------------------------------------------
1 | 0.14
2 | ===
3 | * Added charting
4 | * Added Perf.Chart
5 | * refactored reportMain
6 | * expanded ReportOptions
7 | * added more algorithms to Example
8 | * refactored Perf.BigO
9 | * added PerfDumpOptions
10 | * reportOrg2D ==> report2D
11 | * added reportBigO
12 | * added reportTasty'
13 | * added reportToConsole & parseClock
14 | * refactored SpaceStats
15 | * added tickIOWith, timesN & timesNWith
16 | * removed measureM from Measure
17 | * added multiN
18 |
19 |
20 | 0.13
21 | ===
22 | * replaced rdtsc with clocks library.
23 |
24 | 0.12.0.0
25 | ===
26 | * added reportMain and support for executable development.
27 | * refactored app/perf-explore
28 | * created app/perf-bench and aded as a `cabal bench` thing.
29 |
30 | 0.11.0.0
31 | ===
32 | * added Perf.Count
33 | * GHC 9.6.2 support
34 |
35 | 0.10.0
36 | ===
37 | * GHC 9.2.1 support
38 | * inclusion of BigO
39 |
40 | 0.9.0
41 | ===
42 | * Removed deepseq dependency
43 |
44 | 0.8.0
45 | ===
46 | * GHC 9.0.1 support
47 | * internal fixes to remove numhask dependency
48 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2018, Tony Day
2 |
3 | All rights reserved.
4 |
5 | Redistribution and use in source and binary forms, with or without
6 | modification, are permitted provided that the following conditions are met:
7 |
8 | * Redistributions of source code must retain the above copyright
9 | notice, this list of conditions and the following disclaimer.
10 |
11 | * Redistributions in binary form must reproduce the above
12 | copyright notice, this list of conditions and the following
13 | disclaimer in the documentation and/or other materials provided
14 | with the distribution.
15 |
16 | * Neither the name of Tony Day nor the names of other
17 | contributors may be used to endorse or promote products derived
18 | from this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 |
--------------------------------------------------------------------------------
/app/bench.hs:
--------------------------------------------------------------------------------
1 | -- | Sum example, measured using default settings.
2 | module Main where
3 |
4 | import Data.List qualified as List
5 | import Perf
6 | import Prelude
7 |
8 | main :: IO ()
9 | main = do
10 | let l = 1000
11 | let a = ExampleSum
12 | reportMain a defaultReportOptions (List.intercalate "-" [show a, show @Int l]) (testExample . examplePattern a)
13 |
--------------------------------------------------------------------------------
/app/explore.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE OverloadedLabels #-}
2 | {-# LANGUAGE OverloadedStrings #-}
3 |
4 | -- | basic measurement and callibration
5 | module Main where
6 |
7 | import Control.DeepSeq
8 | import Control.Monad
9 | import Control.Monad.State.Lazy
10 | import Data.List (intercalate)
11 | import Data.Map.Strict qualified as Map
12 | import Data.Text (Text)
13 | import Data.Text qualified as Text
14 | import Data.Text.IO qualified as Text
15 | import GHC.Exts
16 | import GHC.Generics
17 | import Optics.Core
18 | import Options.Applicative
19 | import Options.Applicative.Help.Pretty
20 | import Perf
21 | import Prelude
22 |
23 | data Run = RunExample | RunExampleIO | RunSums | RunLengths | RunNoOps | RunTicks deriving (Eq, Show)
24 |
25 | data AppConfig = AppConfig
26 | { appRun :: Run,
27 | appExample :: Example,
28 | appReportOptions :: ReportOptions
29 | }
30 | deriving (Eq, Show, Generic)
31 |
32 | defaultAppConfig :: AppConfig
33 | defaultAppConfig = AppConfig RunExample ExampleSum defaultReportOptions
34 |
35 | parseRun :: Parser Run
36 | parseRun =
37 | flag' RunSums (long "sums" <> help "run on sum algorithms")
38 | <|> flag' RunLengths (long "lengths" <> help "run on length algorithms")
39 | <|> flag' RunExample (long "example" <> help "run on the example algorithm" <> style (annotate bold))
40 | <|> flag' RunExampleIO (long "exampleIO" <> help "exampleIO test")
41 | <|> flag' RunNoOps (long "noops" <> help "noops test")
42 | <|> flag' RunTicks (long "ticks" <> help "tick test")
43 | <|> pure RunExample
44 |
45 | appParser :: AppConfig -> Parser AppConfig
46 | appParser def =
47 | AppConfig
48 | <$> parseRun
49 | <*> parseExample
50 | <*> parseReportOptions (view #appReportOptions def)
51 |
52 | appConfig :: AppConfig -> ParserInfo AppConfig
53 | appConfig def =
54 | info
55 | (appParser def <**> helper)
56 | (fullDesc <> header "Examples of perf usage (defaults in bold)")
57 |
58 | -- | * exampleIO
59 | exampleIO :: (Semigroup t) => PerfT IO t ()
60 | exampleIO = do
61 | txt <- fam "file-read" (Text.readFile "src/Perf.hs")
62 | n <- ffap "length" Text.length txt
63 | fam "print-result" (Text.putStrLn $ "length of file is: " <> Text.pack (show n))
64 |
65 | -- | * sums
66 | -- | measure the various versions of a tick.
67 | statTicks :: (NFData t, NFData b) => Text -> (t -> b) -> t -> Int -> StatDType -> StateT (Map.Map [Text] Double) IO ()
68 | statTicks l f a n s = do
69 | addStat [l, "tick"] . statD s . fmap fromIntegral =<< lift (fst <$> multi tick n f a)
70 | addStat [l, "tickWHNF"] . statD s . fmap fromIntegral =<< lift (fst <$> multi tickWHNF n f a)
71 | addStat [l, "tickLazy"] . statD s . fmap fromIntegral =<< lift (fst <$> multi tickLazy n f a)
72 | addStat [l, "tickForce"] . statD s . fmap fromIntegral =<< lift (fst <$> multi tickForce n f a)
73 | addStat [l, "tickForceArgs"] . statD s . fmap fromIntegral =<< lift (fst <$> multi tickForceArgs n f a)
74 | addStat [l, "stepTime"] . statD s . fmap fromIntegral =<< lift (mconcat . fmap snd . take 1 . Map.toList <$> execPerfT (toMeasureN n stepTime) (f |$| a))
75 | addStat [l, "times"] . statD s . fmap fromIntegral =<< lift (mconcat . fmap snd . take 1 . Map.toList <$> execPerfT (times n) (f |$| a))
76 | addStat [l, "timesn"] . statD s . fmap fromIntegral =<< lift (mconcat . fmap snd . Map.toList <$> execPerfT (pure <$> timesN n) (f |$| a))
77 |
78 | statTicksSum :: (NFData b, Enum b, Num b) => SumPattern b -> Int -> StatDType -> StateT (Map.Map [Text] Double) IO ()
79 | statTicksSum (SumFuse label f a) n s = statTicks label f a n s
80 | statTicksSum (SumFusePoly label f a) n s = statTicks label f a n s
81 | statTicksSum (SumPoly label f a) n s = statTicks label f a n s
82 | statTicksSum (SumMono label f a) n s = statTicks label f a n s
83 |
84 | statTicksSums :: Int -> Int -> StatDType -> IO (Map.Map [Text] Double)
85 | statTicksSums n l s = flip execStateT Map.empty $ mapM_ (\x -> statTicksSum x n s) (allSums l)
86 |
87 | -- * no-op testing
88 |
89 | perfNoOps :: (Semigroup a) => Measure IO a -> IO (Map.Map Text a)
90 | perfNoOps meas =
91 | execPerfT meas $ do
92 | liftIO $ warmup 1000
93 | fap "const" (const ()) ()
94 | fam "pure" (pure ())
95 |
96 | main :: IO ()
97 | main = do
98 | o <- execParser (appConfig defaultAppConfig)
99 | let repOptions = appReportOptions o
100 | let n = reportN repOptions
101 | let s = reportStatDType repOptions
102 | let mt = reportMeasureType repOptions
103 | let c = reportClock repOptions
104 | let !l = reportLength repOptions
105 | let a = appExample o
106 | let r = appRun o
107 |
108 | case r of
109 | RunExample -> do
110 | reportMain
111 | a
112 | repOptions
113 | (intercalate "-" [show r, show a, show l])
114 | (testExample . examplePattern a)
115 | RunExampleIO -> do
116 | m1 <- execPerfT (measureDs mt c 1) exampleIO
117 | (_, (m', m2)) <- outer "outer-total" (measureDs mt c 1) (measureDs mt c 1) exampleIO
118 | let ms = mconcat [Map.mapKeys (\x -> ["normal", x]) m1, Map.mapKeys (\x -> ["outer", x]) (m2 <> m')]
119 | putStrLn ""
120 | let o' = replaceDefaultFilePath (intercalate "-" [show r, show mt, show s]) repOptions
121 | report o' (fmap (statD s) <$> ms)
122 | RunSums -> do
123 | m <- statSums n l (measureDs mt c)
124 | let o' = replaceDefaultFilePath (intercalate "-" [show r, show mt, show n, show l, show s]) repOptions
125 | report o' (statify s m)
126 | RunLengths -> do
127 | m <- statLengths n l (measureDs mt c)
128 | let o' = replaceDefaultFilePath (intercalate "-" [show r, show mt, show n, show l, show s]) repOptions
129 | report o' (statify s m)
130 | RunNoOps -> do
131 | m <- perfNoOps (measureDs mt c n)
132 | let o' = replaceDefaultFilePath (intercalate "-" [show r, show mt, show n]) repOptions
133 | report o' (allStats 4 (Map.mapKeys (: []) m))
134 | RunTicks -> do
135 | m <- statTicksSums n l s
136 | report2D m
137 |
--------------------------------------------------------------------------------
/cabal.project:
--------------------------------------------------------------------------------
1 | packages:
2 | perf.cabal
3 |
--------------------------------------------------------------------------------
/other/ExampleSum-1000-1000-MeasureTime-StatAverage.perf:
--------------------------------------------------------------------------------
1 | fromList [(["sum","time"],13782.642)]
--------------------------------------------------------------------------------
/other/RunExample-ExampleNoOp-1000-1000-MeasureTime-StatAverage.perf:
--------------------------------------------------------------------------------
1 | fromList [(["noop","time"],169.004)]
--------------------------------------------------------------------------------
/other/RunExampleIO-MeasureTime-StatAverage.perf:
--------------------------------------------------------------------------------
1 | fromList [(["normal","file-read","time"],230708.0),(["normal","length","time"],2708.0),(["normal","print-result","time"],37459.0),(["outer","file-read","time"],60500.0),(["outer","length","time"],959.0),(["outer","outer-total","time"],73875.0),(["outer","print-result","time"],9792.0)]
--------------------------------------------------------------------------------
/other/RunExamples-1000-1000-MeasureTime-StatAverage.perf:
--------------------------------------------------------------------------------
1 | fromList [(["constFuse","time"],736.132),(["lengthF","time"],5132.127),(["mapInc","time"],125.111),(["noop","time"],110.868),(["sum","time"],10436.082),(["sumFuse","time"],1812.501)]
--------------------------------------------------------------------------------
/other/perf.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/perf.cabal:
--------------------------------------------------------------------------------
1 | cabal-version: 3.0
2 | name: perf
3 | version: 0.14.0.2
4 | license: BSD-3-Clause
5 | license-file: LICENSE
6 | copyright: Tony Day (c) 2018
7 | category: performance
8 | author: Tony Day, Marco Zocca
9 | maintainer: tonyday567@gmail.com
10 | homepage: https://github.com/tonyday567/perf#readme
11 | bug-reports: https://github.com/tonyday567/perf/issues
12 | synopsis: Performance methods and monad.
13 | description:
14 | A set of tools to measure performance of Haskell programs.
15 | See the Perf module and readme for an example and full API documentation.
16 |
17 | build-type: Simple
18 | tested-with:
19 | , GHC == 9.10.1
20 | , GHC == 9.6.5
21 | , GHC == 9.8.2
22 | extra-doc-files:
23 | ChangeLog.md
24 | readme.md
25 |
26 | source-repository head
27 | type: git
28 | location: https://github.com/tonyday567/perf
29 |
30 | common ghc-options-exe-stanza
31 | ghc-options:
32 | -fforce-recomp
33 | -funbox-strict-fields
34 | -rtsopts
35 | -threaded
36 | -with-rtsopts=-N
37 |
38 | common ghc-options-stanza
39 | ghc-options:
40 | -Wall
41 | -Wcompat
42 | -Widentities
43 | -Wincomplete-record-updates
44 | -Wincomplete-uni-patterns
45 | -Wpartial-fields
46 | -Wredundant-constraints
47 | -fproc-alignment=64
48 |
49 | common ghc2021-stanza
50 | default-language: GHC2021
51 |
52 | library
53 | import: ghc-options-stanza
54 | import: ghc2021-stanza
55 | hs-source-dirs: src
56 | build-depends:
57 | , base >=4.14 && <5
58 | , boxes >=0.1.5 && <0.2
59 | , chart-svg >=0.7 && <0.9
60 | , clock >=0.8 && <0.9
61 | , containers >=0.6 && <0.8
62 | , deepseq >=1.4.4 && <1.6
63 | , formatn >=0.2.1 && <0.4
64 | , mtl >=2.2.2 && <2.4
65 | , numhask-space >=0.10 && <0.14
66 | , optics-core >=0.4.1 && <0.5
67 | , optparse-applicative >=0.17 && <0.20
68 | , prettychart >=0.3 && <0.4
69 | , prettyprinter >=1.7.1 && <1.8
70 | , recursion-schemes >=5.2.2 && <5.3
71 | , tasty >=1.5.2 && <1.6
72 | , tasty-bench >=0.4 && <0.5
73 | , text >=1.2 && <2.2
74 | , vector >=0.12.3 && <0.14
75 | exposed-modules:
76 | Perf
77 | Perf.Algos
78 | Perf.BigO
79 | Perf.Chart
80 | Perf.Count
81 | Perf.Measure
82 | Perf.Report
83 | Perf.Space
84 | Perf.Stats
85 | Perf.Time
86 | Perf.Types
87 | ghc-options: -O2
88 |
89 | executable perf-explore
90 | import: ghc-options-exe-stanza
91 | import: ghc-options-stanza
92 | import: ghc2021-stanza
93 | main-is: explore.hs
94 | hs-source-dirs: app
95 | build-depends:
96 | , base >=4.14 && <5
97 | , containers >=0.6 && <0.8
98 | , deepseq >=1.4.4 && <1.6
99 | , mtl >=2.2.2 && <2.4
100 | , optics-core >=0.4.1 && <0.5
101 | , optparse-applicative >=0.17 && <0.20
102 | , perf
103 | , text >=1.2 && <2.2
104 | ghc-options: -O2
105 |
106 | benchmark perf-bench
107 | import: ghc-options-exe-stanza
108 | import: ghc-options-stanza
109 | import: ghc2021-stanza
110 | main-is: bench.hs
111 | hs-source-dirs: app
112 | build-depends:
113 | , base >=4.14 && <5
114 | , perf
115 | ghc-options: -O2
116 | type: exitcode-stdio-1.0
117 |
--------------------------------------------------------------------------------
/perf.org:
--------------------------------------------------------------------------------
1 | #+TITLE: perf
2 | #+PROPERTY: header-args :exports both
3 | #+PROPERTY: header-args :eval no-export
4 |
5 | [[https://hackage.haskell.org/package/perf][file:https://img.shields.io/hackage/v/perf.svg]] [[https://github.com/tonyday567/perf/actions][file:https://github.com/tonyday567/perf/workflows/haskell-ci.yml/badge.svg]]
6 |
7 | * Features
8 |
9 | ~perf~ is an experimental library with a focus on the low-level empirics of Haskell code performance. If you are looking for a quick and reliable performance benchmark, criterion and tasty-bench are both good choices. If your results are confounding, however, you may need to dig deeper, and this is the problem space of ~perf~.
10 |
11 | The library:
12 |
13 | - provides a monad transformer, ~PerfT~. The criterion API tends towards an atomistic approach - bust code up into snippets, copy-paste into a bench.hs and measure their isolated performance. In contrast, with ~PerfT~ performance can be measured within a code snippet's original context. Differing code points can be labelled and measured as part of a single run, encouraging a much faster observation - experimentation - refactor cycle.
14 |
15 | - is polymorphic to what, exactly, is being measured, so that concepts such as counters, debug checks, time and space performance can share treatment.
16 |
17 | - attempts to measure big O for algorithms that can be defined in terms of input size growth.
18 |
19 | - includes live charting of raw performance results via chart-svg and prettychart
20 |
21 | * Usage
22 |
23 | Probably the best introduction to ~perf~ is via the perf-explore executable:
24 |
25 | #+begin_src sh :results output
26 | perf-explore
27 | #+end_src
28 |
29 | #+RESULTS:
30 | : label1 label2 old result new result change
31 | :
32 | : sum time 9.93e3 7.57e3 improvement
33 |
34 | Summing [1..1000] took 9,930 nanoseconds, an improvement versus the on file performance previously measured.
35 |
36 | Live charts of raw performance measurement can be obtained via the prettychart library with:
37 |
38 | #+begin_src sh :results output
39 | prettychart-watch --watch --filepath other --port 3566
40 | #+end_src
41 |
42 | ... and pointer your browser at localhost:3566
43 |
44 | #+begin_src sh :file other/perf.svg :results output graphics file :exports both
45 | perf-explore -n 1000 --nocheck --chart
46 | #+end_src
47 |
48 | #+RESULTS:
49 | [[file:other/perf.svg]]
50 |
51 | In this particular measure, there was an improvement, dropping from about 10,000 nanos to 8,600 nanos. Increasing the number of measurements:
52 |
53 | #+begin_src sh :file other/perf20000.svg :results output graphics file :exports both
54 | perf-explore -n 20000 --nocheck --chart --chartpath other/perf20000.svg
55 | #+end_src
56 |
57 | #+RESULTS:
58 | [[file:other/perf20000.svg]]
59 |
60 | Improvements seem to continue as n increases before stabilising (after a GC perhaps) at 3,500 nanos
61 |
62 |
63 |
64 | #+begin_src sh :results output
65 | perf-explore -n 20000 --order --nocheck --tasty
66 | #+end_src
67 |
68 | #+RESULTS:
69 | : label1 label2 results
70 | :
71 | : sum time 3.51e3
72 | :
73 | : sum:time 3.5 * O(N1)
74 | : tasty:time: 3510
75 |
76 | The order of the computation (=\l -> fap sum [1 .. l]=) is O(N1) and the results are very close to the tasty-bench result.
77 |
78 | In comparsion, (\l -> fap (\x -> sum [1 .. x]) l):
79 |
80 | #+begin_src sh :file other/perffuse.svg :results output graphics file :exports both
81 | perf-explore --nocheck --sumFuse -n 100000 --chart --chartpath other/perffuse.svg --order
82 | #+end_src
83 |
84 | #+RESULTS:
85 | [[file:other/perffuse.svg]]
86 |
87 | #+begin_src sh :results output :exports both
88 | perf-explore --nocheck --sumFuse -n 100000 --order
89 | #+end_src
90 |
91 | #+RESULTS:
92 | : label1 label2 results
93 | :
94 | : sumFuse time 6.78e2
95 | :
96 | : sumFuse:time 0.66 * O(N1)
97 |
98 | ... is much faster. Hooray for list fusion!
99 |
100 | * Issues
101 |
102 | ** fragility
103 |
104 | Results, especially for simple computations, are fragile and can show large variance in performance characteristics in identical runs, and across differing compilations. Whether this is due to library flaws or is just the nature of ghc is an open question.
105 |
106 | ** Statistics
107 |
108 | #+begin_quote
109 | Obligatory disclaimer: statistics is a tricky matter, there is no one-size-fits-all approach. In the absence of a good theory simplistic approaches are as (un)sound as obscure ones. Those who seek statistical soundness should rather collect raw data and process it themselves using a proper statistical toolbox. Data reported by tasty-bench is only of indicative and comparative significance. ~ [[https://hackage.haskell.org/package/tasty-bench-0.4/docs/Test-Tasty-Bench.html#t:Benchmarkable][tasty-bench]]
110 | #+end_quote
111 |
112 | #+begin_quote
113 | variance introduced by outliers: 88% (severely inflated) ~ [[https://hackage.haskell.org/package/criterion][criterion]]
114 | #+end_quote
115 |
116 | The library default is to report the 10th percentile as a summary statistic, and this is a matter of taste, determined mostly by the purpose of the measurement.
117 |
118 | ** ffap and fap
119 |
120 | #+begin_src haskell-ng :results output
121 | :t ffap
122 | #+end_src
123 |
124 | #+RESULTS:
125 | : ffap
126 | : :: (Control.DeepSeq.NFData a, Control.DeepSeq.NFData b, MonadIO m,
127 | : Semigroup t) =>
128 | : Text.Text -> (a -> b) -> a -> PerfT m t b
129 |
130 | ffap and fap are broadly similar to criterion's nf and whnf respectively, but passes throught the results of the computation into the monad transformer, enabling in-context measurement.
131 |
132 | A fine-grained and detailed examination of the effect of measurement on laziness and on core details would be beneficial to the library.
133 |
134 | ** tasty
135 |
136 | The library was originally developed before tasty-bench, which does a great job of integrating into the tasty api, and a future refactor may integrate with this, rather than supply idiosyncratic methods.
137 |
138 | ** order
139 |
140 | BigOrder calculations tend to be fragile and sometimes differ from theory.
141 |
142 | * Development
143 |
144 | This org file has been used to develop and document library innovation and testing, and may be of use to users in understanding the library. Note that running ~perf~ via ghci is very slow compared with an external process which accesses the compiled version of the library.
145 |
146 | #+begin_src haskell-ng :results output
147 | :r
148 | :set -Wno-type-defaults
149 | :set -Wno-unused-do-bind
150 | :set -Wno-name-shadowing
151 | :set -XOverloadedStrings
152 | :set -XOverloadedLabels
153 | import Perf
154 | import Perf.Report
155 | import Data.FormatN
156 | import qualified Data.Text as Text
157 | import qualified Data.Text.IO as Text
158 | import qualified Data.Map.Strict as Map
159 | import Control.Monad
160 | import Data.Bifunctor
161 | import System.Clock
162 | import Data.List qualified as List
163 | import Control.Category ((>>>))
164 | import Optics.Core
165 | import Data.Foldable
166 | import NumHask.Space
167 | putStrLn "ok"
168 | import Chart hiding (tick)
169 | import Prettychart
170 | import Chart.Examples
171 | import Perf.Chart
172 | (disp,q) <- startChartServer Nothing
173 | disp lineExample
174 | import Prettyprinter
175 | import Control.Monad.State.Lazy
176 | import Text.PrettyPrint.Boxes
177 | #+end_src
178 |
179 | #+RESULTS:
180 | : Ok, 11 modules loaded.
181 | : ok
182 | : Setting phasegrhsc it>o stun... (poTrrtu e9
183 | : 160) (cgthrcli->c to quitg)h
184 |
185 | #+begin_src haskell-ng :results output
186 | l = 1000
187 | n = 1000
188 |
189 | :{
190 | p = do
191 | ffap "sum" sum [1 .. l]
192 | ffap "sumfuse" (\x -> sum [1 .. x]) l
193 | :}
194 | :t p
195 | run = runPerfT (times n) p
196 | :t run
197 | (res, m) <- run
198 | :t m
199 | median . fmap fromIntegral <$> m
200 | #+end_src
201 |
202 | #+RESULTS:
203 | : ghci| ghci| ghci| ghci| ghci> p :: (MonadIO m, Semigroup t, Control.DeepSeq.NFData b, Num b,
204 | : Enum b) =>
205 | : PerfT m t b
206 | : run
207 | : :: (Control.DeepSeq.NFData a, Num a, Enum a) =>
208 | : IO (a, Map.Map Text.Text [Nanos])
209 | : m :: Map.Map Text.Text [Nanos]
210 | : fromList [("sum",21978.1),("sumfuse",26710.18)]
211 |
212 | * Details
213 |
214 | ** System.Clock
215 |
216 | The default clock is MonoticRaw for linux & macOS, and ThreadCPUTime for Windows.
217 |
218 | *** resolution
219 |
220 | #+begin_src haskell-ng :results output
221 | getRes Monotonic
222 | getRes Realtime
223 | getRes ProcessCPUTime
224 | getRes ThreadCPUTime
225 | getRes MonotonicRaw
226 | #+end_src
227 |
228 | #+RESULTS:
229 | : TimeSpec {sec = 0, nsec = 1000}
230 | : TimeSpec {sec = 0, nsec = 1000}
231 | : TimeSpec {sec = 0, nsec = 1000}
232 | : TimeSpec {sec = 0, nsec = 42}
233 | : TimeSpec {sec = 0, nsec = 42}
234 |
235 | ** ticks
236 |
237 | The various versions of tick and a variety of algorithms are artifacts of ongoing exploration.
238 |
239 | #+begin_src sh :results output
240 | perf-explore -n 20000 --best --ticks
241 | #+end_src
242 |
243 | #+RESULTS:
244 | #+begin_example
245 | algo stepTime tick tickForce tickForceArgs tickLazy tickWHNF times timesn
246 | sumAux 3.11e3 3.11e3 3.11e3 3.11e3 5.13e0 3.11e3 3.11e3 3.10e3
247 | sumCata 3.11e3 3.11e3 3.11e3 3.11e3 5.11e0 3.11e3 3.11e3 3.14e3
248 | sumCo 3.11e3 3.11e3 3.11e3 3.11e3 5.06e0 3.11e3 3.11e3 3.08e3
249 | sumCoCase 3.11e3 3.11e3 3.11e3 3.11e3 5.11e0 3.11e3 3.11e3 3.08e3
250 | sumCoGo 3.11e3 3.11e3 3.11e3 3.11e3 5.06e0 3.11e3 3.11e3 3.12e3
251 | sumF 3.48e3 3.49e3 3.46e3 3.46e3 5.06e0 3.48e3 3.48e3 3.48e3
252 | sumFlip 3.48e3 3.48e3 3.45e3 3.45e3 5.03e0 3.48e3 3.48e3 3.48e3
253 | sumFlipLazy 3.48e3 3.48e3 3.45e3 3.45e3 4.96e0 3.48e3 3.48e3 3.45e3
254 | sumFoldr 3.11e3 3.11e3 3.11e3 3.11e3 5.13e0 3.11e3 3.11e3 3.11e3
255 | sumFuse 6.54e2 6.54e2 6.54e2 6.54e2 5.17e0 6.54e2 6.54e2 6.39e2
256 | sumFuseFoldl' 6.54e2 6.54e2 6.54e2 6.54e2 5.00e0 6.54e2 6.54e2 6.44e2
257 | sumFuseFoldr 9.93e2 9.92e2 9.92e2 9.92e2 5.13e0 9.92e2 9.93e2 9.63e2
258 | sumFusePoly 6.56e2 6.56e2 6.56e2 6.56e2 5.12e0 6.56e2 6.57e2 6.47e2
259 | sumLambda 3.48e3 3.49e3 3.48e3 3.48e3 5.12e0 3.48e3 3.48e3 3.55e3
260 | sumMono 3.48e3 3.48e3 3.46e3 3.46e3 5.00e0 3.48e3 3.48e3 3.50e3
261 | sumPoly 3.62e3 3.49e3 3.54e3 3.56e3 5.04e0 3.71e3 3.62e3 3.70e3
262 | sumSum 3.48e3 3.49e3 3.48e3 3.48e3 4.98e0 3.48e3 3.48e3 3.49e3
263 | sumTail 3.48e3 3.49e3 3.45e3 3.45e3 5.00e0 3.48e3 3.48e3 3.51e3
264 | sumTailLazy 3.48e3 3.48e3 3.45e3 3.45e3 5.16e0 3.48e3 3.48e3 3.49e3
265 |
266 | #+end_example
267 |
268 | ** Time
269 | *** What is a tick?
270 |
271 | A fundamental operation of Perf.Time is tick, which sandwiches a (strict) function application between two readings of a clock, and returns time in nanoseconds, and the computation result. In this way, the `Perf` monad can be inserted into the midst of a computation in an attempt to measure performance in-situ as opposed to sitting off in a separate and decontextualized process.
272 |
273 | #+begin_src haskell-ng :results output :exports both
274 | :t tick
275 | #+end_src
276 |
277 | #+RESULTS:
278 | : tick :: (a -> b) -> a -> IO (Nanos, b)
279 |
280 | =tick= returns in the IO monad, because reading a cycle counter is an IO effect. A trivial but fundamental point is that performance measurement effects the computation being measured.
281 |
282 |
283 | *** tick_
284 |
285 | tick_ measures the nanoseconds between two immediate clock reads.
286 |
287 | #+begin_src haskell-ng :results output :exports both
288 | :t tick_
289 | #+end_src
290 |
291 | #+RESULTS:
292 | : tick_ :: IO Nanos
293 |
294 |
295 | #+begin_src haskell-ng :results output :exports both
296 | replicateM 10 tick_
297 | #+end_src
298 |
299 | #+RESULTS:
300 | : [1833,500,416,416,416,375,375,416,416,416]
301 |
302 | *** multiple ticks
303 |
304 | #+begin_src haskell-ng :results output :exports both
305 | fmap (fmap (fst)) . replicateM 10 $ tick (const ()) ()
306 | #+end_src
307 |
308 | #+RESULTS:
309 | : [7000,2333,2000,2208,1958,1959,1959,2000,2000,1959]
310 |
311 | Here, ~const () ()~ was evaluated and took 7 micro-seconds for the first effect, reducing down to 2 msecs after 10 effects.
312 |
313 | *** tickIO
314 |
315 | ~tickIO~ measures the evaluation of an IO value.
316 |
317 | #+begin_src haskell-ng :results output :exports both
318 | :t tickIO
319 | #+end_src
320 |
321 | #+RESULTS:
322 | : tickIO :: IO a -> IO (Cycles, a)
323 |
324 | #+begin_src haskell-ng :results output :exports both
325 | fmap (fmap fst) . replicateM 10 $ tickIO (pure ())
326 | #+end_src
327 |
328 | #+RESULTS:
329 | : [5541,1625,1458,1833,1375,1416,1375,1375,1375,1375]
330 |
331 | *** sum example
332 |
333 | #+begin_src haskell-ng :exports both
334 | fmap (expt (Just 2) . fromIntegral) . fst <$> ticks 10 sum ([1..10000] :: [Double])
335 | #+end_src
336 |
337 | #+RESULTS:
338 | : ["5.0e5","2.4e5","2.4e5","2.4e5","2.4e5","2.4e5","2.4e5","2.4e5","2.5e5","2.4e5"]
339 |
340 |
341 | #+begin_src haskell-ng :results output :exports both
342 | ts <- ticks 10000 sum ([1..1000] :: [Double])
343 | print $ average (fmap fromIntegral $ fst ts)
344 | #+end_src
345 |
346 | #+RESULTS:
347 | : 10747.1975
348 |
349 | ** PerfT
350 |
351 | ~PerfT~ allows for multiple measurement points and is polymorphic in what is being measured. It returns a Map of results held in State.
352 |
353 | Compare a lower-level usage of ticks, measuring the average of summing to one thousand over one thousand trials:
354 |
355 | #+begin_src haskell-ng :results output :exports both
356 | first (average . fmap fromIntegral) <$> ticks 1000 sum [1..1000]
357 | #+end_src
358 |
359 | #+RESULTS:
360 | : (25947.635,500500)
361 |
362 | ... with PerfT usage
363 |
364 | #+begin_src haskell-ng :results output :exports both
365 | second (fmap (average . fmap fromIntegral)) <$> runPerfT (times 1000) (sum |$| [1..1000])
366 | #+end_src
367 |
368 | #+RESULTS:
369 | : (500500,fromList [("",26217.098)])
370 |
371 | An IO example
372 |
373 | #+begin_src haskell-ng
374 | exampleIO' :: IO ()
375 | exampleIO' = do
376 | txt <- Text.readFile "src/Perf.hs"
377 | let n = Text.length txt
378 | Text.putStrLn $ "length of file is: " <> Text.pack (show n)
379 | #+end_src
380 |
381 | #+begin_src haskell-ng :results output :exports both
382 | exampleIO = execPerfT time (do
383 | txt <- fam "file_read" (Text.readFile "src/Perf.hs")
384 | n <- fap "length" Text.length txt
385 | fam "print_result" (Text.putStrLn $ "length of file is: " <> Text.pack (show n)))
386 | #+end_src
387 |
388 | #+begin_src sh :results output :exports both
389 | perf-explore --exampleIO
390 | #+end_src
391 |
392 | #+RESULTS:
393 | #+begin_example
394 | length of file is: 1794
395 | length of file is: 1794
396 |
397 | label1 label2 label3 old result new result change
398 |
399 | normal file-read time 2.31e5 1.28e5 improvement
400 | normal length time 2.71e3 2.00e3 improvement
401 | normal print-result time 3.75e4 1.32e4 improvement
402 | outer file-read time 6.05e4 3.64e4 improvement
403 | outer length time 9.59e2 6.25e2 improvement
404 | outer outer-total time 7.39e4 4.02e4 improvement
405 | outer print-result time 9.79e3 1.71e3 improvement
406 | #+end_example
407 |
408 | ** Perf.BigO
409 |
410 | Perf.BigO represents functionality to determine the complexity order for a computation.
411 |
412 | We could do a regression and minimise the error term, but we know that the largest run contains the most information; we would need to weight the simulations according to some heuristic.
413 |
414 | Instead, we:
415 |
416 | - estimate the order factor for each possible Order, from N3 to N0, setting the highest n run constant factor to zero,
417 | - pick the order based on lowest absolute error result summed across all the runs,
418 |
419 | #+begin_src haskell-ng :results output :exports both
420 | import qualified Prelude as P
421 | import Data.List (nub)
422 | estOrder (\x -> sum $ nub [1..x]) 100 [10,100,1000,1000]
423 | #+end_src
424 |
425 | #+RESULTS:
426 | : BigOrder {bigOrder = N2, bigFactor = 3.187417}
427 |
428 | #+begin_src haskell-ng :results output :exports both
429 | import qualified Prelude as P
430 | import Data.List (nub)
431 | estOrder (\x -> sum $ [1..x]) 10 [1,10,100,1000]
432 | #+end_src
433 |
434 | #+RESULTS:
435 | : BigOrder {bigOrder = N12, bigFactor = 695.0370069284081, bigConstant = 0.0}
436 |
437 | ** References
438 |
439 | https://wiki.haskell.org/Performance/GHC
440 |
441 | [[https://github.com/haskell-perf/checklist][The Haskell performance checklist]]
442 |
443 | [[https://github.com/ndmitchell/spaceleak][ndmitchell/spaceleak: Notes on space leaks]]
444 |
445 | *** Core
446 |
447 | [[https://ghc.gitlab.haskell.org/ghc/doc/users_guide/debugging.html#options-debugging][5.13. Debugging the compiler]]
448 |
449 | #+begin_src sh
450 | ghc app/speed.hs -ddump-simpl -ddump-to-file -fforce-recomp -dlint -O
451 | #+end_src
452 |
453 | [[https://wiki.haskell.org/Performance/GHC#Looking_at_the_Core][haskell wiki: Looking at the Core]]
454 |
455 | [[https://godbolt.org/][godbolt]]
456 |
457 | [[https://gitlab.haskell.org/ghc/ghc/-/issues/15185][ghc issue 15185: Enum instance for IntX / WordX are inefficient]]
458 |
459 | [[https://fixpt.de/blog/2017-12-04-strictness-analysis-part-1.html][fixpt - All About Strictness Analysis (part 1)]]
460 |
461 | *** Profiling
462 | **** setup
463 | [[https://ghc.gitlab.haskell.org/ghc/doc/users_guide/profiling.html#prof-heap][8. Profiling]]
464 |
465 | A typical configuration step for profiling:
466 |
467 | #+begin_src sh :results output
468 | cabal configure --enable-library-profiling --enable-executable-profiling -fprof-auto -fprof -write-ghc-environment-files=always
469 | #+end_src
470 |
471 | A cabal.project.local with profiling enabled:
472 |
473 | #+begin_quote
474 | write-ghc-environment-files: always
475 | ignore-project: False
476 | flags: +prof +prof-auto
477 | library-profiling: True
478 | executable-profiling: True
479 | #+end_quote
480 |
481 | Examples from markup-parse R&D:
482 |
483 | Executable compilation:
484 |
485 | #+begin_src sh :results output
486 | ghc -prof -fprof-auto -rtsopts app/speed0.hs -threaded -fforce-recomp
487 | #+end_src
488 |
489 | Executable run:
490 |
491 | #+begin_src sh :results output
492 | app/speed0 +RTS -s -p -hc -l -RTS
493 | #+end_src
494 |
495 | **** Space usage output (-s)
496 |
497 | #+begin_example
498 | 885,263,472 bytes allocated in the heap
499 | 8,507,448 bytes copied during GC
500 | 163,200 bytes maximum residency (4 sample(s))
501 | 27,752 bytes maximum slop
502 | 6 MiB total memory in use (0 MiB lost due to fragmentation)
503 |
504 | Tot time (elapsed) Avg pause Max pause
505 | Gen 0 207 colls, 0 par 0.009s 0.010s 0.0001s 0.0002s
506 | Gen 1 4 colls, 0 par 0.001s 0.001s 0.0004s 0.0005s
507 |
508 | TASKS: 4 (1 bound, 3 peak workers (3 total), using -N1)
509 |
510 | SPARKS: 0 (0 converted, 0 overflowed, 0 dud, 0 GC'd, 0 fizzled)
511 |
512 | INIT time 0.006s ( 0.006s elapsed)
513 | MUT time 0.367s ( 0.360s elapsed)
514 | GC time 0.010s ( 0.011s elapsed)
515 | RP time 0.000s ( 0.000s elapsed)
516 | PROF time 0.000s ( 0.000s elapsed)
517 | EXIT time 0.001s ( 0.001s elapsed)
518 | Total time 0.384s ( 0.380s elapsed)
519 | #+end_example
520 |
521 | **** Cost center profile (-p)
522 |
523 | Dumped to speed0.prof
524 |
525 | #+begin_example
526 | COST CENTRE MODULE SRC %time %alloc
527 |
528 | token MarkupParse src/MarkupParse.hs:(259,1)-(260,20) 50.2 50.4
529 | wrappedQ' MarkupParse.FlatParse src/MarkupParse/FlatParse.hs:(215,1)-(217,78) 20.8 23.1
530 | ws_ MarkupParse.FlatParse src/MarkupParse/FlatParse.hs:(135,1)-(146,4) 14.3 5.5
531 | eq MarkupParse.FlatParse src/MarkupParse/FlatParse.hs:243:1-30 10.6 11.1
532 | gather MarkupParse src/MarkupParse.hs:(420,1)-(428,100) 2.4 3.7
533 | runParser FlatParse.Basic src/FlatParse/Basic.hs:(217,1)-(225,24) 1.0 6.0
534 | #+end_example
535 |
536 | **** heap analysis (-hc -l)
537 |
538 | #+begin_src sh :results output
539 | eventlog2html speed0.eventlog
540 | #+end_src
541 |
542 | Produces speed0.eventlog.html which contains heap charts.
543 |
544 | *** Cache speed
545 |
546 | The average cycles per + operation can get down to about 0.7 cycles, and there are about 4 cache registers per cycle, so a sum pipeline uses 2.8 register instructions per +.
547 |
548 | | Cache | nsecs | Cycles |
549 | |-------------------+-------+--------------|
550 | | register | 0.1 | 4 per cycle |
551 | | L1 Cache access | 1 | 3-4 cycles |
552 | | L2 Cache access | 4 | 11-12 cycles |
553 | | L3 unified access | 14 | 30 - 40 |
554 | | DRAM hit | 80 | 195 cycles |
555 | | L1 miss | 16 | 40 cycles |
556 | | L2 miss | >250 | >600 cycles |
557 |
558 |
559 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | [](https://hackage.haskell.org/package/perf) [](https://github.com/tonyday567/perf/actions)
2 |
3 |
4 | # Features
5 |
6 | `perf` is an experimental library with a focus on the low-level empirics of Haskell code performance. If you are looking for a quick and reliable performance benchmark, criterion and tasty-bench are both good choices. If your results are confounding, however, you may need to dig deeper, and this is the problem space of `perf`.
7 |
8 | The library:
9 |
10 | - provides a monad transformer, `PerfT`. The criterion API tends towards an atomistic approach - bust code up into snippets, copy-paste into a bench.hs and measure their isolated performance. In contrast, with `PerfT` performance can be measured within a code snippet’s original context. Differing code points can be labelled and measured as part of a single run, encouraging a much faster observation - experimentation - refactor cycle.
11 |
12 | - is polymorphic to what, exactly, is being measured, so that concepts such as counters, debug checks, time and space performance can share treatment.
13 |
14 | - attempts to measure big O for algorithms that can be defined in terms of input size growth.
15 |
16 | - includes live charting of raw performance results via chart-svg and prettychart
17 |
18 |
19 | # Usage
20 |
21 | Probably the best introduction to `perf` is via the perf-explore executable:
22 |
23 | perf-explore
24 |
25 | label1 label2 old result new result change
26 |
27 | sum time 9.93e3 7.57e3 improvement
28 |
29 | Summing [1..1000] took 9,930 nanoseconds, an improvement versus the on file performance previously measured.
30 |
31 | Live charts of raw performance measurement can be obtained via the prettychart library with:
32 |
33 | prettychart-watch --watch --filepath other --port 3566
34 |
35 | … and pointer your browser at localhost:3566
36 |
37 | perf-explore -n 1000 --nocheck --chart
38 |
39 | 
40 |
41 | In this particular measure, there was an improvement, dropping from about 10,000 nanos to 8,600 nanos. Increasing the number of measurements:
42 |
43 | perf-explore -n 20000 --nocheck --chart --chartpath other/perf20000.svg
44 |
45 | 
46 |
47 | Improvements seem to continue as n increases before stabilising (after a GC perhaps) at 3,500 nanos
48 |
49 | perf-explore -n 20000 --order --nocheck --tasty
50 |
51 | label1 label2 results
52 |
53 | sum time 3.51e3
54 |
55 | sum:time 3.5 * O(N1)
56 | tasty:time: 3510
57 |
58 | The order of the computation (`\l -> fap sum [1 .. l]`) is O(N1) and the results are very close to the tasty-bench result.
59 |
60 | In comparsion, (\l -> fap (\x -> sum [1 .. x]) l):
61 |
62 | perf-explore --nocheck --sumFuse -n 100000 --chart --chartpath other/perffuse.svg --order
63 |
64 | 
65 |
66 | perf-explore --nocheck --sumFuse -n 100000 --order
67 |
68 | label1 label2 results
69 |
70 | sumFuse time 6.78e2
71 |
72 | sumFuse:time 0.66 * O(N1)
73 |
74 | … is much faster. Hooray for list fusion!
75 |
76 |
77 | # Issues
78 |
79 |
80 | ## fragility
81 |
82 | Results, especially for simple computations, are fragile and can show large variance in performance characteristics in identical runs, and across differing compilations. Whether this is due to library flaws or is just the nature of ghc is an open question.
83 |
84 |
85 | ## Statistics
86 |
87 | > Obligatory disclaimer: statistics is a tricky matter, there is no one-size-fits-all approach. In the absence of a good theory simplistic approaches are as (un)sound as obscure ones. Those who seek statistical soundness should rather collect raw data and process it themselves using a proper statistical toolbox. Data reported by tasty-bench is only of indicative and comparative significance. ~ [tasty-bench](https://hackage.haskell.org/package/tasty-bench-0.4/docs/Test-Tasty-Bench.html#t:Benchmarkable)
88 |
89 | > variance introduced by outliers: 88% (severely inflated) ~ [criterion](https://hackage.haskell.org/package/criterion)
90 |
91 | The library default is to report the 10th percentile as a summary statistic, and this is a matter of taste, determined mostly by the purpose of the measurement.
92 |
93 |
94 | ## ffap and fap
95 |
96 | :t ffap
97 |
98 | ffap
99 | :: (Control.DeepSeq.NFData a, Control.DeepSeq.NFData b, MonadIO m,
100 | Semigroup t) =>
101 | Text.Text -> (a -> b) -> a -> PerfT m t b
102 |
103 | ffap and fap are broadly similar to criterion’s nf and whnf respectively, but passes throught the results of the computation into the monad transformer, enabling in-context measurement.
104 |
105 | A fine-grained and detailed examination of the effect of measurement on laziness and on core details would be beneficial to the library.
106 |
107 |
108 | ## tasty
109 |
110 | The library was originally developed before tasty-bench, which does a great job of integrating into the tasty api, and a future refactor may integrate with this, rather than supply idiosyncratic methods.
111 |
112 |
113 | ## order
114 |
115 | BigOrder calculations tend to be fragile and sometimes differ from theory.
116 |
117 |
118 | # Development
119 |
120 | This org file has been used to develop and document library innovation and testing, and may be of use to users in understanding the library. Note that running `perf` via ghci is very slow compared with an external process which accesses the compiled version of the library.
121 |
122 | :r
123 | :set -Wno-type-defaults
124 | :set -Wno-unused-do-bind
125 | :set -Wno-name-shadowing
126 | :set -XOverloadedStrings
127 | :set -XOverloadedLabels
128 | import Perf
129 | import Perf.Report
130 | import Data.FormatN
131 | import qualified Data.Text as Text
132 | import qualified Data.Text.IO as Text
133 | import qualified Data.Map.Strict as Map
134 | import Control.Monad
135 | import Data.Bifunctor
136 | import System.Clock
137 | import Data.List qualified as List
138 | import Control.Category ((>>>))
139 | import Optics.Core
140 | import Data.Foldable
141 | import NumHask.Space
142 | putStrLn "ok"
143 | import Chart hiding (tick)
144 | import Prettychart
145 | import Chart.Examples
146 | import Perf.Chart
147 | (disp,q) <- startChartServer Nothing
148 | disp lineExample
149 | import Prettyprinter
150 | import Control.Monad.State.Lazy
151 | import Text.PrettyPrint.Boxes
152 |
153 | Ok, 11 modules loaded.
154 | ok
155 | Setting phasegrhsc it>o stun... (poTrrtu e9
156 | 160) (cgthrcli->c to quitg)h
157 |
158 | l = 1000
159 | n = 1000
160 |
161 | :{
162 | p = do
163 | ffap "sum" sum [1 .. l]
164 | ffap "sumfuse" (\x -> sum [1 .. x]) l
165 | :}
166 | :t p
167 | run = runPerfT (times n) p
168 | :t run
169 | (res, m) <- run
170 | :t m
171 | median . fmap fromIntegral <$> m
172 |
173 | ghci| ghci| ghci| ghci| ghci> p :: (MonadIO m, Semigroup t, Control.DeepSeq.NFData b, Num b,
174 | Enum b) =>
175 | PerfT m t b
176 | run
177 | :: (Control.DeepSeq.NFData a, Num a, Enum a) =>
178 | IO (a, Map.Map Text.Text [Nanos])
179 | m :: Map.Map Text.Text [Nanos]
180 | fromList [("sum",21978.1),("sumfuse",26710.18)]
181 |
182 |
183 | # Details
184 |
185 |
186 | ## System.Clock
187 |
188 | The default clock is MonoticRaw for linux & macOS, and ThreadCPUTime for Windows.
189 |
190 |
191 | ### resolution
192 |
193 | getRes Monotonic
194 | getRes Realtime
195 | getRes ProcessCPUTime
196 | getRes ThreadCPUTime
197 | getRes MonotonicRaw
198 |
199 | TimeSpec {sec = 0, nsec = 1000}
200 | TimeSpec {sec = 0, nsec = 1000}
201 | TimeSpec {sec = 0, nsec = 1000}
202 | TimeSpec {sec = 0, nsec = 42}
203 | TimeSpec {sec = 0, nsec = 42}
204 |
205 |
206 | ## ticks
207 |
208 | The various versions of tick and a variety of algorithms are artifacts of ongoing exploration.
209 |
210 | perf-explore -n 20000 --best --ticks
211 |
212 | algo stepTime tick tickForce tickForceArgs tickLazy tickWHNF times timesn
213 | sumAux 3.11e3 3.11e3 3.11e3 3.11e3 5.13e0 3.11e3 3.11e3 3.10e3
214 | sumCata 3.11e3 3.11e3 3.11e3 3.11e3 5.11e0 3.11e3 3.11e3 3.14e3
215 | sumCo 3.11e3 3.11e3 3.11e3 3.11e3 5.06e0 3.11e3 3.11e3 3.08e3
216 | sumCoCase 3.11e3 3.11e3 3.11e3 3.11e3 5.11e0 3.11e3 3.11e3 3.08e3
217 | sumCoGo 3.11e3 3.11e3 3.11e3 3.11e3 5.06e0 3.11e3 3.11e3 3.12e3
218 | sumF 3.48e3 3.49e3 3.46e3 3.46e3 5.06e0 3.48e3 3.48e3 3.48e3
219 | sumFlip 3.48e3 3.48e3 3.45e3 3.45e3 5.03e0 3.48e3 3.48e3 3.48e3
220 | sumFlipLazy 3.48e3 3.48e3 3.45e3 3.45e3 4.96e0 3.48e3 3.48e3 3.45e3
221 | sumFoldr 3.11e3 3.11e3 3.11e3 3.11e3 5.13e0 3.11e3 3.11e3 3.11e3
222 | sumFuse 6.54e2 6.54e2 6.54e2 6.54e2 5.17e0 6.54e2 6.54e2 6.39e2
223 | sumFuseFoldl' 6.54e2 6.54e2 6.54e2 6.54e2 5.00e0 6.54e2 6.54e2 6.44e2
224 | sumFuseFoldr 9.93e2 9.92e2 9.92e2 9.92e2 5.13e0 9.92e2 9.93e2 9.63e2
225 | sumFusePoly 6.56e2 6.56e2 6.56e2 6.56e2 5.12e0 6.56e2 6.57e2 6.47e2
226 | sumLambda 3.48e3 3.49e3 3.48e3 3.48e3 5.12e0 3.48e3 3.48e3 3.55e3
227 | sumMono 3.48e3 3.48e3 3.46e3 3.46e3 5.00e0 3.48e3 3.48e3 3.50e3
228 | sumPoly 3.62e3 3.49e3 3.54e3 3.56e3 5.04e0 3.71e3 3.62e3 3.70e3
229 | sumSum 3.48e3 3.49e3 3.48e3 3.48e3 4.98e0 3.48e3 3.48e3 3.49e3
230 | sumTail 3.48e3 3.49e3 3.45e3 3.45e3 5.00e0 3.48e3 3.48e3 3.51e3
231 | sumTailLazy 3.48e3 3.48e3 3.45e3 3.45e3 5.16e0 3.48e3 3.48e3 3.49e3
232 |
233 |
234 | ## Time
235 |
236 |
237 | ### What is a tick?
238 |
239 | A fundamental operation of Perf.Time is tick, which sandwiches a (strict) function application between two readings of a clock, and returns time in nanoseconds, and the computation result. In this way, the \`Perf\` monad can be inserted into the midst of a computation in an attempt to measure performance in-situ as opposed to sitting off in a separate and decontextualized process.
240 |
241 | :t tick
242 |
243 | tick :: (a -> b) -> a -> IO (Nanos, b)
244 |
245 | `tick` returns in the IO monad, because reading a cycle counter is an IO effect. A trivial but fundamental point is that performance measurement effects the computation being measured.
246 |
247 |
248 | ### tick\_
249 |
250 | tick\_ measures the nanoseconds between two immediate clock reads.
251 |
252 | :t tick_
253 |
254 | tick_ :: IO Nanos
255 |
256 | replicateM 10 tick_
257 |
258 | [1833,500,416,416,416,375,375,416,416,416]
259 |
260 |
261 | ### multiple ticks
262 |
263 | fmap (fmap (fst)) . replicateM 10 $ tick (const ()) ()
264 |
265 | [7000,2333,2000,2208,1958,1959,1959,2000,2000,1959]
266 |
267 | Here, `const () ()` was evaluated and took 7 micro-seconds for the first effect, reducing down to 2 msecs after 10 effects.
268 |
269 |
270 | ### tickIO
271 |
272 | `tickIO` measures the evaluation of an IO value.
273 |
274 | :t tickIO
275 |
276 | tickIO :: IO a -> IO (Cycles, a)
277 |
278 | fmap (fmap fst) . replicateM 10 $ tickIO (pure ())
279 |
280 | [5541,1625,1458,1833,1375,1416,1375,1375,1375,1375]
281 |
282 |
283 | ### sum example
284 |
285 | fmap (expt (Just 2) . fromIntegral) . fst <$> ticks 10 sum ([1..10000] :: [Double])
286 |
287 | ["5.0e5","2.4e5","2.4e5","2.4e5","2.4e5","2.4e5","2.4e5","2.4e5","2.5e5","2.4e5"]
288 |
289 | ts <- ticks 10000 sum ([1..1000] :: [Double])
290 | print $ average (fmap fromIntegral $ fst ts)
291 |
292 | 10747.1975
293 |
294 |
295 | ## PerfT
296 |
297 | `PerfT` allows for multiple measurement points and is polymorphic in what is being measured. It returns a Map of results held in State.
298 |
299 | Compare a lower-level usage of ticks, measuring the average of summing to one thousand over one thousand trials:
300 |
301 | first (average . fmap fromIntegral) <$> ticks 1000 sum [1..1000]
302 |
303 | (25947.635,500500)
304 |
305 | … with PerfT usage
306 |
307 | second (fmap (average . fmap fromIntegral)) <$> runPerfT (times 1000) (sum |$| [1..1000])
308 |
309 | (500500,fromList [("",26217.098)])
310 |
311 | An IO example
312 |
313 | exampleIO' :: IO ()
314 | exampleIO' = do
315 | txt <- Text.readFile "src/Perf.hs"
316 | let n = Text.length txt
317 | Text.putStrLn $ "length of file is: " <> Text.pack (show n)
318 |
319 | exampleIO = execPerfT time (do
320 | txt <- fam "file_read" (Text.readFile "src/Perf.hs")
321 | n <- fap "length" Text.length txt
322 | fam "print_result" (Text.putStrLn $ "length of file is: " <> Text.pack (show n)))
323 |
324 | perf-explore --exampleIO
325 |
326 | length of file is: 1794
327 | length of file is: 1794
328 |
329 | label1 label2 label3 old result new result change
330 |
331 | normal file-read time 2.31e5 1.28e5 improvement
332 | normal length time 2.71e3 2.00e3 improvement
333 | normal print-result time 3.75e4 1.32e4 improvement
334 | outer file-read time 6.05e4 3.64e4 improvement
335 | outer length time 9.59e2 6.25e2 improvement
336 | outer outer-total time 7.39e4 4.02e4 improvement
337 | outer print-result time 9.79e3 1.71e3 improvement
338 |
339 |
340 | ## Perf.BigO
341 |
342 | Perf.BigO represents functionality to determine the complexity order for a computation.
343 |
344 | We could do a regression and minimise the error term, but we know that the largest run contains the most information; we would need to weight the simulations according to some heuristic.
345 |
346 | Instead, we:
347 |
348 | - estimate the order factor for each possible Order, from N3 to N0, setting the highest n run constant factor to zero,
349 | - pick the order based on lowest absolute error result summed across all the runs,
350 |
351 | import qualified Prelude as P
352 | import Data.List (nub)
353 | estOrder (\x -> sum $ nub [1..x]) 100 [10,100,1000,1000]
354 |
355 | BigOrder {bigOrder = N2, bigFactor = 3.187417}
356 |
357 | import qualified Prelude as P
358 | import Data.List (nub)
359 | estOrder (\x -> sum $ [1..x]) 10 [1,10,100,1000]
360 |
361 | BigOrder {bigOrder = N12, bigFactor = 695.0370069284081, bigConstant = 0.0}
362 |
363 |
364 | ## References
365 |
366 |
367 |
368 | [The Haskell performance checklist](https://github.com/haskell-perf/checklist)
369 |
370 | [ndmitchell/spaceleak: Notes on space leaks](https://github.com/ndmitchell/spaceleak)
371 |
372 |
373 | ### Core
374 |
375 | [5.13. Debugging the compiler](https://ghc.gitlab.haskell.org/ghc/doc/users_guide/debugging.html#options-debugging)
376 |
377 | ghc app/speed.hs -ddump-simpl -ddump-to-file -fforce-recomp -dlint -O
378 |
379 | [haskell wiki: Looking at the Core](https://wiki.haskell.org/Performance/GHC#Looking_at_the_Core)
380 |
381 | [godbolt](https://godbolt.org/)
382 |
383 | [ghc issue 15185: Enum instance for IntX / WordX are inefficient](https://gitlab.haskell.org/ghc/ghc/-/issues/15185)
384 |
385 | [fixpt - All About Strictness Analysis (part 1)](https://fixpt.de/blog/2017-12-04-strictness-analysis-part-1.html)
386 |
387 |
388 | ### Profiling
389 |
390 | 1. setup
391 |
392 | [8. Profiling](https://ghc.gitlab.haskell.org/ghc/doc/users_guide/profiling.html#prof-heap)
393 |
394 | A typical configuration step for profiling:
395 |
396 | cabal configure --enable-library-profiling --enable-executable-profiling -fprof-auto -fprof -write-ghc-environment-files=always
397 |
398 | A cabal.project.local with profiling enabled:
399 |
400 | > write-ghc-environment-files: always
401 | > ignore-project: False
402 | > flags: +prof +prof-auto
403 | > library-profiling: True
404 | > executable-profiling: True
405 |
406 | Examples from markup-parse R&D:
407 |
408 | Executable compilation:
409 |
410 | ghc -prof -fprof-auto -rtsopts app/speed0.hs -threaded -fforce-recomp
411 |
412 | Executable run:
413 |
414 | app/speed0 +RTS -s -p -hc -l -RTS
415 |
416 | 2. Space usage output (-s)
417 |
418 | 885,263,472 bytes allocated in the heap
419 | 8,507,448 bytes copied during GC
420 | 163,200 bytes maximum residency (4 sample(s))
421 | 27,752 bytes maximum slop
422 | 6 MiB total memory in use (0 MiB lost due to fragmentation)
423 |
424 | Tot time (elapsed) Avg pause Max pause
425 | Gen 0 207 colls, 0 par 0.009s 0.010s 0.0001s 0.0002s
426 | Gen 1 4 colls, 0 par 0.001s 0.001s 0.0004s 0.0005s
427 |
428 | TASKS: 4 (1 bound, 3 peak workers (3 total), using -N1)
429 |
430 | SPARKS: 0 (0 converted, 0 overflowed, 0 dud, 0 GC'd, 0 fizzled)
431 |
432 | INIT time 0.006s ( 0.006s elapsed)
433 | MUT time 0.367s ( 0.360s elapsed)
434 | GC time 0.010s ( 0.011s elapsed)
435 | RP time 0.000s ( 0.000s elapsed)
436 | PROF time 0.000s ( 0.000s elapsed)
437 | EXIT time 0.001s ( 0.001s elapsed)
438 | Total time 0.384s ( 0.380s elapsed)
439 |
440 | 3. Cost center profile (-p)
441 |
442 | Dumped to speed0.prof
443 |
444 | COST CENTRE MODULE SRC %time %alloc
445 |
446 | token MarkupParse src/MarkupParse.hs:(259,1)-(260,20) 50.2 50.4
447 | wrappedQ' MarkupParse.FlatParse src/MarkupParse/FlatParse.hs:(215,1)-(217,78) 20.8 23.1
448 | ws_ MarkupParse.FlatParse src/MarkupParse/FlatParse.hs:(135,1)-(146,4) 14.3 5.5
449 | eq MarkupParse.FlatParse src/MarkupParse/FlatParse.hs:243:1-30 10.6 11.1
450 | gather MarkupParse src/MarkupParse.hs:(420,1)-(428,100) 2.4 3.7
451 | runParser FlatParse.Basic src/FlatParse/Basic.hs:(217,1)-(225,24) 1.0 6.0
452 |
453 | 4. heap analysis (-hc -l)
454 |
455 | eventlog2html speed0.eventlog
456 |
457 | Produces speed0.eventlog.html which contains heap charts.
458 |
459 |
460 | ### Cache speed
461 |
462 | The average cycles per + operation can get down to about 0.7 cycles, and there are about 4 cache registers per cycle, so a sum pipeline uses 2.8 register instructions per +.
463 |
464 |
465 |
466 |
467 |
468 |
469 |
470 |
471 |
472 |
473 |
474 |
475 |
476 | Cache |
477 | nsecs |
478 | Cycles |
479 |
480 |
481 |
482 |
483 | register |
484 | 0.1 |
485 | 4 per cycle |
486 |
487 |
488 |
489 | L1 Cache access |
490 | 1 |
491 | 3-4 cycles |
492 |
493 |
494 |
495 | L2 Cache access |
496 | 4 |
497 | 11-12 cycles |
498 |
499 |
500 |
501 | L3 unified access |
502 | 14 |
503 | 30 - 40 |
504 |
505 |
506 |
507 | DRAM hit |
508 | 80 |
509 | 195 cycles |
510 |
511 |
512 |
513 | L1 miss |
514 | 16 |
515 | 40 cycles |
516 |
517 |
518 |
519 | L2 miss |
520 | >250 |
521 | >600 cycles |
522 |
523 |
524 |
525 |
526 |
--------------------------------------------------------------------------------
/src/Perf.hs:
--------------------------------------------------------------------------------
1 | -- | == Introduction
2 | --
3 | -- @perf@ provides tools for measuring the runtime performance of Haskell functions. It includes:
4 | --
5 | -- - time measurement via the [clock](https://hackage.haskell.org/package/clock) library.
6 | --
7 | -- - a polymorphic approach to what a 'Measure' is so that a wide variety of measurements such as counting, space and time measurement can share the same API.
8 | --
9 | -- - 'PerfT' which is a monad transformer designed to add the collection of performance information to existing code. Running the code produces a tuple of the original computation results, and a Map of performance measurements that were specified.
10 | --
11 | -- - functionality to determine performance order, in 'Perf.BigO'
12 | --
13 | -- - reporting functionality encapsulated in 'Perf.Report'. @perf@ can be run via 'cabal bench' and will, for example, error on performance degradation; see the project's cabal file for an example.
14 | module Perf
15 | ( -- * re-exports
16 | module Perf.Types,
17 | -- | Representation of what a Performance 'Measure' is.
18 | module Perf.Measure,
19 | -- | Low-level time performance 'Measure' counting 'Nanos'
20 | module Perf.Time,
21 | -- | Low-level space performance 'Measure's based on GHC's allocation statistics.
22 | module Perf.Space,
23 | -- | Simple loop counter
24 | module Perf.Count,
25 | -- | Various (fast loop) algorithms that have been used for testing perf functionality.
26 | module Perf.Algos,
27 | -- | Order of complexity computations
28 | module Perf.BigO,
29 | -- | Reporting
30 | module Perf.Report,
31 | -- | Statistical support
32 | module Perf.Stats,
33 | )
34 | where
35 |
36 | import Perf.Algos
37 | import Perf.BigO
38 | import Perf.Count
39 | import Perf.Measure
40 | import Perf.Report
41 | import Perf.Space
42 | import Perf.Stats
43 | import Perf.Time
44 | import Perf.Types
45 | import Prelude hiding (cycle)
46 |
--------------------------------------------------------------------------------
/src/Perf/Algos.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE LambdaCase #-}
2 | {-# LANGUAGE OverloadedStrings #-}
3 | {-# OPTIONS_GHC -Wno-name-shadowing #-}
4 | {-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
5 |
6 | {-# HLINT ignore "Redundant lambda" #-}
7 | {-# HLINT ignore "Avoid lambda" #-}
8 | {-# HLINT ignore "Use foldr" #-}
9 | {-# HLINT ignore "Use sum" #-}
10 |
11 | -- | Algorithms and functions for testing purposes
12 | module Perf.Algos
13 | ( -- * command-line options
14 | Example (..),
15 | parseExample,
16 | ExamplePattern (..),
17 | examplePattern,
18 | exampleLabel,
19 | testExample,
20 | tastyExample,
21 |
22 | -- * sum algorithms
23 | SumPattern (..),
24 | allSums,
25 | testSum,
26 | statSums,
27 | sumTail,
28 | sumTailLazy,
29 | sumFlip,
30 | sumFlipLazy,
31 | sumCo,
32 | sumCoGo,
33 | sumCoCase,
34 | sumAux,
35 | sumFoldr,
36 | sumCata,
37 | sumSum,
38 | sumMono,
39 | sumPoly,
40 | sumLambda,
41 | sumF,
42 | sumFuse,
43 | sumFusePoly,
44 | sumFuseFoldl',
45 | sumFuseFoldr,
46 |
47 | -- * length algorithms
48 | LengthPattern (..),
49 | allLengths,
50 | testLength,
51 | statLengths,
52 |
53 | -- * length
54 | lengthTail,
55 | lengthTailLazy,
56 | lengthFlip,
57 | lengthFlipLazy,
58 | lengthCo,
59 | lengthCoCase,
60 | lengthAux,
61 | lengthFoldr,
62 | lengthFoldrConst,
63 | lengthF,
64 | lengthFMono,
65 |
66 | -- * recursion patterns
67 | recurseTail,
68 | recurseTailLazy,
69 | recurseFlip,
70 | recurseFlipLazy,
71 | recurseCo,
72 | recurseCoLazy,
73 | recurseCata,
74 |
75 | -- * miscellaneous
76 | mapInc,
77 | constFuse,
78 | splitHalf,
79 | )
80 | where
81 |
82 | import Control.Monad (void)
83 | import Control.Monad.IO.Class (MonadIO (..))
84 | import Data.Bifunctor
85 | import Data.Foldable
86 | import Data.Functor.Foldable
87 | import Data.List qualified as List
88 | import Data.Map.Strict qualified as Map
89 | import Data.Text (Text)
90 | import Options.Applicative
91 | import Options.Applicative.Help.Pretty
92 | import Perf.Types
93 | import Test.Tasty.Bench
94 |
95 | -- | Algorithm examples for testing
96 | data Example = ExampleSumFuse | ExampleSum | ExampleLengthF | ExampleConstFuse | ExampleMapInc | ExampleNoOp | ExampleNub | ExampleFib deriving (Eq, Show)
97 |
98 | -- | Parse command-line options for algorithm examples.
99 | parseExample :: Parser Example
100 | parseExample =
101 | flag' ExampleSumFuse (long "sumFuse" <> help "fused sum pipeline")
102 | <|> flag' ExampleSum (long "sum" <> style (annotate bold) <> help "sum")
103 | <|> flag' ExampleLengthF (long "lengthF" <> help "foldr id length")
104 | <|> flag' ExampleConstFuse (long "constFuse" <> help "fused const pipeline")
105 | <|> flag' ExampleMapInc (long "mapInc" <> help "fmap (+1)")
106 | <|> flag' ExampleNoOp (long "noOp" <> help "const ()")
107 | <|> flag' ExampleFib (long "fib" <> help "fibonacci")
108 | <|> flag' ExampleNub (long "nub" <> help "List.nub")
109 | <|> pure ExampleSum
110 |
111 | -- | Unification of example function applications
112 | data ExamplePattern a
113 | = PatternSumFuse Text ((Num a) => (a -> a)) a
114 | | PatternSum Text ((Num a) => [a] -> a) [a]
115 | | PatternLengthF Text ([a] -> Int) [a]
116 | | PatternConstFuse Text (Int -> ()) Int
117 | | PatternMapInc Text ([Int] -> [Int]) [Int]
118 | | PatternNoOp Text (() -> ()) ()
119 | | PatternNub Text ([Int] -> [Int]) [Int]
120 | | PatternFib Text (Int -> Integer) Int
121 |
122 | -- | Labels
123 | exampleLabel :: ExamplePattern a -> Text
124 | exampleLabel (PatternSumFuse l _ _) = l
125 | exampleLabel (PatternSum l _ _) = l
126 | exampleLabel (PatternLengthF l _ _) = l
127 | exampleLabel (PatternConstFuse l _ _) = l
128 | exampleLabel (PatternMapInc l _ _) = l
129 | exampleLabel (PatternNoOp l _ _) = l
130 | exampleLabel (PatternNub l _ _) = l
131 | exampleLabel (PatternFib l _ _) = l
132 |
133 | -- | Convert an 'Example' to an 'ExamplePattern'.
134 | examplePattern :: Example -> Int -> ExamplePattern Int
135 | examplePattern ExampleSumFuse l = PatternSumFuse "sumFuse" sumFuse l
136 | examplePattern ExampleSum l = PatternSum "sum" sum [1 .. l]
137 | examplePattern ExampleLengthF l = PatternLengthF "lengthF" lengthF [1 .. l]
138 | examplePattern ExampleConstFuse l = PatternConstFuse "constFuse" constFuse l
139 | examplePattern ExampleMapInc l = PatternMapInc "mapInc" mapInc [1 .. l]
140 | examplePattern ExampleNoOp _ = PatternNoOp "noop" (const ()) ()
141 | examplePattern ExampleNub l = PatternNub "nub" List.nub [1 .. l]
142 | examplePattern ExampleFib l = PatternFib "fib" fib l
143 |
144 | -- | Convert an 'ExamplePattern' to a 'PerfT'.
145 | testExample :: (Semigroup a, MonadIO m) => ExamplePattern Int -> PerfT m a ()
146 | testExample (PatternSumFuse label f a) = void $ ffap label f a
147 | testExample (PatternSum label f a) = void $ ffap label f a
148 | testExample (PatternLengthF label f a) = void $ ffap label f a
149 | testExample (PatternConstFuse label f a) = void $ ffap label f a
150 | testExample (PatternMapInc label f a) = void $ ffap label f a
151 | testExample (PatternNoOp label f a) = void $ ffap label f a
152 | testExample (PatternNub label f a) = void $ ffap label f a
153 | testExample (PatternFib label f a) = void $ ffap label f a
154 |
155 | -- | Convert an 'ExamplePattern' to a tasty-bench run.
156 | tastyExample :: ExamplePattern Int -> Benchmarkable
157 | tastyExample (PatternSumFuse _ f a) = nf f a
158 | tastyExample (PatternSum _ f a) = nf f a
159 | tastyExample (PatternLengthF _ f a) = nf f a
160 | tastyExample (PatternConstFuse _ f a) = nf f a
161 | tastyExample (PatternMapInc _ f a) = nf f a
162 | tastyExample (PatternNoOp _ f a) = nf f a
163 | tastyExample (PatternNub _ f a) = nf f a
164 | tastyExample (PatternFib _ f a) = nf f a
165 |
166 | -- | Unification of sum function applications
167 | data SumPattern a
168 | = SumFuse Text (Int -> Int) Int
169 | | SumFusePoly Text ((Enum a, Num a) => a -> a) a
170 | | SumPoly Text ((Num a) => [a] -> a) [a]
171 | | SumMono Text ([Int] -> Int) [Int]
172 |
173 | -- | All the sum algorithms.
174 | allSums :: Int -> [SumPattern Int]
175 | allSums l =
176 | [ SumPoly "sumTail" sumTail [1 .. l],
177 | SumPoly "sumTailLazy" sumTailLazy [1 .. l],
178 | SumPoly "sumFlip" sumFlip [1 .. l],
179 | SumPoly "sumFlipLazy" sumFlipLazy [1 .. l],
180 | SumPoly "sumCo" sumCo [1 .. l],
181 | SumPoly "sumCoGo" sumCoGo [1 .. l],
182 | SumPoly "sumCoCase" sumCoCase [1 .. l],
183 | SumPoly "sumAux" sumAux [1 .. l],
184 | SumPoly "sumFoldr" sumFoldr [1 .. l],
185 | SumPoly "sumCata" sumCata [1 .. l],
186 | SumPoly "sumSum" sumSum [1 .. l],
187 | SumMono "sumMono" sumMono [1 .. l],
188 | SumPoly "sumPoly" sumPoly [1 .. l],
189 | SumPoly "sumLambda" sumLambda [1 .. l],
190 | SumPoly "sumF" sumF [1 .. l],
191 | SumFuse "sumFuse" sumFuse l,
192 | SumFusePoly "sumFusePoly" sumFusePoly l,
193 | SumFuse "sumFuseFoldl'" sumFuseFoldl' l,
194 | SumFuse "sumFuseFoldr" sumFuseFoldr l
195 | ]
196 |
197 | -- | Convert an 'SumPattern' to a 'PerfT'.
198 | testSum :: (Semigroup a, MonadIO m) => SumPattern Int -> PerfT m a Int
199 | testSum (SumFuse label f a) = fap label f a
200 | testSum (SumFusePoly label f a) = fap label f a
201 | testSum (SumMono label f a) = fap label f a
202 | testSum (SumPoly label f a) = fap label f a
203 |
204 | -- | Run a sum algorithm measurement.
205 | statSums :: (MonadIO m) => Int -> Int -> (Int -> Measure m [a]) -> m (Map.Map Text [a])
206 | statSums n l m = execPerfT (m n) $ mapM_ testSum (allSums l)
207 |
208 | -- | tail resursive
209 | sumTail :: (Num a) => [a] -> a
210 | sumTail = go 0
211 | where
212 | go acc [] = acc
213 | go acc (x : xs) = go (x + acc) $! xs
214 |
215 | -- | lazy recursion.
216 | sumTailLazy :: (Num a) => [a] -> a
217 | sumTailLazy = go 0
218 | where
219 | go acc [] = acc
220 | go acc (x : xs) = go (x + acc) $! xs
221 |
222 | -- | With argument order flipped
223 | sumFlip :: (Num a) => [a] -> a
224 | sumFlip xs0 = go xs0 0
225 | where
226 | go [] s = s
227 | go (x : xs) s = go xs $! x + s
228 |
229 | -- | Lazy with argument order flipped.
230 | sumFlipLazy :: (Num a) => [a] -> a
231 | sumFlipLazy xs0 = go xs0 0
232 | where
233 | go [] s = s
234 | go (x : xs) s = go xs $ x + s
235 |
236 | -- | Co-routine style
237 | sumCo :: (Num a) => [a] -> a
238 | sumCo [] = 0
239 | sumCo (x : xs) = x + sumCo xs
240 |
241 | -- | Co-routine, go style
242 | sumCoGo :: (Num a) => [a] -> a
243 | sumCoGo = go
244 | where
245 | go [] = 0
246 | go (x : xs) = x + go xs
247 |
248 | -- | Co-routine, case-style
249 | sumCoCase :: (Num a) => [a] -> a
250 | sumCoCase = \case
251 | [] -> 0
252 | (x : xs) -> x + sumCoCase xs
253 |
254 | -- | Auxillary style.
255 | sumAux :: (Num a) => [a] -> a
256 | sumAux = \case
257 | [] -> b
258 | (x : xs) -> f x (sumAux xs)
259 | where
260 | b = 0
261 | f x xs = x + xs
262 |
263 | -- | foldr style
264 | sumFoldr :: (Num a) => [a] -> a
265 | sumFoldr xs = foldr (+) 0 xs
266 |
267 | -- | cata style
268 | sumCata :: (Num a) => [a] -> a
269 | sumCata = cata $ \case
270 | Nil -> 0
271 | Cons x acc -> x + acc
272 |
273 | -- | sum
274 | sumSum :: (Num a) => [a] -> a
275 | sumSum xs = sum xs
276 |
277 | -- | Monomorphic sum
278 | sumMono :: [Int] -> Int
279 | sumMono xs = foldl' (+) 0 xs
280 |
281 | -- | Polymorphic sum
282 | sumPoly :: (Num a) => [a] -> a
283 | sumPoly xs = foldl' (+) 0 xs
284 |
285 | -- | Lambda-style sum
286 | sumLambda :: (Num a) => [a] -> a
287 | sumLambda = \xs -> foldl' (+) 0 xs
288 |
289 | sumF' :: (Num a) => a -> (a -> a) -> a -> a
290 | sumF' x r = \ !a -> r (x + a)
291 |
292 | -- | GHC-style foldr method.
293 | sumF :: (Num a) => [a] -> a
294 | sumF xs = foldr sumF' id xs 0
295 |
296 | -- | Fusion check
297 | sumFuse :: Int -> Int
298 | sumFuse x = sum [1 .. x]
299 |
300 | -- | Fusion under polymorph
301 | sumFusePoly :: (Enum a, Num a) => a -> a
302 | sumFusePoly x = sum [1 .. x]
303 |
304 | -- | foldl' fusion
305 | sumFuseFoldl' :: Int -> Int
306 | sumFuseFoldl' x = foldl' (+) 0 [1 .. x]
307 |
308 | -- | foldr fusion
309 | sumFuseFoldr :: Int -> Int
310 | sumFuseFoldr x = foldr (+) 0 [1 .. x]
311 |
312 | -- | Unification of length function applications
313 | data LengthPattern a
314 | = LengthPoly Text ([a] -> Int) [a]
315 | | LengthMono Text ([Int] -> Int) [Int]
316 |
317 | -- | All the length algorithms.
318 | allLengths :: Int -> [LengthPattern Int]
319 | allLengths l =
320 | [ LengthPoly "lengthTail" lengthTail [1 .. l],
321 | LengthPoly "lengthTailLazy" lengthTailLazy [1 .. l],
322 | LengthPoly "lengthFlip" lengthFlip [1 .. l],
323 | LengthPoly "lengthFlipLazy" lengthFlipLazy [1 .. l],
324 | LengthPoly "lengthCo" lengthCo [1 .. l],
325 | LengthPoly "lengthCoCase" lengthCoCase [1 .. l],
326 | LengthPoly "lengthAux" lengthAux [1 .. l],
327 | LengthPoly "lengthFoldr" lengthFoldr [1 .. l],
328 | LengthPoly "lengthFoldrConst" lengthFoldrConst [1 .. l],
329 | LengthPoly "lengthF" lengthF [1 .. l],
330 | LengthMono "lengthFMono" lengthFMono [1 .. l]
331 | ]
332 |
333 | -- | Convert an 'LengthPattern' to a 'PerfT'.
334 | testLength :: (Semigroup a, MonadIO m) => LengthPattern Int -> PerfT m a Int
335 | testLength (LengthMono label f a) = fap label f a
336 | testLength (LengthPoly label f a) = fap label f a
337 |
338 | -- | Run a lengths algorithm
339 | statLengths :: (MonadIO m) => Int -> Int -> (Int -> Measure m [a]) -> m (Map.Map Text [a])
340 | statLengths n l m = execPerfT (m n) $ mapM_ testLength (allLengths l)
341 |
342 | -- | tail resursive
343 | lengthTail :: [a] -> Int
344 | lengthTail xs0 = go 0 xs0
345 | where
346 | go s [] = s
347 | go s (_ : xs) = go (s + 1) $! xs
348 |
349 | -- | lazy recursion.
350 | lengthTailLazy :: [a] -> Int
351 | lengthTailLazy xs0 = go 0 xs0
352 | where
353 | go s [] = s
354 | go s (_ : xs) = go (s + 1) xs
355 |
356 | -- | With argument order flipped
357 | lengthFlip :: [a] -> Int
358 | lengthFlip xs0 = go xs0 0
359 | where
360 | go [] s = s
361 | go (_ : xs) s = go xs $! s + 1
362 |
363 | -- | Lazy with argument order flipped.
364 | lengthFlipLazy :: [a] -> Int
365 | lengthFlipLazy xs0 = go xs0 0
366 | where
367 | go [] s = s
368 | go (_ : xs) s = go xs $ s + 1
369 |
370 | -- | Co-routine style
371 | lengthCo :: [a] -> Int
372 | lengthCo [] = 0
373 | lengthCo (_ : xs) = 1 + lengthCo xs
374 |
375 | -- | Co-routine style as a Case statement.
376 | lengthCoCase :: [a] -> Int
377 | lengthCoCase = \case
378 | [] -> 0
379 | (_ : xs) -> 1 + lengthCoCase xs
380 |
381 | -- | Auxillary version.
382 | lengthAux :: [a] -> Int
383 | lengthAux = \case
384 | [] -> b
385 | (x : xs) -> f x (lengthAux xs)
386 | where
387 | b = 0
388 | f _ xs = 1 + xs
389 |
390 | -- | foldr style
391 | lengthFoldr :: [a] -> Int
392 | lengthFoldr = foldr f b
393 | where
394 | b = 0
395 | f _ xs = 1 + xs
396 |
397 | -- | foldr style with explicit const usage.
398 | lengthFoldrConst :: [a] -> Int
399 | lengthFoldrConst = foldr (const (1 +)) 0
400 |
401 | {-
402 | -- from base:
403 | -- https://hackage.haskell.org/package/base-4.16.0.0/docs/src/GHC.List.html#length
404 | -- The lambda form turns out to be necessary to make this inline
405 | -- when we need it to and give good performance.
406 | {-# INLINE [0] lengthFB #-}
407 | lengthFB :: x -> (Int -> Int) -> Int -> Int
408 | lengthFB _ r !a = r (a + 1)
409 |
410 | -}
411 | lengthF' :: (Num a) => x -> (a -> a) -> a -> a
412 | lengthF' _ r = \ !a -> r (a + 1)
413 |
414 | -- | GHC style
415 | lengthF :: [a] -> Int
416 | lengthF xs0 = foldr lengthF' id xs0 0
417 |
418 | -- | Monomorphic, GHC style
419 | lengthFMono :: [Int] -> Int
420 | lengthFMono xs0 = foldr lengthF' id xs0 0
421 |
422 | -- * recursion patterns
423 |
424 | -- | Tail recursion
425 | recurseTail :: (a -> b -> b) -> b -> [a] -> b
426 | recurseTail f = go
427 | where
428 | go s [] = s
429 | go s (x : xs) = go (f x s) $! xs
430 |
431 | -- | Lazy tail recursion
432 | recurseTailLazy :: (a -> b -> b) -> b -> [a] -> b
433 | recurseTailLazy f = go
434 | where
435 | go s [] = s
436 | go s (x : xs) = go (f x s) xs
437 |
438 | -- | Tail resursion with flipped argument order.
439 | recurseFlip :: (a -> b -> b) -> b -> [a] -> b
440 | recurseFlip f s0 xs0 = go xs0 s0
441 | where
442 | go [] s = s
443 | go (x : xs) s = go xs $! f x s
444 |
445 | -- | Lazy tail resursion with flipped argument order.
446 | recurseFlipLazy :: (a -> b -> b) -> b -> [a] -> b
447 | recurseFlipLazy f s0 xs0 = go xs0 s0
448 | where
449 | go [] s = s
450 | go (x : xs) s = go xs $ f x s
451 |
452 | -- | Coroutine
453 | recurseCo :: (a -> b -> b) -> b -> [a] -> b
454 | recurseCo f s0 = go
455 | where
456 | go [] = s0
457 | go (x : xs) = f x $! go xs
458 |
459 | -- | Lazy, coroutine
460 | recurseCoLazy :: (a -> b -> b) -> b -> [a] -> b
461 | recurseCoLazy f s0 = go
462 | where
463 | go [] = s0
464 | go (x : xs) = f x $ go xs
465 |
466 | -- | Cata style
467 | recurseCata :: (a -> b -> b) -> b -> [a] -> b
468 | recurseCata f s0 = cata $ \case
469 | Nil -> s0
470 | Cons x acc -> f x acc
471 |
472 | -- * miscellaneous
473 |
474 | -- | Test of const fusion
475 | constFuse :: Int -> ()
476 | constFuse x = foldl' const () [1 .. x]
477 |
478 | -- | Increment a list.
479 | mapInc :: [Int] -> [Int]
480 | mapInc xs = fmap (+ 1) xs
481 |
482 | -- | Split a list.
483 | splitHalf :: [a] -> ([a], [a])
484 | splitHalf xs = go xs xs
485 | where
486 | go (y : ys) (_ : _ : zs) = first (y :) (go ys zs)
487 | go ys _ = ([], ys)
488 |
489 | -- | Fibonnacci
490 | fib :: Int -> Integer
491 | fib n = if n < 2 then toInteger n else fib (n - 1) + fib (n - 2)
492 |
--------------------------------------------------------------------------------
/src/Perf/BigO.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE OverloadedStrings #-}
2 |
3 | -- | [Order of complexity](https://en.wikibooks.org/wiki/Optimizing_Code_for_Speed/Order_of_Complexity_Optimizations#:~:text=of%2DComplexity%20Reduction-,What%20is%20order%20of%20complexity%3F,*log(N)) calculations.
4 | module Perf.BigO
5 | ( O (..),
6 | olist,
7 | promote,
8 | promote1,
9 | promote_,
10 | demote,
11 | demote1,
12 | spectrum,
13 | Order (..),
14 | bigO,
15 | runtime,
16 | BigOrder (..),
17 | fromOrder,
18 | toOrder,
19 | order,
20 | diffs,
21 | bestO,
22 | estO,
23 | estOs,
24 | makeNs,
25 | OrderOptions (..),
26 | defaultOrderOptions,
27 | parseOrderOptions,
28 | )
29 | where
30 |
31 | import Data.Bool
32 | import Data.FormatN
33 | import Data.List qualified as List
34 | import Data.Maybe
35 | import Data.Monoid
36 | import Data.Vector qualified as V
37 | import GHC.Generics
38 | import Options.Applicative
39 | import Prettyprinter
40 | import Prelude
41 |
42 | -- $setup
43 | -- >>> import qualified Data.List as List
44 | -- >>> o = Order [0.0,1.0,100.0,0.0,0.0,0.0,0.0,0.0]
45 | -- >>> ms = [2805.0,3476.0,9989.0,92590.0,1029074.6947660954]
46 | -- >>> ns = [1,10,100,1000,10000]
47 |
48 | -- | order type
49 | data O
50 | = -- | cubic
51 | N3
52 | | -- | quadratic
53 | N2
54 | | -- | ^3/2
55 | N32
56 | | -- | N * log N
57 | NLogN
58 | | -- | linear
59 | N1
60 | | -- | sqrt N
61 | N12
62 | | -- | log N
63 | LogN
64 | | -- | constant
65 | N0
66 | deriving (Eq, Ord, Show, Generic, Enum)
67 |
68 | -- | enumeration of O types
69 | --
70 | -- >>> olist
71 | -- [N3,N2,N32,NLogN,N1,N12,LogN,N0]
72 | olist :: [O]
73 | olist = [N3 .. N0]
74 |
75 | -- | functions to compute performance measure
76 | --
77 | -- >>> fmap ($ 0) promote_
78 | -- [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]
79 | --
80 | -- >>> fmap ($ 1) promote_
81 | -- [1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]
82 | --
83 | -- Ordering makes sense around N=10
84 | --
85 | -- >>> fmap ($ 10) promote_
86 | -- [1000.0,100.0,31.622776601683793,23.02585092994046,10.0,3.1622776601683795,2.302585092994046,1.0]
87 | --
88 | -- Having NP may cause big num problems
89 | --
90 | -- >>> fmap ($ 1000) promote_
91 | -- [1.0e9,1000000.0,31622.776601683792,6907.755278982137,1000.0,31.622776601683793,6.907755278982137,1.0]
92 | promote_ :: [Double -> Double]
93 | promote_ =
94 | [ -- \n -> min maxBound (bool (2**n) zero (n<=zero)),
95 | (^ (3 :: Integer)),
96 | (^ (2 :: Integer)),
97 | (** 1.5),
98 | \n -> bool (bool (n * log n) 1 (n <= 1)) 0 (n <= 0),
99 | id,
100 | (** 0.5),
101 | \n -> bool (bool (log n) 1 (n <= 1)) 0 (n <= 0),
102 | \n -> bool 1 0 (n <= 0)
103 | ]
104 |
105 | -- | a set of factors for each order, which represents a full Order specification.
106 | newtype Order a = Order {factors :: [a]} deriving (Eq, Ord, Show, Generic, Functor)
107 |
108 | -- | create an Order
109 | --
110 | -- >>> order N2 1
111 | -- Order {factors = [0,1,0,0,0,0,0,0]}
112 | order :: (Num a) => O -> a -> Order a
113 | order o a = Order $ replicate n 0 <> [a] <> replicate (7 - n) 0
114 | where
115 | n = fromEnum o
116 |
117 | -- | Calculate the expected performance measure
118 | --
119 | -- >>> promote (order N2 1) 10
120 | -- 100.0
121 | promote :: Order Double -> Double -> Double
122 | promote (Order fs) n = sum (zipWith (*) fs (($ n) <$> promote_))
123 |
124 | -- | Calculate the expected performance measure per n
125 | --
126 | -- >>> promote (order N2 1) 10
127 | -- 100.0
128 | promote1 :: Order Double -> Double
129 | promote1 o = promote o 1
130 |
131 | -- | Calculate an Order from a given O, an n, and a total performance measurement
132 | --
133 | -- A measurement of 1e6 for n=1000 with an order of N2 is:
134 | --
135 | -- >>> demote N2 1000 1000000
136 | -- Order {factors = [0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0]}
137 | --
138 | -- > promote (demote N2 n m) n m == m
139 | demote :: O -> Double -> Double -> Order Double
140 | demote o n m = order o (m / (promote_ List.!! fromEnum o) n)
141 |
142 | -- | Calculate an Order from a measure, and an O
143 | --
144 | -- >>> demote1 N2 1000
145 | -- Order {factors = [0.0,1000.0,0.0,0.0,0.0,0.0,0.0,0.0]}
146 | --
147 | -- > demote1 N2 m == demote o 1 m
148 | demote1 :: O -> Double -> Order Double
149 | demote1 o m = demote o 1 m
150 |
151 | -- | find the dominant order, and it's factor
152 | --
153 | -- >>> bigO o
154 | -- (N2,1.0)
155 | bigO :: (Ord a, Num a) => Order a -> (O, a)
156 | bigO (Order os) = (toEnum b, os List.!! b)
157 | where
158 | b = fromMaybe 7 $ List.findIndex (> 0) os
159 |
160 | -- | compute the runtime component of an Order, defined as the
161 | -- difference between the dominant order and the total for a single run.
162 | --
163 | -- >>> runtime o
164 | -- 100.0
165 | runtime :: Order Double -> Double
166 | runtime (Order os) = promote (Order r) 1
167 | where
168 | b = fromMaybe 7 $ List.findIndex (> 0) os
169 | r = take b os <> [0] <> drop (b + 1) os
170 |
171 | instance (Num a) => Num (Order a) where
172 | -- 0 = Order $ replicate 9 0
173 | (+) (Order o) (Order o') =
174 | Order (zipWith (+) o o')
175 | negate (Order o) = Order $ negate <$> o
176 | (*) (Order o) (Order o') =
177 | Order (zipWith (*) o o')
178 | abs = undefined
179 | signum = undefined
180 | fromInteger x = Order $ replicate 9 (fromInteger x)
181 |
182 | -- | A set of factors consisting of the dominant order, the dominant order factor and a constant factor
183 | data BigOrder a = BigOrder {bigOrder :: O, bigFactor :: a} deriving (Eq, Ord, Show, Generic, Functor)
184 |
185 | instance Pretty (BigOrder Double) where
186 | pretty (BigOrder o f) = pretty (decimal (Just 2) f) <> " * O(" <> viaShow o <> ")"
187 |
188 | -- | compute the BigOrder
189 | --
190 | -- >>> fromOrder o
191 | -- BigOrder {bigOrder = N2, bigFactor = 1.0}
192 | fromOrder :: Order Double -> BigOrder Double
193 | fromOrder o' = BigOrder o f
194 | where
195 | (o, f) = bigO o'
196 |
197 | -- | convert a BigOrder to an Order.
198 | --
199 | -- toOrder . fromOrder is not a round trip iso.
200 | --
201 | -- >>> toOrder (fromOrder o)
202 | -- Order {factors = [0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0]}
203 | toOrder :: BigOrder Double -> Order Double
204 | toOrder (BigOrder o f) = order o f
205 |
206 | -- | The factor for each O given an n, and a measurement.
207 | --
208 | -- >>> spectrum 100 10000
209 | -- Order {factors = [1.0e-2,1.0,10.0,21.71472409516259,100.0,1000.0,2171.4724095162587,10000.0]}
210 | spectrum :: Double -> Double -> Order Double
211 | spectrum n m = Order ((m /) . ($ n) <$> promote_)
212 |
213 | -- | The errors for a list of n's and measurements, based on the spectrum of the last measurement.
214 | diffs :: [Double] -> [Double] -> [[Double]]
215 | diffs ns ms = List.transpose $ zipWith (\n m -> zipWith (\o' f -> m - promote (order o' f) n) olist fs) ns ms
216 | where
217 | fs = factors (spectrum (List.last ns) (List.last ms))
218 |
219 | -- | minimum error order for a list of measurements
220 | --
221 | -- >>> bestO ns ms
222 | -- N1
223 | bestO :: [Double] -> [Double] -> O
224 | bestO ns ms =
225 | toEnum $
226 | V.minIndex $
227 | V.fromList
228 | (sum <$> fmap (fmap abs) (diffs ns ms))
229 |
230 | -- | fit the best order for the last measurement and return it, and the error terms for the measurements
231 | --
232 | -- >>> estO ns ms
233 | -- (Order {factors = [0.0,0.0,0.0,0.0,102.90746947660953,0.0,0.0,0.0]},[2702.0925305233905,2446.9253052339045,-301.7469476609531,-10317.469476609534,0.0])
234 | estO :: [Double] -> [Double] -> (Order Double, [Double])
235 | estO [] _ = (0, [])
236 | estO ns ms = (lasto, diff)
237 | where
238 | diff = diffs ns ms List.!! fromEnum o
239 | o = bestO ns ms
240 | lasto = demote o (List.last ns) (List.last ms)
241 |
242 | -- | fit orders from the last measurement to the first, using the residuals at each step.
243 | --
244 | -- >>> estOs ns ms
245 | -- [Order {factors = [0.0,0.0,0.0,0.0,102.90746947660953,0.0,0.0,0.0]},Order {factors = [0.0,0.0,-0.32626703235351473,0.0,0.0,0.0,0.0,0.0]},Order {factors = [0.0,0.0,0.0,0.0,0.0,0.0,0.0,24.520084692561625]},Order {factors = [0.0,0.0,0.0,0.0,0.0,0.0,0.0,2432.722690017952]},Order {factors = [0.0,0.0,0.0,0.0,0.0,0.0,0.0,245.1760228452299]}]
246 | estOs :: [Double] -> [Double] -> [Order Double]
247 | estOs ns ms = go [] ns ms
248 | where
249 | go os _ [] = os
250 | go os _ [m] = os <> [order N0 m]
251 | go os ns' ms' = let (o', res) = estO ns' ms' in go (os <> [o']) (List.init ns') (List.init res)
252 |
253 | makeNs :: Int -> Double -> Int -> [Int]
254 | makeNs n0 d low = reverse $ go (next n0) [n0]
255 | where
256 | next n = floor (fromIntegral n / d)
257 | go :: Int -> [Int] -> [Int]
258 | go n acc = bool (go (next n) (acc <> [n])) acc (low >= n)
259 |
260 | data OrderOptions = OrderOptions
261 | { doOrder :: Bool,
262 | orderLow :: Int,
263 | orderDivisor :: Double
264 | }
265 | deriving (Eq, Show, Generic)
266 |
267 | defaultOrderOptions :: OrderOptions
268 | defaultOrderOptions = OrderOptions False 10 9
269 |
270 | parseOrderOptions :: OrderOptions -> Parser OrderOptions
271 | parseOrderOptions def =
272 | OrderOptions
273 | <$> switch (long "order" <> short 'o' <> help "calculate order")
274 | <*> option auto (value (orderLow def) <> long "orderlowest" <> showDefaultWith show <> metavar "DOUBLE" <> help "smallest order test")
275 | <*> option auto (value (orderDivisor def) <> long "orderdivisor" <> showDefaultWith show <> metavar "DOUBLE" <> help "divisor for order computation")
276 |
--------------------------------------------------------------------------------
/src/Perf/Chart.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE OverloadedLabels #-}
2 | {-# LANGUAGE OverloadedStrings #-}
3 |
4 | module Perf.Chart where
5 |
6 | import Chart
7 | import Control.Category ((>>>))
8 | import Data.Bifunctor
9 | import Data.Bool
10 | import Data.List qualified as List
11 | import Data.Map.Strict qualified as Map
12 | import Data.Maybe
13 | import Data.Text (Text)
14 | import Data.Text qualified as Text
15 | import GHC.Generics
16 | import Optics.Core
17 | import Options.Applicative
18 | import Perf.Stats as Perf
19 | import Prettychart
20 |
21 | -- m <- fromDump defaultPerfDumpOptions
22 | data PerfChartOptions
23 | = PerfChartOptions
24 | { doChart :: Bool,
25 | chartFilepath :: FilePath,
26 | truncateAt :: Double,
27 | doSmallChart :: Bool,
28 | doBigChart :: Bool,
29 | doHistChart :: Bool,
30 | doAveragesLegend :: Bool,
31 | averagesStyle :: Style,
32 | averagesPaletteStart :: Int,
33 | averagesLegend :: LegendOptions,
34 | smallStyle :: Style,
35 | smallHud :: HudOptions,
36 | bigStyle :: Style,
37 | bigHud :: HudOptions,
38 | titleSize :: Double,
39 | histGrain :: Int,
40 | bigWidth :: Double,
41 | excludeZeros :: Bool
42 | }
43 | deriving (Eq, Show, Generic)
44 |
45 | defaultPerfChartOptions :: PerfChartOptions
46 | defaultPerfChartOptions = PerfChartOptions False "other/perf.svg" 10 True True True True (defaultGlyphStyle & set #size 0.05) 2 (defaultLegendOptions & set #place PlaceBottom & set #numStacks 3 & set #scaleChartsBy 0.2 & set #legendSize 0.3 & set #alignCharts AlignLeft & set #hgap (-0.2) & set #vgap (-0.1)) (defaultGlyphStyle & set #size 0.01 & over #color (rgb (palette 0)) & set (#color % opac') 0.3 & set (#borderColor % opac') 0.3 & set #glyphShape (gpalette 0)) defaultHudOptions (defaultGlyphStyle & set #size 0.06 & over #color (rgb (palette 0)) & set #glyphShape (gpalette 0) & set (#color % opac') 0.3 & set (#borderColor % opac') 1) (defaultHudOptions & set (#axes % each % #item % #ticks % #textTick %? #style % #size) 0.07 & over #axes (drop 1) & set (#axes % each % #item % #ticks % #tick % numTicks') (Just 2)) 0.08 100 0.2 True
47 |
48 | -- | Parse charting options.
49 | parsePerfChartOptions :: PerfChartOptions -> Parser PerfChartOptions
50 | parsePerfChartOptions def =
51 | (\c fp trunAt small big hist avs -> PerfChartOptions c fp trunAt small big hist avs (view #averagesStyle def) (view #averagesPaletteStart def) (view #averagesLegend def) (view #smallStyle def) (view #smallHud def) (view #bigStyle def) (view #bigHud def) (view #titleSize def) (view #histGrain def) (view #bigWidth def) (view #excludeZeros def))
52 | <$> switch (long "chart" <> short 'c' <> help "chart the result")
53 | <*> option str (value (view #chartFilepath def) <> showDefault <> long "chartpath" <> metavar "FILE" <> help "chart file name")
54 | <*> option auto (value (view #truncateAt def) <> showDefaultWith show <> long "truncateat" <> help "truncate chart data (multiple of median)")
55 | <*> switch (long "small")
56 | <*> switch (long "big")
57 | <*> switch (long "histogram")
58 | <*> switch (long "averages")
59 |
60 | perfCharts :: PerfChartOptions -> Maybe [Text] -> Map.Map Text [[Double]] -> ChartOptions
61 | perfCharts cfg labels m = bool (stackCO stackn AlignLeft NoAlign 0.1 cs) (head cs) (length cs == 1)
62 | where
63 | stackn = length cs & fromIntegral & sqrt @Double & ceiling
64 | cs = uncurry (perfChart cfg) <$> ps'
65 | ps = mconcat $ fmap (uncurry zip . bimap (\t -> fmap ((t <> ": ") <>) (fromMaybe (Text.pack . show @Int <$> [0 ..]) labels)) List.transpose) (Map.toList m)
66 | ps' = filter ((> 0) . sum . snd) ps
67 |
68 | perfChart :: PerfChartOptions -> Text -> [Double] -> ChartOptions
69 | perfChart cfg t xs = finalChart
70 | where
71 | xsSmall = xs & xify & filter (_y >>> (< upperCutoff)) & filter (\x -> view #excludeZeros cfg && (_y x > 0))
72 | xsBig = xs & xify & filter (_y >>> (>= upperCutoff))
73 | med = median xs
74 | best = tenth xs
75 | av = Perf.average xs
76 | upperCutoff = view #truncateAt cfg * med
77 |
78 | labels =
79 | [ "average: " <> comma (Just 3) av,
80 | "median: " <> comma (Just 3) med,
81 | "best: " <> comma (Just 3) best
82 | ]
83 | (Rect _ _ y' w') = fromMaybe one $ space1 xsSmall
84 | (Range x' z') = Range zero (fromIntegral $ length xs)
85 | rectx = BlankChart defaultStyle [Rect x' z' y' w']
86 | averagesCT = named "averages" $ zipWith (\x i -> GlyphChart (view #averagesStyle cfg & set #color (palette i) & set #borderColor (palette i) & set #glyphShape (gpalette i)) [Point zero x]) [av, med, best] [(view #averagesPaletteStart cfg) ..]
87 |
88 | (smallDot, smallHist) = dotHistChart (view #histGrain cfg) (view #smallStyle cfg) (mempty @ChartOptions & set #chartTree (averagesCT <> named "xrange" [rectx]) & set #hudOptions (view #smallHud cfg)) xsSmall
89 |
90 | minb = minimum (_y <$> xsBig)
91 | bigrange = Rect x' z' (bool minb zero (length xsBig == 1)) minb
92 | (bigDot, bigHist) = dotHistChart (view #histGrain cfg) (view #bigStyle cfg) (mempty @ChartOptions & set #hudOptions (view #bigHud cfg) & set #chartTree (named "xrange" [BlankChart defaultStyle [bigrange]])) xsBig
93 |
94 | (Rect bdX bdW _ _) = fromMaybe one $ view styleBox' (asChartTree bigDot)
95 | bdr = Just $ Rect bdX bdW (-(view #bigWidth cfg)) (view #bigWidth cfg)
96 | (Rect bdhX bdhW _ _) = fromMaybe one $ view styleBox' (asChartTree bigHist)
97 | bhr = Just $ Rect bdhX bdhW (-(view #bigWidth cfg)) (view #bigWidth cfg)
98 |
99 | finalChart =
100 | mempty @ChartOptions
101 | & set
102 | #chartTree
103 | ( stack
104 | 2
105 | NoAlign
106 | NoAlign
107 | 0
108 | ( bool (asChartTree bigDot & set styleBox' bdr & pure) mempty (null xsBig)
109 | <> bool (asChartTree bigHist & set styleBox' bhr & pure) mempty (null xsBig)
110 | <> [ asChartTree smallDot,
111 | asChartTree smallHist
112 | ]
113 | )
114 | )
115 | & set
116 | (#hudOptions % #legends)
117 | [Priority 10 (view #averagesLegend cfg & set #legendCharts (zipWith (\t' c -> (t', [c])) labels (toListOf chart' averagesCT)))]
118 | & set
119 | (#hudOptions % #titles)
120 | [Priority 5 (defaultTitleOptions t & set (#style % #size) (view #titleSize cfg))]
121 |
122 | dotHistChart :: Int -> Style -> ChartOptions -> [Point Double] -> (ChartOptions, ChartOptions)
123 | dotHistChart grain gstyle co xs = (dotCO, histCO)
124 | where
125 | dotCT = named "dot" [GlyphChart gstyle xs]
126 | ys = fmap _y xs
127 | (Range l u) = fromMaybe one (space1 ys)
128 | r' = bool (Range l u) (Range 0 l) (l == u)
129 | r = computeRangeTick r' (fromMaybe defaultTick (co & preview (#hudOptions % #axes % ix 1 % #item % #ticks % #tick)))
130 | (y, w) = let (Range y' w') = r in bool (y', w') (y' - 0.5, y' + 0.5) (y' == w')
131 |
132 | histCO = hhistChart r grain ys & set (#markupOptions % #chartAspect) (CanvasAspect 0.3) & over #chartTree (<> unnamed [BlankChart defaultStyle [Rect 0 0 y w]])
133 | dotCO = co & over #chartTree (dotCT <>)
134 |
135 | compareCharts :: [(PerfChartOptions, Text, [Double])] -> ChartOptions
136 | compareCharts xs = finalChart
137 | where
138 | xs' = xs & fmap (\(_, _, x) -> x)
139 | cfg' = xs & fmap (\(x, _, _) -> x)
140 | t' = xs & fmap (\(_, x, _) -> x)
141 | cfg = head cfg'
142 | xsSmall = xs' & fmap (xify >>> filter (_y >>> (< upperCutoff)) >>> filter (\x -> view #excludeZeros cfg && (_y x > 0)))
143 | xsBig = xs' & fmap (xify >>> filter (_y >>> (>= upperCutoff)))
144 | med = median <$> xs'
145 | upperCutoff = view #truncateAt cfg * maximum med
146 |
147 | (Rect _ _ y' w') = fromMaybe one $ space1 $ mconcat xsSmall
148 | (Range x' z') = Range zero (fromIntegral $ maximum (length <$> xs'))
149 | rectx = BlankChart defaultStyle [Rect x' z' y' w']
150 |
151 | (smallDot, smallHist) = dotHistCharts (view #histGrain cfg) (mempty @ChartOptions & set #hudOptions (view #smallHud cfg) & set #chartTree (unnamed [rectx])) (zip (view #smallStyle <$> cfg') xsSmall)
152 |
153 | minb = minimum (_y <$> mconcat xsBig)
154 | bigrange = Rect x' z' (bool minb zero (length (mconcat xsBig) == 1)) minb
155 | (bigDot, bigHist) = dotHistCharts (view #histGrain cfg) (mempty @ChartOptions & set #hudOptions (view #bigHud cfg) & set #chartTree (named "xrange" [BlankChart defaultStyle [bigrange]])) (zip (view #bigStyle <$> cfg') xsBig)
156 |
157 | (Rect bdX bdW _ _) = fromMaybe one $ view styleBox' (asChartTree bigDot)
158 | bdr = Just $ Rect bdX bdW (-(view #bigWidth cfg)) (view #bigWidth cfg)
159 | (Rect bdhX bdhW _ _) = fromMaybe one $ view styleBox' (asChartTree bigHist)
160 | bhr = Just $ Rect bdhX bdhW (-(view #bigWidth cfg)) (view #bigWidth cfg)
161 |
162 | finalChart =
163 | mempty @ChartOptions
164 | & set
165 | #chartTree
166 | ( stack
167 | 2
168 | NoAlign
169 | NoAlign
170 | 0
171 | ( bool (asChartTree bigDot & set styleBox' bdr & pure) mempty (null xsBig)
172 | <> bool (asChartTree bigHist & set styleBox' bhr & pure) mempty (null xsBig)
173 | <> [ asChartTree smallDot,
174 | asChartTree smallHist
175 | ]
176 | )
177 | )
178 | & set
179 | (#hudOptions % #legends)
180 | [Priority 10 (view #averagesLegend cfg & set #legendCharts (zipWith (\t'' c -> (t'', [c])) t' (toListOf (#chartTree % chart') smallDot)))]
181 |
182 | dotHistCharts :: Int -> ChartOptions -> [(Style, [Point Double])] -> (ChartOptions, ChartOptions)
183 | dotHistCharts grain co xs = (dotCO, histCO)
184 | where
185 | dotCTs = named "dot" (uncurry GlyphChart <$> xs)
186 | ys = fmap _y . snd <$> xs
187 | (Range l u) = fromMaybe one (space1 (mconcat ys))
188 | r' = bool (Range l u) (Range 0 l) (l == u)
189 | r = computeRangeTick r' (fromMaybe defaultTick (co & preview (#hudOptions % #axes % ix 1 % #item % #ticks % #tick)))
190 | (y, w) = let (Range y' w') = r in bool (y', w') (y' - 0.5, y' + 0.5) (y' == w')
191 |
192 | histCO = hhistCharts r grain (zip (fst <$> xs) ys) & set (#markupOptions % #chartAspect) (CanvasAspect 0.3) & over #chartTree (<> unnamed [BlankChart defaultStyle [Rect 0 0 y w]])
193 | dotCO = co & over #chartTree (dotCTs <>)
194 |
--------------------------------------------------------------------------------
/src/Perf/Count.hs:
--------------------------------------------------------------------------------
1 | -- | Simple counter.
2 | module Perf.Count
3 | ( count,
4 | countN,
5 | )
6 | where
7 |
8 | import Perf.Types
9 | import Prelude
10 |
11 | -- | Register 1 as a performance measure
12 | count :: (Applicative m) => StepMeasure m Int
13 | count = StepMeasure start stop
14 | where
15 | start = pure ()
16 | stop _ = pure 1
17 | {-# INLINEABLE count #-}
18 |
19 | -- | Count the number of times measured.
20 | countN :: Int -> Measure IO Int
21 | countN n = sum <$> toMeasureN n count
22 | {-# INLINEABLE countN #-}
23 |
--------------------------------------------------------------------------------
/src/Perf/Measure.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE OverloadedStrings #-}
2 |
3 | -- | Unification of the various different performance measure types, mostly to unify reporting and data management.
4 | module Perf.Measure
5 | ( MeasureType (..),
6 | parseMeasure,
7 | measureDs,
8 | measureLabels,
9 | measureFinalStat,
10 | )
11 | where
12 |
13 | import Data.Text (Text)
14 | import Options.Applicative
15 | import Options.Applicative.Help.Pretty qualified as OA
16 | import Perf.Count
17 | import Perf.Space
18 | import Perf.Stats
19 | import Perf.Time
20 | import Perf.Types
21 | import System.Clock
22 | import Prelude hiding (cycle)
23 |
24 | -- | Command-line measurement options.
25 | data MeasureType = MeasureTime | MeasureNTime | MeasureSpace | MeasureSpaceTime | MeasureAllocation | MeasureCount deriving (Eq, Show)
26 |
27 | -- | Parse command-line 'MeasureType' options.
28 | parseMeasure :: Parser MeasureType
29 | parseMeasure =
30 | flag' MeasureTime (long "time" <> style (OA.annotate OA.bold) <> help "measure time performance")
31 | <|> flag' MeasureNTime (long "ntime" <> help "measure n*time performance")
32 | <|> flag' MeasureSpace (long "space" <> help "measure space performance")
33 | <|> flag' MeasureSpaceTime (long "spacetime" <> help "measure both space and time performance")
34 | <|> flag' MeasureAllocation (long "allocation" <> help "measure bytes allocated")
35 | <|> flag' MeasureCount (long "count" <> help "measure count")
36 | <|> pure MeasureTime
37 |
38 | -- | unification of the different measurements to being a list of doubles.
39 | measureDs :: MeasureType -> Clock -> Int -> Measure IO [[Double]]
40 | measureDs mt c n =
41 | case mt of
42 | MeasureTime -> fmap ((: []) . fromIntegral) <$> timesWith c n
43 | MeasureNTime -> pure . pure . fromIntegral <$> timesNWith c n
44 | MeasureSpace -> toMeasureN n (ssToList <$> space False)
45 | MeasureSpaceTime -> toMeasureN n ((\x y -> ssToList x <> [fromIntegral y]) <$> space False <*> stepTime)
46 | MeasureAllocation -> fmap ((: []) . fromIntegral) <$> toMeasureN n (allocation False)
47 | MeasureCount -> (: []) . fmap fromIntegral <$> toMeasureN n count
48 |
49 | -- | unification of measurement labels
50 | measureLabels :: MeasureType -> [Text]
51 | measureLabels mt =
52 | case mt of
53 | MeasureTime -> ["time"]
54 | MeasureNTime -> ["ntime"]
55 | MeasureSpace -> spaceLabels
56 | MeasureSpaceTime -> spaceLabels <> ["time"]
57 | MeasureAllocation -> ["allocation"]
58 | MeasureCount -> ["count"]
59 |
60 | -- | How to fold the list of performance measures.
61 | measureFinalStat :: MeasureType -> Int -> [Double] -> Double
62 | measureFinalStat mt n =
63 | case mt of
64 | MeasureTime -> average
65 | MeasureNTime -> (/ fromIntegral n) . sum
66 | MeasureSpace -> average
67 | MeasureSpaceTime -> average
68 | MeasureAllocation -> average
69 | MeasureCount -> sum
70 |
--------------------------------------------------------------------------------
/src/Perf/Report.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE CPP #-}
2 | {-# LANGUAGE OverloadedLabels #-}
3 | {-# LANGUAGE OverloadedStrings #-}
4 |
5 | -- | Reporting on performance, potentially checking versus a canned results.
6 | module Perf.Report
7 | ( Name,
8 | Header (..),
9 | parseHeader,
10 | CompareLevels (..),
11 | defaultCompareLevels,
12 | parseCompareLevels,
13 | ReportOptions (..),
14 | defaultReportOptions,
15 | parseReportOptions,
16 | PerfDumpOptions (..),
17 | defaultPerfDumpOptions,
18 | parsePerfDumpOptions,
19 | fromDump,
20 | report,
21 | reportMain,
22 | writeResult,
23 | readResult,
24 | CompareResult (..),
25 | compareNote,
26 | report2D,
27 | Golden (..),
28 | defaultGolden,
29 | parseGolden,
30 | replaceDefaultFilePath,
31 | parseClock,
32 | reportToConsole,
33 | )
34 | where
35 |
36 | import Chart
37 | import Control.Exception
38 | import Control.Monad
39 | import Data.Bool
40 | import Data.Foldable
41 | import Data.List (intercalate)
42 | import Data.List qualified as List
43 | import Data.Map.Merge.Strict
44 | import Data.Map.Strict qualified as Map
45 | import Data.Text (Text)
46 | import Data.Text qualified as Text
47 | import Data.Text.IO qualified as Text
48 | import GHC.Generics
49 | import Optics.Core
50 | import Options.Applicative as OA
51 | import Options.Applicative.Help.Pretty
52 | import Perf.Algos
53 | import Perf.BigO
54 | import Perf.Chart
55 | import Perf.Measure
56 | import Perf.Stats
57 | import Perf.Time (defaultClock)
58 | import Perf.Types
59 | import Prettyprinter.Render.Text qualified as PP
60 | import System.Clock
61 | import System.Exit
62 | import System.Mem
63 | import Test.Tasty
64 | import Test.Tasty.Bench
65 | import Text.PrettyPrint.Boxes qualified as B
66 | import Text.Printf hiding (parseFormat)
67 | import Text.Read
68 |
69 | -- | Benchmark name
70 | type Name = String
71 |
72 | -- | Whether to include header information.
73 | data Header = Header | NoHeader deriving (Eq, Show, Generic)
74 |
75 | -- | Command-line parser for 'Header'
76 | parseHeader :: Parser Header
77 | parseHeader =
78 | flag' Header (long "header" <> help "include headers in reporting")
79 | <|> flag' NoHeader (long "noheader" <> help "dont include headers in reporting")
80 | <|> pure Header
81 |
82 | -- | Options for production of a performance report.
83 | data ReportOptions = ReportOptions
84 | { -- | Number of times to run a benchmark.
85 | reportN :: Int,
86 | reportLength :: Int,
87 | reportClock :: Clock,
88 | reportStatDType :: StatDType,
89 | reportMeasureType :: MeasureType,
90 | reportGolden :: Golden,
91 | reportHeader :: Header,
92 | reportCompare :: CompareLevels,
93 | reportChart :: PerfChartOptions,
94 | reportDump :: PerfDumpOptions,
95 | reportGC :: Bool,
96 | reportOrder :: OrderOptions,
97 | reportTasty :: Bool
98 | }
99 | deriving (Eq, Show, Generic)
100 |
101 | -- | Default reporting options
102 | defaultReportOptions :: ReportOptions
103 | defaultReportOptions =
104 | ReportOptions
105 | 1000
106 | 1000
107 | defaultClock
108 | StatAverage
109 | MeasureTime
110 | defaultGolden
111 | Header
112 | defaultCompareLevels
113 | defaultPerfChartOptions
114 | defaultPerfDumpOptions
115 | False
116 | defaultOrderOptions
117 | False
118 |
119 | -- | Command-line parser for 'ReportOptions'
120 | parseReportOptions :: ReportOptions -> Parser ReportOptions
121 | parseReportOptions def =
122 | ReportOptions
123 | <$> option auto (value (view #reportN def) <> showDefaultWith show <> long "runs" <> short 'n' <> metavar "INT" <> help "number of runs to perform")
124 | <*> option auto (value (view #reportLength def) <> long "length" <> showDefaultWith show <> short 'l' <> metavar "INT" <> help "length-like variable eg, used to alter list length and compute order")
125 | <*> parseClock
126 | <*> parseStatD
127 | <*> parseMeasure
128 | <*> parseGolden
129 | <*> parseHeader
130 | <*> parseCompareLevels defaultCompareLevels
131 | <*> parsePerfChartOptions defaultPerfChartOptions
132 | <*> parsePerfDumpOptions defaultPerfDumpOptions
133 | <*> switch (long "gc" <> help "run the GC prior to measurement")
134 | <*> parseOrderOptions defaultOrderOptions
135 | <*> switch (long "tasty" <> help "run tasty-bench")
136 |
137 | -- | Parse command-line 'Clock' options.
138 | parseClock :: Parser Clock
139 | parseClock =
140 | flag' Monotonic (long "Monotonic" <> OA.style (annotate bold) <> help "use Monotonic clock")
141 | <|> flag' Realtime (long "Realtime" <> help "use Realtime clock")
142 | <|> flag' ProcessCPUTime (long "ProcessCPUTime" <> help "use ProcessCPUTime clock")
143 | <|> flag' ThreadCPUTime (long "ThreadCPUTime" <> help "use ThreadCPUTime clock")
144 | #ifdef mingw32_HOST_OS
145 | <|> pure ThreadCPUTime
146 | #else
147 | <|> flag' MonotonicRaw (long "MonotonicRaw" <> help "use MonotonicRaw clock")
148 | <|> pure MonotonicRaw
149 | #endif
150 |
151 | data PerfDumpOptions = PerfDumpOptions {dumpFilepath :: FilePath, doDump :: Bool} deriving (Eq, Show, Generic)
152 |
153 | defaultPerfDumpOptions :: PerfDumpOptions
154 | defaultPerfDumpOptions = PerfDumpOptions "other/perf.map" False
155 |
156 | -- | Parse charting options.
157 | parsePerfDumpOptions :: PerfDumpOptions -> Parser PerfDumpOptions
158 | parsePerfDumpOptions def =
159 | PerfDumpOptions
160 | <$> option str (value (view #dumpFilepath def) <> showDefaultWith show <> long "dumppath" <> metavar "FILE" <> help "dump file name")
161 | <*> switch (long "dump" <> help "dump raw performance data as a Map Text [[Double]]")
162 |
163 | fromDump :: PerfDumpOptions -> IO (Map.Map Text [[Double]])
164 | fromDump cfg = read <$> readFile (view #dumpFilepath cfg)
165 |
166 | -- | Run and report a benchmark with the specified reporting options.
167 | reportMain :: Example -> ReportOptions -> Name -> (Int -> PerfT IO [[Double]] a) -> IO a
168 | reportMain ex o name t = do
169 | let !n = reportN o
170 | let l = reportLength o
171 | let s = reportStatDType o
172 | let c = reportClock o
173 | let mt = reportMeasureType o
174 | let o' = replaceDefaultFilePath (intercalate "-" [name, show n, show mt, show s]) o
175 | when (reportGC o) performGC
176 | (a, m) <- runPerfT (measureDs mt c n) (t l)
177 | report o' (statify s m)
178 | (\cfg -> when (view #doChart cfg) (writeChartOptions (view #chartFilepath cfg) (perfCharts cfg (Just (measureLabels mt)) m))) (reportChart o)
179 | (\cfg -> when (view #doDump cfg) (writeFile (view #dumpFilepath cfg) (show m))) (reportDump o)
180 | when (view (#reportOrder % #doOrder) o) (reportBigO o t)
181 | when (view #reportTasty o) (reportTasty' ex o)
182 | pure a
183 |
184 | -- | Levels of geometric difference in compared performance that triggers reporting.
185 | data CompareLevels = CompareLevels {errorLevel :: Double, warningLevel :: Double, improvedLevel :: Double} deriving (Eq, Show)
186 |
187 | -- |
188 | -- >>> defaultCompareLevels
189 | -- CompareLevels {errorLevel = 0.2, warningLevel = 5.0e-2, improvedLevel = 5.0e-2}
190 | defaultCompareLevels :: CompareLevels
191 | defaultCompareLevels = CompareLevels 0.2 0.05 0.05
192 |
193 | -- | Command-line parser for 'CompareLevels'
194 | parseCompareLevels :: CompareLevels -> Parser CompareLevels
195 | parseCompareLevels c =
196 | CompareLevels
197 | <$> option auto (value (errorLevel c) <> showDefaultWith show <> long "error" <> metavar "DOUBLE" <> help "report an error if performance degrades by more than this")
198 | <*> option auto (value (warningLevel c) <> showDefaultWith show <> long "warning" <> metavar "DOUBLE" <> help "report a warning if performance degrades by more than this")
199 | <*> option auto (value (improvedLevel c) <> showDefaultWith show <> long "improved" <> metavar "DOUBLE" <> help "report if performance improves by more than this")
200 |
201 | -- | Write results to file
202 | writeResult :: FilePath -> Map.Map [Text] Double -> IO ()
203 | writeResult f m = writeFile f (show m)
204 |
205 | -- | Read results from a file.
206 | readResult :: FilePath -> IO (Either String (Map.Map [Text] Double))
207 | readResult f = do
208 | a :: Either SomeException String <- try (readFile f)
209 | pure $ either (Left . show) readEither a
210 |
211 | -- | Comparison data between two results.
212 | data CompareResult = CompareResult {oldResult :: Maybe Double, newResult :: Maybe Double, noteResult :: Text} deriving (Show, Eq)
213 |
214 | hasDegraded :: Map.Map a CompareResult -> Bool
215 | hasDegraded m = any (((== "degraded") . noteResult) . snd) (Map.toList m)
216 |
217 | -- | Compare two results and produce some notes given level triggers.
218 | compareNote :: (Ord a) => CompareLevels -> Map.Map a Double -> Map.Map a Double -> Map.Map a CompareResult
219 | compareNote cfg x y =
220 | merge
221 | (mapMissing (\_ x' -> CompareResult Nothing (Just x') "new result"))
222 | (mapMissing (\_ x' -> CompareResult (Just x') Nothing "old result not found"))
223 | ( zipWithMatched
224 | ( \_ x' y' ->
225 | CompareResult (Just x') (Just y') (note' x' y')
226 | )
227 | )
228 | x
229 | y
230 | where
231 | note' x' y'
232 | | y' / x' > 1 + errorLevel cfg = "degraded"
233 | | y' / x' > 1 + warningLevel cfg = "slightly-degraded"
234 | | y' / x' < (1 - improvedLevel cfg) = "improvement"
235 | | otherwise = ""
236 |
237 | -- | Console-style header information.
238 | formatHeader :: Map.Map [Text] a -> [Text] -> [Text]
239 | formatHeader m ts =
240 | [mconcat $ Text.pack . printf "%-16s" <$> ((("label" <>) . Text.pack . show <$> [1 .. labelCols]) <> ts), mempty]
241 | where
242 | labelCols = maximum $ length <$> Map.keys m
243 |
244 | -- | Format a comparison.
245 | formatCompare :: Header -> Map.Map [Text] CompareResult -> [Text]
246 | formatCompare h m =
247 | bool [] (formatHeader m ["old result", "new result", "change"]) (h == Header)
248 | <> Map.elems (Map.mapWithKey (\k a -> Text.pack . mconcat $ printf "%-16s" <$> (k <> compareReport a)) m)
249 | where
250 | compareReport (CompareResult x y n) =
251 | [ maybe mempty (expt (Just 3)) x,
252 | maybe mempty (expt (Just 3)) y,
253 | n
254 | ]
255 |
256 | -- | Format a result as lines of text.
257 | formatText :: Header -> Map.Map [Text] Text -> [Text]
258 | formatText h m =
259 | bool [] (formatHeader m ["results"]) (h == Header)
260 | <> Map.elems (Map.mapWithKey (\k a -> Text.pack . mconcat $ printf "%-16s" <$> (k <> [a])) m)
261 |
262 | -- | Format a result as a table.
263 | report2D :: Map.Map [Text] Double -> IO ()
264 | report2D m = putStrLn $ B.render $ B.hsep 1 B.left $ cs' : rs'
265 | where
266 | rs = List.nub ((List.!! 1) . fst <$> Map.toList m)
267 | cs = List.nub ((List.!! 0) . fst <$> Map.toList m)
268 | bx = B.text . Text.unpack
269 | xs = (\c -> (\r -> m Map.! [c, r]) <$> rs) <$> cs
270 | xs' = fmap (fmap (bx . expt (Just 3))) xs
271 | cs' = B.vcat B.left (bx <$> ("algo" : cs))
272 | rs' = B.vcat B.right <$> zipWith (:) (bx <$> rs) (List.transpose xs')
273 |
274 | reportToConsole :: [Text] -> IO ()
275 | reportToConsole xs = traverse_ Text.putStrLn xs
276 |
277 | -- | Golden file options.
278 | data Golden = Golden {golden :: FilePath, check :: CheckGolden, record :: RecordGolden} deriving (Generic, Eq, Show)
279 |
280 | -- | Whether to check against a golden file
281 | data CheckGolden = CheckGolden | NoCheckGolden deriving (Eq, Show, Generic)
282 |
283 | -- | Whether to overwrite a golden file
284 | data RecordGolden = RecordGolden | NoRecordGolden deriving (Eq, Show, Generic)
285 |
286 | -- | Default is Golden "other/bench.perf" CheckGolden NoRecordGolden
287 | defaultGolden :: Golden
288 | defaultGolden = Golden "other/bench.perf" CheckGolden NoRecordGolden
289 |
290 | -- | Replace the golden file name stem if it's the default.
291 | replaceGoldenDefault :: FilePath -> Golden -> Golden
292 | replaceGoldenDefault s g = bool g g {golden = s} (golden g == golden defaultGolden)
293 |
294 | defaultGoldenPath :: FilePath -> FilePath
295 | defaultGoldenPath fp = "other/" <> fp <> ".perf"
296 |
297 | -- | Replace the Golden file path with the suggested stem, but only if the user did not specify a specific file path at the command line.
298 | replaceDefaultFilePath :: FilePath -> ReportOptions -> ReportOptions
299 | replaceDefaultFilePath fp o =
300 | o {reportGolden = replaceGoldenDefault (defaultGoldenPath fp) (reportGolden o)}
301 |
302 | -- | Parse command-line golden file options.
303 | parseGolden :: Parser Golden
304 | parseGolden =
305 | Golden
306 | <$> option str (value (golden defaultGolden) <> showDefaultWith show <> long "golden" <> short 'g' <> metavar "FILE" <> help "golden file name")
307 | -- True is the default for 'check'.
308 | <*> (bool NoCheckGolden CheckGolden <$> flag True False (long "nocheck" <> help "do not check versus the golden file"))
309 | <*> (bool NoRecordGolden RecordGolden <$> switch (long "record" <> short 'r' <> help "record the result to the golden file"))
310 |
311 | reportConsoleNoCompare :: Header -> Map.Map [Text] Double -> IO ()
312 | reportConsoleNoCompare h m =
313 | reportToConsole (formatText h (expt (Just 3) <$> m))
314 |
315 | reportConsoleCompare :: Header -> Map.Map [Text] CompareResult -> IO ()
316 | reportConsoleCompare h m =
317 | reportToConsole (formatCompare h m)
318 |
319 | -- | Report results
320 | --
321 | -- If a goldenFile is checked, and performance has degraded, the function will exit with 'ExitFailure' so that 'cabal bench' and other types of processes can signal performance issues.
322 | report :: ReportOptions -> Map.Map [Text] [Double] -> IO ()
323 | report o m = do
324 | when
325 | ((== RecordGolden) $ record (reportGolden o))
326 | (writeResult (golden (reportGolden o)) m')
327 | case check (reportGolden o) of
328 | NoCheckGolden -> reportConsoleNoCompare (reportHeader o) m'
329 | CheckGolden -> do
330 | mOrig <- readResult (golden (reportGolden o))
331 | case mOrig of
332 | Left _ -> do
333 | reportConsoleNoCompare (reportHeader o) m'
334 | unless
335 | ((RecordGolden ==) $ record (reportGolden o))
336 | (putStrLn "No golden file found. To create one, run with '-r'")
337 | Right orig -> do
338 | let n = compareNote (reportCompare o) orig m'
339 | _ <- reportConsoleCompare (reportHeader o) n
340 | when (hasDegraded n) (exitWith $ ExitFailure 1)
341 | where
342 | m' = Map.fromList $ mconcat $ (\(ks, xss) -> zipWith (\x l -> (ks <> [l], x)) xss (measureLabels (reportMeasureType o))) <$> Map.toList m
343 |
344 | reportBigO :: ReportOptions -> (Int -> PerfT IO [[Double]] a) -> IO ()
345 | reportBigO o p = do
346 | m <- mapM (execPerfT (measureDs (view #reportMeasureType o) (view #reportClock o) (view #reportN o)) . p) ns
347 | putStrLn mempty
348 | reportToConsole $ PP.renderStrict . layoutPretty defaultLayoutOptions <$> os'' m
349 | pure ()
350 | where
351 | l = view #reportLength o
352 | ns = makeNs l (view (#reportOrder % #orderDivisor) o) (view (#reportOrder % #orderLow) o)
353 | ms m' = fmap (fmap (statD (view #reportStatDType o)) . List.transpose) <$> m'
354 | os m' = fmap (fmap (pretty . fromOrder . fst . estO (fromIntegral <$> ns)) . List.transpose) (Map.unionsWith (<>) (fmap (fmap (: [])) (ms m')))
355 | os' m' = mconcat $ (\(ks, xss) -> zipWith (\x l' -> ([ks] <> [l'], x)) xss (measureLabels (reportMeasureType o))) <$> Map.toList (os m')
356 | os'' m' = (\(k, v) -> (pretty . Text.intercalate ":") k <> " " <> v) <$> os' m'
357 |
358 | reportTasty' :: Example -> ReportOptions -> IO ()
359 | reportTasty' ex o = do
360 | t <- measureCpuTime (mkTimeout 1000000) (RelStDev 0.05) (tastyExample (examplePattern ex (view #reportLength o)))
361 | Text.putStrLn $ "tasty:time: " <> decimal (Just 3) (t * 1e9)
362 |
--------------------------------------------------------------------------------
/src/Perf/Space.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE OverloadedStrings #-}
2 |
3 | -- | Space performance measurement.
4 | module Perf.Space
5 | ( SpaceStats (..),
6 | ssToList,
7 | spaceLabels,
8 | space,
9 | allocation,
10 | Bytes (..),
11 | )
12 | where
13 |
14 | import Control.Monad
15 | import Data.Text (Text)
16 | import Data.Word
17 | import GHC.Stats
18 | import Perf.Types
19 | import System.Mem
20 | import Prelude hiding (cycle)
21 |
22 | -- | GHC allocation statistics.
23 | data SpaceStats = SpaceStats {allocated :: Word64, copied :: Word64, maxmem :: Word64, minorgcs :: Word32, majorgcs :: Word32} deriving (Read, Show, Eq)
24 |
25 | -- | Convert 'SpaceStats' to a list of numbers.
26 | ssToList :: (Num a) => SpaceStats -> [a]
27 | ssToList (SpaceStats x1 x2 x3 x4 x5) = [fromIntegral x1, fromIntegral x2, fromIntegral x3, fromIntegral x4, fromIntegral x5]
28 |
29 | instance Semigroup SpaceStats where
30 | (<>) = addSpace
31 |
32 | instance Monoid SpaceStats where
33 | mempty = SpaceStats 0 0 0 0 0
34 |
35 | instance Num SpaceStats where
36 | (+) = addSpace
37 | (-) = diffSpace
38 | (*) = error "SpaceStats times"
39 | abs = error "SpaceStats abs"
40 | signum = error "SpaceStats signum"
41 | fromInteger n = SpaceStats (fromIntegral n) (fromIntegral n) (fromIntegral n) (fromIntegral n) (fromIntegral n)
42 |
43 | diffSpace :: SpaceStats -> SpaceStats -> SpaceStats
44 | diffSpace (SpaceStats x1 x2 x3 x4 x5) (SpaceStats x1' x2' x3' x4' x5') = SpaceStats (x1' - x1) (x2' - x2) (x3' - x3) (x4' - x4) (x5' - x5)
45 |
46 | addSpace :: SpaceStats -> SpaceStats -> SpaceStats
47 | addSpace (SpaceStats x1 x2 x3 x4 x5) (SpaceStats x1' x2' x3' x4' x5') = SpaceStats (x1' + x1) (x2' + x2) (x3' + x3) (x4' + x4) (x5' + x5)
48 |
49 | getSpace :: RTSStats -> SpaceStats
50 | getSpace s = SpaceStats (allocated_bytes s) (copied_bytes s) (max_mem_in_use_bytes s) (gcs s) (major_gcs s)
51 |
52 | -- | Labels for 'SpaceStats'.
53 | spaceLabels :: [Text]
54 | spaceLabels = ["allocated", "copied", "maxmem", "minorgcs", "majorgcs"]
55 |
56 | -- | A allocation 'StepMeasure' with a flag to determine if 'performGC' should run prior to the measurement.
57 | space :: Bool -> StepMeasure IO SpaceStats
58 | space p = StepMeasure (start p) stop
59 | where
60 | start p' = do
61 | when p' performGC
62 | getSpace <$> getRTSStats
63 | stop s = do
64 | s' <- getSpace <$> getRTSStats
65 | pure $ diffSpace s s'
66 | {-# INLINEABLE space #-}
67 |
68 | -- | Number of bytes
69 | newtype Bytes = Bytes {unbytes :: Word64}
70 | deriving (Show, Read, Eq, Ord, Num, Real, Enum, Integral)
71 |
72 | instance Semigroup Bytes where
73 | (<>) = (+)
74 |
75 | instance Monoid Bytes where
76 | mempty = 0
77 |
78 | -- | Measure memory allocation, with a flag to run 'performGC' prior to the measurement.
79 | allocation :: Bool -> StepMeasure IO Bytes
80 | allocation p = StepMeasure (start p) stop
81 | where
82 | start p' = do
83 | when p' performGC
84 | Bytes . allocated_bytes <$> getRTSStats
85 | stop s = do
86 | s' <- Bytes . allocated_bytes <$> getRTSStats
87 | pure $ s' - s
88 | {-# INLINEABLE allocation #-}
89 |
--------------------------------------------------------------------------------
/src/Perf/Stats.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE OverloadedStrings #-}
2 |
3 | -- | Statistical choices for multiple performance measurements.
4 | module Perf.Stats
5 | ( average,
6 | median,
7 | tenth,
8 | averageI,
9 | StatDType (..),
10 | statD,
11 | statDs,
12 | parseStatD,
13 | -- stat reporting
14 | addStat,
15 | ordy,
16 | allStats,
17 | statify,
18 | )
19 | where
20 |
21 | import Control.Monad.State.Lazy
22 | import Data.List qualified as List
23 | import Data.Map.Strict qualified as Map
24 | import Data.Text (Text, pack)
25 | import NumHask.Space (quantile)
26 | import Options.Applicative
27 | import Options.Applicative.Help.Pretty
28 |
29 | -- | Compute the median
30 | median :: [Double] -> Double
31 | median = quantile 0.5
32 |
33 | -- | Compute the average
34 | average :: [Double] -> Double
35 | average xs = sum xs / (fromIntegral . length $ xs)
36 |
37 | -- | Compute the tenth percentile
38 | tenth :: [Double] -> Double
39 | tenth = quantile 0.1
40 |
41 | -- | Compute the average of an Integral
42 | averageI :: (Integral a) => [a] -> Double
43 | averageI xs = sum (fromIntegral <$> xs) / (fromIntegral . length $ xs)
44 |
45 | -- | Command-line options for type of statistic.
46 | data StatDType = StatAverage | StatMedian | StatBest deriving (Eq, Show)
47 |
48 | -- | Compute a statistic.
49 | statD :: StatDType -> [Double] -> Double
50 | statD StatBest = tenth
51 | statD StatMedian = median
52 | statD StatAverage = average
53 |
54 | -- | Compute a list of statistics.
55 | statDs :: StatDType -> [[Double]] -> [Double]
56 | statDs StatBest = fmap tenth . List.transpose
57 | statDs StatMedian = fmap median . List.transpose
58 | statDs StatAverage = fmap average . List.transpose
59 |
60 | -- | Parse command-line 'StatDType' options.
61 | parseStatD :: Parser StatDType
62 | parseStatD =
63 | flag' StatBest (long "best" <> style (annotate bold) <> help "report upper decile")
64 | <|> flag' StatMedian (long "median" <> help "report median")
65 | <|> flag' StatAverage (long "average" <> help "report average")
66 | <|> pure StatBest
67 |
68 | -- | Add a statistic to a State Map
69 | addStat :: (Ord k, Monad m) => k -> s -> StateT (Map.Map k s) m ()
70 | addStat label s = do
71 | modify (Map.insert label s)
72 |
73 | -- | Linguistic conversion of an ordinal
74 | ordy :: Int -> [Text]
75 | ordy f = zipWith (\x s -> (pack . show) x <> s) [1 .. f] (["st", "nd", "rd"] <> repeat "th")
76 |
77 | -- | Compute all stats.
78 | allStats :: Int -> Map.Map [Text] [[Double]] -> Map.Map [Text] [Double]
79 | allStats f m =
80 | Map.fromList $
81 | mconcat
82 | [ mconcat ((\(ks, xss) -> zipWith (\l xs -> (ks <> [l], xs)) (ordy f) xss) <$> mlist),
83 | (\(ks, xss) -> (ks <> ["best"], quantile 0.1 <$> List.transpose xss)) <$> mlist,
84 | (\(ks, xss) -> (ks <> ["median"], quantile 0.5 <$> List.transpose xss)) <$> mlist,
85 | (\(ks, xss) -> (ks <> ["average"], av <$> List.transpose xss)) <$> mlist
86 | ]
87 | where
88 | mlist = Map.toList m
89 | av xs = sum xs / (fromIntegral . length $ xs)
90 |
91 | -- | Convert a Map of performance result to a statistic.
92 | statify :: (Ord a) => StatDType -> Map.Map a [[Double]] -> Map.Map [a] [Double]
93 | statify s m = fmap (statD s) . List.transpose <$> Map.mapKeys (: []) m
94 |
--------------------------------------------------------------------------------
/src/Perf/Time.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE CPP #-}
2 | {-# LANGUAGE ViewPatterns #-}
3 |
4 | -- | Use of 'System.Clock' from the [clock](https://hackage.haskell.org/package/clock) library to measure time performance of a computation.
5 | module Perf.Time
6 | ( Nanos,
7 | defaultClock,
8 | toSecs,
9 | nanosWith,
10 | nanos,
11 | tick_,
12 | warmup,
13 | tickWith,
14 | tick,
15 | tickWHNF,
16 | tickLazy,
17 | tickForce,
18 | tickForceArgs,
19 | tickIO,
20 | tickIOWith,
21 | ticks,
22 | ticksIO,
23 | time,
24 | times,
25 | timesWith,
26 | timesN,
27 | timesNWith,
28 | stepTime,
29 | )
30 | where
31 |
32 | import Control.DeepSeq
33 | import Control.Monad (replicateM_)
34 | import Perf.Types
35 | import System.Clock
36 | import Prelude
37 |
38 | -- | A performance measure of number of nanoseconds.
39 | type Nanos = Integer
40 |
41 | -- | Convert 'Nanos' to seconds.
42 | toSecs :: Nanos -> Double
43 | toSecs ns = fromIntegral ns / 1e9
44 |
45 | -- | 'MonotonicRaw' is the default for macOS & linux, at around 42 nano time resolution, and a 'tick_' measurement of around 170 nanos. For Windows, 'ThreadCPUTime' has a similar time resolution at 42 nanos and a 'tick_' of around 500 nanos.
46 | defaultClock :: Clock
47 |
48 | #ifdef mingw32_HOST_OS
49 | defaultClock = ThreadCPUTime
50 | #else
51 | defaultClock = MonotonicRaw
52 | #endif
53 |
54 | -- | A single 'defaultClock' reading (note that the absolute value is not meaningful).
55 | nanos :: IO Nanos
56 | nanos = nanosWith defaultClock
57 |
58 | -- | A single reading of a specific 'Clock'.
59 | nanosWith :: Clock -> IO Nanos
60 | nanosWith c = toNanoSecs <$> getTime c
61 |
62 | -- | tick_ measures the number of nanos it takes to read the clock.
63 | tick_ :: IO Nanos
64 | tick_ = do
65 | t <- nanos
66 | t' <- nanos
67 | pure (t' - t)
68 |
69 | -- | Warm up the clock, to avoid a high first measurement. Without a warmup, one or more larger values can occur at the start of a measurement spree, and often are in the zone of an L2 miss.
70 | warmup :: Int -> IO ()
71 | warmup n = replicateM_ n tick_
72 |
73 | -- | tick from a specific 'Clock'
74 | tickWith :: Clock -> (a -> b) -> a -> IO (Nanos, b)
75 | tickWith c !f !a = do
76 | !t <- nanosWith c
77 | !a' <- pure $! f a
78 | !t' <- nanosWith c
79 | pure (t' - t, a')
80 | {-# INLINEABLE tickWith #-}
81 |
82 | -- | /tick f a/
83 | --
84 | -- - strictly evaluates f and a to WHNF
85 | -- - reads the clock
86 | -- - strictly evaluates f a to WHNF
87 | -- - reads the clock
88 | -- - returns (nanos, f a)
89 | tick :: (a -> b) -> a -> IO (Nanos, b)
90 | tick !f !a = do
91 | !t <- nanos
92 | !a' <- pure $! f a
93 | !t' <- nanos
94 | pure (t' - t, a')
95 | {-# INLINEABLE tick #-}
96 |
97 | -- | /tickWHNF f a/
98 | --
99 | -- - reads the clock
100 | -- - strictly evaluates f a to WHNF (this may also kick off thunk evaluation in f or a which will also be captured in the cycle count)
101 | -- - reads the clock
102 | -- - returns (nanos, f a)
103 | tickWHNF :: (a -> b) -> a -> IO (Nanos, b)
104 | tickWHNF f a = do
105 | !t <- nanos
106 | !a' <- pure $! f a
107 | !t' <- nanos
108 | pure (t' - t, a')
109 | {-# INLINEABLE tickWHNF #-}
110 |
111 | -- | /tickLazy f a/
112 | --
113 | -- - reads the clock
114 | -- - lazily evaluates f a
115 | -- - reads the clock
116 | -- - returns (nanos, f a)
117 | tickLazy :: (a -> b) -> a -> IO (Nanos, b)
118 | tickLazy f a = do
119 | t <- nanos
120 | let a' = f a
121 | t' <- nanos
122 | pure (t' - t, a')
123 | {-# INLINEABLE tickLazy #-}
124 |
125 | -- | /tickForce f a/
126 | --
127 | -- - deeply evaluates f and a,
128 | -- - reads the clock
129 | -- - deeply evaluates f a
130 | -- - reads the clock
131 | -- - returns (nanos, f a)
132 | tickForce :: (NFData a, NFData b) => (a -> b) -> a -> IO (Nanos, b)
133 | tickForce (force -> !f) (force -> !a) = do
134 | !t <- nanos
135 | !a' <- pure (force (f a))
136 | !t' <- nanos
137 | pure (t' - t, a')
138 | {-# INLINEABLE tickForce #-}
139 |
140 | -- | /tickForceArgs f a/
141 | --
142 | -- - deeply evaluates f and a,
143 | -- - reads the clock
144 | -- - strictly evaluates f a to WHNF
145 | -- - reads the clock
146 | -- - returns (nanos, f a)
147 | tickForceArgs :: (NFData a) => (a -> b) -> a -> IO (Nanos, b)
148 | tickForceArgs (force -> !f) (force -> !a) = do
149 | !t <- nanos
150 | !a' <- pure $! f a
151 | !t' <- nanos
152 | pure (t' - t, a')
153 | {-# INLINEABLE tickForceArgs #-}
154 |
155 | -- | measures an /IO a/
156 | tickIO :: IO a -> IO (Nanos, a)
157 | tickIO a = do
158 | !t <- nanos
159 | !a' <- a
160 | !t' <- nanos
161 | pure (t' - t, a')
162 | {-# INLINEABLE tickIO #-}
163 |
164 | -- | measures an /IO a/
165 | tickIOWith :: Clock -> IO a -> IO (Nanos, a)
166 | tickIOWith c a = do
167 | !t <- nanosWith c
168 | !a' <- a
169 | !t' <- nanosWith c
170 | pure (t' - t, a')
171 | {-# INLINEABLE tickIOWith #-}
172 |
173 | -- | n measurements of a tick
174 | --
175 | -- returns a list of Nanos and the last evaluated f a
176 | ticks :: Int -> (a -> b) -> a -> IO ([Nanos], b)
177 | ticks = multi tick
178 | {-# INLINEABLE ticks #-}
179 |
180 | -- | n measurements of a tickIO
181 | --
182 | -- returns an IO tuple; list of Nanos and the last evaluated f a
183 | ticksIO :: Int -> IO a -> IO ([Nanos], a)
184 | ticksIO = multiM tickIO
185 | {-# INLINEABLE ticksIO #-}
186 |
187 | -- | tick as a 'StepMeasure'
188 | stepTime :: StepMeasure IO Nanos
189 | stepTime = StepMeasure start stop
190 | where
191 | start = nanos
192 | stop r = fmap (\x -> x - r) nanos
193 | {-# INLINEABLE stepTime #-}
194 |
195 | -- | tick as a 'Measure'
196 | time :: Measure IO Nanos
197 | time = Measure tick
198 | {-# INLINEABLE time #-}
199 |
200 | -- | tick as a multi-Measure
201 | times :: Int -> Measure IO [Nanos]
202 | times n = Measure (ticks n)
203 | {-# INLINEABLE times #-}
204 |
205 | -- | tickWith as a multi-Measure
206 | timesWith :: Clock -> Int -> Measure IO [Nanos]
207 | timesWith c n = repeated n (Measure (tickWith c))
208 | {-# INLINEABLE timesWith #-}
209 |
210 | -- | tickWith for n repeated applications
211 | timesN :: Int -> Measure IO Nanos
212 | timesN n = Measure (tickNWith defaultClock n)
213 | {-# INLINEABLE timesN #-}
214 |
215 | -- | tickWith for n repeated applications
216 | timesNWith :: Clock -> Int -> Measure IO Nanos
217 | timesNWith c n = Measure (tickNWith c n)
218 | {-# INLINEABLE timesNWith #-}
219 |
220 | tickNWith :: Clock -> Int -> (a -> b) -> a -> IO (Nanos, b)
221 | tickNWith c n !f !a = do
222 | !t <- nanosWith c
223 | !a' <- multiN id f a n
224 | !t' <- nanosWith c
225 | pure (floor @Double (fromIntegral (t' - t) / fromIntegral n), a')
226 | {-# INLINEABLE tickNWith #-}
227 |
--------------------------------------------------------------------------------
/src/Perf/Types.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE OverloadedStrings #-}
2 | {-# OPTIONS_GHC -Wno-x-partial #-}
3 |
4 | -- | Abstract types of performance measurement.
5 | module Perf.Types
6 | ( -- * Measure
7 | Measure (..),
8 | repeated,
9 | StepMeasure (..),
10 | toMeasure,
11 | toMeasureN,
12 | step,
13 | stepM,
14 | multi,
15 | multiM,
16 | multiN,
17 |
18 | -- * function application
19 | fap,
20 | afap,
21 | ffap,
22 | fan,
23 | fam,
24 | (|$|),
25 | ($|),
26 | (|+|),
27 |
28 | -- * PerfT monad
29 | PerfT (..),
30 | Perf,
31 | runPerfT,
32 | evalPerfT,
33 | execPerfT,
34 | outer,
35 | slop,
36 | slops,
37 | )
38 | where
39 |
40 | import Control.DeepSeq
41 | import Control.Monad
42 | import Control.Monad.State.Lazy
43 | import Data.Bifunctor
44 | import Data.Functor.Identity
45 | import Data.Map.Strict qualified as Map
46 | import Data.Text (Text)
47 | import GHC.Exts
48 | import GHC.IO hiding (liftIO)
49 | import Prelude
50 |
51 | -- | Abstraction of a performance measurement within a monadic context.
52 | --
53 | -- - measure applies a function to a value, returning a tuple of the performance measure, and the computation result.
54 | -- - measureM evaluates a monadic value and returns a performance-result tuple.
55 | newtype Measure m t = Measure
56 | { measure :: forall a b. (a -> b) -> a -> m (t, b)
57 | }
58 |
59 | instance (Functor m) => Functor (Measure m) where
60 | fmap f (Measure m) =
61 | Measure
62 | (\f' a' -> fmap (first f) (m f' a'))
63 |
64 | -- | An inefficient application that runs the inner action twice.
65 | instance (Applicative m) => Applicative (Measure m) where
66 | pure t = Measure (\f a -> pure (t, f a))
67 | (Measure mf) <*> (Measure mt) =
68 | Measure
69 | (\f a -> (\(nf', fa') (t', _) -> (nf' t', fa')) <$> mf f a <*> mt f a)
70 |
71 | -- | Convert a Measure into a multi measure.
72 | repeated :: (Applicative m) => Int -> Measure m t -> Measure m [t]
73 | repeated n (Measure p) =
74 | Measure
75 | (\f a -> fmap (\xs -> (fmap fst xs, snd (head xs))) (replicateM n (p f a)))
76 | {-# INLINEABLE repeated #-}
77 |
78 | -- | Abstraction of a performance measurement with a pre and a post step wrapping the computation.
79 | data StepMeasure m t = forall i. StepMeasure {pre :: m i, post :: i -> m t}
80 |
81 | instance (Functor m) => Functor (StepMeasure m) where
82 | fmap f (StepMeasure start stop) = StepMeasure start (fmap f . stop)
83 |
84 | instance (Applicative m) => Applicative (StepMeasure m) where
85 | pure t = StepMeasure (pure ()) (const (pure t))
86 | (<*>) (StepMeasure fstart fstop) (StepMeasure start stop) =
87 | StepMeasure ((,) <$> fstart <*> start) (\(fi, i) -> fstop fi <*> stop i)
88 |
89 | -- | Convert a StepMeasure into a Measure
90 | toMeasure :: (Monad m) => StepMeasure m t -> Measure m t
91 | toMeasure (StepMeasure pre' post') = Measure (step pre' post')
92 | {-# INLINEABLE toMeasure #-}
93 |
94 | -- | Convert a StepMeasure into a Measure running the computation multiple times.
95 | toMeasureN :: (Monad m) => Int -> StepMeasure m t -> Measure m [t]
96 | toMeasureN n (StepMeasure pre' post') = Measure (multi (step pre' post') n)
97 | {-# INLINEABLE toMeasureN #-}
98 |
99 | -- | A single step measurement.
100 | step :: (Monad m) => m i -> (i -> m t) -> (a -> b) -> a -> m (t, b)
101 | step pre' post' !f !a = do
102 | !p <- pre'
103 | !b <- pure $! f a
104 | !t <- post' p
105 | pure (t, b)
106 | {-# INLINEABLE step #-}
107 |
108 | -- | A single step measurement.
109 | stepM :: (Monad m) => m i -> (i -> m t) -> m a -> m (t, a)
110 | stepM pre' post' a = do
111 | !p <- pre'
112 | !ma <- a
113 | !t <- post' p
114 | pure (t, ma)
115 | {-# INLINEABLE stepM #-}
116 |
117 | multi1 :: (Monad m) => ((a -> b) -> a -> m (t, b)) -> Int -> (a -> b) -> a -> m [(t, b)]
118 | multi1 action n !f !a = sequence $ replicate n $! action f a
119 | {-# INLINEABLE multi1 #-}
120 |
121 | -- | Return one result but multiple measurements.
122 | multi :: (Monad m) => ((a -> b) -> a -> m (t, b)) -> Int -> (a -> b) -> a -> m ([t], b)
123 | multi action n !f !a = do
124 | xs <- multi1 action n f a
125 | pure (fmap fst xs, snd (head xs))
126 | {-# INLINEABLE multi #-}
127 |
128 | -- | Multiple measurements
129 | multiM :: (Monad m) => (m a -> m (t, a)) -> Int -> m a -> m ([t], a)
130 | multiM action n a =
131 | fmap (\xs -> (fmap fst xs, head $! fmap snd xs)) (replicateM n (action a))
132 | {-# INLINEABLE multiM #-}
133 |
134 | multiN :: (b -> t) -> (a -> b) -> a -> Int -> IO t
135 | multiN frc = multiNLoop SPEC
136 | where
137 | multiNLoop !_ f x n
138 | | n == 1 = evaluate (frc (f x))
139 | | otherwise = do
140 | _ <- evaluate (frc (f x))
141 | multiNLoop SPEC f x (n - 1)
142 | {-# INLINE multiN #-}
143 |
144 | -- | Performance measurement transformer storing a 'Measure' and a map of named results.
145 | newtype PerfT m t a = PerfT
146 | { measurePerf :: StateT (Measure m t, Map.Map Text t) m a
147 | }
148 | deriving (Functor, Applicative, Monad)
149 |
150 | -- | The transformer over Identity
151 | type Perf t a = PerfT Identity t a
152 |
153 | instance (MonadIO m) => MonadIO (PerfT m t) where
154 | liftIO = PerfT . liftIO
155 |
156 | -- | Lift an application to a PerfT m, providing a label and a 'Measure'.
157 | --
158 | -- Measurements with the same label will be mappended
159 | fap :: (MonadIO m, Semigroup t) => Text -> (a -> b) -> a -> PerfT m t b
160 | fap label f a =
161 | PerfT $ do
162 | m <- fst <$> get
163 | (t, fa) <- lift $ measure m f a
164 | modify $ second (Map.insertWith (<>) label t)
165 | return fa
166 | {-# INLINEABLE fap #-}
167 |
168 | -- | Lift an application to a PerfT m, forcing the argument.
169 | afap :: (NFData a, MonadIO m, Semigroup t) => Text -> (a -> b) -> a -> PerfT m t b
170 | afap label f a = fap label f (force a)
171 | {-# INLINEABLE afap #-}
172 |
173 | -- | Lift an application to a PerfT m, forcing argument and result.
174 | ffap :: (NFData a, NFData b, MonadIO m, Semigroup t) => Text -> (a -> b) -> a -> PerfT m t b
175 | ffap label f a = fap label (force . f) (force a)
176 | {-# INLINEABLE ffap #-}
177 |
178 | -- | Lift a number to a PerfT m, providing a label, function, and input.
179 | --
180 | -- Measurements with the same label will be added
181 | fan :: (MonadIO m, Num t) => Text -> (a -> b) -> a -> PerfT m t b
182 | fan label f a =
183 | PerfT $ do
184 | m <- fst <$> get
185 | (t, fa) <- lift $ measure m f a
186 | modify $ second (Map.insertWith (+) label t)
187 | return fa
188 | {-# INLINEABLE fan #-}
189 |
190 | -- | Lift a monadic value to a PerfT m, providing a label and a 'Measure'.
191 | --
192 | -- Measurements with the same label will be added
193 | fam :: (MonadIO m, Semigroup t) => Text -> m a -> PerfT m t a
194 | fam label a =
195 | PerfT $ do
196 | m <- fst <$> get
197 | (t, !ma) <- lift $ measure m (const a) ()
198 | modify $ second (Map.insertWith (<>) label t)
199 | lift ma
200 | {-# INLINEABLE fam #-}
201 |
202 | -- | lift a pure, unnamed function application to PerfT
203 | (|$|) :: (Semigroup t) => (a -> b) -> a -> PerfT IO t b
204 | (|$|) f a = fap "" f a
205 | {-# INLINEABLE (|$|) #-}
206 |
207 | -- | lift a monadic, unnamed function application to PerfT
208 | ($|) :: (Semigroup t) => IO a -> PerfT IO t a
209 | ($|) a = fam "" a
210 | {-# INLINEABLE ($|) #-}
211 |
212 | -- | lift an unnamed numeric measure to PerfT
213 | (|+|) :: (Num t) => (a -> b) -> a -> PerfT IO t b
214 | (|+|) f a = fan "" f a
215 | {-# INLINEABLE (|+|) #-}
216 |
217 | -- | Run the performance measure, returning (computational result, measurement).
218 | runPerfT :: (Functor m) => Measure m t -> PerfT m t a -> m (a, Map.Map Text t)
219 | runPerfT m p = fmap (second snd) <$> flip runStateT (m, Map.empty) $ measurePerf p
220 | {-# INLINEABLE runPerfT #-}
221 |
222 | -- | Consume the PerfT layer and return the original monadic result.
223 | -- Fingers crossed, PerfT structure should be completely compiled away.
224 | evalPerfT :: (Monad m) => Measure m t -> PerfT m t a -> m a
225 | evalPerfT m p = fmap fst <$> flip runStateT (m, Map.empty) $ measurePerf p
226 | {-# INLINEABLE evalPerfT #-}
227 |
228 | -- | Consume a PerfT layer and return the measurement.
229 | execPerfT :: (Monad m) => Measure m t -> PerfT m t a -> m (Map.Map Text t)
230 | execPerfT m p = fmap snd <$> flip execStateT (m, Map.empty) $ measurePerf p
231 | {-# INLINEABLE execPerfT #-}
232 |
233 | -- | run a PerfT and also calculate performance over the entire computation
234 | outer :: (MonadIO m, Semigroup s) => Text -> Measure m s -> Measure m t -> PerfT m t a -> m (a, (Map.Map Text s, Map.Map Text t))
235 | outer label outerm meas p =
236 | (\((a, m), m') -> (a, (m', m)))
237 | <$> runPerfT
238 | outerm
239 | (fam label (runPerfT meas p))
240 |
241 | -- | run a PerfT and calculate excess performance over the entire computation
242 | slop :: (MonadIO m, Num t, Semigroup t) => Text -> Measure m t -> PerfT m t a -> m (a, Map.Map Text t)
243 | slop l meas p =
244 | (\((a, m), m') -> (a, m <> Map.insert "slop" (m' Map.! l - Map.foldl' (+) 0 m) m'))
245 | <$> runPerfT
246 | meas
247 | (fam l (runPerfT meas p))
248 |
249 | -- | run a multi PerfT and calculate excess performance over the entire computation
250 | slops :: (MonadIO m, Num t, Semigroup t) => Int -> Measure m t -> PerfT m [t] a -> m (a, (Map.Map Text t, Map.Map Text [t]))
251 | slops n meas p =
252 | (\((a, ms), m') -> (a, (Map.insert "slop" (m' Map.! "outer" - Map.foldl' (+) 0 (fmap sum ms)) m', ms)))
253 | <$> runPerfT
254 | meas
255 | (fam "outer" (runPerfT (repeated n meas) p))
256 |
--------------------------------------------------------------------------------