├── Setup.hs ├── ChangeLog.md ├── .gitignore ├── README.md ├── stack.yaml.lock ├── src ├── Types.hs ├── Style.hs ├── CsvParse.hs ├── Plot.hs └── CriterionCompare.hs ├── LICENSE ├── criterion-compare.cabal └── stack.yaml /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /ChangeLog.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | 4 | ## 0.1 Initial hackage release 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.hi 2 | *.o 3 | dist-newstyle/ 4 | .stack-work/* 5 | .ghc.environment* -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Criterion-compare 2 | 3 | A tool for quick comparisons between different criterion runs via their csv files. 4 | 5 | Use like this: 6 | ``` 7 | $ criterion-compare 8 | ``` 9 | 10 | Which will generate the files `comparison.html` and `comparison.svg`. 11 | 12 | ### Attributions 13 | 14 | This tool was initially written by Ben Gamari (bgamari), extended by Brandon Simmons (jberryman) and most recently 15 | updated by Andreas Klebinger (AndreasPK). -------------------------------------------------------------------------------- /stack.yaml.lock: -------------------------------------------------------------------------------- 1 | # This file was autogenerated by Stack. 2 | # You should not edit this file by hand. 3 | # For more information, please see the documentation at: 4 | # https://docs.haskellstack.org/en/stable/lock_files 5 | 6 | packages: [] 7 | snapshots: 8 | - completed: 9 | size: 500539 10 | url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/13/29.yaml 11 | sha256: 006398c5e92d1d64737b7e98ae4d63987c36808814504d1451f56ebd98093f75 12 | original: lts-13.29 13 | -------------------------------------------------------------------------------- /src/Types.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE GeneralizedNewtypeDeriving #-} 2 | 3 | module Types where 4 | 5 | import Data.Csv 6 | 7 | -- | The name of a set of benchmark results from a single run. 8 | newtype RunName = RunName {getRunName :: String} 9 | deriving (Eq, Ord, Show, FromField) 10 | 11 | -- | The name of a benchmark 12 | newtype BenchName = BenchName {getBenchName :: String} 13 | deriving (Eq, Ord, Show, FromField) 14 | 15 | data Stats = Stats { statsMean, statsMeanLB, statsMeanUB :: Double 16 | , statsStd, statsStdLB, statsStdUB :: Double 17 | } 18 | deriving (Show) 19 | -------------------------------------------------------------------------------- /src/Style.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | module Style (style) where 3 | 4 | import Control.Monad (forM_) 5 | import qualified Data.Text as T 6 | import qualified Data.Text.Lazy as TL 7 | import Clay hiding (round, style, min) 8 | 9 | style :: T.Text 10 | style = TL.toStrict $ render style' 11 | 12 | style' :: Css 13 | style' = do 14 | ".stddev" ? do 15 | fontSizeCustom xSmall 16 | "td" ? do 17 | paddingLeft (em 1) 18 | paddingRight (em 1) 19 | 20 | forM_ [0..10] $ \n -> do 21 | star # byClass (T.pack $ "stat-p"++show (round n)) ? do backgroundColor $ shade 0 n 22 | star # byClass (T.pack $ "stat-n"++show (round n)) ? do backgroundColor $ shade 128 n 23 | 24 | shade h n = hsl h 60 (100 - (5*n)) 25 | -------------------------------------------------------------------------------- /src/CsvParse.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE FlexibleContexts #-} 3 | {-# LANGUAGE TupleSections #-} 4 | 5 | module CsvParse (readResults) where 6 | 7 | import qualified Data.Map.Strict as M 8 | import Control.Applicative 9 | import Data.Csv 10 | import qualified Data.Vector as V 11 | import qualified Data.ByteString.Lazy.Char8 as BSL 12 | 13 | import Types 14 | 15 | data BenchResult = BenchResult { benchName :: BenchName 16 | , benchStats :: Stats 17 | } 18 | 19 | instance FromRecord BenchResult where 20 | parseRecord v 21 | | V.length v == 7 = 22 | bench <$> v .! 0 23 | <*> v .! 1 24 | <*> v .! 2 25 | <*> v .! 3 26 | <*> v .! 4 27 | <*> v .! 5 28 | <*> v .! 6 29 | | otherwise = empty 30 | where bench a b c d e f g = BenchResult a (Stats b c d e f g) 31 | 32 | readResults :: FilePath -> IO [(BenchName, Stats)] 33 | readResults fname = do 34 | mxs <- parseResults <$> BSL.readFile fname 35 | case mxs of 36 | Left err -> fail err 37 | Right xs -> return $ map (\(BenchResult a b) -> (a, b)) $ V.toList xs 38 | 39 | parseResults :: BSL.ByteString -> Either String (V.Vector BenchResult) 40 | parseResults = 41 | decode NoHeader 42 | . BSL.unlines 43 | . filter (not . ("Name,Mean" `BSL.isPrefixOf`)) 44 | . BSL.lines 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, Ben Gamari 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of Ben Gamari nor the names of other 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /criterion-compare.cabal: -------------------------------------------------------------------------------- 1 | name: criterion-compare 2 | version: 0.1.0.0 3 | x-revision: 1 4 | synopsis: A simple tool for visualising differences in Criterion benchmark results 5 | description: Create a html file containing a quick comparison of results for the input files. 6 | homepage: http://github.com/AndreasPK/criterion-compare 7 | license: BSD3 8 | license-file: LICENSE 9 | author: Ben Gamari 10 | maintainer: ben@well-typed.com, klebinger.andreas@gmx.at 11 | copyright: (c) 2016 Ben Gamari 12 | category: Development 13 | build-type: Simple 14 | extra-source-files: ChangeLog.md, README.md 15 | cabal-version: >=1.10 16 | 17 | executable criterion-compare 18 | main-is: CriterionCompare.hs 19 | other-modules: CsvParse, Plot, Style, Types 20 | hs-source-dirs: src 21 | other-extensions: GeneralizedNewtypeDeriving, RecordWildCards, OverloadedStrings, FlexibleContexts, TupleSections 22 | default-language: Haskell2010 23 | build-depends: base >=4.9 && < 4.22, 24 | cassava >=0.5 && <0.6, 25 | containers >=0.6 && <0.9, 26 | Chart >=1.6 && < 1.10, 27 | data-default >=0.5 && < 0.8.1, 28 | lens >=4.13 && < 5.4, 29 | colour >=2.3 && < 2.4, 30 | text >=1.2 && < 2.2, 31 | filepath >=1.4 && <1.6, 32 | lucid >=2.9 && < 2.12, 33 | Chart-diagrams >=1.6 && < 1.10, 34 | optparse-applicative >=0.12 && < 0.19, 35 | clay >=0.10 && < 0.16, 36 | vector >=0.11 && < 0.14, 37 | bytestring >=0.10 && < 0.13 38 | 39 | source-repository head 40 | type: git 41 | location: https://github.com/bgamari/criterion-compare.git 42 | -------------------------------------------------------------------------------- /src/Plot.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE RecordWildCards #-} 2 | 3 | module Plot where 4 | 5 | import qualified Data.Map as M 6 | 7 | import Graphics.Rendering.Chart 8 | import Data.Default 9 | import Data.List 10 | import Control.Lens 11 | import Types 12 | import Data.Colour 13 | import qualified Data.Colour.Names as N 14 | 15 | plot :: M.Map RunName (M.Map BenchName Stats) -> Layout PlotIndex Double 16 | plot results = layout 17 | where 18 | idxs' :: [(PlotIndex, Maybe (BenchName, RunName))] 19 | idxs' = addIndexes 20 | $ intercalate (replicate 3 Nothing) 21 | $ transpose 22 | [ [ Just (benchName, runName) 23 | | (benchName, _stats) <- M.assocs runs 24 | ] 25 | | (runName, runs) <- M.assocs results 26 | ] 27 | 28 | idxs :: M.Map (RunName, BenchName) PlotIndex 29 | idxs = M.unions [ M.singleton (runName, benchName) idx 30 | | (idx, Just (benchName, runName)) <- idxs' 31 | ] 32 | 33 | plotRun :: AlphaColour Double 34 | -> (RunName, M.Map BenchName Stats) 35 | -> PlotErrBars PlotIndex Double 36 | plotRun color (runName, benchmarks) = 37 | plot_errbars_title .~ getRunName runName 38 | $ plot_errbars_line_style . line_color .~ color 39 | $ plot_errbars_values .~ [ ErrPoint ex ey 40 | | (benchName, Stats{..}) <- M.assocs benchmarks 41 | , let Just idx = M.lookup (runName, benchName) idxs 42 | , let ex = ErrValue idx idx idx 43 | , let ey = ErrValue statsMeanLB statsMean statsMeanUB 44 | ] 45 | $ def 46 | 47 | colors :: [AlphaColour Double] 48 | colors = map opaque $ cycle [N.red, N.blue, N.purple, N.yellow, N.brown, N.green, N.cyan] 49 | 50 | plots :: [Plot PlotIndex Double] 51 | plots = map toPlot $ zipWith plotRun colors (M.assocs results) 52 | 53 | labels = map (\(BenchName name, idx) -> (idx, name)) $ M.assocs $ M.mapKeys snd idxs 54 | 55 | layout :: Layout PlotIndex Double 56 | layout = layout_title .~ "Criterion comparison" 57 | $ layout_plots .~ plots 58 | $ layout_x_axis . laxis_override .~ (axis_labels .~ [labels]) 59 | $ def 60 | -------------------------------------------------------------------------------- /stack.yaml: -------------------------------------------------------------------------------- 1 | # This file was automatically generated by 'stack init' 2 | # 3 | # Some commonly used options have been documented as comments in this file. 4 | # For advanced use and comprehensive documentation of the format, please see: 5 | # https://docs.haskellstack.org/en/stable/yaml_configuration/ 6 | 7 | # Resolver to choose a 'specific' stackage snapshot or a compiler version. 8 | # A snapshot resolver dictates the compiler version and the set of packages 9 | # to be used for project dependencies. For example: 10 | # 11 | # resolver: lts-3.5 12 | # resolver: nightly-2015-09-21 13 | # resolver: ghc-7.10.2 14 | # 15 | # The location of a snapshot can be provided as a file or url. Stack assumes 16 | # a snapshot provided as a file might change, whereas a url resource does not. 17 | # 18 | # resolver: ./custom-snapshot.yaml 19 | # resolver: https://example.com/snapshots/2018-01-01.yaml 20 | resolver: lts-13.29 21 | 22 | # User packages to be built. 23 | # Various formats can be used as shown in the example below. 24 | # 25 | # packages: 26 | # - some-directory 27 | # - https://example.com/foo/bar/baz-0.0.2.tar.gz 28 | # subdirs: 29 | # - auto-update 30 | # - wai 31 | packages: 32 | - . 33 | # Dependency packages to be pulled from upstream that are not in the resolver. 34 | # These entries can reference officially published versions as well as 35 | # forks / in-progress versions pinned to a git hash. For example: 36 | # 37 | # extra-deps: 38 | # - acme-missiles-0.3 39 | # - git: https://github.com/commercialhaskell/stack.git 40 | # commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a 41 | # 42 | # extra-deps: [] 43 | 44 | # Override default flag values for local packages and extra-deps 45 | # flags: {} 46 | 47 | # Extra package databases containing global packages 48 | # extra-package-dbs: [] 49 | 50 | # Control whether we use the GHC we find on the path 51 | # system-ghc: true 52 | # 53 | # Require a specific version of stack, using version ranges 54 | # require-stack-version: -any # Default 55 | # require-stack-version: ">=2.1" 56 | # 57 | # Override the architecture used by stack, especially useful on Windows 58 | # arch: i386 59 | # arch: x86_64 60 | # 61 | # Extra directories used by stack for building 62 | # extra-include-dirs: [/path/to/dir] 63 | # extra-lib-dirs: [/path/to/dir] 64 | # 65 | # Allow a newer minor version of GHC than the snapshot specifies 66 | # compiler-check: newer-minor 67 | -------------------------------------------------------------------------------- /src/CriterionCompare.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE FlexibleContexts #-} 3 | {-# LANGUAGE RecordWildCards #-} 4 | {-# LANGUAGE TupleSections #-} 5 | 6 | import qualified Data.Map.Strict as M 7 | import qualified Data.Text as T 8 | import Control.Applicative 9 | import Control.Monad (forM_) 10 | import Data.Maybe (fromMaybe) 11 | import System.FilePath (takeFileName, dropExtension, (<.>)) 12 | import Numeric 13 | import Lucid 14 | import Lucid.Base 15 | import Graphics.Rendering.Chart (toRenderable) 16 | import Graphics.Rendering.Chart.Backend.Diagrams (renderableToFile) 17 | import Data.Default 18 | import Options.Applicative hiding (style) 19 | 20 | import Style 21 | import Plot 22 | import CsvParse 23 | import Types 24 | 25 | invert :: (Ord a, Ord b) => M.Map a (M.Map b v) -> M.Map b (M.Map a v) 26 | invert runs = 27 | M.unionsWith M.union [ M.singleton bench $ M.singleton run stats 28 | | (run, results) <- M.assocs runs 29 | , (bench, stats) <- M.assocs results ] 30 | 31 | toTable :: [(BenchName,Int)] -> M.Map BenchName (M.Map RunName (Either (Html ()) (Attribute,Double))) -> Html () 32 | toTable orderOrig results = 33 | table_ $ do 34 | thead_ $ 35 | tr_ $ do 36 | th_ "Benchmark" 37 | forM_ (M.keys $ head $ M.elems results) $ \(RunName runName) -> 38 | th_ $ toHtml runName 39 | 40 | -- for list.js 41 | tbody_ [class_ "list"] $ 42 | forM_ (M.assocs results) $ \(bn@(BenchName benchName), runs) -> tr_ $ do 43 | let o = fromMaybe maxBound $ lookup bn orderOrig 44 | td_ [class_ "benchName"] $ toHtml benchName 45 | forM_ (M.assocs runs) $ \(RunName runName, ec) -> do 46 | let content = either id (\(cls,n) -> span_ [cls] $ toHtml $ showGFloat (Just 1) n "%") ec 47 | td_ [class_ $ T.pack runName] content 48 | let significance = sum $ map (either (const 0) (abs . snd)) $ M.elems runs 49 | -- hidden tds to let us sort by original benchmark order: 50 | td_ [class_ "orderOrig", style_ "display:none;"] (toHtml $ show o) 51 | td_ [class_ "significance", style_ "display:none;"] 52 | -- subtract as stupid hack to get reverse order on first click 53 | (toHtml $ show (99999 - significance)) 54 | -- TODO "largest regression", "largest improvement", etc. Then we 55 | -- must add those names to the JS snippet at the bottom 56 | 57 | tabulateAbsolute :: M.Map BenchName (M.Map RunName Stats) 58 | -> M.Map BenchName (M.Map RunName (Html ())) 59 | tabulateAbsolute = fmap $ fmap cell 60 | where 61 | cell stats = 62 | let mean = showGFloat (Just 2) (statsMean stats) "" 63 | std = showString " ± " $ showGFloat (Just 2) (statsStd stats) "" 64 | in td_ $ do 65 | toHtml mean 66 | span_ [class_ "stddev"] $ toHtml std 67 | 68 | tabulateRelative :: RunName -> M.Map BenchName (M.Map RunName Stats) 69 | -> M.Map BenchName (M.Map RunName (Either (Html ()) (Attribute,Double))) 70 | tabulateRelative refRun results = 71 | M.mapWithKey (\bench -> M.mapWithKey (cell bench)) results 72 | where 73 | cell bench run stats 74 | | run == refRun 75 | = showAbs stats 76 | | Just refStats <- M.lookup bench results >>= M.lookup refRun 77 | = let rel = (statsMean stats - statsMean refStats) / statsMean refStats 78 | cls = T.pack $ "stat-"++sign++show (abs n) 79 | where sign = if rel > 0 then "p" else "n" 80 | n = min 10 $ max (-10) $ round $ rel / 0.025 :: Int 81 | -- in span_ [class_ cls] $ toHtml $ showGFloat (Just 1) (100*rel) "%" 82 | in Right (class_ cls, 100*rel) 83 | | otherwise 84 | = showAbs stats 85 | 86 | showAbs stats = Left $ toHtml $ showGFloat (Just 2) (statsMean stats) "" 87 | 88 | data Options = Options { optRunNames :: [RunName] 89 | , optOutput :: FilePath 90 | , optRunPaths :: [FilePath] 91 | } 92 | 93 | options :: Options.Applicative.Parser Options 94 | options = 95 | Options <$> many (option (RunName <$> str) $ short 'l' <> long "label" <> help "label") 96 | <*> option str (short 'o' <> long "output" <> metavar "FILE" <> help "output file name" <> value "comparison") 97 | <*> many (argument str $ metavar "FILE" <> help "CSV file name") 98 | 99 | addGeoMean :: [(RunName, M.Map BenchName Stats)] -> [(RunName, M.Map BenchName Stats)] 100 | addGeoMean input 101 | -- Inputs have different benchmarks - geoMean makes no sense then. 102 | | not (all (== head benchNames) benchNames) = input 103 | | otherwise = map (\(run,stats) -> (run,mean_stat stats)) input 104 | 105 | where 106 | mean_stat stats = M.insert (BenchName "GeoMean (calculated)") 107 | (Stats { statsMean = (gm stats/head_mean) 108 | , statsMeanLB = 0, statsMeanUB = 0 109 | , statsStd = 0, statsStdLB = 0 110 | , statsStdUB = 0 }) 111 | stats 112 | head_mean = gm . snd . head $ input 113 | 114 | product = (M.foldl' (\total s -> statsMean s * total) 1.0) 115 | entries m = fromIntegral $ M.size m :: Double 116 | gm m = (product m) ** (1.0/entries m) :: Double 117 | 118 | benchNames = map (M.keys . snd) input :: [[BenchName]] 119 | 120 | 121 | 122 | 123 | 124 | 125 | main :: IO () 126 | main = do 127 | Options{..} <- execParser $ info (helper <*> options) mempty 128 | results <- sequence [ (name',) . M.fromList <$> readResults path 129 | | (name, path) <- zip (map Just optRunNames ++ repeat Nothing) optRunPaths 130 | , let name' = fromMaybe (RunName $ dropExtension $ takeFileName path) name 131 | ] :: IO [(RunName, M.Map BenchName Stats)] 132 | 133 | let resultWithMean = addGeoMean results 134 | 135 | orderOrig <- zipWith (\i (nm,_)-> (nm,i)) [0..] <$> (readResults $ head optRunPaths) :: IO [(BenchName, Int)] 136 | renderableToFile def (optOutput <.> "svg") $ toRenderable $ plot $ M.fromList resultWithMean 137 | --let table = tabulateAbsolute $ invert $ M.unions results 138 | let table = tabulateRelative (fst $ head resultWithMean) $ invert $ M.fromList resultWithMean 139 | 140 | renderToFile (optOutput <.> "html") $ doctypehtml_ $ do 141 | head_ $ do 142 | title_ "Criterion comparison" 143 | meta_ [ charset_ "UTF-8" ] 144 | style_ style 145 | body_ $ 146 | -- for list.js: 147 | div_ [id_ "bench"] $ do 148 | input_ [class_ "search", placeholder_ "Filter by name"] 149 | span_ "Sort by: " 150 | button_ [class_ "sort", makeAttribute "data-sort" "orderOrig"] "original order" 151 | button_ [class_ "sort", makeAttribute "data-sort" "significance"] "significance" 152 | button_ [class_ "sort", makeAttribute "data-sort" "benchName"] "name" 153 | toTable orderOrig table 154 | -- http://listjs.com : 155 | script_ [ src_ "http://cdnjs.cloudflare.com/ajax/libs/list.js/1.5.0/list.min.js"] (""::T.Text) 156 | script_ "new List(\"bench\", {valueNames: [\"orderOrig\", \"benchName\",\"significance\"]});" 157 | --------------------------------------------------------------------------------