├── tests ├── __init__.py ├── requirements.txt └── requirements-linting.txt ├── pytest_speed ├── version.py ├── save.py ├── cli.py ├── utils.py ├── __init__.py └── benchmark.py ├── .gitignore ├── README.md ├── .pre-commit-config.yaml ├── Makefile └── pyproject.toml /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytest_speed/version.py: -------------------------------------------------------------------------------- 1 | VERSION = '0.3.3' 2 | -------------------------------------------------------------------------------- /tests/requirements.txt: -------------------------------------------------------------------------------- 1 | coverage[toml]==6.4.1 2 | poetry==1.2.0a2 3 | pytest==7.1.2 4 | pytest-sugar==0.9.4 5 | -------------------------------------------------------------------------------- /tests/requirements-linting.txt: -------------------------------------------------------------------------------- 1 | black==22.6.0 2 | flake8==4.0.1 3 | flake8-quotes==3.3.1 4 | isort[colors]==5.10.1 5 | mypy==0.961 6 | pre-commit==2.19.0 7 | pycodestyle==2.8.0 8 | pyflakes==2.4.0 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | .idea/ 3 | env/ 4 | .coverage 5 | .cache/ 6 | htmlcov/ 7 | media/ 8 | sandbox/ 9 | .pytest_cache/ 10 | *.egg-info/ 11 | /build/ 12 | /dist/ 13 | npm-debug.log* 14 | yarn-debug.log* 15 | yarn-error.log* 16 | /TODO.md 17 | /.mypy_cache/ 18 | /scratch/ 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pytest-speed 2 | 3 | Modern benchmarking library for python with pytest integration. 4 | 5 | `pytest-speed` is mostly backwards compatible with `pytest-benchmark` but adds: 6 | * clearer output using rich 7 | * clearer comparison of benchmarks 8 | * some extra functionality, e.g. running multiple benchmarks from a single test 9 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.0.1 4 | hooks: 5 | - id: check-yaml 6 | args: ['--unsafe'] 7 | - id: end-of-file-fixer 8 | 9 | - repo: local 10 | hooks: 11 | - id: lint 12 | name: Lint 13 | entry: make lint 14 | types: [python] 15 | language: system 16 | - id: mypy 17 | name: Mypy 18 | entry: make mypy 19 | types: [python] 20 | language: system 21 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .DEFAULT_GOAL := all 2 | isort = isort pytest_speed tests 3 | black = black pytest_speed tests 4 | 5 | .PHONY: install 6 | install: 7 | pip install -r tests/requirements.txt 8 | pip install -r tests/requirements-linting.txt 9 | poetry install 10 | pre-commit install 11 | 12 | .PHONY: format 13 | format: 14 | $(isort) 15 | $(black) 16 | 17 | .PHONY: lint 18 | lint: 19 | flake8 --max-complexity 12 --max-line-length 120 --ignore E203,W503 pytest_speed tests 20 | $(isort) --check-only --df 21 | $(black) --check 22 | 23 | .PHONY: test 24 | test: 25 | coverage run -m pytest 26 | 27 | .PHONY: testcov 28 | testcov: test 29 | @coverage report --show-missing 30 | @coverage html 31 | 32 | .PHONY: mypy 33 | mypy: 34 | mypy pytest_speed 35 | 36 | .PHONY: all 37 | all: lint mypy testcov 38 | -------------------------------------------------------------------------------- /pytest_speed/save.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | from dataclasses import asdict, dataclass 4 | from datetime import datetime 5 | from pathlib import Path 6 | from typing import TYPE_CHECKING, Any, Dict, List, Tuple 7 | 8 | from .utils import GitSummary 9 | 10 | if TYPE_CHECKING: 11 | from .benchmark import Benchmark, BenchmarkConfig 12 | 13 | __args__ = 'save_benchmarks', 'load_all_benchmarks', 'load_benchmark', 'BenchmarkSummary' 14 | benchmark_save_dir = Path('.benchmarks/speed') 15 | 16 | 17 | def save_benchmarks(benchmarks: 'List[Benchmark]', config: 'BenchmarkConfig', git: GitSummary) -> Tuple[int, str]: 18 | """ 19 | save benchmarks to file. 20 | """ 21 | data: Dict[str, Any] = { 22 | 'timestamp': datetime.now().isoformat(), 23 | 'git_info': asdict(git), 24 | 'config': asdict(config), 25 | 'benchmarks': [asdict(bm) for bm in benchmarks], 26 | } 27 | if benchmark_save_dir.exists(): 28 | bm_id = sum(1 for _ in benchmark_save_dir.glob('bench*')) + 1 29 | else: 30 | bm_id = 1 31 | benchmark_save_dir.mkdir(parents=True) 32 | 33 | data['id'] = bm_id 34 | path = benchmark_save_dir / f'bench{bm_id:03d}.json' 35 | with path.open('w') as f: 36 | json.dump(data, f, indent=2) 37 | return bm_id, str(path) 38 | 39 | 40 | @dataclass 41 | class BenchmarkSummary: 42 | id: int 43 | timestamp: datetime 44 | config: 'BenchmarkConfig' 45 | git: GitSummary 46 | benchmarks: 'List[Benchmark]' 47 | 48 | 49 | def load_all_benchmarks() -> List[BenchmarkSummary]: 50 | benchmark_summaries = [] 51 | for path in benchmark_save_dir.glob('bench*'): 52 | m = re.search(r'bench(\d+)', path.name) 53 | if m: 54 | benchmark_id = int(m.group(1)) 55 | benchmark_summaries.append(load_benchmark(benchmark_id)) 56 | return benchmark_summaries 57 | 58 | 59 | def load_benchmark(benchmark_id: int) -> BenchmarkSummary: 60 | from .benchmark import Benchmark, BenchmarkConfig 61 | 62 | path = benchmark_save_dir / f'bench{benchmark_id:03d}.json' 63 | with path.open() as f: 64 | data = json.load(f) 65 | return BenchmarkSummary( 66 | id=data['id'], 67 | timestamp=datetime.fromisoformat(data['timestamp']), 68 | config=BenchmarkConfig(**data['config']), 69 | git=GitSummary(**data['git_info']), 70 | benchmarks=[Benchmark(**bm) for bm in data['benchmarks']], 71 | ) 72 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ['setuptools', 'setuptools-scm'] 3 | build-backend = 'setuptools.build_meta' 4 | 5 | [project] 6 | name = 'pytest-speed' 7 | version = '0.4.0' 8 | description = 'Modern benchmarking library for python with pytest integration.' 9 | authors = [{name = 'Samuel Colvin', email = 's@muelcolvin.com'}] 10 | readme = 'README.md' 11 | license = {text = 'MIT'} 12 | classifiers = [ 13 | 'Development Status :: 3 - Alpha', 14 | 'Framework :: Pytest', 15 | 'Intended Audience :: Developers', 16 | 'Intended Audience :: Education', 17 | 'Intended Audience :: Information Technology', 18 | 'Intended Audience :: Science/Research', 19 | 'Intended Audience :: System Administrators', 20 | 'Operating System :: Unix', 21 | 'Operating System :: POSIX :: Linux', 22 | 'Environment :: Console', 23 | 'Environment :: MacOS X', 24 | 'License :: OSI Approved :: MIT License', 25 | 'Programming Language :: Python :: 3 :: Only', 26 | 'Programming Language :: Python :: 3.7', 27 | 'Programming Language :: Python :: 3.8', 28 | 'Programming Language :: Python :: 3.9', 29 | 'Programming Language :: Python :: 3.10', 30 | 'Programming Language :: Python :: 3.11', 31 | 'Topic :: Software Development :: Libraries :: Python Modules', 32 | 'Topic :: Internet', 33 | 'Typing :: Typed', 34 | ] 35 | requires-python = '>=3.7' 36 | dependencies = [ 37 | 'pytest>=7', 38 | 'click>=7', 39 | 'rich>=12', 40 | ] 41 | [project.entry-points.pytest11] 42 | speed = 'pytest_speed' 43 | 44 | [project.scripts] 45 | pytest-speed = "pytest_speed.cli:cli" 46 | 47 | [project.urls] 48 | repository = 'https://github.com/samuelcolvin/pytest-speed' 49 | 50 | [tool.pytest.ini_options] 51 | testpaths = 'tests' 52 | filterwarnings = 'error' 53 | 54 | [tool.coverage.run] 55 | source = ['pytest_speed'] 56 | branch = true 57 | 58 | [tool.coverage.report] 59 | precision = 2 60 | exclude_lines = [ 61 | 'pragma: no cover', 62 | 'raise NotImplementedError', 63 | 'raise NotImplemented', 64 | 'if TYPE_CHECKING:', 65 | '@overload', 66 | ] 67 | 68 | [tool.black] 69 | color = true 70 | line-length = 120 71 | target-version = ['py37', 'py38', 'py39', 'py310'] 72 | skip-string-normalization = true 73 | skip-magic-trailing-comma = true 74 | 75 | [tool.isort] 76 | line_length = 120 77 | multi_line_output = 3 78 | include_trailing_comma = true 79 | force_grid_wrap = 0 80 | combine_as_imports = true 81 | color_output = true 82 | 83 | [tool.mypy] 84 | strict = true 85 | warn_return_any = false 86 | show_error_codes = true 87 | -------------------------------------------------------------------------------- /pytest_speed/cli.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | import click 4 | from rich.console import Console 5 | from rich.table import Table 6 | 7 | from .benchmark import BenchmarkTable, compare_benchmarks 8 | from .save import BenchmarkSummary, load_all_benchmarks, load_benchmark 9 | from .utils import format_ts 10 | from .version import VERSION 11 | 12 | 13 | @click.group() 14 | @click.version_option(VERSION) 15 | def cli() -> None: 16 | """ 17 | CLI for pytest-speed, can be used to list saved benchmarks and compare two benchmarks. 18 | """ 19 | pass 20 | 21 | 22 | @cli.command(name='list') 23 | def list_() -> None: 24 | """ 25 | List all saved benchmarks. 26 | """ 27 | benchmark_summaries = load_all_benchmarks() 28 | console = Console() 29 | table = Table(title='Saved Benchmarks', padding=(0, 2), border_style='cyan') 30 | 31 | table.add_column('ID', style='bold', justify='right') 32 | table.add_column('Timestamp') 33 | table.add_column('Branch') 34 | table.add_column('Commit SHA') 35 | table.add_column('Commit Message') 36 | table.add_column('Benchmarks', justify='right') 37 | 38 | now = datetime.now() 39 | benchmark_summaries.sort(key=lambda bs_: bs_.id) 40 | for bs in benchmark_summaries: 41 | table.add_row( 42 | f'{bs.id:d}', 43 | format_ts(bs.timestamp, now), 44 | bs.git.branch, 45 | f'{bs.git.commit[:7]}{" [dirty]" if bs.git.dirty else ""}', 46 | bs.git.short_message(), 47 | f'{len(bs.benchmarks):,}', 48 | ) 49 | console.print(table) 50 | 51 | 52 | @cli.command() 53 | @click.argument('benchmark_id', type=int) 54 | def display(benchmark_id: int) -> None: 55 | """ 56 | Display a table summarising a single benchmark run. 57 | 58 | Same table as is printed after a run. 59 | """ 60 | bms = get_benchmark(benchmark_id) 61 | BenchmarkTable(Console(), bms.git, bms.benchmarks).print() 62 | 63 | 64 | @cli.command() 65 | @click.argument('id_before', type=int) 66 | @click.argument('id_after', type=int) 67 | def compare(id_before: int, id_after: int) -> None: 68 | """ 69 | Load two benchmarks and compare them. 70 | 71 | IDs should match those from the "ID" column of `pytest-speed list`. 72 | """ 73 | before = get_benchmark(id_before) 74 | after = get_benchmark(id_after) 75 | 76 | compare_benchmarks(before, after) 77 | 78 | 79 | def get_benchmark(benchmark_id: int) -> BenchmarkSummary: 80 | try: 81 | return load_benchmark(benchmark_id) 82 | except FileNotFoundError: 83 | raise click.UsageError(f'No benchmark with ID {benchmark_id}') 84 | -------------------------------------------------------------------------------- /pytest_speed/utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | import subprocess 3 | from dataclasses import dataclass 4 | from datetime import datetime 5 | from pathlib import Path 6 | from textwrap import shorten 7 | from typing import TYPE_CHECKING, Dict, List, Optional, Tuple 8 | 9 | if TYPE_CHECKING: 10 | from .benchmark import Benchmark 11 | 12 | 13 | @dataclass 14 | class GitSummary: 15 | found: bool 16 | branch: str = '' 17 | commit: str = '' 18 | commit_message: str = '' 19 | dirty: bool = False 20 | # TODO parent and commit timestamp 21 | 22 | def __str__(self) -> str: 23 | if self.found: 24 | s = f'{self.branch} ({self.commit[:7]})' 25 | if self.dirty: 26 | s += ' [dirty]' 27 | return s 28 | else: 29 | return '' 30 | 31 | @classmethod 32 | def build(cls) -> 'GitSummary': 33 | if not Path('.git').exists(): 34 | return GitSummary(found=False) 35 | p = subprocess.run(['git', 'rev-parse', '--abbrev-ref', 'HEAD'], check=True, stdout=subprocess.PIPE, text=True) 36 | branch = p.stdout.strip() 37 | p = subprocess.run( 38 | ['git', 'describe', '--dirty', '--always', '--long', '--abbrev=40'], 39 | check=True, 40 | stdout=subprocess.PIPE, 41 | text=True, 42 | ) 43 | dirty = '-dirty' in p.stdout 44 | if dirty: 45 | commit = p.stdout.strip().split('-', 1)[0] 46 | else: 47 | commit = p.stdout.strip() 48 | p = subprocess.run( 49 | ['git', 'log', '--format=%B', '-n', '1', commit], check=True, stdout=subprocess.PIPE, text=True 50 | ) 51 | commit_message = p.stdout.strip() 52 | return cls(True, branch, commit, commit_message, dirty) 53 | 54 | def short_message(self) -> str: 55 | comment = re.sub(r'^\s*(\*\s*)?', '', self.commit_message, flags=re.M) 56 | return shorten(re.sub(r'\n\s*', ' ', comment), 50, placeholder='…') 57 | 58 | 59 | def render_time(time_ns: float, units: str, div: int) -> str: 60 | value = time_ns / div 61 | if value < 1: 62 | dp = 3 63 | else: 64 | dp = 2 if value < 100 else 1 65 | return f'{value:_.{dp}f}{units}' 66 | 67 | 68 | def benchmark_change(before: float, after: float) -> str: 69 | if after > before * 2: 70 | return f'x{after / before:0.2f}' 71 | else: 72 | return f'{(after - before) / before:+0.2%}' 73 | 74 | 75 | def group_benchmarks(benchmarks: 'List[Benchmark]') -> 'Dict[Optional[str], List[Benchmark]]': 76 | groups: 'Dict[Optional[str], List[Benchmark]]' = {} 77 | for bm in benchmarks: 78 | group = groups.get(bm.group) 79 | if group: 80 | group.append(bm) 81 | else: 82 | groups[bm.group] = [bm] 83 | return groups 84 | 85 | 86 | def calc_div_units(time_ns: float) -> Tuple[str, int]: 87 | if time_ns < 1_000: 88 | return 'ns', 1 89 | elif time_ns < 1_000_000: 90 | return 'µs', 1_000 91 | elif time_ns < 1_000_000_000: 92 | return 'ms', 1_000_000 93 | else: 94 | return 's', 1_000_000_000 95 | 96 | 97 | def format_ts(ts: datetime, now: datetime) -> str: 98 | if ts.date() == now.date(): 99 | diff = now - ts 100 | if diff.seconds < 60: 101 | ago = f'{diff.seconds} seconds' 102 | else: 103 | mins = round(diff.seconds / 60) 104 | if diff.seconds < 3600: 105 | ago = f'{mins:.0f} mins' 106 | else: 107 | ago = f'{mins / 60:.0f} hours, {mins % 60:.0f} mins' 108 | return f'{ts:%H:%M} ({ago} ago)' 109 | else: 110 | return f'{ts:%Y-%m-%d %H:%M}' 111 | -------------------------------------------------------------------------------- /pytest_speed/__init__.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Any, Callable, Optional, Protocol, Union 3 | 4 | import pytest 5 | import rich 6 | 7 | from .benchmark import Benchmark, BenchmarkCollection, BenchmarkConfig 8 | from .version import VERSION 9 | 10 | __version__ = VERSION 11 | 12 | 13 | def pytest_configure(config: Any) -> None: 14 | config.addinivalue_line( 15 | 'markers', 'benchmark: pytest-speed marker to define benchmark groups (compatible with pytest-benchmark)' 16 | ) 17 | config.addinivalue_line('markers', 'speed: pytest-speed marker to define benchmark groups') 18 | 19 | 20 | stub_help = 'pytest-speed stub for pytest-benchmark, ignored' 21 | 22 | 23 | def pytest_addoption(parser: Any) -> None: 24 | parser.addoption('--benchmark-columns', action='store', default='', help=stub_help) 25 | parser.addoption('--benchmark-group-by', action='store', default='', help=stub_help) 26 | parser.addoption('--benchmark-warmup', action='store', default='', help=stub_help) 27 | parser.addoption('--benchmark-disable', action='store_true', default='', help=stub_help) 28 | parser.addoption( 29 | '--benchmark-save', 30 | action='store', 31 | default='', 32 | help='pytest-speed stub for pytest-benchmark, value is ignored, but if set, benchmarks are saved', 33 | ) 34 | parser.addoption( 35 | '--benchmark-enable', 36 | dest='bench', 37 | action='store_true', 38 | default=False, 39 | help='alias for "--bench", compatible with pytest-benchmark - enable benchmarks', 40 | ) 41 | parser.addoption('--bench', action='store_true', default=False, help='enable benchmarks') 42 | parser.addoption('--bench-save', action='store_true', default=False, help='save benchmarks') 43 | 44 | 45 | benchmarks: Optional[BenchmarkCollection] = None 46 | 47 | 48 | class RunBench(Protocol): 49 | def __call__( 50 | self, *args: Any, name: Optional[str] = None, group: Optional[str] = None 51 | ) -> Union[None, Benchmark, Callable[[Callable[[], None]], Optional[Benchmark]]]: 52 | ... 53 | 54 | 55 | @pytest.fixture(scope='session') 56 | def benchmark_collection(request: Any) -> Optional[BenchmarkCollection]: 57 | global benchmarks 58 | 59 | save = any(request.config.getoption(opt) for opt in ('bench_save', 'benchmark_save')) 60 | if request.config.getoption('bench') or save: 61 | benchmarks = BenchmarkCollection(BenchmarkConfig(), save) 62 | return benchmarks 63 | else: 64 | return None 65 | 66 | 67 | @pytest.fixture 68 | def bench(request: Any, capsys: Any, benchmark_collection: Optional[BenchmarkCollection]) -> RunBench: 69 | verbose_level = request.config.getoption('verbose') 70 | call_index = 0 71 | 72 | def benchmark_func_logic( 73 | func: Callable[..., Any], *args: Any, name: Optional[str] = None, group: Optional[str] = None 74 | ) -> Optional[Benchmark]: 75 | nonlocal call_index 76 | if benchmark_collection is None: 77 | # benchmarks not enabled, just run the function and return 78 | func(*args) 79 | return None 80 | 81 | test_name = re.sub('^test_', '', request.node.name) 82 | if name is not None: 83 | name = name.format(test=test_name, index=call_index) 84 | elif call_index == 0: 85 | name = test_name 86 | else: 87 | name = f'{test_name}_{call_index}' 88 | 89 | if group is None: 90 | group = next((m.kwargs['group'] for m in request.node.iter_markers('speed')), None) 91 | if group is None: 92 | group = next((m.kwargs['group'] for m in request.node.iter_markers('benchmark')), None) 93 | 94 | call_index += 1 95 | benchmark = benchmark_collection.run_benchmark(name, group, func, *args) 96 | if verbose_level > 0: 97 | with capsys.disabled(): 98 | rich.print(benchmark.summary(), end='') 99 | return benchmark 100 | 101 | def benchmark_func_wrapper( 102 | *args: Any, name: Optional[str] = None, group: Optional[str] = None 103 | ) -> Union[None, Benchmark, Callable[[Callable[[], None]], Optional[Benchmark]]]: 104 | if args: 105 | return benchmark_func_logic(*args, name=name, group=group) 106 | else: 107 | def benchmark_wrapper(func: Callable[[], None]) -> Optional[Benchmark]: 108 | return benchmark_func_logic(func, name=name, group=group) 109 | 110 | return benchmark_wrapper 111 | 112 | return benchmark_func_wrapper 113 | 114 | 115 | @pytest.fixture 116 | def benchmark(bench: RunBench) -> RunBench: 117 | """ 118 | Compatibility with pytest-benchmark 119 | """ 120 | return bench 121 | 122 | 123 | def pytest_terminal_summary() -> None: 124 | if benchmarks: 125 | benchmarks.finish() 126 | -------------------------------------------------------------------------------- /pytest_speed/benchmark.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from datetime import datetime 3 | from statistics import mean, stdev 4 | from time import perf_counter_ns 5 | from typing import Any, Callable, List, Optional, Sequence, Tuple, Union, cast 6 | 7 | from rich.console import Console 8 | from rich.markup import escape 9 | from rich.table import Table 10 | from rich.text import Text 11 | 12 | from .save import BenchmarkSummary, save_benchmarks 13 | from .utils import GitSummary, benchmark_change, calc_div_units, format_ts, group_benchmarks, render_time 14 | 15 | __args__ = 'BenchmarkConfig', 'Benchmark', 'BenchmarkRun', 'BenchmarkTable', 'compare_benchmarks' 16 | 17 | 18 | @dataclass 19 | class BenchmarkConfig: 20 | """ 21 | Store configuration info for benchmarking 22 | """ 23 | 24 | warmup_time_ns = 1_000_000_000 25 | warmup_max_iterations = 5_000 26 | max_rounds = 10_000 27 | max_time_ns = 3_000_000_000 28 | outlier_percentage = 10 29 | 30 | ideal_rounds = 100 31 | min_rounds = 10 32 | ideal_iterations = 10_000 33 | min_iterations = 200 34 | 35 | 36 | @dataclass 37 | class Benchmark: 38 | """ 39 | Store results of a single benchmark. 40 | """ 41 | 42 | name: str 43 | group: Optional[str] 44 | best_ns: int 45 | worse_ns: int 46 | mean_ns: float 47 | stddev_ns: float 48 | bench_time_ns: int 49 | rounds: int 50 | iter_per_round: int 51 | outlier_rounds: int 52 | outlier_prop: float 53 | warnings: Sequence[str] = () 54 | 55 | def summary(self) -> str: 56 | best_ns = self.best_ns / self.iter_per_round 57 | units, div = calc_div_units(best_ns) 58 | parts = dict( 59 | group=self.group, 60 | name=self.name, 61 | best=render_time(best_ns, units, div), 62 | stdev=render_time(self.stddev_ns / self.iter_per_round, units, div), 63 | iterations=f'{self.rounds * self.iter_per_round:,}', 64 | warnings=', '.join(self.warnings), 65 | ) 66 | return ' '.join(f'[blue]{k}[/blue]=[green]{v}[/green]' for k, v in parts.items() if v) 67 | 68 | 69 | class BenchmarkCollection: 70 | """ 71 | Manage a benchmark run and store data about it. 72 | """ 73 | 74 | def __init__(self, config: BenchmarkConfig, save: bool): 75 | self.config = config 76 | self.save = save 77 | self.benchmarks: list[Benchmark] = [] 78 | self.git = GitSummary.build() 79 | 80 | def run_benchmark(self, name: str, group: Optional[str], func: Callable[..., Any], *args: Any) -> Benchmark: 81 | """ 82 | Run a single benchmark and record data about it. 83 | """ 84 | warnings: List[str] = [] 85 | iter_per_round, rounds = self._warmup(func, args) 86 | 87 | times = [] 88 | loop_range = range(iter_per_round) 89 | start_time = perf_counter_ns() 90 | toc = start_time 91 | 92 | for _ in range(rounds): 93 | tic = perf_counter_ns() 94 | for _ in loop_range: 95 | func(*args) 96 | toc = perf_counter_ns() 97 | times.append(toc - tic) 98 | if toc - start_time > self.config.max_time_ns * 2: 99 | warnings.append('Benchmark timed out') 100 | break 101 | 102 | bench_time_ns = toc - start_time 103 | best_ns = min(times) 104 | outlier_threshold = int(best_ns * (1 + self.config.outlier_percentage / 100)) 105 | outlier_rounds = sum(1 for t in times if t > outlier_threshold) 106 | 107 | outlier_prop = outlier_rounds / rounds 108 | if outlier_prop > 0.1: 109 | warnings.append(f'{outlier_prop:0.0%} high outliers') 110 | 111 | benchmark = Benchmark( 112 | name=name, 113 | group=group, 114 | best_ns=best_ns, 115 | worse_ns=max(times), 116 | mean_ns=mean(times), 117 | stddev_ns=stdev(times), 118 | bench_time_ns=bench_time_ns, 119 | rounds=len(times), 120 | iter_per_round=iter_per_round, 121 | outlier_rounds=outlier_rounds, 122 | outlier_prop=outlier_prop, 123 | warnings=warnings, 124 | ) 125 | self.benchmarks.append(benchmark) 126 | return benchmark 127 | 128 | def _warmup(self, func: Callable[..., Any], args: Sequence[Any]) -> Tuple[int, int]: 129 | """ 130 | Run warmup iterations and return tuple of (iter_per_round, rounds). 131 | """ 132 | times = [] 133 | start_time = perf_counter_ns() 134 | for _ in range(self.config.warmup_max_iterations): 135 | tic = perf_counter_ns() 136 | func(*args) 137 | toc = perf_counter_ns() 138 | times.append(toc - tic) 139 | if toc - start_time > self.config.warmup_time_ns: 140 | break 141 | 142 | mean_warmup = mean(times) 143 | del times 144 | # we want to run ideal_rounds rounds of iterations, each group consisting of up to ideal_iterations iterations 145 | # we want them to finish in less than max_time_ns 146 | # that means each round should take max_time_ns / ideal_rounds 147 | 148 | round_time = self.config.max_time_ns / self.config.ideal_rounds 149 | iter_per_round = min(self.config.ideal_iterations, int(round_time / mean_warmup)) 150 | 151 | rounds = self.config.ideal_rounds 152 | if iter_per_round < self.config.min_iterations: 153 | rounds = self.config.min_rounds 154 | round_time = self.config.max_time_ns / rounds 155 | iter_per_round = max(self.config.min_iterations, int(round_time / mean_warmup)) 156 | return iter_per_round, rounds 157 | 158 | def finish(self) -> None: 159 | if not self.benchmarks: 160 | print('No benchmarks run') 161 | return 162 | 163 | console = Console() 164 | if self.save: 165 | bm_id, save_path = save_benchmarks(self.benchmarks, self.config, self.git) 166 | console.print(f'[italic][dim]Saved benchmarks to [/dim][cyan]{escape(save_path)}[/cyan][dim].[/dim]') 167 | else: 168 | bm_id = None 169 | 170 | BenchmarkTable(console, self.git, self.benchmarks, bm_id).print() 171 | 172 | 173 | class BenchmarkTable: 174 | """ 175 | Logic for printing a table summarising benchmarks. 176 | """ 177 | 178 | def __init__(self, console: Console, git: GitSummary, benchmarks: List[Benchmark], bm_id: Optional[int] = None): 179 | self.console = console 180 | title = ['Benchmarks', str(git)] 181 | if bm_id is not None: 182 | title.append(f'Save ID: [blue]{bm_id:d}[/blue]') 183 | 184 | self.table = Table(title=' '.join(t for t in title if t), padding=(0, 2), expand=True, border_style='cyan') 185 | self.benchmarks = benchmarks 186 | min_time = min(bm.best_ns / bm.iter_per_round for bm in benchmarks) 187 | self.units, self.div = calc_div_units(min_time) 188 | self.group_best: Optional[float] = None 189 | 190 | def print(self) -> None: 191 | show_groups = any(bm.group for bm in self.benchmarks) 192 | 193 | if show_groups: 194 | self.table.add_column('Group', style='bold') 195 | self.table.add_column('Test Name') 196 | self.table.add_column(f'Best ({self.units}/iter)', justify='right') 197 | if show_groups: 198 | self.table.add_column('Relative', justify='right') 199 | self.table.add_column(f'Stddev ({self.units}/iter)', justify='right') 200 | self.table.add_column('Iterations', justify='right') 201 | self.table.add_column('Note') 202 | 203 | if show_groups: 204 | for bm_group in group_benchmarks(self.benchmarks).values(): 205 | group_len = len(bm_group) 206 | bm_group.sort(key=lambda bm: bm.best_ns / bm.iter_per_round) 207 | for index, bm in enumerate(bm_group): 208 | self._add_group_row(index == 0, index + 1 == group_len, bm) 209 | 210 | self.benchmarks.sort(key=lambda bm: (bm.group, bm.best_ns / bm.iter_per_round)) 211 | else: 212 | for bm in self.benchmarks: 213 | self._add_no_group_row(bm) 214 | 215 | self.console.print(self.table) 216 | 217 | def _add_group_row(self, first_in_group: bool, last_in_group: bool, benchmark: Benchmark) -> None: 218 | best_ns = benchmark.best_ns / benchmark.iter_per_round 219 | if first_in_group: 220 | # new group 221 | self.group_best = best_ns 222 | group_col = benchmark.group 223 | rel = '' 224 | # if just one item in the group, no style 225 | row_style = 'normal' if last_in_group else 'green' 226 | else: 227 | # show the worse result in red 228 | row_style = 'red' if last_in_group else 'cyan' 229 | group_col = '' 230 | rel = benchmark_change(cast(float, self.group_best), best_ns) 231 | 232 | self.table.add_row( 233 | group_col, 234 | Text(benchmark.name or '(no name)', style=row_style), 235 | Text(self._render_time(best_ns), style=row_style), 236 | Text(rel, style=row_style), 237 | Text(self._render_time(benchmark.stddev_ns / benchmark.iter_per_round), style=row_style), 238 | Text(f'{benchmark.rounds * benchmark.iter_per_round:,}', style=row_style), 239 | self._row_note(benchmark), 240 | end_section=last_in_group, 241 | ) 242 | 243 | def _add_no_group_row(self, benchmark: Benchmark) -> None: 244 | self.table.add_row( 245 | benchmark.name or '(no name)', 246 | self._render_time(benchmark.best_ns / benchmark.iter_per_round), 247 | self._render_time(benchmark.stddev_ns / benchmark.iter_per_round), 248 | f'{benchmark.rounds * benchmark.iter_per_round:,}', 249 | self._row_note(benchmark), 250 | ) 251 | 252 | def _render_time(self, ns: float) -> str: 253 | return render_time(ns, '', self.div) 254 | 255 | @staticmethod 256 | def _row_note(benchmark: Benchmark) -> Union[str, Text]: 257 | if benchmark.warnings: 258 | return Text('\n'.join(benchmark.warnings), style='red') 259 | else: 260 | return '' 261 | 262 | 263 | def compare_benchmarks(before: BenchmarkSummary, after: BenchmarkSummary) -> None: 264 | """ 265 | Compare two sets of benchmarks. 266 | """ 267 | now = datetime.now() 268 | console = Console() 269 | table = Table(title='Benchmarks being compared', title_justify='left', padding=(0, 2), border_style='cyan') 270 | table.add_column('', style='bold') 271 | table.add_column('Before') 272 | table.add_column('After') 273 | table.add_row('ID', f'{before.id:03d}', f'{after.id:03d}') 274 | table.add_row('Branch', before.git.branch, after.git.branch) 275 | table.add_row('Commit SHA', before.git.commit[:7], after.git.commit[:7]) 276 | table.add_row('Commit Message', before.git.commit_message, after.git.commit_message) 277 | table.add_row('Benchmark Timestamp', format_ts(before.timestamp, now), format_ts(after.timestamp, now)) 278 | 279 | console.print('') 280 | console.print(table) 281 | 282 | min_time = min( 283 | [bm.best_ns / bm.iter_per_round for bm in before.benchmarks] 284 | + [bm.best_ns / bm.iter_per_round for bm in after.benchmarks] 285 | ) 286 | units, div = calc_div_units(min_time) 287 | 288 | table = Table(title='Benchmarks Comparison', title_justify='left', padding=(0, 2), border_style='cyan') 289 | table.add_column('Group', style='bold') 290 | table.add_column('Benchmark') 291 | table.add_column(f'Before ({units}/iter)', justify='right') 292 | table.add_column(f'After ({units}/iter)', justify='right') 293 | table.add_column('Change', justify='right') 294 | 295 | test_keys = set() 296 | after_lookup = {benchmark_key(bm): bm for bm in after.benchmarks} 297 | before_not_after = 0 298 | 299 | for bm_group in group_benchmarks(before.benchmarks).values(): 300 | for index, bm in enumerate(bm_group): 301 | key = benchmark_key(bm) 302 | after_bm = after_lookup.get(key) 303 | test_keys.add(key) 304 | before_ns = bm.best_ns / bm.iter_per_round 305 | 306 | group_name = (bm.group or '') if index == 0 else '' 307 | end_section = index == len(bm_group) - 1 308 | if after_bm: 309 | after_ns = after_bm.best_ns / after_bm.iter_per_round 310 | style = None 311 | if after_ns > before_ns * 1.1: 312 | style = 'red' 313 | elif after_ns < before_ns * 0.9: 314 | style = 'green' 315 | table.add_row( 316 | group_name, 317 | bm.name, 318 | render_time(before_ns, '', div), 319 | render_time(after_ns, '', div), 320 | benchmark_change(before_ns, after_ns), 321 | style=style, 322 | end_section=end_section, 323 | ) 324 | else: 325 | before_not_after += 1 326 | 327 | console.print('') 328 | console.print(table) 329 | if before_not_after: 330 | console.print(f'{before_not_after} benchmarks in before but not after.', style='red') 331 | after_not_before = sum(benchmark_key(bm) not in test_keys for bm in after.benchmarks) 332 | if after_not_before: 333 | console.print(f'{after_not_before} benchmarks in after but not before.', style='red') 334 | 335 | 336 | def benchmark_key(bm: 'Benchmark') -> str: 337 | if bm.group: 338 | return f'{bm.group}:{bm.name}' 339 | else: 340 | return bm.name 341 | --------------------------------------------------------------------------------