├── .gitattributes ├── .github └── workflows │ ├── bench.yaml │ └── test.yaml ├── .gitignore ├── .typos.toml ├── Cargo.toml ├── LICENSE ├── README.md ├── assets └── sample.png ├── examples ├── Cargo.toml ├── benches │ ├── common.rs │ ├── input.txt │ ├── search-btree.rs │ ├── search-ord.rs │ ├── search-vec.rs │ ├── tango-async.rs │ ├── tango-faster.rs │ ├── tango-slower.rs │ └── test_funcs.rs ├── build.rs ├── criterion-ordsearch.sh └── ordsearch.sh ├── notebooks ├── bootstrap.ipynb ├── empirical-observations.ipynb ├── exp.ipynb └── ordsearch.ipynb ├── pair-test.gnuplot ├── render-plot.sh ├── rust-toolchain.toml ├── scripts ├── aws-bench.sh ├── aws-results.sh ├── calibrate.sh ├── criterion.sh ├── describe.py ├── linear-sampling-test.sh ├── ordsearch.sh ├── sensitivity-test.sh └── tango.sh ├── tango-bench ├── Cargo.toml ├── benches │ └── tango.rs ├── build.rs └── src │ ├── cli.rs │ ├── dylib.rs │ ├── lib.rs │ ├── linux.rs │ └── plot.gnuplot └── test.sh /.gitattributes: -------------------------------------------------------------------------------- 1 | notebooks/*.ipynb linguist-detectable=false 2 | -------------------------------------------------------------------------------- /.github/workflows/bench.yaml: -------------------------------------------------------------------------------- 1 | name: Benchmarks 2 | 3 | on: workflow_dispatch 4 | 5 | jobs: 6 | bench: 7 | runs-on: ubuntu-22.04 8 | steps: 9 | - uses: actions/checkout@v3 10 | 11 | - name: Prepare Environment 12 | run: | 13 | rustup update nightly 14 | rustup default nightly 15 | cargo install cargo-export --version 0.2.0 16 | 17 | - uses: actions/cache@v3 18 | with: 19 | path: | 20 | ~/.cargo/bin/ 21 | ~/.cargo/registry/index/ 22 | ~/.cargo/registry/cache/ 23 | ~/.cargo/git/db/ 24 | ./target/ 25 | ./baseline-branch/target/ 26 | key: Bench/${{ runner.os }} 27 | 28 | - name: Building Benchmarks 29 | run: cargo export target/benchmarks -- bench --bench='search-*' 30 | 31 | - name: Run Benchmarks 32 | run: | 33 | set -eo pipefail 34 | 35 | mkdir -p target/dumps 36 | target/benchmarks/search_ord --color=never compare target/benchmarks/search_ord \ 37 | -t 1 -o -d target/dumps | tee target/benchmark.txt 38 | 39 | - uses: actions/upload-artifact@v3 40 | with: 41 | name: benchmark-results 42 | path: | 43 | target/benchmark.txt 44 | target/dumps/*.csv 45 | -------------------------------------------------------------------------------- /.github/workflows/test.yaml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: push 4 | 5 | jobs: 6 | lint: 7 | runs-on: ubuntu-22.04 8 | steps: 9 | - uses: actions/checkout@v4 10 | - uses: dtolnay/rust-toolchain@stable 11 | with: 12 | components: "clippy, rustfmt" 13 | - uses: olix0r/cargo-action-fmt/setup@v2 14 | - uses: Swatinem/rust-cache@v2 15 | - name: Running clippy 16 | run: cargo clippy --all-targets --all-features -p tango-bench --message-format=json | cargo-action-fmt 17 | - name: Checking formatting 18 | run: cargo fmt -- --check --color always 19 | - name: Typo 20 | uses: crate-ci/typos@master 21 | test: 22 | strategy: 23 | matrix: 24 | include: 25 | - os: ubuntu-22.04 26 | - os: macos-12 27 | - os: windows-2019 28 | runs-on: ${{ matrix.os }} 29 | steps: 30 | - uses: actions/checkout@v4 31 | - uses: dtolnay/rust-toolchain@stable 32 | - uses: Swatinem/rust-cache@v2 33 | 34 | - name: Run Tests 35 | run: cargo test 36 | 37 | bench: 38 | needs: [test] 39 | runs-on: ubuntu-22.04 40 | steps: 41 | - uses: actions/checkout@v3 42 | - uses: actions/checkout@v3 43 | with: 44 | ref: dev 45 | path: baseline-branch 46 | - uses: dtolnay/rust-toolchain@stable 47 | - uses: Swatinem/rust-cache@v2 48 | - uses: taiki-e/install-action@v2 49 | with: 50 | tool: cargo-export 51 | 52 | - name: Building Benchmarks 53 | run: | 54 | cargo export target/benchmarks -- bench --bench=tango 55 | cd baseline-branch 56 | cargo export target/benchmarks -- bench --bench=tango 57 | 58 | - name: Run Benchmarks 59 | run: | 60 | set -eo pipefail 61 | 62 | target/benchmarks/tango --color=never compare baseline-branch/target/benchmarks/tango \ 63 | -v -t 1 --fail-threshold 10 | tee target/benchmark.txt 64 | 65 | - uses: actions/upload-artifact@v3 66 | with: 67 | name: benchmark.txt 68 | path: target/benchmark.txt 69 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | Cargo.lock 3 | **/*.rs.bk 4 | *.svg 5 | /graphs 6 | /*.csv 7 | .ipynb_checkpoints 8 | /local 9 | /.fleet 10 | /.vscode 11 | /.ttr.yaml 12 | /*.drawio 13 | -------------------------------------------------------------------------------- /.typos.toml: -------------------------------------------------------------------------------- 1 | [files] 2 | extend-exclude = ["examples/benches/input.txt", "notebooks/*.ipynb"] 3 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | resolver = "2" 3 | 4 | members = [ 5 | "tango-bench", 6 | "examples" 7 | ] 8 | 9 | [profile.bench] 10 | debug = true 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Denis Bazhenov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tango.rs 2 | 3 |
4 | Tango Bench 5 | Tango Bench 6 |
7 | 8 | It used to be that benchmarking required a significant amount of time and numerous iterations to arrive at meaningful results, which was particularly arduous when trying to detect subtle changes, such as those within the range of a few percentage points. 9 | 10 | Introducing Tango.rs, a novel benchmarking framework that employs [paired benchmarking](https://www.bazhenov.me/posts/paired-benchmarking/) to assess code performance. This approach capitalizes on the fact that it's far more efficient to measure the performance difference between two simultaneously executing functions compared to two functions executed consecutively. 11 | 12 | ![](assets/sample.png) 13 | 14 | Features: 15 | 16 | - very high sensitivity to changes which allows to converge on results quicker than traditional (pointwise) approach. Often the fraction of a second is enough; 17 | - ability to compare different versions of the same code from different VCS commits (A/B-benchmarking); 18 | - async support using tokio.rs; 19 | - macOS, Linux and Windows support; 20 | 21 | ## 1 second, 1 percent, 1 error 22 | 23 | Compared to traditional pointwise benchmarking, paired benchmarking is significantly more sensitive to changes. This heightened sensitivity enables the early detection of statistically significant performance variations. 24 | 25 | Tango is designed to have the capability to detect a 1% change in performance within just 1 second in at least 9 out of 10 test runs. 26 | 27 | ## Prerequirements 28 | 29 | 1. Rust and Cargo toolchain installed (Rust stable is supported on Linux/macOS, nightly is required for Windows) 30 | 1. (_Optional_) [`cargo-export`](https://github.com/bazhenov/cargo-export) installed 31 | 32 | ## Getting started 33 | 34 | 1. Add cargo dependency and create new benchmark: 35 | 36 | ```toml 37 | [dev-dependencies] 38 | tango-bench = "0.6" 39 | 40 | [[bench]] 41 | name = "factorial" 42 | harness = false 43 | ``` 44 | 45 | 1. allows rustc to export symbols for dynamic linking from benchmarks 46 | 47 | - **(Linux/macOS)** Add build script (`build.rs`) with following content 48 | 49 | ```rust,ignore 50 | fn main() { 51 | println!("cargo:rustc-link-arg-benches=-rdynamic"); 52 | println!("cargo:rerun-if-changed=build.rs"); 53 | } 54 | ``` 55 | 56 | - **(Windows, nightly required)** Add following code to cargo config (`.cargo/config`) 57 | 58 | ```toml 59 | [build] 60 | rustflags = ["-Zexport-executable-symbols"] 61 | ``` 62 | 63 | 1. Add `benches/factorial.rs` with the following content: 64 | 65 | ```rust,no_run 66 | use std::hint::black_box; 67 | use tango_bench::{benchmark_fn, tango_benchmarks, tango_main, IntoBenchmarks}; 68 | 69 | pub fn factorial(mut n: usize) -> usize { 70 | let mut result = 1usize; 71 | while n > 0 { 72 | result = result.wrapping_mul(black_box(n)); 73 | n -= 1; 74 | } 75 | result 76 | } 77 | 78 | fn factorial_benchmarks() -> impl IntoBenchmarks { 79 | [ 80 | benchmark_fn("factorial", |b| b.iter(|| factorial(500))), 81 | ] 82 | } 83 | 84 | tango_benchmarks!(factorial_benchmarks()); 85 | tango_main!(); 86 | ``` 87 | 88 | 1. Build and export benchmark to `target/benchmarks` directory: 89 | 90 | ```console 91 | $ cargo export target/benchmarks -- bench --bench=factorial 92 | ``` 93 | 94 | 1. Now lets try to modify `factorial.rs` and make factorial faster :) 95 | 96 | ```rust,ignore 97 | fn factorial_benchmarks() -> impl IntoBenchmarks { 98 | [ 99 | benchmark_fn("factorial", |b| b.iter(|| factorial(495))), 100 | ] 101 | } 102 | ``` 103 | 104 | 1. Now we can compare new version with already built one: 105 | 106 | ```console 107 | $ cargo bench -q --bench=factorial -- compare target/benchmarks/factorial 108 | factorial [ 375.5 ns ... 369.0 ns ] -1.58%* 109 | ``` 110 | The result shows that indeed there is indeed ~1% difference between `factorial(500)` and `factorial(495)`. 111 | 112 | Additional examples are available in `examples` directory. 113 | 114 | ## Async support 115 | 116 | To use Tango.rs in an asynchronous setup, follow these steps: 117 | 118 | 1. Add `tokio` and `tango-bench` dependencies to your `Cargo.toml`: 119 | 120 | ```toml 121 | [dev-dependencies] 122 | tango-bench = { version = "0.6", features = ["async-tokio"] } 123 | 124 | [[bench]] 125 | name = "async_factorial" 126 | harness = false 127 | ``` 128 | 129 | 2. Create `benches/async_factorial.rs` with the following content: 130 | 131 | ```rust,no_run 132 | use std::hint::black_box; 133 | use tango_bench::{ 134 | async_benchmark_fn, asynchronous::tokio::TokioRuntime, tango_benchmarks, tango_main, 135 | IntoBenchmarks, 136 | }; 137 | 138 | pub async fn factorial(mut n: usize) -> usize { 139 | let mut result = 1usize; 140 | while n > 0 { 141 | result = result.wrapping_mul(black_box(n)); 142 | n -= 1; 143 | } 144 | result 145 | } 146 | 147 | fn benchmarks() -> impl IntoBenchmarks { 148 | [async_benchmark_fn("async_factorial", TokioRuntime, |b| { 149 | b.iter(|| async { factorial(500).await }) 150 | })] 151 | } 152 | 153 | tango_benchmarks!(benchmarks()); 154 | tango_main!(); 155 | ``` 156 | 157 | 3. Build and use benchmarks as you do in synchronous case 158 | 159 | ```console 160 | $ cargo bench -q --bench=async_factorial -- compare 161 | ``` 162 | 163 | ## Runner arguments 164 | 165 | There are several arguments you can pass to the `compare` command to change it behavior 166 | 167 | - `-t`, `--time` – how long to run each benchmark (in seconds) 168 | - `-s`, `--samples` – how much samples to gather from each benchmark 169 | - `-f` – filter benchmarks by name. Glob patterns are supported (eg. `*/bench_name/{2,4,8}/**`) 170 | - `-d [path]` – dump CSV with raw samples in a given directory 171 | - `--gnuplot` – generate plot for each benchmark (requires gnuplot to be installed) 172 | - `-o`, `--filter-outliers` – additionally filter outliers 173 | - `-p`, `--parallel` - run base/candidate functions in 2 different threads instead of interleaving in a single thread 174 | - `--fail-threshold` – do fail if new version is slower than baseline on a given percentage 175 | - `--fail-fast` - do fail after first benchmark exceeding fail threshold, not after the whole suite 176 | 177 | 178 | ## Contributing 179 | 180 | The project is in its early stages so any help will be appreciated. Here are some ideas you might find interesting 181 | 182 | - find a way to provide a more user friendly API for registering functions in the system 183 | - if you're a library author, trying out tango and providing feedback will be very useful 184 | -------------------------------------------------------------------------------- /assets/sample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bazhenov/tango/2704f1fd0a74d5d33eb7beaeb35d525695aa9d0e/assets/sample.png -------------------------------------------------------------------------------- /examples/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "tango-examples" 3 | version = "0.2.0" 4 | edition = "2021" 5 | autobenches = false 6 | 7 | [dependencies] 8 | tango-bench = { path = "../tango-bench" } 9 | rand = { version = "0.8", features = ["small_rng"] } 10 | 11 | [dev-dependencies] 12 | ordsearch = { version = "0.2.5" } 13 | num-traits = "0.2" 14 | 15 | [[bench]] 16 | name = "search-ord" 17 | harness = false 18 | 19 | [[bench]] 20 | name = "search-vec" 21 | harness = false 22 | 23 | [[bench]] 24 | name = "search-btree" 25 | harness = false 26 | 27 | [[bench]] 28 | name = "tango-faster" 29 | harness = false 30 | 31 | [[bench]] 32 | name = "tango-slower" 33 | harness = false 34 | 35 | [[bench]] 36 | name = "tango-async" 37 | harness = false 38 | required-features = ["async-tokio"] 39 | 40 | [features] 41 | prefetch = ["ordsearch/nightly"] 42 | align = [] 43 | async-tokio = [] 44 | -------------------------------------------------------------------------------- /examples/benches/common.rs: -------------------------------------------------------------------------------- 1 | extern crate tango_bench; 2 | 3 | use std::{any::type_name, convert::TryFrom, fmt::Debug, iter, marker::PhantomData}; 4 | use tango_bench::{benchmark_fn, IntoBenchmarks, MeasurementSettings, DEFAULT_SETTINGS}; 5 | 6 | const SIZES: [usize; 14] = [ 7 | 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 8 | ]; 9 | 10 | struct Lcg(usize); 11 | 12 | impl Lcg { 13 | fn next>(&mut self, max_value: usize) -> T { 14 | self.0 = self.0.wrapping_mul(1664525).wrapping_add(1013904223); 15 | T::try_from((self.0 >> 32) % max_value).ok().unwrap() 16 | } 17 | } 18 | 19 | pub struct RandomCollection { 20 | rng: Lcg, 21 | size: usize, 22 | value_dup_factor: usize, 23 | phantom: PhantomData, 24 | } 25 | 26 | impl RandomCollection 27 | where 28 | C::Item: Ord + Copy + TryFrom, 29 | { 30 | pub fn new(size: usize, value_dup_factor: usize, seed: u64) -> Self { 31 | Self { 32 | rng: Lcg(seed as usize), 33 | size, 34 | value_dup_factor, 35 | phantom: PhantomData, 36 | } 37 | } 38 | } 39 | 40 | impl RandomCollection 41 | where 42 | C::Item: Ord + Copy + TryFrom + Debug, 43 | usize: TryFrom, 44 | { 45 | fn random_collection(&mut self) -> Sample { 46 | let vec = generate_sorted_vec(self.size, self.value_dup_factor); 47 | let max = usize::try_from(*vec.last().unwrap()).ok().unwrap(); 48 | 49 | Sample { 50 | collection: C::from_sorted_vec(vec), 51 | max_value: max, 52 | } 53 | } 54 | 55 | fn next_needle(&mut self, sample: &Sample) -> C::Item { 56 | self.rng.next(sample.max_value + 1) 57 | } 58 | } 59 | 60 | fn generate_sorted_vec(size: usize, dup_factor: usize) -> Vec 61 | where 62 | T: Ord + Copy + TryFrom, 63 | { 64 | (0..) 65 | .map(|v| 2 * v) 66 | .map(|v| T::try_from(v)) 67 | .map_while(Result::ok) 68 | .flat_map(|v| iter::repeat(v).take(dup_factor)) 69 | .take(size) 70 | .collect() 71 | } 72 | 73 | pub struct Sample { 74 | collection: C, 75 | max_value: usize, 76 | } 77 | 78 | impl AsRef for Sample { 79 | fn as_ref(&self) -> &C { 80 | &self.collection 81 | } 82 | } 83 | 84 | pub trait FromSortedVec { 85 | type Item; 86 | fn from_sorted_vec(v: Vec) -> Self; 87 | } 88 | 89 | impl FromSortedVec for Vec { 90 | type Item = T; 91 | 92 | fn from_sorted_vec(v: Vec) -> Self { 93 | v 94 | } 95 | } 96 | 97 | /// Generate benchmarks for searching in a collection. 98 | pub fn search_benchmarks(f: F) -> impl IntoBenchmarks 99 | where 100 | C: FromSortedVec + 'static, 101 | F: Fn(&C, C::Item) -> Option + Copy + 'static, 102 | C::Item: Copy + Ord + TryFrom + Debug, 103 | usize: TryFrom, 104 | { 105 | let mut benchmarks = vec![]; 106 | for size in SIZES { 107 | let name = format!("{}/{}/nodup", type_name::(), size); 108 | benchmarks.push(benchmark_fn(name, move |b| { 109 | let mut rnd = RandomCollection::::new(size, 1, b.seed); 110 | let input = rnd.random_collection(); 111 | b.iter(move || f(&input.collection, rnd.next_needle(&input))) 112 | })); 113 | } 114 | benchmarks 115 | } 116 | 117 | pub const SETTINGS: MeasurementSettings = MeasurementSettings { 118 | samples_per_haystack: usize::MAX, 119 | max_iterations_per_sample: 10_000, 120 | ..DEFAULT_SETTINGS 121 | }; 122 | -------------------------------------------------------------------------------- /examples/benches/search-btree.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr(feature = "align", feature(fn_align))] 2 | 3 | use common::{search_benchmarks, FromSortedVec}; 4 | use std::{collections::BTreeSet, ops::Bound}; 5 | use tango_bench::{tango_benchmarks, tango_main}; 6 | 7 | mod common; 8 | 9 | impl FromSortedVec for BTreeSet { 10 | type Item = T; 11 | 12 | fn from_sorted_vec(v: Vec) -> Self { 13 | BTreeSet::from_iter(v) 14 | } 15 | } 16 | 17 | #[cfg_attr(feature = "align", repr(align(32)))] 18 | #[cfg_attr(feature = "align", inline(never))] 19 | fn search_btree(haystack: &BTreeSet, needle: T) -> Option { 20 | haystack 21 | .range((Bound::Included(needle), Bound::Unbounded)) 22 | .next() 23 | .copied() 24 | } 25 | 26 | tango_benchmarks!( 27 | search_benchmarks(search_btree::), 28 | search_benchmarks(search_btree::), 29 | search_benchmarks(search_btree::), 30 | search_benchmarks(search_btree::) 31 | ); 32 | tango_main!(common::SETTINGS); 33 | -------------------------------------------------------------------------------- /examples/benches/search-ord.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr(feature = "align", feature(fn_align))] 2 | 3 | use common::{search_benchmarks, FromSortedVec}; 4 | use ordsearch::OrderedCollection; 5 | use tango_bench::{tango_benchmarks, tango_main}; 6 | 7 | mod common; 8 | 9 | impl FromSortedVec for OrderedCollection { 10 | type Item = T; 11 | fn from_sorted_vec(v: Vec) -> Self { 12 | OrderedCollection::from_sorted_iter(v) 13 | } 14 | } 15 | 16 | #[cfg_attr(feature = "align", repr(align(32)))] 17 | #[cfg_attr(feature = "align", inline(never))] 18 | fn search_ord(haystack: &OrderedCollection, needle: T) -> Option { 19 | haystack.find_gte(needle).copied() 20 | } 21 | 22 | tango_benchmarks!( 23 | search_benchmarks(search_ord::), 24 | search_benchmarks(search_ord::), 25 | search_benchmarks(search_ord::), 26 | search_benchmarks(search_ord::) 27 | ); 28 | tango_main!(common::SETTINGS); 29 | -------------------------------------------------------------------------------- /examples/benches/search-vec.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr(feature = "align", feature(fn_align))] 2 | 3 | use common::search_benchmarks; 4 | use tango_bench::{tango_benchmarks, tango_main}; 5 | 6 | mod common; 7 | 8 | #[cfg_attr(feature = "align", repr(align(32)))] 9 | #[cfg_attr(feature = "align", inline(never))] 10 | #[allow(clippy::ptr_arg)] 11 | fn search_vec(haystack: &Vec, needle: T) -> Option { 12 | haystack 13 | .binary_search(&needle) 14 | .ok() 15 | .and_then(|idx| haystack.get(idx)) 16 | .copied() 17 | } 18 | 19 | tango_benchmarks!( 20 | search_benchmarks(search_vec::), 21 | search_benchmarks(search_vec::), 22 | search_benchmarks(search_vec::), 23 | search_benchmarks(search_vec::) 24 | ); 25 | 26 | tango_main!(common::SETTINGS); 27 | -------------------------------------------------------------------------------- /examples/benches/tango-async.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr(feature = "align", feature(fn_align))] 2 | 3 | use crate::test_funcs::factorial; 4 | use tango_bench::{ 5 | async_benchmark_fn, asynchronous::tokio::TokioRuntime, tango_benchmarks, tango_main, 6 | IntoBenchmarks, 7 | }; 8 | 9 | mod test_funcs; 10 | 11 | fn num_benchmarks() -> impl IntoBenchmarks { 12 | [async_benchmark_fn("factorial_async", TokioRuntime, |b| { 13 | b.iter(|| async { factorial(500) }) 14 | })] 15 | } 16 | 17 | tango_benchmarks!(num_benchmarks()); 18 | tango_main!(); 19 | -------------------------------------------------------------------------------- /examples/benches/tango-faster.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr(feature = "align", feature(fn_align))] 2 | 3 | use crate::test_funcs::{factorial, sum}; 4 | use std::rc::Rc; 5 | use tango_bench::{benchmark_fn, tango_benchmarks, tango_main, IntoBenchmarks}; 6 | use test_funcs::{ 7 | create_str_benchmark, sort_unstable, str_count, str_take, vec_benchmarks, IndexedString, 8 | INPUT_TEXT, 9 | }; 10 | 11 | mod test_funcs; 12 | 13 | fn num_benchmarks() -> impl IntoBenchmarks { 14 | [ 15 | benchmark_fn("sum", |b| b.iter(|| sum(4950))), 16 | benchmark_fn("factorial", |b| b.iter(|| factorial(495))), 17 | ] 18 | } 19 | 20 | fn str_benchmarks() -> impl IntoBenchmarks { 21 | let input = Rc::new(IndexedString::from(INPUT_TEXT)); 22 | [ 23 | create_str_benchmark("str_length/random", &input, str_count), 24 | create_str_benchmark("str_length/random_limited", &input, |s| str_take(4950, s)), 25 | ] 26 | } 27 | 28 | tango_benchmarks!( 29 | str_benchmarks(), 30 | num_benchmarks(), 31 | vec_benchmarks(sort_unstable) 32 | ); 33 | tango_main!(); 34 | -------------------------------------------------------------------------------- /examples/benches/tango-slower.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr(feature = "align", feature(fn_align))] 2 | 3 | use crate::test_funcs::{factorial, sum}; 4 | use std::rc::Rc; 5 | use tango_bench::{benchmark_fn, tango_benchmarks, tango_main, IntoBenchmarks}; 6 | use test_funcs::{ 7 | create_str_benchmark, sort_stable, str_count_rev, str_take, vec_benchmarks, IndexedString, 8 | INPUT_TEXT, 9 | }; 10 | 11 | mod test_funcs; 12 | 13 | fn num_benchmarks() -> impl IntoBenchmarks { 14 | [ 15 | benchmark_fn("sum", |b| b.iter(|| sum(5000))), 16 | benchmark_fn("factorial", |b| b.iter(|| factorial(500))), 17 | ] 18 | } 19 | 20 | fn str_benchmarks() -> impl IntoBenchmarks { 21 | let input = Rc::new(IndexedString::from(INPUT_TEXT)); 22 | [ 23 | create_str_benchmark("str_length/random", &input, str_count_rev), 24 | create_str_benchmark("str_length/random_limited", &input, |s| str_take(5000, s)), 25 | ] 26 | } 27 | 28 | tango_benchmarks!( 29 | str_benchmarks(), 30 | num_benchmarks(), 31 | vec_benchmarks(sort_stable) 32 | ); 33 | tango_main!(); 34 | -------------------------------------------------------------------------------- /examples/benches/test_funcs.rs: -------------------------------------------------------------------------------- 1 | use rand::{distributions::Standard, rngs::SmallRng, Rng, SeedableRng}; 2 | use std::{hint::black_box, rc::Rc}; 3 | use tango_bench::{benchmark_fn, Benchmark, IntoBenchmarks}; 4 | 5 | /// HTML page with a lot of chinese text to test UTF8 decoding speed 6 | #[allow(unused)] 7 | pub const INPUT_TEXT: &str = include_str!("./input.txt"); 8 | 9 | #[allow(unused)] 10 | pub(crate) fn create_str_benchmark( 11 | name: &'static str, 12 | input: &Rc, 13 | f: fn(&str) -> usize, 14 | ) -> Benchmark { 15 | let input = Rc::clone(input); 16 | benchmark_fn(name, move |b| { 17 | let mut rng = SmallRng::seed_from_u64(b.seed); 18 | let input = Rc::clone(&input); 19 | b.iter(move || f(random_substring(&input, &mut rng))) 20 | }) 21 | } 22 | 23 | fn random_substring<'a>(input: &'a IndexedString, rng: &mut impl Rng) -> &'a str { 24 | let length = 50_000; 25 | let indices = &input.indices; 26 | let start = rng.gen_range(0..indices.len() - length); 27 | let range = indices[start]..indices[start + length]; 28 | &input.string[range] 29 | } 30 | 31 | pub(crate) struct IndexedString { 32 | string: String, 33 | indices: Vec, 34 | } 35 | 36 | impl From<&str> for IndexedString { 37 | fn from(value: &str) -> Self { 38 | Self { 39 | string: value.to_owned(), 40 | indices: build_char_indices(value), 41 | } 42 | } 43 | } 44 | 45 | fn build_char_indices(text: &str) -> Vec { 46 | text.char_indices().map(|(idx, _)| idx).collect() 47 | } 48 | 49 | #[cfg_attr(feature = "align", repr(align(32)))] 50 | #[cfg_attr(feature = "align", inline(never))] 51 | #[allow(unused)] 52 | pub fn sum(n: usize) -> usize { 53 | let mut sum = 0; 54 | for i in 0..black_box(n) { 55 | sum += black_box(i); 56 | } 57 | sum 58 | } 59 | 60 | #[cfg_attr(feature = "align", repr(align(32)))] 61 | #[cfg_attr(feature = "align", inline(never))] 62 | #[allow(unused)] 63 | pub fn factorial(mut n: usize) -> usize { 64 | let mut result = 1usize; 65 | while n > 0 { 66 | result = result.wrapping_mul(black_box(n)); 67 | n -= 1; 68 | } 69 | result 70 | } 71 | 72 | #[cfg_attr(feature = "align", repr(align(32)))] 73 | #[cfg_attr(feature = "align", inline(never))] 74 | #[allow(unused)] 75 | #[allow(clippy::ptr_arg)] 76 | pub fn str_count_rev(s: &str) -> usize { 77 | let mut l = 0; 78 | for _ in s.chars().rev() { 79 | l += 1; 80 | } 81 | l 82 | } 83 | 84 | #[cfg_attr(feature = "align", repr(align(32)))] 85 | #[cfg_attr(feature = "align", inline(never))] 86 | #[allow(unused)] 87 | #[allow(clippy::ptr_arg)] 88 | pub fn str_count(s: &str) -> usize { 89 | let mut l = 0; 90 | for _ in s.chars() { 91 | l += 1; 92 | } 93 | l 94 | } 95 | 96 | #[cfg_attr(feature = "align", repr(align(32)))] 97 | #[cfg_attr(feature = "align", inline(never))] 98 | #[allow(unused)] 99 | #[allow(clippy::ptr_arg)] 100 | pub fn str_take(n: usize, s: &str) -> usize { 101 | s.chars().take(black_box(n)).count() 102 | } 103 | 104 | #[cfg_attr(feature = "align", repr(align(32)))] 105 | #[cfg_attr(feature = "align", inline(never))] 106 | #[allow(unused)] 107 | #[allow(clippy::ptr_arg)] 108 | pub fn sort_unstable(input: &Vec) -> T { 109 | let mut input = input.clone(); 110 | input.sort_unstable(); 111 | input[input.len() / 2] 112 | } 113 | 114 | #[cfg_attr(feature = "align", repr(align(32)))] 115 | #[cfg_attr(feature = "align", inline(never))] 116 | #[allow(unused)] 117 | #[allow(clippy::ptr_arg)] 118 | pub fn sort_stable(input: &Vec) -> T { 119 | let mut input = input.clone(); 120 | input.sort(); 121 | input[input.len() / 2] 122 | } 123 | 124 | #[allow(unused)] 125 | pub fn vec_benchmarks(f: impl Fn(&Vec) -> u64 + Copy + 'static) -> impl IntoBenchmarks { 126 | let mut benches = vec![]; 127 | for size in [100, 1_000, 10_000, 100_000] { 128 | benches.push(benchmark_fn(format!("sort/{}", size), move |b| { 129 | let input: Vec = SmallRng::seed_from_u64(b.seed) 130 | .sample_iter(Standard) 131 | .take(1000) 132 | .collect(); 133 | b.iter(move || f(&input)) 134 | })) 135 | } 136 | benches 137 | } 138 | -------------------------------------------------------------------------------- /examples/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | println!("cargo:rustc-link-arg-benches=-rdynamic"); 3 | println!("cargo:rerun-if-changed=build.rs"); 4 | } 5 | -------------------------------------------------------------------------------- /examples/criterion-ordsearch.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This script is running ordsearch criterion benchmarks and producing log in target folder 4 | # Should be execute in the ordsearch directory 5 | 6 | set -eo pipefail 7 | 8 | FILE=./target/criterion.txt 9 | 10 | if [ -f "${FILE}" ]; then 11 | rm -f "${FILE}" 12 | fi 13 | 14 | for i in {1..30}; do 15 | cargo +nightly bench \ 16 | --bench=search_comparison \ 17 | --features=nightly \ 18 | "Search u8/(sorted_vec|ordsearch)/8$" >> "${FILE}" 19 | done 20 | 21 | for NAME in "u8/sorted_vec/8" "u8/ordsearch/8"; do 22 | echo "${NAME}" 23 | cat "${FILE}" | grep "${NAME}" | grep 'time:' | awk '{print $6}' 24 | done 25 | 26 | 27 | -------------------------------------------------------------------------------- /examples/ordsearch.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -eo pipefail 4 | 5 | cargo export target/benchmarks -- bench --bench='search-*' 6 | 7 | echo "OrderedCollection vs Vec" 8 | target/benchmarks/search_vec compare target/benchmarks/search_ord $@ 9 | 10 | echo "OrderedCollection vs BTree" 11 | target/benchmarks/search_btree compare target/benchmarks/search_ord $@ 12 | -------------------------------------------------------------------------------- /notebooks/empirical-observations.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "cbfc36d7-9096-4fb4-bbc8-ae2758a2e4db", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import scipy.stats as st\n", 11 | "import numpy as np\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "import pandas as pd\n", 14 | "%config InlineBackend.figure_formats = ['svg']\n", 15 | "\n", 16 | "#def cum_var(input):\n", 17 | "# return np.array([np.var(input[:i+1]) for i in np.arange(0, len(input))])\n", 18 | "\n", 19 | "def cum_var(input):\n", 20 | " s = 0\n", 21 | " m = 0\n", 22 | " n = 0\n", 23 | " result = [0]\n", 24 | "\n", 25 | " for value in input:\n", 26 | " n += 1\n", 27 | " m_p = m;\n", 28 | " m += (value - m) / n\n", 29 | " s += (value - m) * (value - m_p)\n", 30 | " if n > 1:\n", 31 | " result.append(s / (n - 1))\n", 32 | " return np.array(result)" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "id": "cef6ca34-a74c-4175-b415-fce63bb04906", 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "d = pd.read_csv(\"../sum_50000-sum_50000.csv\", header=None, names=[\"base\", \"candidate\"])\n", 43 | "#d = pd.read_csv(\"../factorial_500-factorial_495.csv\", header=None, names=[\"base\", \"candidate\"])\n", 44 | "\n", 45 | "d" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "id": "e5a882b1-86fc-44b4-8ec3-fccbe5146be6", 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "fig = plt.figure()\n", 56 | "ax = fig.add_subplot()\n", 57 | "\n", 58 | "x = [np.min(d), np.max(d)]\n", 59 | "ax.set_yscale('log')\n", 60 | "#ax.set_xscale('log')\n", 61 | "ax.hist(d['candidate'] - d['base'], bins = 500)\n", 62 | "\n", 63 | "fig.show()" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "id": "534303db-4627-459c-accc-ff5195beac03", 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "fig = plt.figure()\n", 74 | "ax = fig.add_subplot()\n", 75 | "\n", 76 | "x = [np.min(d), np.max(d)]\n", 77 | "ax.set_yscale('log')\n", 78 | "ax.set_xscale('log')\n", 79 | "ax.plot(x, x, color='red', linewidth=0.3)\n", 80 | "ax.scatter(d['base'], d['candidate'], s=2)\n", 81 | "\n", 82 | "fig.show()" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": null, 88 | "id": "578fca8c-e1b2-48e2-a175-a8d98c185d89", 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "diff = np.abs(d['candidate'] - d['base'])\n", 93 | "diff = diff[np.abs(diff).argsort()]\n", 94 | "\n", 95 | "forward_var = cum_var(diff)\n", 96 | "ratio = np.roll(forward_var, -1) / forward_var" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "id": "3ba7aa8a-7ae8-4eae-864f-95ec16183647", 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "#ax[1].set_ylim(0.9, 1.5)\n", 107 | "fig = plt.figure()\n", 108 | "ax = fig.subplots(2, 1)\n", 109 | "\n", 110 | "x = np.arange(0, len(diff))\n", 111 | "last_idx = 200\n", 112 | "#ax.set_yscale('log')\n", 113 | "#ax.set_xscale('log')\n", 114 | "#ax[0].set_ylim(-1000, 1000)\n", 115 | "factor = 10\n", 116 | "ax[0].plot(x[x % factor == 0], forward_var[x % factor == 0], linewidth=0.3)\n", 117 | "\n", 118 | "ax[1].plot(x[x % factor == 0][-1000:], ratio[x % factor == 0], linewidth=0.5, color='red')\n", 119 | "\n", 120 | "print(np.max(ratio[-100:]))\n", 121 | "\n", 122 | "fig.show()" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "id": "6e901144-0f45-450b-8f0d-e20ce89dfd7c", 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "print(ratio[ratio > 1.05])\n", 133 | "print(x[ratio > 1.05])\n", 134 | "#print(ratio[-last_idx:])" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "id": "46b772c6-377d-4615-a849-078b77f1713b", 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "print(diff[-800:])" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "id": "79837283-9a2c-49b8-bf61-a2142f13b9d1", 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [ 154 | "d = pd.read_csv(\"../sum_50000-sum_50000.csv\", header=None, names=[\"base\", \"candidate\"])\n", 155 | "#d = pd.read_csv(\"../factorial_500-factorial_500.csv\", header=None, names=[\"base\", \"candidate\"])\n", 156 | "\n", 157 | "diff = np.abs(d['candidate'] - d['base'])\n", 158 | "diff = diff[np.abs(diff).argsort()]\n", 159 | "\n", 160 | "forward_var = cum_var(diff)\n", 161 | "ratio = np.roll(forward_var, -1) / forward_var\n", 162 | "\n", 163 | "fig = plt.figure()\n", 164 | "ax = fig.subplots(1, 1)\n", 165 | "\n", 166 | "x = np.arange(0, len(diff))\n", 167 | "\n", 168 | "last = len(diff) // 1\n", 169 | "\n", 170 | "x = x[-last:]\n", 171 | "y = np.maximum(ratio[-last:], 1)\n", 172 | "y = y - 1\n", 173 | "ax.set_yscale('log')\n", 174 | "ax.plot(x, y, linewidth=0.5, color='red')\n", 175 | "ax.plot(x, 1 / (x - 1) * 10, linewidth=0.5, color='blue')\n", 176 | "\n", 177 | "fig.show()" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "id": "9079380c-8157-48f0-80b2-c8cf18b7b5b3", 184 | "metadata": {}, 185 | "outputs": [], 186 | "source": [ 187 | "np.sqrt(len(d))" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "id": "3cbc0903-8425-41ba-8f93-e71bb470ffbc", 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "len(d[d['base'] - d['candidate'] == 0])/len(d)" 198 | ] 199 | } 200 | ], 201 | "metadata": { 202 | "kernelspec": { 203 | "display_name": "Python 3 (ipykernel)", 204 | "language": "python", 205 | "name": "python3" 206 | }, 207 | "language_info": { 208 | "codemirror_mode": { 209 | "name": "ipython", 210 | "version": 3 211 | }, 212 | "file_extension": ".py", 213 | "mimetype": "text/x-python", 214 | "name": "python", 215 | "nbconvert_exporter": "python", 216 | "pygments_lexer": "ipython3", 217 | "version": "3.11.5" 218 | } 219 | }, 220 | "nbformat": 4, 221 | "nbformat_minor": 5 222 | } 223 | -------------------------------------------------------------------------------- /notebooks/ordsearch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "e5023e12-a161-4793-899c-27f73e3833ad", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd\n", 11 | "import scipy.stats as st\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "%config InlineBackend.figure_formats = ['svg']" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 152, 19 | "id": "78d35383-8799-4d53-935a-933bae392108", 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "D=pd.read_csv(\"../test-data\", sep=\";\", header=None)\n", 24 | "\n", 25 | "grouped = {}\n", 26 | "for row in D.iterrows():\n", 27 | " key = row[1][0]\n", 28 | " value = float(row[1][1])\n", 29 | " if not key in grouped:\n", 30 | " grouped[key] = []\n", 31 | " grouped[key].append(value)" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 153, 37 | "id": "61488a42-ac86-41a8-9ef1-241c30db2fd1", 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "data": { 42 | "image/svg+xml": [ 43 | "\n", 44 | "\n", 46 | "\n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " 2023-12-06T04:08:18.804890\n", 52 | " image/svg+xml\n", 53 | " \n", 54 | " \n", 55 | " Matplotlib v3.8.0, https://matplotlib.org/\n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 110 | " \n", 120 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 417 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 553 | " \n", 570 | " \n", 583 | " \n", 604 | " \n", 629 | " \n", 650 | " \n", 669 | " \n", 676 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 763 | " \n", 797 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | "\n" 1145 | ], 1146 | "text/plain": [ 1147 | "
" 1148 | ] 1149 | }, 1150 | "metadata": {}, 1151 | "output_type": "display_data" 1152 | } 1153 | ], 1154 | "source": [ 1155 | "names = list(grouped.keys())\n", 1156 | "box_plot_data = list(grouped.values())\n", 1157 | "\n", 1158 | "fig, ax = plt.subplots(figsize=(6, 2))\n", 1159 | "\n", 1160 | "#fig.title(\"Fruit Growth Distribution\")\n", 1161 | "ax.boxplot(box_plot_data, labels=names, whis=(0.5,99.5), showfliers=False, vert=0, widths=0.8, sym='.', flierprops=dict(markersize=4))\n", 1162 | "# ax.set_xlim([-75,-64])\n", 1163 | "# ax.set_xticks(list(range(-90, -60, 1)))\n", 1164 | "plt.grid(visible=True, which='major', axis='x', color='grey', linestyle='--', linewidth=0.5)\n", 1165 | "#ax.set_yticks()\n", 1166 | "plt.plot();" 1167 | ] 1168 | } 1169 | ], 1170 | "metadata": { 1171 | "kernelspec": { 1172 | "display_name": "Python 3 (ipykernel)", 1173 | "language": "python", 1174 | "name": "python3" 1175 | }, 1176 | "language_info": { 1177 | "codemirror_mode": { 1178 | "name": "ipython", 1179 | "version": 3 1180 | }, 1181 | "file_extension": ".py", 1182 | "mimetype": "text/x-python", 1183 | "name": "python", 1184 | "nbconvert_exporter": "python", 1185 | "pygments_lexer": "ipython3", 1186 | "version": "3.11.5" 1187 | } 1188 | }, 1189 | "nbformat": 4, 1190 | "nbformat_minor": 5 1191 | } 1192 | -------------------------------------------------------------------------------- /pair-test.gnuplot: -------------------------------------------------------------------------------- 1 | set term svg enhanced size 1200,400 lw 1.5 2 | set output ARG2 3 | set grid 4 | 5 | set datafile separator ', 6 | 7 | set multiplot 8 | 9 | set ylabel "time (us.)" 10 | set xlabel "observation no" 11 | 12 | set title "Execution time" 13 | set size 0.6,1 14 | set origin 0,0 15 | plot ARG1 using ($1/$3 / 1000) title "base" with linespoints pt 1 ps 0.3 lw 0.8 lc 'dark-red', \ 16 | ARG1 using (-$2/$3 / 1000) title "-candidate" with linespoints pt 1 ps 0.3 lw 0.8 lc 'dark-green', \ 17 | ARG1 using (($2 - $1) / $3 / 1000) title "(candidate-baseline)" with lines lw 0.8 lc 'navy' 18 | 19 | set xtics autofreq 20 | set ytics autofreq 21 | 22 | set ylabel "time (us.) - candidate" 23 | set xlabel "time (us.) - base" 24 | 25 | f(x) = x 26 | 27 | unset title 28 | set size 0.4,1 29 | set origin 0.6,0 30 | unset key 31 | 32 | plot f(x) notitle with lines linestyle 1 lc "red" dt 4 lw 1, \ 33 | ARG1 using ($1 / $3 / 1000):($2 / $3 / 1000) title "time to execute" with points pt 1 ps 0.5 lc rgb 'dark-red' 34 | -------------------------------------------------------------------------------- /render-plot.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | gnuplot -c plot.gnuplot "$1" "plot.svg" && osascript -e 'tell application "Google Chrome" to tell the active tab of its first window to reload' 4 | -------------------------------------------------------------------------------- /rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | channel = "nightly" 3 | -------------------------------------------------------------------------------- /scripts/aws-bench.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This scripts is automating experiment on an AWS virtual machine 4 | # 5 | # The goal of an experiment is to measure performance variance reported by both harnesses (tango/criterion). 6 | # UTF8 counting routine is used as a test function. The first one is counting up to 5000 characters in a string 7 | # the second is up to 4950. We are expecting to see 1% difference in performance of those two functions 8 | 9 | CRITERION=./target/criterion.txt 10 | TANGO=./target/tango.txt 11 | TANGO_FILTERED=./target/tango-filtered.txt 12 | 13 | # Building and exporting all benchmarks. Align feature is used to disable inlining and to force 32-byte aligning 14 | # of a tested functions. Without this trick the performance of the functions on Intel platform is heavily influenced 15 | # by code aligning. 16 | cargo +nightly export ./target/benchmarks -- bench --features=align --bench=criterion 17 | cargo +nightly export target/benchmarks -- bench --features=align --bench='tango-*' 18 | 19 | while : 20 | do 21 | date | tee -a "${CRITERION}" | tee -a "${TANGO}" | tee -a "${TANGO_FILTERED}" 22 | 23 | # Running criterion benchmarks 24 | ./target/benchmarks/criterion --bench str_length_495 \ 25 | --warm-up-time 1 --measurement-time 1 | tee -a "${CRITERION}" 26 | ./target/benchmarks/criterion --bench str_length_500 \ 27 | --warm-up-time 1 --measurement-time 1 | tee -a "${CRITERION}" 28 | 29 | # Running tango benchmarks 30 | ./target/benchmarks/tango_faster compare ./target/benchmarks/tango_slower \ 31 | -t 2 -f 'str_length_limit' | tee -a "${TANGO}" 32 | ./target/benchmarks/tango_faster compare ./target/benchmarks/tango_slower \ 33 | -t 2 -o -f 'str_length_limit' | tee -a "${TANGO_FILTERED}" 34 | done 35 | -------------------------------------------------------------------------------- /scripts/aws-results.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | CRITERION=./target/criterion.txt 5 | TANGO=./target/tango.txt 6 | TANGO_FILTERED=./target/tango-filtered.txt 7 | 8 | if [ "$1" == "tango" ]; then 9 | cat "${TANGO}" | awk '{print $(NF)}' | egrep -o '(-|\+)[0-9]+\.[0-9]+' 10 | fi 11 | 12 | if [ "$1" == "tango-filtered" ]; then 13 | cat "${TANGO_FILTERED}" | awk '{print $(NF)}' | egrep -o '(-|\+)[0-9]+\.[0-9]+' 14 | fi 15 | 16 | if [ "$1" == "criterion" ]; then 17 | paste \ 18 | <(cat "${CRITERION}" | grep -A1 "str_length_5000" | grep 'time:' | awk '{print $5}') \ 19 | <(cat "${CRITERION}" | grep -A1 "str_length_4950" | grep 'time:' | awk '{print $5}') | \ 20 | awk '{print ($2 - $1) / $1 * 100}' 21 | fi 22 | -------------------------------------------------------------------------------- /scripts/calibrate.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This script runs tango-faster/tango-slower pair of benchmarks several times 4 | # and reports how many times the results were statistically significant. Because 5 | # those benchmarks was intentianlly constructed with performance difference, 6 | # the bigger results the better (like [10/10]). 7 | 8 | set -eo pipefail 9 | 10 | cargo export target/benchmarks -- bench --bench='tango-*' --features=align 11 | 12 | CMD="target/benchmarks/tango_faster compare target/benchmarks/tango_slower $@" 13 | OUTPUT="" 14 | ITERATIONS=10 15 | 16 | for (( i=1; i<=ITERATIONS; i++ )) 17 | do 18 | echo -n "." 19 | OUTPUT=$(paste <(echo "$OUTPUT") <($CMD)) 20 | done 21 | echo 22 | 23 | echo "Results:" 24 | echo "$OUTPUT" | awk -v iter="$ITERATIONS" -F ' {2,}' '{printf(" [%3d/%3d] %s\n", gsub(/\*/,"", $0), iter, $1)}' 25 | -------------------------------------------------------------------------------- /scripts/criterion.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | FILE=./target/criterion.txt 5 | 6 | if [ -f "${FILE}" ]; then 7 | rm -f "${FILE}" 8 | fi 9 | 10 | cargo export ./target/benchmarks -- bench --bench=criterion 11 | 12 | time ( 13 | for i in {1..30}; do 14 | ./target/benchmarks/criterion --bench str_length_495 \ 15 | --warm-up-time 1 --measurement-time 1 >> "${FILE}" 16 | ./target/benchmarks/criterion --bench str_length_500 \ 17 | --warm-up-time 1 --measurement-time 1 >> "${FILE}" 18 | done 19 | ) 20 | 21 | paste \ 22 | <(cat "${FILE}" | grep -A1 "str_length_500" | grep 'time:' | awk '{print $5}') \ 23 | <(cat "${FILE}" | grep -A1 "str_length_495" | grep 'time:' | awk '{print $5}') | \ 24 | awk '{print ($2 - $1) / $1 * 100}' 25 | -------------------------------------------------------------------------------- /scripts/describe.py: -------------------------------------------------------------------------------- 1 | import colorama 2 | import pandas as pd 3 | import numpy as np 4 | import sys 5 | from colorama import Fore, Back, Style 6 | 7 | def main(): 8 | colorama.init() 9 | max_length = [len(name) for name in sys.argv[1:]] 10 | for file in sys.argv[1:]: 11 | df = pd.read_csv(file, header=None) 12 | time = (df[1] - df[0]) / df[2] 13 | 14 | file_name = (file + " ").ljust(max(max_length) + 2, '.') 15 | 16 | print(f"{Fore.LIGHTBLACK_EX}{file_name} {Style.RESET_ALL}", end="") 17 | q1 = np.percentile(time, 25) 18 | q3 = np.percentile(time, 75) 19 | print(f"{np.percentile(time, 5):>10.1f} {time.mean():>10.1f} {np.percentile(time, 95):>10.1f} [IQR:{q3 - q1:>10.1f}]") 20 | 21 | if __name__ == "__main__": 22 | main(); 23 | -------------------------------------------------------------------------------- /scripts/linear-sampling-test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | cargo +nightly export ./target/benchmarks -- bench --bench='search-*' 5 | 6 | mkdir -p target/dump 7 | rm -f target/dump/*.csv 8 | 9 | for i in {1..30}; do 10 | for sampler in flat linear random; do 11 | printf "%10s : " "$sampler" 12 | ./target/benchmarks/search_ord compare ./target/benchmarks/search_vec -t 1 \ 13 | -f 'search/u32/1024/nodup' -d target/dump --sampler="$sampler" 14 | mv "target/dump/search-u32-1024-nodup.csv" "target/dump/$sampler-$i.csv" 15 | done 16 | done 17 | -------------------------------------------------------------------------------- /scripts/ordsearch.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | cargo +nightly export target/benchmarks -- bench --bench="search-*" 5 | 6 | pushd ../ordsearch/ 7 | cargo +nightly export ../tango/target/benchmarks -- bench --bench=search_comparison --features=nightly 8 | popd 9 | 10 | rm -f target/tango.txt 11 | rm -f target/criterion.txt 12 | 13 | # Patching PIE executable if needed 14 | target/benchmarks/search_ord compare target/benchmarks/search_vec -f "*/u32/1024/nodup" -t 0.1 > /dev/null 15 | if [ -f target/benchmarks/search_vec.patched ]; then 16 | mv target/benchmarks/search_vec.patched target/benchmarks/search_vec 17 | chmod +x target/benchmarks/search_vec 18 | fi 19 | 20 | for i in {1..1000}; do 21 | # Tango benchmarks 22 | ( 23 | for time in 0.1 0.3 0.5 1.0; do 24 | target/benchmarks/search_ord compare target/benchmarks/search_vec -f "*/u32/1024/nodup" -t "$time" -o \ 25 | | awk -v OFS=';' -v FS=" " -v time="$time" '{print "tango/u32/1024/" time "s", $NF}' | tr -d '%*' 26 | done 27 | ) | tee -a target/tango.txt 28 | 29 | # Criterion benchmarks 30 | target/benchmarks/search_comparison --bench "Search u32/(ordsearch|sorted_vec)/1024" \ 31 | | tee -a target/criterion.txt 32 | done 33 | 34 | # Reporting code 35 | paste \ 36 | <(cat target/criterion.txt | grep -A1 'sorted_vec' | grep 'time:' | awk '{print $4}') \ 37 | <(cat target/criterion.txt | grep -A1 'ordsearch' | grep 'time:' | awk '{print $4}') \ 38 | | awk 'OFS=";" {print "criterion/u32/1024", ($2 - $1) / $1 * 100}' > target/criterion_u32_1024.txt 39 | (cat target/criterion_u32_1024.txt; cat target/tango.txt) > target/results.txt 40 | 41 | (cat target/criterion.txt | grep 'change:' | awk 'OFS=";" {print "criterion/u32/1024", $3}'; cat target/tango.txt ) | tr -d '%' > target/results.txt 42 | -------------------------------------------------------------------------------- /scripts/sensitivity-test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | cargo +nightly export ./target/benchmarks -- bench --bench='tango-*' 5 | 6 | mkdir -p target/dump 7 | rm -f target/dump/*.csv 8 | 9 | TARGET=target/benchmarks/tango_faster 10 | 11 | "$TARGET" compare "$TARGET" -d target/dump $@ 12 | python3 ./scripts/describe.py target/dump/*.csv 13 | -------------------------------------------------------------------------------- /scripts/tango.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | FILE=./target/tango.txt 5 | 6 | if [ -f "${FILE}" ]; then 7 | rm -f "${FILE}" 8 | fi 9 | 10 | cargo export target/benchmarks -- bench --bench='tango-*' 11 | 12 | time ( 13 | for i in {1..30}; do 14 | ./target/benchmarks/tango_faster compare ./target/benchmarks/tango_slower \ 15 | -t 1 -f 'str_length_limit' >> "${FILE}" 16 | done 17 | ) 18 | 19 | cat "${FILE}" | awk '{print $(NF)}' | sed 's/%//' 20 | -------------------------------------------------------------------------------- /tango-bench/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "tango-bench" 3 | version = "0.6.0" 4 | edition = "2021" 5 | license = "MIT" 6 | description = "Tango benchmarking harness" 7 | homepage = "https://github.com/bazhenov/tango" 8 | documentation = "https://docs.rs/tango-bench" 9 | repository = "https://github.com/bazhenov/tango" 10 | readme = "../README.md" 11 | categories = ["development-tools", "development-tools::profiling"] 12 | keywords = ["benchmarks", "performance"] 13 | 14 | [dependencies] 15 | anyhow = "1.0.75" 16 | clap = { version = "4.4.11", features = ["derive"] } 17 | colorz = { version = "1.1", features = ["supports-color"] } 18 | glob-match = "0.2" 19 | libloading = "0.8" 20 | log = "0.4.20" 21 | num-traits = "0.2" 22 | rand = { version = "0.8", features = ["small_rng"] } 23 | thiserror = "1.0.50" 24 | alloca = "0.4" 25 | tokio = { version = "1.37.0", features = ["rt"], optional = true } 26 | 27 | [target.'cfg(target_os = "linux")'.dependencies] 28 | goblin = "0.7.1" 29 | scroll = "0.11" 30 | tempfile = "3.8" 31 | 32 | [features] 33 | hw-timer = [] 34 | async = [] 35 | async-tokio = ['async', 'dep:tokio'] 36 | 37 | [[bench]] 38 | name = "tango" 39 | harness = false 40 | -------------------------------------------------------------------------------- /tango-bench/benches/tango.rs: -------------------------------------------------------------------------------- 1 | use rand::{distributions::Standard, rngs::SmallRng, Rng, SeedableRng}; 2 | use tango_bench::{ 3 | benchmark_fn, iqr_variance_thresholds, tango_benchmarks, tango_main, IntoBenchmarks, Summary, 4 | }; 5 | 6 | fn summary_benchmarks() -> impl IntoBenchmarks { 7 | [benchmark_fn("summary", move |b| { 8 | let rnd = SmallRng::seed_from_u64(b.seed); 9 | let input: Vec = rnd.sample_iter(Standard).take(1000).collect(); 10 | b.iter(move || Summary::from(&input)) 11 | })] 12 | } 13 | 14 | fn iqr_interquartile_range_benchmarks() -> impl IntoBenchmarks { 15 | [benchmark_fn("iqr", move |b| { 16 | let rnd = SmallRng::seed_from_u64(b.seed); 17 | let input: Vec = rnd.sample_iter(Standard).take(1000).collect(); 18 | b.iter(move || iqr_variance_thresholds(input.clone())) 19 | })] 20 | } 21 | 22 | fn empty_benchmarks() -> impl IntoBenchmarks { 23 | [benchmark_fn("measure_empty_function", move |p| { 24 | let mut bench = benchmark_fn("_", |b| b.iter(|| 42)); 25 | let mut state = bench.prepare_state(p.seed); 26 | p.iter(move || state.measure(1)) 27 | })] 28 | } 29 | 30 | tango_benchmarks!( 31 | empty_benchmarks(), 32 | summary_benchmarks(), 33 | iqr_interquartile_range_benchmarks() 34 | ); 35 | tango_main!(); 36 | -------------------------------------------------------------------------------- /tango-bench/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | println!("cargo:rustc-link-arg-benches=-rdynamic"); 3 | println!("cargo:rerun-if-changed=build.rs"); 4 | } 5 | -------------------------------------------------------------------------------- /tango-bench/src/cli.rs: -------------------------------------------------------------------------------- 1 | //! Contains functionality of a `cargo bench` harness 2 | use crate::{ 3 | dylib::{FunctionIdx, Spi, SpiModeKind}, 4 | CacheFirewall, Error, FlatSampleLength, LinearSampleLength, MeasurementSettings, 5 | RandomSampleLength, SampleLength, SampleLengthKind, 6 | }; 7 | use anyhow::{bail, Context}; 8 | use clap::Parser; 9 | use colorz::mode::{self, Mode}; 10 | use core::fmt; 11 | use glob_match::glob_match; 12 | use std::{ 13 | env::{self, args, temp_dir}, 14 | fmt::Display, 15 | fs, 16 | io::{stderr, Write}, 17 | num::NonZeroUsize, 18 | path::{Path, PathBuf}, 19 | process::{Command, ExitCode, Stdio}, 20 | str::FromStr, 21 | time::Duration, 22 | }; 23 | 24 | pub type Result = anyhow::Result; 25 | pub(crate) type StdResult = std::result::Result; 26 | 27 | #[derive(Parser, Debug)] 28 | enum BenchmarkMode { 29 | List { 30 | #[command(flatten)] 31 | bench_flags: CargoBenchFlags, 32 | }, 33 | Compare(PairedOpts), 34 | Solo(SoloOpts), 35 | } 36 | 37 | #[derive(Parser, Debug)] 38 | struct PairedOpts { 39 | #[command(flatten)] 40 | bench_flags: CargoBenchFlags, 41 | 42 | /// Path to the executable to test against. Tango will test against itself if no executable given 43 | path: Option, 44 | 45 | /// write CSV dumps of all the measurements in a given location 46 | #[arg(short = 'd', long = "dump")] 47 | path_to_dump: Option, 48 | 49 | /// generate gnuplot graphs for each test (requires --dump [path] to be specified) 50 | #[arg(short = 'g', long = "gnuplot")] 51 | gnuplot: bool, 52 | 53 | /// seed for the random number generator or omit to use a random seed 54 | #[arg(long = "seed")] 55 | seed: Option, 56 | 57 | /// Number of samples to take for each test 58 | #[arg(short = 's', long = "samples")] 59 | samples: Option, 60 | 61 | /// The strategy to decide the number of iterations to run for each sample (values: flat, linear, random) 62 | #[arg(long = "sampler")] 63 | sampler: Option, 64 | 65 | /// Duration of each sample in seconds 66 | #[arg(short = 't', long = "time")] 67 | time: Option, 68 | 69 | /// Fail if the difference between the two measurements is greater than the given threshold in percent 70 | #[arg(long = "fail-threshold")] 71 | fail_threshold: Option, 72 | 73 | /// Should we terminate early if --fail-threshold is exceed 74 | #[arg(long = "fail-fast")] 75 | fail_fast: bool, 76 | 77 | /// Perform a read of a dummy data between samsples to minimize the effect of cache on the performance 78 | /// (size in Kbytes) 79 | #[arg(long = "cache-firewall")] 80 | cache_firewall: Option, 81 | 82 | /// Perform a randomized offset to the stack frame for each sample. 83 | /// (size in bytes) 84 | #[arg(long = "randomize-stack")] 85 | randomize_stack: Option, 86 | 87 | /// Delegate control back to the OS before each sample 88 | #[arg(long = "yield-before-sample")] 89 | yield_before_sample: Option, 90 | 91 | /// Filter tests by name (eg. '*/{sorted,unsorted}/[0-9]*') 92 | #[arg(short = 'f', long = "filter")] 93 | filter: Option, 94 | 95 | /// Report only statistically significant results 96 | #[arg(short = 'g', long = "significant-only", default_value_t = false)] 97 | significant_only: bool, 98 | 99 | /// Enable outlier detection 100 | #[arg(short = 'o', long = "filter-outliers")] 101 | filter_outliers: bool, 102 | 103 | /// Perform warmup iterations before taking measurements (1/10 of sample iterations) 104 | #[arg(long = "warmup")] 105 | warmup_enabled: Option, 106 | 107 | #[arg(short = 'p', long = "parallel")] 108 | parallel: bool, 109 | 110 | /// Quiet mode 111 | #[arg(short = 'q')] 112 | quiet: bool, 113 | 114 | #[arg(short = 'v', long = "verbose", default_value_t = false)] 115 | verbose: bool, 116 | } 117 | 118 | #[derive(Parser, Debug)] 119 | struct SoloOpts { 120 | #[command(flatten)] 121 | bench_flags: CargoBenchFlags, 122 | 123 | /// seed for the random number generator or omit to use a random seed 124 | #[arg(long = "seed")] 125 | seed: Option, 126 | 127 | /// Number of samples to take for each test 128 | #[arg(short = 's', long = "samples")] 129 | samples: Option, 130 | 131 | /// The strategy to decide the number of iterations to run for each sample (values: flat, linear, random) 132 | #[arg(long = "sampler")] 133 | sampler: Option, 134 | 135 | /// Duration of each sample in seconds 136 | #[arg(short = 't', long = "time")] 137 | time: Option, 138 | 139 | /// Perform a read of a dummy data between samsples to minimize the effect of cache on the performance 140 | /// (size in Kbytes) 141 | #[arg(long = "cache-firewall")] 142 | cache_firewall: Option, 143 | 144 | /// Perform a randomized offset to the stack frame for each sample. 145 | /// (size in bytes) 146 | #[arg(long = "randomize-stack")] 147 | randomize_stack: Option, 148 | 149 | /// Delegate control back to the OS before each sample 150 | #[arg(long = "yield-before-sample")] 151 | yield_before_sample: Option, 152 | 153 | /// Filter tests by name (eg. '*/{sorted,unsorted}/[0-9]*') 154 | #[arg(short = 'f', long = "filter")] 155 | filter: Option, 156 | 157 | /// Perform warmup iterations before taking measurements (1/10 of sample iterations) 158 | #[arg(long = "warmup")] 159 | warmup_enabled: Option, 160 | 161 | /// Quiet mode 162 | #[arg(short = 'q')] 163 | quiet: bool, 164 | 165 | #[arg(short = 'v', long = "verbose", default_value_t = false)] 166 | verbose: bool, 167 | } 168 | 169 | #[derive(Parser, Debug)] 170 | #[command(author, version, about, long_about = None)] 171 | struct Opts { 172 | #[command(subcommand)] 173 | subcommand: Option, 174 | 175 | #[command(flatten)] 176 | bench_flags: CargoBenchFlags, 177 | 178 | #[arg(long = "color", default_value = "detect")] 179 | coloring_mode: String, 180 | } 181 | 182 | impl FromStr for SampleLengthKind { 183 | type Err = Error; 184 | 185 | fn from_str(s: &str) -> StdResult { 186 | match s { 187 | "flat" => Ok(SampleLengthKind::Flat), 188 | "linear" => Ok(SampleLengthKind::Linear), 189 | "random" => Ok(SampleLengthKind::Random), 190 | _ => Err(Error::UnknownSamplerType), 191 | } 192 | } 193 | } 194 | 195 | /// Definition of the flags required to comply with `cargo bench` calling conventions. 196 | #[derive(Parser, Debug, Clone)] 197 | struct CargoBenchFlags { 198 | #[arg(long = "bench", default_value_t = true)] 199 | bench: bool, 200 | } 201 | 202 | pub fn run(settings: MeasurementSettings) -> Result { 203 | let opts = Opts::parse(); 204 | 205 | match Mode::from_str(&opts.coloring_mode) { 206 | Ok(coloring_mode) => mode::set_coloring_mode(coloring_mode), 207 | Err(_) => eprintln!("[WARN] Invalid coloring mode: {}", opts.coloring_mode), 208 | } 209 | 210 | let subcommand = opts.subcommand.unwrap_or(BenchmarkMode::List { 211 | bench_flags: opts.bench_flags, 212 | }); 213 | 214 | match subcommand { 215 | BenchmarkMode::List { bench_flags: _ } => { 216 | let spi = Spi::for_self(SpiModeKind::Synchronous).ok_or(Error::SpiSelfWasMoved)?; 217 | for func in spi.tests() { 218 | println!("{}", func.name); 219 | } 220 | Ok(ExitCode::SUCCESS) 221 | } 222 | BenchmarkMode::Compare(opts) => paired_test::run_test(opts, settings), 223 | BenchmarkMode::Solo(opts) => solo_test::run_test(opts, settings), 224 | } 225 | } 226 | 227 | // Automatically removes a file when goes out of scope 228 | struct AutoDelete(PathBuf); 229 | 230 | impl std::ops::Deref for AutoDelete { 231 | type Target = PathBuf; 232 | 233 | fn deref(&self) -> &Self::Target { 234 | &self.0 235 | } 236 | } 237 | 238 | impl Drop for AutoDelete { 239 | fn drop(&mut self) { 240 | if let Err(e) = fs::remove_file(&self.0) { 241 | eprintln!("Failed to delete file {}: {}", self.0.display(), e); 242 | } 243 | } 244 | } 245 | 246 | fn create_loop_mode(samples: Option, time: Option) -> Result { 247 | let loop_mode = match (samples, time) { 248 | (Some(samples), None) => LoopMode::Samples(samples.into()), 249 | (None, Some(time)) => LoopMode::Time(Duration::from_millis((time * 1000.) as u64)), 250 | (None, None) => LoopMode::Time(Duration::from_millis(100)), 251 | (Some(_), Some(_)) => bail!("-t and -s are mutually exclusive"), 252 | }; 253 | Ok(loop_mode) 254 | } 255 | 256 | #[derive(Clone, Copy)] 257 | enum LoopMode { 258 | Samples(usize), 259 | Time(Duration), 260 | } 261 | 262 | impl LoopMode { 263 | fn should_continue(&self, iter_no: usize, loop_time: Duration) -> bool { 264 | match self { 265 | LoopMode::Samples(samples) => iter_no < *samples, 266 | LoopMode::Time(duration) => loop_time < *duration, 267 | } 268 | } 269 | } 270 | 271 | mod solo_test { 272 | use super::*; 273 | use crate::{dylib::Spi, CacheFirewall, Summary}; 274 | use alloca::with_alloca; 275 | use rand::{distributions, rngs::SmallRng, Rng, SeedableRng}; 276 | use std::thread; 277 | 278 | pub(super) fn run_test(opts: SoloOpts, mut settings: MeasurementSettings) -> Result { 279 | let SoloOpts { 280 | bench_flags: _, 281 | quiet: _, 282 | verbose: _, 283 | filter, 284 | samples, 285 | time, 286 | seed, 287 | sampler, 288 | cache_firewall, 289 | yield_before_sample, 290 | warmup_enabled, 291 | randomize_stack, 292 | } = opts; 293 | 294 | let mut spi_self = Spi::for_self(SpiModeKind::Synchronous).ok_or(Error::SpiSelfWasMoved)?; 295 | 296 | settings.cache_firewall = cache_firewall; 297 | settings.randomize_stack = randomize_stack; 298 | 299 | if let Some(warmup_enabled) = warmup_enabled { 300 | settings.warmup_enabled = warmup_enabled; 301 | } 302 | if let Some(yield_before_sample) = yield_before_sample { 303 | settings.yield_before_sample = yield_before_sample; 304 | } 305 | if let Some(sampler) = sampler { 306 | settings.sampler_type = sampler; 307 | } 308 | 309 | let filter = filter.as_deref().unwrap_or(""); 310 | let loop_mode = create_loop_mode(samples, time)?; 311 | 312 | let test_names = spi_self 313 | .tests() 314 | .iter() 315 | .map(|t| &t.name) 316 | .cloned() 317 | .collect::>(); 318 | for func_name in test_names { 319 | if !filter.is_empty() && !glob_match(filter, &func_name) { 320 | continue; 321 | } 322 | 323 | let result = run_solo_test(&mut spi_self, &func_name, settings, seed, loop_mode)?; 324 | 325 | reporting::default_reporter_solo(&func_name, &result); 326 | } 327 | 328 | Ok(ExitCode::SUCCESS) 329 | } 330 | 331 | fn run_solo_test( 332 | spi: &mut Spi, 333 | test_name: &str, 334 | settings: MeasurementSettings, 335 | seed: Option, 336 | loop_mode: LoopMode, 337 | ) -> Result> { 338 | const TIME_SLICE_MS: u32 = 10; 339 | 340 | let firewall = settings 341 | .cache_firewall 342 | .map(|s| s * 1024) 343 | .map(CacheFirewall::new); 344 | let baseline_func = spi.lookup(test_name).ok_or(Error::InvalidTestName)?; 345 | 346 | let mut spi_func = TestedFunction::new(spi, baseline_func.idx); 347 | 348 | let seed = seed.unwrap_or_else(rand::random); 349 | 350 | spi_func.prepare_state(seed); 351 | let mut iterations_per_sample = (spi_func.estimate_iterations(TIME_SLICE_MS) / 2).max(1); 352 | let mut sampler = create_sampler(&settings, seed); 353 | 354 | let mut rng = SmallRng::seed_from_u64(seed); 355 | let stack_offset_distr = settings 356 | .randomize_stack 357 | .map(|offset| distributions::Uniform::new(0, offset)); 358 | 359 | let mut i = 0; 360 | 361 | let mut sample_iterations = vec![]; 362 | 363 | if let LoopMode::Samples(samples) = loop_mode { 364 | sample_iterations.reserve(samples); 365 | spi_func.samples.reserve(samples); 366 | } 367 | 368 | let mut loop_time = Duration::from_secs(0); 369 | let mut loop_iterations = 0; 370 | while loop_mode.should_continue(i, loop_time) { 371 | if loop_time > Duration::from_millis(100) { 372 | // correcting time slice estimates 373 | iterations_per_sample = 374 | loop_iterations * TIME_SLICE_MS as usize / loop_time.as_millis() as usize; 375 | } 376 | let iterations = sampler.next_sample_iterations(i, iterations_per_sample); 377 | loop_iterations += iterations; 378 | let warmup_iterations = settings.warmup_enabled.then(|| (iterations / 10).max(1)); 379 | 380 | if settings.yield_before_sample { 381 | thread::yield_now(); 382 | } 383 | 384 | let prepare_state_seed = (i % settings.samples_per_haystack == 0).then_some(seed); 385 | 386 | prepare_func( 387 | prepare_state_seed, 388 | &mut spi_func, 389 | warmup_iterations, 390 | firewall.as_ref(), 391 | ); 392 | 393 | // Allocate a custom stack frame during runtime, to try to offset alignment of the stack. 394 | if let Some(distr) = stack_offset_distr { 395 | with_alloca(rng.sample(distr), |_| { 396 | spi_func.measure(iterations); 397 | }); 398 | } else { 399 | spi_func.measure(iterations); 400 | } 401 | 402 | loop_time += Duration::from_nanos(spi_func.read_sample()); 403 | sample_iterations.push(iterations); 404 | i += 1; 405 | } 406 | 407 | let samples = spi_func 408 | .samples 409 | .iter() 410 | .zip(sample_iterations.iter()) 411 | .map(|(sample, iterations)| *sample as f64 / *iterations as f64) 412 | .collect::>(); 413 | Ok(Summary::from(&samples).unwrap()) 414 | } 415 | } 416 | 417 | mod paired_test { 418 | use super::*; 419 | use crate::{calculate_run_result, CacheFirewall, RunResult}; 420 | use alloca::with_alloca; 421 | use fs::File; 422 | use rand::{distributions, rngs::SmallRng, Rng, SeedableRng}; 423 | use std::{ 424 | io::{self, BufWriter}, 425 | mem, thread, 426 | }; 427 | 428 | pub(super) fn run_test( 429 | opts: PairedOpts, 430 | mut settings: MeasurementSettings, 431 | ) -> Result { 432 | let PairedOpts { 433 | bench_flags: _, 434 | path, 435 | verbose, 436 | filter, 437 | samples, 438 | time, 439 | filter_outliers, 440 | path_to_dump, 441 | gnuplot, 442 | fail_threshold, 443 | fail_fast, 444 | significant_only, 445 | seed, 446 | sampler, 447 | cache_firewall, 448 | yield_before_sample, 449 | warmup_enabled, 450 | parallel, 451 | quiet, 452 | randomize_stack, 453 | } = opts; 454 | let mut path = path 455 | .or_else(|| args().next().map(PathBuf::from)) 456 | .expect("No path given"); 457 | if path.is_relative() { 458 | // Resolving paths relative to PWD if given 459 | if let Ok(pwd) = env::var("PWD") { 460 | path = PathBuf::from(pwd).join(path) 461 | } 462 | }; 463 | 464 | #[cfg(target_os = "linux")] 465 | let path = crate::linux::patch_pie_binary_if_needed(&path)?.unwrap_or(path); 466 | 467 | let mode = if parallel { 468 | SpiModeKind::Asynchronous 469 | } else { 470 | SpiModeKind::Synchronous 471 | }; 472 | 473 | let mut spi_self = Spi::for_self(mode).ok_or(Error::SpiSelfWasMoved)?; 474 | let mut spi_lib = Spi::for_library(path, mode); 475 | 476 | settings.filter_outliers = filter_outliers; 477 | settings.cache_firewall = cache_firewall; 478 | settings.randomize_stack = randomize_stack; 479 | 480 | if let Some(warmup_enabled) = warmup_enabled { 481 | settings.warmup_enabled = warmup_enabled; 482 | } 483 | if let Some(yield_before_sample) = yield_before_sample { 484 | settings.yield_before_sample = yield_before_sample; 485 | } 486 | if let Some(sampler) = sampler { 487 | settings.sampler_type = sampler; 488 | } 489 | 490 | let filter = filter.as_deref().unwrap_or(""); 491 | let loop_mode = create_loop_mode(samples, time)?; 492 | 493 | let mut exit_code = ExitCode::SUCCESS; 494 | 495 | if let Some(path) = &path_to_dump { 496 | if !path.exists() { 497 | fs::create_dir_all(path)?; 498 | } 499 | } 500 | if gnuplot && path_to_dump.is_none() { 501 | eprintln!("warn: --gnuplot requires -d to be specified. No plots will be generated") 502 | } 503 | 504 | let mut sample_dumps = vec![]; 505 | 506 | let test_names = spi_self 507 | .tests() 508 | .iter() 509 | .map(|t| &t.name) 510 | .cloned() 511 | .collect::>(); 512 | for func_name in test_names { 513 | if !filter.is_empty() && !glob_match(filter, &func_name) { 514 | continue; 515 | } 516 | 517 | if spi_lib.lookup(&func_name).is_none() { 518 | if !quiet { 519 | writeln!(stderr(), "{} skipped...", &func_name)?; 520 | } 521 | continue; 522 | } 523 | 524 | let (result, sample_dump) = run_paired_test( 525 | &mut spi_lib, 526 | &mut spi_self, 527 | &func_name, 528 | settings, 529 | seed, 530 | loop_mode, 531 | path_to_dump.as_ref(), 532 | )?; 533 | 534 | if let Some(dump) = sample_dump { 535 | sample_dumps.push(dump); 536 | } 537 | 538 | if result.diff_estimate.significant || !significant_only { 539 | if verbose { 540 | reporting::verbose_reporter(&result); 541 | } else { 542 | reporting::default_reporter(&result); 543 | } 544 | } 545 | 546 | if result.diff_estimate.significant { 547 | if let Some(threshold) = fail_threshold { 548 | if result.diff_estimate.pct >= threshold { 549 | eprintln!( 550 | "[ERROR] Performance regressed {:+.1}% >= {:.1}% - test: {}", 551 | result.diff_estimate.pct, threshold, func_name 552 | ); 553 | if fail_fast { 554 | return Ok(ExitCode::FAILURE); 555 | } else { 556 | exit_code = ExitCode::FAILURE; 557 | } 558 | } 559 | } 560 | } 561 | } 562 | 563 | if let Some(path_to_dump) = path_to_dump { 564 | if gnuplot && !sample_dumps.is_empty() { 565 | generate_plots(&path_to_dump, sample_dumps.as_slice())?; 566 | } 567 | } 568 | 569 | Ok(exit_code) 570 | } 571 | 572 | /// Measure the difference in performance of two functions 573 | /// 574 | /// Provides a way to save a raw dump of measurements into directory 575 | /// 576 | /// The format is as follows 577 | /// ```txt 578 | /// b_1,c_1 579 | /// b_2,c_2 580 | /// ... 581 | /// b_n,c_n 582 | /// ``` 583 | /// where `b_1..b_n` are baseline absolute time (in nanoseconds) measurements 584 | /// and `c_1..c_n` are candidate time measurements 585 | /// 586 | /// Returns a statistical results of a test run and path to raw samples of sample dump was requested 587 | fn run_paired_test( 588 | baseline: &mut Spi, 589 | candidate: &mut Spi, 590 | test_name: &str, 591 | settings: MeasurementSettings, 592 | seed: Option, 593 | loop_mode: LoopMode, 594 | samples_dump_path: Option<&PathBuf>, 595 | ) -> Result<(RunResult, Option)> { 596 | const TIME_SLICE_MS: u32 = 10; 597 | 598 | let firewall = settings 599 | .cache_firewall 600 | .map(|s| s * 1024) 601 | .map(CacheFirewall::new); 602 | let baseline_func = baseline.lookup(test_name).ok_or(Error::InvalidTestName)?; 603 | let candidate_func = candidate.lookup(test_name).ok_or(Error::InvalidTestName)?; 604 | 605 | let mut baseline = TestedFunction::new(baseline, baseline_func.idx); 606 | let mut candidate = TestedFunction::new(candidate, candidate_func.idx); 607 | 608 | let mut a_func = &mut baseline; 609 | let mut b_func = &mut candidate; 610 | 611 | let seed = seed.unwrap_or_else(rand::random); 612 | 613 | a_func.prepare_state(seed); 614 | let a_estimate = (a_func.estimate_iterations(TIME_SLICE_MS) / 2).max(1); 615 | 616 | b_func.prepare_state(seed); 617 | let b_estimate = (b_func.estimate_iterations(TIME_SLICE_MS) / 2).max(1); 618 | 619 | let mut iterations_per_sample = a_estimate.min(b_estimate); 620 | let mut sampler = create_sampler(&settings, seed); 621 | 622 | let mut rng = SmallRng::seed_from_u64(seed); 623 | let stack_offset_distr = settings 624 | .randomize_stack 625 | .map(|offset| distributions::Uniform::new(0, offset)); 626 | 627 | let mut i = 0; 628 | let mut switch_counter = 0; 629 | 630 | let mut sample_iterations = vec![]; 631 | 632 | if let LoopMode::Samples(samples) = loop_mode { 633 | sample_iterations.reserve(samples); 634 | a_func.samples.reserve(samples); 635 | b_func.samples.reserve(samples); 636 | } 637 | 638 | let mut loop_time = Duration::from_secs(0); 639 | let mut loop_iterations = 0; 640 | while loop_mode.should_continue(i, loop_time) { 641 | if loop_time > Duration::from_millis(100) { 642 | // correcting time slice estimates 643 | iterations_per_sample = 644 | loop_iterations * TIME_SLICE_MS as usize / loop_time.as_millis() as usize; 645 | } 646 | let iterations = sampler.next_sample_iterations(i, iterations_per_sample); 647 | loop_iterations += iterations; 648 | let warmup_iterations = settings.warmup_enabled.then(|| (iterations / 10).max(1)); 649 | 650 | // !!! IMPORTANT !!! 651 | // Algorithms should be called in different order on each new iteration. 652 | // This equalize the probability of facing unfortunate circumstances like cache misses or page faults 653 | // for both functions. Although both algorithms are from distinct shared objects and therefore 654 | // must be fully self-contained in terms of virtual address space (each shared object has its own 655 | // generator instances, static variables, memory mappings, etc.) it might be the case that 656 | // on the level of physical memory both of them rely on the same memory-mapped test data, for example. 657 | // In that case first function will experience the larger amount of major page faults. 658 | { 659 | mem::swap(&mut a_func, &mut b_func); 660 | switch_counter += 1; 661 | } 662 | 663 | if settings.yield_before_sample { 664 | thread::yield_now(); 665 | } 666 | 667 | let prepare_state_seed = (i % settings.samples_per_haystack == 0).then_some(seed); 668 | let mut sample_time = 0; 669 | 670 | prepare_func( 671 | prepare_state_seed, 672 | a_func, 673 | warmup_iterations, 674 | firewall.as_ref(), 675 | ); 676 | prepare_func( 677 | prepare_state_seed, 678 | b_func, 679 | warmup_iterations, 680 | firewall.as_ref(), 681 | ); 682 | 683 | // Allocate a custom stack frame during runtime, to try to offset alignment of the stack. 684 | if let Some(distr) = stack_offset_distr { 685 | with_alloca(rng.sample(distr), |_| { 686 | a_func.measure(iterations); 687 | b_func.measure(iterations); 688 | }); 689 | } else { 690 | a_func.measure(iterations); 691 | b_func.measure(iterations); 692 | } 693 | 694 | let a_sample_time = a_func.read_sample(); 695 | let b_sample_time = b_func.read_sample(); 696 | sample_time += a_sample_time.max(b_sample_time); 697 | 698 | loop_time += Duration::from_nanos(sample_time); 699 | sample_iterations.push(iterations); 700 | i += 1; 701 | } 702 | 703 | // If we switched functions odd number of times then we need to swap them back so that 704 | // the first function is always the baseline. 705 | if switch_counter % 2 != 0 { 706 | mem::swap(&mut a_func, &mut b_func); 707 | } 708 | 709 | let run_result = calculate_run_result( 710 | test_name, 711 | &a_func.samples, 712 | &b_func.samples, 713 | &sample_iterations, 714 | settings.filter_outliers, 715 | ) 716 | .ok_or(Error::NoMeasurements)?; 717 | 718 | let samples_path = if let Some(path) = samples_dump_path { 719 | let file_path = write_samples(path, test_name, a_func, b_func, sample_iterations)?; 720 | Some(file_path) 721 | } else { 722 | None 723 | }; 724 | 725 | Ok((run_result, samples_path)) 726 | } 727 | 728 | fn write_samples( 729 | path: &Path, 730 | test_name: &str, 731 | a_func: &TestedFunction, 732 | b_func: &TestedFunction, 733 | iterations: Vec, 734 | ) -> Result { 735 | let file_name = format!("{}.csv", test_name.replace('/', "-")); 736 | let file_path = path.join(file_name); 737 | let s_samples = a_func.samples.iter().copied(); 738 | let b_samples = b_func.samples.iter().copied(); 739 | let values = s_samples 740 | .zip(b_samples) 741 | .zip(iterations.iter().copied()) 742 | .map(|((a, b), c)| (a, b, c)); 743 | write_csv(&file_path, values).context("Unable to write raw measurements")?; 744 | Ok(file_path) 745 | } 746 | 747 | fn write_csv( 748 | path: impl AsRef, 749 | values: impl IntoIterator, 750 | ) -> io::Result<()> { 751 | let mut file = BufWriter::new(File::create(path)?); 752 | for (a, b, c) in values { 753 | writeln!(&mut file, "{},{},{}", a, b, c)?; 754 | } 755 | Ok(()) 756 | } 757 | 758 | fn generate_plots(path: &Path, sample_dumps: &[PathBuf]) -> Result<()> { 759 | let gnuplot_file = AutoDelete(temp_dir().join("tango-plot.gnuplot")); 760 | fs::write(&*gnuplot_file, include_bytes!("plot.gnuplot"))?; 761 | let gnuplot_file_str = gnuplot_file.to_str().unwrap(); 762 | 763 | for input in sample_dumps { 764 | let csv_input = input.to_str().unwrap(); 765 | let svg_path = input.with_extension("svg"); 766 | let cmd = Command::new("gnuplot") 767 | .args([ 768 | "-c", 769 | gnuplot_file_str, 770 | csv_input, 771 | svg_path.to_str().unwrap(), 772 | ]) 773 | .stdin(Stdio::null()) 774 | .stdout(Stdio::inherit()) 775 | .stderr(Stdio::inherit()) 776 | .status() 777 | .context("Failed to execute gnuplot")?; 778 | 779 | if !cmd.success() { 780 | bail!("gnuplot command failed"); 781 | } 782 | } 783 | Ok(()) 784 | } 785 | } 786 | 787 | mod reporting { 788 | use crate::cli::{colorize, HumanTime}; 789 | use crate::{RunResult, Summary}; 790 | use colorz::{mode::Stream, Colorize}; 791 | 792 | pub(super) fn verbose_reporter(results: &RunResult) { 793 | let base = results.baseline; 794 | let candidate = results.candidate; 795 | 796 | let significant = results.diff_estimate.significant; 797 | 798 | println!( 799 | "{} (n: {}, outliers: {})", 800 | results.name.bold().stream(Stream::Stdout), 801 | results.diff.n, 802 | results.outliers 803 | ); 804 | 805 | println!( 806 | " {:12} {:>15} {:>15} {:>15}", 807 | "", 808 | "baseline".bold().stream(Stream::Stdout), 809 | "candidate".bold().stream(Stream::Stdout), 810 | "∆".bold().stream(Stream::Stdout), 811 | ); 812 | println!( 813 | " {:12} ╭────────────────────────────────────────────────", 814 | "" 815 | ); 816 | println!( 817 | " {:12} │ {:>15} {:>15} {:>15} {:+4.2}{}{}", 818 | "mean", 819 | HumanTime(base.mean), 820 | HumanTime(candidate.mean), 821 | colorize( 822 | HumanTime(results.diff.mean), 823 | significant, 824 | results.diff.mean < 0. 825 | ), 826 | colorize( 827 | results.diff_estimate.pct, 828 | significant, 829 | results.diff.mean < 0. 830 | ), 831 | colorize("%", significant, results.diff.mean < 0.), 832 | if significant { "*" } else { "" }, 833 | ); 834 | println!( 835 | " {:12} │ {:>15} {:>15} {:>15}", 836 | "min", 837 | HumanTime(base.min), 838 | HumanTime(candidate.min), 839 | HumanTime(candidate.min - base.min) 840 | ); 841 | println!( 842 | " {:12} │ {:>15} {:>15} {:>15}", 843 | "max", 844 | HumanTime(base.max), 845 | HumanTime(candidate.max), 846 | HumanTime(candidate.max - base.max), 847 | ); 848 | println!( 849 | " {:12} │ {:>15} {:>15} {:>15}", 850 | "std. dev.", 851 | HumanTime(base.variance.sqrt()), 852 | HumanTime(candidate.variance.sqrt()), 853 | HumanTime(results.diff.variance.sqrt()), 854 | ); 855 | println!(); 856 | } 857 | 858 | pub(super) fn default_reporter(results: &RunResult) { 859 | let base = results.baseline; 860 | let candidate = results.candidate; 861 | let diff = results.diff; 862 | 863 | let significant = results.diff_estimate.significant; 864 | 865 | let speedup = results.diff_estimate.pct; 866 | let candidate_faster = diff.mean < 0.; 867 | println!( 868 | "{:50} [ {:>8} ... {:>8} ] {:>+7.2}{}{}", 869 | colorize(&results.name, significant, candidate_faster), 870 | HumanTime(base.mean), 871 | colorize(HumanTime(candidate.mean), significant, candidate_faster), 872 | colorize(speedup, significant, candidate_faster), 873 | colorize("%", significant, candidate_faster), 874 | if significant { "*" } else { "" }, 875 | ) 876 | } 877 | 878 | pub(super) fn default_reporter_solo(name: &str, results: &Summary) { 879 | println!( 880 | "{:50} [ {:>8} ... {:>8} ... {:>8} ] stddev: {:>8}", 881 | name, 882 | HumanTime(results.min), 883 | HumanTime(results.mean), 884 | HumanTime(results.max), 885 | HumanTime(results.variance.sqrt()), 886 | ) 887 | } 888 | } 889 | 890 | struct TestedFunction<'a> { 891 | pub(crate) spi: &'a mut Spi, 892 | pub(crate) samples: Vec, 893 | } 894 | 895 | impl<'a> TestedFunction<'a> { 896 | pub(crate) fn new(spi: &'a mut Spi, func: FunctionIdx) -> Self { 897 | spi.select(func); 898 | TestedFunction { 899 | spi, 900 | samples: Vec::new(), 901 | } 902 | } 903 | 904 | pub(crate) fn measure(&mut self, iterations: usize) { 905 | self.spi.measure(iterations); 906 | } 907 | 908 | pub(crate) fn read_sample(&mut self) -> u64 { 909 | let sample = self.spi.read_sample(); 910 | self.samples.push(sample); 911 | sample 912 | } 913 | 914 | pub(crate) fn run(&mut self, iterations: usize) -> u64 { 915 | self.spi.run(iterations) 916 | } 917 | 918 | pub(crate) fn prepare_state(&mut self, seed: u64) { 919 | self.spi.prepare_state(seed); 920 | } 921 | 922 | pub(crate) fn estimate_iterations(&mut self, time_ms: u32) -> usize { 923 | self.spi.estimate_iterations(time_ms) 924 | } 925 | } 926 | 927 | fn prepare_func( 928 | prepare_state_seed: Option, 929 | f: &mut TestedFunction, 930 | warmup_iterations: Option, 931 | firewall: Option<&CacheFirewall>, 932 | ) { 933 | if let Some(seed) = prepare_state_seed { 934 | f.prepare_state(seed); 935 | if let Some(firewall) = firewall { 936 | firewall.issue_read(); 937 | } 938 | } 939 | if let Some(warmup_iterations) = warmup_iterations { 940 | f.run(warmup_iterations); 941 | } 942 | } 943 | 944 | fn create_sampler(settings: &MeasurementSettings, seed: u64) -> Box { 945 | match settings.sampler_type { 946 | SampleLengthKind::Flat => Box::new(FlatSampleLength::new(settings)), 947 | SampleLengthKind::Linear => Box::new(LinearSampleLength::new(settings)), 948 | SampleLengthKind::Random => Box::new(RandomSampleLength::new(settings, seed)), 949 | } 950 | } 951 | 952 | fn colorize(value: T, do_paint: bool, is_improved: bool) -> impl Display { 953 | use colorz::{ansi, mode::Stream::Stdout, Colorize, Style}; 954 | 955 | const RED: Style = Style::new().fg(ansi::Red).const_into_runtime_style(); 956 | const GREEN: Style = Style::new().fg(ansi::Green).const_into_runtime_style(); 957 | const DEFAULT: Style = Style::new().const_into_runtime_style(); 958 | 959 | if do_paint { 960 | if is_improved { 961 | value.into_style_with(GREEN).stream(Stdout) 962 | } else { 963 | value.into_style_with(RED).stream(Stdout) 964 | } 965 | } else { 966 | value.into_style_with(DEFAULT).stream(Stdout) 967 | } 968 | } 969 | 970 | struct HumanTime(f64); 971 | 972 | impl fmt::Display for HumanTime { 973 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 974 | const USEC: f64 = 1_000.; 975 | const MSEC: f64 = USEC * 1_000.; 976 | const SEC: f64 = MSEC * 1_000.; 977 | 978 | if self.0.abs() > SEC { 979 | f.pad(&format!("{:.1} s", self.0 / SEC)) 980 | } else if self.0.abs() > MSEC { 981 | f.pad(&format!("{:.1} ms", self.0 / MSEC)) 982 | } else if self.0.abs() > USEC { 983 | f.pad(&format!("{:.1} us", self.0 / USEC)) 984 | } else if self.0 == 0. { 985 | f.pad("0 ns") 986 | } else { 987 | f.pad(&format!("{:.1} ns", self.0)) 988 | } 989 | } 990 | } 991 | 992 | #[cfg(test)] 993 | mod tests { 994 | use super::*; 995 | 996 | #[test] 997 | fn check_human_time() { 998 | assert_eq!(format!("{}", HumanTime(0.1)), "0.1 ns"); 999 | assert_eq!(format!("{:>5}", HumanTime(0.)), " 0 ns"); 1000 | 1001 | assert_eq!(format!("{}", HumanTime(120.)), "120.0 ns"); 1002 | 1003 | assert_eq!(format!("{}", HumanTime(1200.)), "1.2 us"); 1004 | 1005 | assert_eq!(format!("{}", HumanTime(1200000.)), "1.2 ms"); 1006 | 1007 | assert_eq!(format!("{}", HumanTime(1200000000.)), "1.2 s"); 1008 | 1009 | assert_eq!(format!("{}", HumanTime(-1200000.)), "-1.2 ms"); 1010 | } 1011 | 1012 | // Sane checking some simple patterns 1013 | #[test] 1014 | fn check_glob() { 1015 | let patterns = vec!["a/*/*", "a/**", "*/32/*", "**/b", "a/{32,64}/*"]; 1016 | let input = "a/32/b"; 1017 | for pattern in patterns { 1018 | assert!( 1019 | glob_match(pattern, input), 1020 | "failed to match {} against {}", 1021 | pattern, 1022 | input 1023 | ); 1024 | } 1025 | } 1026 | } 1027 | -------------------------------------------------------------------------------- /tango-bench/src/dylib.rs: -------------------------------------------------------------------------------- 1 | //! Loading and resolving symbols from .dylib/.so libraries 2 | 3 | use self::ffi::{VTable, SELF_VTABLE}; 4 | use crate::{Benchmark, ErasedSampler, Error}; 5 | use anyhow::Context; 6 | use libloading::{Library, Symbol}; 7 | use std::{ 8 | ffi::{c_char, c_ulonglong}, 9 | path::Path, 10 | ptr::{addr_of, null}, 11 | slice, str, 12 | sync::mpsc::{channel, Receiver, Sender}, 13 | thread::{self, JoinHandle}, 14 | }; 15 | 16 | pub type FunctionIdx = usize; 17 | 18 | #[derive(Debug, Clone)] 19 | pub struct NamedFunction { 20 | pub name: String, 21 | 22 | /// Function index in FFI API 23 | pub idx: FunctionIdx, 24 | } 25 | 26 | pub(crate) struct Spi { 27 | tests: Vec, 28 | selected_function: Option, 29 | mode: SpiMode, 30 | } 31 | 32 | #[derive(PartialEq, Eq, Clone, Copy)] 33 | pub enum SpiModeKind { 34 | // Benchmarks are executed synchronously when calling SPI 35 | // 36 | // Dispatcher switches between baseline and candidate after each sample 37 | Synchronous, 38 | 39 | // Benchmarks are executed in different threads 40 | // 41 | // Dispatcher creates a separate thread for baseline and candidate, but synchronize them after each benchmark 42 | Asynchronous, 43 | } 44 | 45 | enum SpiMode { 46 | Synchronous { 47 | vt: Box, 48 | last_measurement: u64, 49 | }, 50 | Asynchronous { 51 | worker: Option>, 52 | tx: Sender, 53 | rx: Receiver, 54 | }, 55 | } 56 | 57 | impl Spi { 58 | pub(crate) fn for_library(path: impl AsRef, mode: SpiModeKind) -> Spi { 59 | let lib = unsafe { Library::new(path.as_ref()) } 60 | .with_context(|| format!("Unable to open library: {}", path.as_ref().display())) 61 | .unwrap(); 62 | spi_handle_for_vtable(ffi::LibraryVTable::new(lib).unwrap(), mode) 63 | } 64 | 65 | pub(crate) fn for_self(mode: SpiModeKind) -> Option { 66 | unsafe { SELF_VTABLE.take() }.map(|vt| spi_handle_for_vtable(vt, mode)) 67 | } 68 | 69 | pub(crate) fn tests(&self) -> &[NamedFunction] { 70 | &self.tests 71 | } 72 | 73 | pub(crate) fn lookup(&self, name: &str) -> Option<&NamedFunction> { 74 | self.tests.iter().find(|f| f.name == name) 75 | } 76 | 77 | pub(crate) fn run(&mut self, iterations: usize) -> u64 { 78 | match &self.mode { 79 | SpiMode::Synchronous { vt, .. } => vt.run(iterations as c_ulonglong), 80 | SpiMode::Asynchronous { worker: _, tx, rx } => { 81 | tx.send(SpiRequest::Run { iterations }).unwrap(); 82 | match rx.recv().unwrap() { 83 | SpiReply::Run(time) => time, 84 | r => panic!("Unexpected response: {:?}", r), 85 | } 86 | } 87 | } 88 | } 89 | 90 | pub(crate) fn measure(&mut self, iterations: usize) { 91 | match &mut self.mode { 92 | SpiMode::Synchronous { 93 | vt, 94 | last_measurement, 95 | } => { 96 | *last_measurement = vt.run(iterations as c_ulonglong); 97 | } 98 | SpiMode::Asynchronous { tx, .. } => { 99 | tx.send(SpiRequest::Measure { iterations }).unwrap(); 100 | } 101 | } 102 | } 103 | 104 | pub(crate) fn read_sample(&mut self) -> u64 { 105 | match &self.mode { 106 | SpiMode::Synchronous { 107 | last_measurement, .. 108 | } => *last_measurement, 109 | SpiMode::Asynchronous { rx, .. } => match rx.recv().unwrap() { 110 | SpiReply::Measure(time) => time, 111 | r => panic!("Unexpected response: {:?}", r), 112 | }, 113 | } 114 | } 115 | 116 | pub(crate) fn estimate_iterations(&mut self, time_ms: u32) -> usize { 117 | match &self.mode { 118 | SpiMode::Synchronous { vt, .. } => vt.estimate_iterations(time_ms) as usize, 119 | SpiMode::Asynchronous { tx, rx, .. } => { 120 | tx.send(SpiRequest::EstimateIterations { time_ms }).unwrap(); 121 | match rx.recv().unwrap() { 122 | SpiReply::EstimateIterations(iters) => iters, 123 | r => panic!("Unexpected response: {:?}", r), 124 | } 125 | } 126 | } 127 | } 128 | 129 | pub(crate) fn prepare_state(&mut self, seed: u64) { 130 | match &self.mode { 131 | SpiMode::Synchronous { vt, .. } => vt.prepare_state(seed), 132 | SpiMode::Asynchronous { tx, rx, .. } => { 133 | tx.send(SpiRequest::PrepareState { seed }).unwrap(); 134 | match rx.recv().unwrap() { 135 | SpiReply::PrepareState => {} 136 | r => panic!("Unexpected response: {:?}", r), 137 | } 138 | } 139 | } 140 | } 141 | 142 | pub(crate) fn select(&mut self, idx: usize) { 143 | match &self.mode { 144 | SpiMode::Synchronous { vt, .. } => vt.select(idx as c_ulonglong), 145 | SpiMode::Asynchronous { tx, rx, .. } => { 146 | tx.send(SpiRequest::Select { idx }).unwrap(); 147 | match rx.recv().unwrap() { 148 | SpiReply::Select => self.selected_function = Some(idx), 149 | r => panic!("Unexpected response: {:?}", r), 150 | } 151 | } 152 | } 153 | } 154 | } 155 | 156 | impl Drop for Spi { 157 | fn drop(&mut self) { 158 | if let SpiMode::Asynchronous { worker, tx, .. } = &mut self.mode { 159 | if let Some(worker) = worker.take() { 160 | tx.send(SpiRequest::Shutdown).unwrap(); 161 | worker.join().unwrap(); 162 | } 163 | } 164 | } 165 | } 166 | 167 | fn spi_worker(vt: &dyn VTable, rx: Receiver, tx: Sender) { 168 | use SpiReply as Rp; 169 | use SpiRequest as Rq; 170 | 171 | while let Ok(req) = rx.recv() { 172 | let reply = match req { 173 | Rq::EstimateIterations { time_ms } => { 174 | Rp::EstimateIterations(vt.estimate_iterations(time_ms) as usize) 175 | } 176 | Rq::PrepareState { seed } => { 177 | vt.prepare_state(seed); 178 | Rp::PrepareState 179 | } 180 | Rq::Select { idx } => { 181 | vt.select(idx as c_ulonglong); 182 | Rp::Select 183 | } 184 | Rq::Run { iterations } => Rp::Run(vt.run(iterations as c_ulonglong)), 185 | Rq::Measure { iterations } => Rp::Measure(vt.run(iterations as c_ulonglong)), 186 | Rq::Shutdown => break, 187 | }; 188 | tx.send(reply).unwrap(); 189 | } 190 | } 191 | 192 | fn spi_handle_for_vtable(vtable: impl VTable + Send + 'static, mode: SpiModeKind) -> Spi { 193 | vtable.init(); 194 | let tests = enumerate_tests(&vtable).unwrap(); 195 | 196 | match mode { 197 | SpiModeKind::Asynchronous => { 198 | let (request_tx, request_rx) = channel(); 199 | let (reply_tx, reply_rx) = channel(); 200 | let worker = thread::spawn(move || { 201 | spi_worker(&vtable, request_rx, reply_tx); 202 | }); 203 | 204 | Spi { 205 | tests, 206 | selected_function: None, 207 | mode: SpiMode::Asynchronous { 208 | worker: Some(worker), 209 | tx: request_tx, 210 | rx: reply_rx, 211 | }, 212 | } 213 | } 214 | SpiModeKind::Synchronous => Spi { 215 | tests, 216 | selected_function: None, 217 | mode: SpiMode::Synchronous { 218 | vt: Box::new(vtable), 219 | last_measurement: 0, 220 | }, 221 | }, 222 | } 223 | } 224 | 225 | fn enumerate_tests(vt: &dyn VTable) -> Result, Error> { 226 | let mut tests = vec![]; 227 | for idx in 0..vt.count() { 228 | vt.select(idx); 229 | 230 | let mut length = 0; 231 | let name_ptr: *const c_char = null(); 232 | vt.get_test_name(addr_of!(name_ptr) as _, &mut length); 233 | if length == 0 { 234 | continue; 235 | } 236 | let slice = unsafe { slice::from_raw_parts(name_ptr as *const u8, length as usize) }; 237 | let name = str::from_utf8(slice) 238 | .map_err(Error::InvalidFFIString)? 239 | .to_string(); 240 | let idx = idx as usize; 241 | tests.push(NamedFunction { name, idx }); 242 | } 243 | Ok(tests) 244 | } 245 | 246 | enum SpiRequest { 247 | EstimateIterations { time_ms: u32 }, 248 | PrepareState { seed: u64 }, 249 | Select { idx: usize }, 250 | Run { iterations: usize }, 251 | Measure { iterations: usize }, 252 | Shutdown, 253 | } 254 | 255 | #[derive(Debug)] 256 | enum SpiReply { 257 | EstimateIterations(usize), 258 | PrepareState, 259 | Select, 260 | Run(u64), 261 | Measure(u64), 262 | } 263 | 264 | /// State which holds the information about list of benchmarks and which one is selected. 265 | /// Used in FFI API (`tango_*` functions). 266 | struct State { 267 | benchmarks: Vec, 268 | selected_function: Option<(usize, Option>)>, 269 | } 270 | 271 | impl State { 272 | fn selected(&self) -> &Benchmark { 273 | &self.benchmarks[self.ensure_selected()] 274 | } 275 | 276 | fn ensure_selected(&self) -> usize { 277 | self.selected_function 278 | .as_ref() 279 | .map(|(idx, _)| *idx) 280 | .expect("No function was selected. Call tango_select() first") 281 | } 282 | 283 | fn selected_state_mut(&mut self) -> Option<&mut Box> { 284 | self.selected_function 285 | .as_mut() 286 | .and_then(|(_, state)| state.as_mut()) 287 | } 288 | } 289 | 290 | /// Global state of the benchmarking library 291 | static mut STATE: Option = None; 292 | 293 | /// `tango_init()` implementation 294 | /// 295 | /// This function is not exported from the library, but is used by the `tango_init()` functions 296 | /// generated by the `tango_benchmark!()` macro. 297 | pub fn __tango_init(benchmarks: Vec) { 298 | unsafe { 299 | if STATE.is_none() { 300 | STATE = Some(State { 301 | benchmarks, 302 | selected_function: None, 303 | }); 304 | } 305 | } 306 | } 307 | 308 | /// Defines all the foundation types and exported symbols for the FFI communication API between two 309 | /// executables. 310 | /// 311 | /// Tango execution model implies simultaneous execution of the code from two binaries. To achieve that 312 | /// Tango benchmark is compiled in a way that executable is also a shared library (.dll, .so, .dylib). This 313 | /// way two executables can coexist in the single process at the same time. 314 | pub mod ffi { 315 | use super::*; 316 | use std::{ 317 | ffi::{c_uint, c_ulonglong}, 318 | mem, 319 | os::raw::c_char, 320 | ptr::null, 321 | }; 322 | 323 | /// Signature types of all FFI API functions 324 | pub type InitFn = unsafe extern "C" fn(); 325 | type CountFn = unsafe extern "C" fn() -> c_ulonglong; 326 | type GetTestNameFn = unsafe extern "C" fn(*mut *const c_char, *mut c_ulonglong); 327 | type SelectFn = unsafe extern "C" fn(c_ulonglong); 328 | type RunFn = unsafe extern "C" fn(c_ulonglong) -> u64; 329 | type EstimateIterationsFn = unsafe extern "C" fn(c_uint) -> c_ulonglong; 330 | type PrepareStateFn = unsafe extern "C" fn(c_ulonglong); 331 | type FreeFn = unsafe extern "C" fn(); 332 | 333 | /// This block of constants is checking that all exported tango functions are of valid type according to the API. 334 | /// Those constants are not meant to be used at runtime in any way 335 | #[allow(unused)] 336 | mod type_check { 337 | use super::*; 338 | 339 | const TANGO_COUNT: CountFn = tango_count; 340 | const TANGO_SELECT: SelectFn = tango_select; 341 | const TANGO_GET_TEST_NAME: GetTestNameFn = tango_get_test_name; 342 | const TANGO_RUN: RunFn = tango_run; 343 | const TANGO_ESTIMATE_ITERATIONS: EstimateIterationsFn = tango_estimate_iterations; 344 | const TANGO_FREE: FreeFn = tango_free; 345 | } 346 | 347 | #[no_mangle] 348 | unsafe extern "C" fn tango_count() -> c_ulonglong { 349 | STATE 350 | .as_ref() 351 | .map(|s| s.benchmarks.len() as c_ulonglong) 352 | .unwrap_or(0) 353 | } 354 | 355 | #[no_mangle] 356 | unsafe extern "C" fn tango_select(idx: c_ulonglong) { 357 | if let Some(s) = STATE.as_mut() { 358 | let idx = idx as usize; 359 | assert!(idx < s.benchmarks.len()); 360 | 361 | s.selected_function = Some(match s.selected_function.take() { 362 | // Preserving state if the same function is selected 363 | Some((selected, state)) if selected == idx => (selected, state), 364 | _ => (idx, None), 365 | }); 366 | } 367 | } 368 | 369 | #[no_mangle] 370 | unsafe extern "C" fn tango_get_test_name(name: *mut *const c_char, length: *mut c_ulonglong) { 371 | if let Some(s) = STATE.as_ref() { 372 | let n = s.selected().name(); 373 | *name = n.as_ptr() as _; 374 | *length = n.len() as c_ulonglong; 375 | } else { 376 | *name = null(); 377 | *length = 0; 378 | } 379 | } 380 | 381 | #[no_mangle] 382 | unsafe extern "C" fn tango_run(iterations: c_ulonglong) -> u64 { 383 | if let Some(s) = STATE.as_mut() { 384 | s.selected_state_mut() 385 | .expect("no tango_prepare_state() was called") 386 | .measure(iterations as usize) 387 | } else { 388 | 0 389 | } 390 | } 391 | 392 | #[no_mangle] 393 | unsafe extern "C" fn tango_estimate_iterations(time_ms: c_uint) -> c_ulonglong { 394 | if let Some(s) = STATE.as_mut() { 395 | s.selected_state_mut() 396 | .expect("no tango_prepare_state() was called") 397 | .as_mut() 398 | .estimate_iterations(time_ms) as c_ulonglong 399 | } else { 400 | 0 401 | } 402 | } 403 | 404 | #[no_mangle] 405 | unsafe extern "C" fn tango_prepare_state(seed: c_ulonglong) { 406 | if let Some(s) = STATE.as_mut() { 407 | let Some((idx, state)) = &mut s.selected_function else { 408 | panic!("No tango_select() was called") 409 | }; 410 | *state = Some(s.benchmarks[*idx].prepare_state(seed)); 411 | } 412 | } 413 | 414 | #[no_mangle] 415 | unsafe extern "C" fn tango_free() { 416 | STATE.take(); 417 | } 418 | 419 | pub(super) trait VTable { 420 | fn init(&self); 421 | fn count(&self) -> c_ulonglong; 422 | fn select(&self, func_idx: c_ulonglong); 423 | fn get_test_name(&self, ptr: *mut *const c_char, len: *mut c_ulonglong); 424 | fn run(&self, iterations: c_ulonglong) -> c_ulonglong; 425 | fn estimate_iterations(&self, time_ms: c_uint) -> c_ulonglong; 426 | fn prepare_state(&self, seed: c_ulonglong); 427 | } 428 | 429 | pub(super) static mut SELF_VTABLE: Option = Some(SelfVTable); 430 | 431 | /// FFI implementation for the current executable. 432 | /// 433 | /// Used to communicate with FFI API of the executable bypassing dynamic linking. 434 | /// # Safety 435 | /// Instances of this type should not be created directory. The single instance [`SELF_SPI`] should be used instead 436 | pub(super) struct SelfVTable; 437 | 438 | impl VTable for SelfVTable { 439 | fn init(&self) { 440 | // In executable mode `tango_init` is already called by the main function 441 | } 442 | 443 | fn count(&self) -> c_ulonglong { 444 | unsafe { tango_count() } 445 | } 446 | 447 | fn select(&self, func_idx: c_ulonglong) { 448 | unsafe { tango_select(func_idx) } 449 | } 450 | 451 | fn get_test_name(&self, ptr: *mut *const c_char, len: *mut c_ulonglong) { 452 | unsafe { tango_get_test_name(ptr, len) } 453 | } 454 | 455 | fn run(&self, iterations: c_ulonglong) -> u64 { 456 | unsafe { tango_run(iterations) } 457 | } 458 | 459 | fn estimate_iterations(&self, time_ms: c_uint) -> c_ulonglong { 460 | unsafe { tango_estimate_iterations(time_ms) } 461 | } 462 | 463 | fn prepare_state(&self, seed: u64) { 464 | unsafe { tango_prepare_state(seed) } 465 | } 466 | } 467 | 468 | impl Drop for SelfVTable { 469 | fn drop(&mut self) { 470 | unsafe { 471 | tango_free(); 472 | } 473 | } 474 | } 475 | 476 | pub(super) struct LibraryVTable { 477 | /// SAFETY: using static here is sound because 478 | /// (1) this struct is private and field can not be accessed outside 479 | /// (2) rust has drop order guarantee (fields are dropped in declaration order) 480 | init_fn: Symbol<'static, InitFn>, 481 | count_fn: Symbol<'static, CountFn>, 482 | select_fn: Symbol<'static, SelectFn>, 483 | get_test_name_fn: Symbol<'static, GetTestNameFn>, 484 | run_fn: Symbol<'static, RunFn>, 485 | estimate_iterations_fn: Symbol<'static, EstimateIterationsFn>, 486 | prepare_state_fn: Symbol<'static, PrepareStateFn>, 487 | free_fn: Symbol<'static, FreeFn>, 488 | 489 | /// SAFETY: This field should be last because it should be dropped last 490 | _library: Box, 491 | } 492 | 493 | impl LibraryVTable { 494 | pub(super) fn new(library: Library) -> Result { 495 | // SAFETY: library is boxed and not moved here, therefore we can safley construct self-referential 496 | // struct here 497 | let library = Box::new(library); 498 | let init_fn = lookup_symbol::(&library, "tango_init")?; 499 | let count_fn = lookup_symbol::(&library, "tango_count")?; 500 | let select_fn = lookup_symbol::(&library, "tango_select")?; 501 | let get_test_name_fn = lookup_symbol::(&library, "tango_get_test_name")?; 502 | let run_fn = lookup_symbol::(&library, "tango_run")?; 503 | let estimate_iterations_fn = 504 | lookup_symbol::(&library, "tango_estimate_iterations")?; 505 | let prepare_state_fn = 506 | lookup_symbol::(&library, "tango_prepare_state")?; 507 | let free_fn = lookup_symbol::(&library, "tango_free")?; 508 | Ok(Self { 509 | _library: library, 510 | init_fn, 511 | count_fn, 512 | select_fn, 513 | get_test_name_fn, 514 | run_fn, 515 | estimate_iterations_fn, 516 | prepare_state_fn, 517 | free_fn, 518 | }) 519 | } 520 | } 521 | 522 | impl VTable for LibraryVTable { 523 | fn init(&self) { 524 | unsafe { (self.init_fn)() } 525 | } 526 | 527 | fn count(&self) -> c_ulonglong { 528 | unsafe { (self.count_fn)() } 529 | } 530 | 531 | fn select(&self, func_idx: c_ulonglong) { 532 | unsafe { (self.select_fn)(func_idx) } 533 | } 534 | 535 | fn get_test_name(&self, ptr: *mut *const c_char, len: *mut c_ulonglong) { 536 | unsafe { (self.get_test_name_fn)(ptr, len) } 537 | } 538 | 539 | fn run(&self, iterations: c_ulonglong) -> u64 { 540 | unsafe { (self.run_fn)(iterations) } 541 | } 542 | 543 | fn estimate_iterations(&self, time_ms: c_uint) -> c_ulonglong { 544 | unsafe { (self.estimate_iterations_fn)(time_ms) } 545 | } 546 | 547 | fn prepare_state(&self, seed: c_ulonglong) { 548 | unsafe { (self.prepare_state_fn)(seed) } 549 | } 550 | } 551 | 552 | impl Drop for LibraryVTable { 553 | fn drop(&mut self) { 554 | unsafe { (self.free_fn)() } 555 | } 556 | } 557 | 558 | fn lookup_symbol<'l, T>( 559 | library: &'l Library, 560 | name: &'static str, 561 | ) -> Result, Error> { 562 | unsafe { 563 | let symbol = library 564 | .get(name.as_bytes()) 565 | .map_err(Error::UnableToLoadSymbol)?; 566 | Ok(mem::transmute::, Symbol<'static, T>>(symbol)) 567 | } 568 | } 569 | } 570 | -------------------------------------------------------------------------------- /tango-bench/src/lib.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "async")] 2 | pub use asynchronous::async_benchmark_fn; 3 | use core::ptr; 4 | use num_traits::ToPrimitive; 5 | use rand::{rngs::SmallRng, Rng, SeedableRng}; 6 | use std::{ 7 | cmp::Ordering, 8 | hint::black_box, 9 | io, mem, 10 | ops::{Deref, RangeInclusive}, 11 | str::Utf8Error, 12 | time::Duration, 13 | }; 14 | use thiserror::Error; 15 | use timer::{ActiveTimer, Timer}; 16 | 17 | pub mod cli; 18 | pub mod dylib; 19 | #[cfg(target_os = "linux")] 20 | pub mod linux; 21 | 22 | #[derive(Debug, Error)] 23 | pub enum Error { 24 | #[error("No measurements given")] 25 | NoMeasurements, 26 | 27 | #[error("Invalid string pointer from FFI")] 28 | InvalidFFIString(Utf8Error), 29 | 30 | #[error("Spi::self() was already called")] 31 | SpiSelfWasMoved, 32 | 33 | #[error("Unable to load library symbol")] 34 | UnableToLoadSymbol(#[source] libloading::Error), 35 | 36 | #[error("Unknown sampler type. Available options are: flat and linear")] 37 | UnknownSamplerType, 38 | 39 | #[error("Invalid test name given")] 40 | InvalidTestName, 41 | 42 | #[error("IO Error")] 43 | IOError(#[from] io::Error), 44 | } 45 | 46 | /// Registers benchmark in the system 47 | /// 48 | /// Macros accepts a list of functions that produce any [`IntoBenchmarks`] type. All of the benchmarks 49 | /// created by those functions are registered in the harness. 50 | /// 51 | /// ## Example 52 | /// ```rust 53 | /// use std::time::Instant; 54 | /// use tango_bench::{benchmark_fn, IntoBenchmarks, tango_benchmarks}; 55 | /// 56 | /// fn time_benchmarks() -> impl IntoBenchmarks { 57 | /// [benchmark_fn("current_time", |b| b.iter(|| Instant::now()))] 58 | /// } 59 | /// 60 | /// tango_benchmarks!(time_benchmarks()); 61 | /// ``` 62 | #[macro_export] 63 | macro_rules! tango_benchmarks { 64 | ($($func_expr:expr),+) => { 65 | /// Type checking tango_init() function 66 | const TANGO_INIT: $crate::dylib::ffi::InitFn = tango_init; 67 | 68 | /// Exported function for initializing the benchmark harness 69 | #[no_mangle] 70 | unsafe extern "C" fn tango_init() { 71 | let mut benchmarks = vec![]; 72 | $(benchmarks.extend($crate::IntoBenchmarks::into_benchmarks($func_expr));)* 73 | $crate::dylib::__tango_init(benchmarks) 74 | } 75 | 76 | }; 77 | } 78 | 79 | /// Main entrypoint for benchmarks 80 | /// 81 | /// This macro generate `main()` function for the benchmark harness. Can be used in a form with providing 82 | /// measurement settings: 83 | /// ```rust 84 | /// use tango_bench::{tango_main, tango_benchmarks, MeasurementSettings}; 85 | /// 86 | /// // Register benchmarks 87 | /// tango_benchmarks!([]); 88 | /// 89 | /// tango_main!(MeasurementSettings { 90 | /// samples_per_haystack: 1000, 91 | /// min_iterations_per_sample: 10, 92 | /// max_iterations_per_sample: 10_000, 93 | /// ..Default::default() 94 | /// }); 95 | /// ``` 96 | #[macro_export] 97 | macro_rules! tango_main { 98 | ($settings:expr) => { 99 | fn main() -> $crate::cli::Result { 100 | // Initialize Tango for SelfVTable usage 101 | unsafe { tango_init() }; 102 | $crate::cli::run($settings) 103 | } 104 | }; 105 | () => { 106 | tango_main! {$crate::MeasurementSettings::default()} 107 | }; 108 | } 109 | 110 | pub struct BenchmarkParams { 111 | pub seed: u64, 112 | } 113 | 114 | pub struct Bencher { 115 | params: BenchmarkParams, 116 | } 117 | 118 | impl Deref for Bencher { 119 | type Target = BenchmarkParams; 120 | 121 | fn deref(&self) -> &Self::Target { 122 | &self.params 123 | } 124 | } 125 | 126 | impl Bencher { 127 | pub fn iter O + 'static>(self, func: F) -> Box { 128 | Box::new(Sampler(func)) 129 | } 130 | } 131 | 132 | struct Sampler(F); 133 | 134 | pub trait ErasedSampler { 135 | /// Measures the performance if the function 136 | /// 137 | /// Returns the cumulative execution time (all iterations) with nanoseconds precision, 138 | /// but not necessarily accuracy. Usually this time is get by `clock_gettime()` call or some other 139 | /// platform-specific call. 140 | /// 141 | /// This method should use the same arguments for measuring the test function unless [`prepare_state()`] 142 | /// method is called. Only then new set of input arguments should be generated. It is NOT allowed 143 | /// to call this method without first calling [`prepare_state()`]. 144 | /// 145 | /// [`prepare_state()`]: Self::prepare_state() 146 | fn measure(&mut self, iterations: usize) -> u64; 147 | 148 | /// Estimates the number of iterations achievable within given time. 149 | /// 150 | /// Time span is given in milliseconds (`time_ms`). Estimate can be an approximation and it is important 151 | /// for implementation to be fast (in the order of 10 ms). 152 | /// If possible the same input arguments should be used when building the estimate. 153 | /// If the single call of a function is longer than provided timespan the implementation should return 0. 154 | fn estimate_iterations(&mut self, time_ms: u32) -> usize { 155 | let mut iters = 1; 156 | let time_ns = Duration::from_millis(time_ms as u64).as_nanos() as u64; 157 | 158 | for _ in 0..5 { 159 | // Never believe short measurements because they are very unreliable. Pretending that 160 | // measurement at least took 1us guarantees that we won't end up with an unreasonably large number 161 | // of iterations 162 | let time = self.measure(iters).max(1_000); 163 | let time_per_iteration = (time / iters as u64).max(1); 164 | let new_iters = (time_ns / time_per_iteration) as usize; 165 | 166 | // Do early stop if new estimate has the same order of magnitude. It is good enough. 167 | if new_iters < 2 * iters { 168 | return new_iters; 169 | } 170 | 171 | iters = new_iters; 172 | } 173 | 174 | iters 175 | } 176 | } 177 | 178 | impl O> ErasedSampler for Sampler { 179 | fn measure(&mut self, iterations: usize) -> u64 { 180 | let start = ActiveTimer::start(); 181 | for _ in 0..iterations { 182 | black_box((self.0)()); 183 | } 184 | ActiveTimer::stop(start) 185 | } 186 | } 187 | 188 | pub struct Benchmark { 189 | name: String, 190 | sampler_factory: Box, 191 | } 192 | 193 | pub fn benchmark_fn Box + 'static>( 194 | name: impl Into, 195 | sampler_factory: F, 196 | ) -> Benchmark { 197 | let name = name.into(); 198 | assert!(!name.is_empty()); 199 | Benchmark { 200 | name, 201 | sampler_factory: Box::new(SyncSampleFactory(sampler_factory)), 202 | } 203 | } 204 | 205 | pub trait SamplerFactory { 206 | fn create_sampler(&mut self, params: BenchmarkParams) -> Box; 207 | } 208 | 209 | struct SyncSampleFactory(F); 210 | 211 | impl Box> SamplerFactory for SyncSampleFactory { 212 | fn create_sampler(&mut self, params: BenchmarkParams) -> Box { 213 | (self.0)(Bencher { params }) 214 | } 215 | } 216 | 217 | impl Benchmark { 218 | /// Generates next haystack for the measurement 219 | /// 220 | /// Calling this method should update internal haystack used for measurement. 221 | /// Returns `true` if update happens, `false` if implementation doesn't support haystack generation. 222 | /// Haystack/Needle distinction is described in [`Generator`] trait. 223 | pub fn prepare_state(&mut self, seed: u64) -> Box { 224 | self.sampler_factory 225 | .create_sampler(BenchmarkParams { seed }) 226 | } 227 | 228 | /// Name of the benchmark 229 | pub fn name(&self) -> &str { 230 | self.name.as_str() 231 | } 232 | } 233 | 234 | /// Converts the implementing type into a vector of [`Benchmark`]. 235 | pub trait IntoBenchmarks { 236 | fn into_benchmarks(self) -> Vec; 237 | } 238 | 239 | impl IntoBenchmarks for [Benchmark; N] { 240 | fn into_benchmarks(self) -> Vec { 241 | self.into_iter().collect() 242 | } 243 | } 244 | 245 | impl IntoBenchmarks for Vec { 246 | fn into_benchmarks(self) -> Vec { 247 | self 248 | } 249 | } 250 | 251 | /// Describes basic settings for the benchmarking process 252 | /// 253 | /// This structure is passed to [`cli::run()`]. 254 | /// 255 | /// Should be created only with overriding needed properties, like so: 256 | /// ```rust 257 | /// use tango_bench::MeasurementSettings; 258 | /// 259 | /// let settings = MeasurementSettings { 260 | /// min_iterations_per_sample: 1000, 261 | /// ..Default::default() 262 | /// }; 263 | /// ``` 264 | #[derive(Clone, Copy, Debug)] 265 | pub struct MeasurementSettings { 266 | pub filter_outliers: bool, 267 | 268 | /// The number of samples per one generated haystack 269 | pub samples_per_haystack: usize, 270 | 271 | /// Minimum number of iterations in a sample for each of 2 tested functions 272 | pub min_iterations_per_sample: usize, 273 | 274 | /// The number of iterations in a sample for each of 2 tested functions 275 | pub max_iterations_per_sample: usize, 276 | 277 | pub sampler_type: SampleLengthKind, 278 | 279 | /// If true scheduler performs warmup iterations before measuring function 280 | pub warmup_enabled: bool, 281 | 282 | /// Size of a CPU cache firewall in KBytes 283 | /// 284 | /// If set, the scheduler will perform a dummy data read between samples generation to spoil the CPU cache 285 | /// 286 | /// Cache firewall is a way to reduce the impact of the CPU cache on the benchmarking process. It tries 287 | /// to minimize discrepancies in performance between two algorithms due to the CPU cache state. 288 | pub cache_firewall: Option, 289 | 290 | /// If true, scheduler will perform a yield of control back to the OS before taking each sample 291 | /// 292 | /// Yielding control to the OS is a way to reduce the impact of OS scheduler on the benchmarking process. 293 | pub yield_before_sample: bool, 294 | 295 | /// If set, use alloca to allocate a random offset for the stack each sample. 296 | /// This to reduce memory alignment effects on the benchmarking process. 297 | /// 298 | /// May cause UB if the allocation is larger then the thread stack size. 299 | pub randomize_stack: Option, 300 | } 301 | 302 | #[derive(Clone, Copy, Debug)] 303 | pub enum SampleLengthKind { 304 | Flat, 305 | Linear, 306 | Random, 307 | } 308 | 309 | /// Performs a dummy reads from memory to spoil given amount of CPU cache 310 | /// 311 | /// Uses cache aligned data arrays to perform minimum amount of reads possible to spoil the cache 312 | struct CacheFirewall { 313 | cache_lines: Vec, 314 | } 315 | 316 | impl CacheFirewall { 317 | fn new(bytes: usize) -> Self { 318 | let n = bytes / mem::size_of::(); 319 | let cache_lines = vec![CacheLine::default(); n]; 320 | Self { cache_lines } 321 | } 322 | 323 | fn issue_read(&self) { 324 | for line in &self.cache_lines { 325 | // Because CacheLine is aligned on 64 bytes it is enough to read single element from the array 326 | // to spoil the whole cache line 327 | unsafe { ptr::read_volatile(&line.0[0]) }; 328 | } 329 | } 330 | } 331 | 332 | #[repr(C)] 333 | #[repr(align(64))] 334 | #[derive(Default, Clone, Copy)] 335 | struct CacheLine([u16; 32]); 336 | 337 | pub const DEFAULT_SETTINGS: MeasurementSettings = MeasurementSettings { 338 | filter_outliers: false, 339 | samples_per_haystack: 1, 340 | min_iterations_per_sample: 1, 341 | max_iterations_per_sample: 5000, 342 | sampler_type: SampleLengthKind::Random, 343 | cache_firewall: None, 344 | yield_before_sample: false, 345 | warmup_enabled: true, 346 | randomize_stack: None, 347 | }; 348 | 349 | impl Default for MeasurementSettings { 350 | fn default() -> Self { 351 | DEFAULT_SETTINGS 352 | } 353 | } 354 | 355 | /// Responsible for determining the number of iterations to run for each sample 356 | /// 357 | /// Different sampler strategies can influence the results heavily. For example, if function is dependent heavily 358 | /// on a memory subsystem, then it should be tested with different number of iterations to be representative 359 | /// for different memory access patterns and cache states. 360 | trait SampleLength { 361 | /// Returns the number of iterations to run for the next sample 362 | /// 363 | /// Accepts the number of iteration being run starting from 0 and cumulative time spent by both functions 364 | fn next_sample_iterations(&mut self, iteration_no: usize, estimate: usize) -> usize; 365 | } 366 | 367 | /// Runs the same number of iterations for each sample 368 | /// 369 | /// Estimates the number of iterations based on the number of iterations achieved in 10 ms and uses 370 | /// this number as a base for the number of iterations for each sample. 371 | struct FlatSampleLength { 372 | min: usize, 373 | max: usize, 374 | } 375 | 376 | impl FlatSampleLength { 377 | fn new(settings: &MeasurementSettings) -> Self { 378 | FlatSampleLength { 379 | min: settings.min_iterations_per_sample.max(1), 380 | max: settings.max_iterations_per_sample, 381 | } 382 | } 383 | } 384 | 385 | impl SampleLength for FlatSampleLength { 386 | fn next_sample_iterations(&mut self, _iteration_no: usize, estimate: usize) -> usize { 387 | estimate.clamp(self.min, self.max) 388 | } 389 | } 390 | 391 | struct LinearSampleLength { 392 | min: usize, 393 | max: usize, 394 | } 395 | 396 | impl LinearSampleLength { 397 | fn new(settings: &MeasurementSettings) -> Self { 398 | Self { 399 | min: settings.min_iterations_per_sample.max(1), 400 | max: settings.max_iterations_per_sample, 401 | } 402 | } 403 | } 404 | 405 | impl SampleLength for LinearSampleLength { 406 | fn next_sample_iterations(&mut self, iteration_no: usize, estimate: usize) -> usize { 407 | let estimate = estimate.clamp(self.min, self.max); 408 | (iteration_no % estimate) + 1 409 | } 410 | } 411 | 412 | /// Sampler that randomly determines the number of iterations to run for each sample 413 | /// 414 | /// This sampler uses a random number generator to decide the number of iterations for each sample. 415 | struct RandomSampleLength { 416 | rng: SmallRng, 417 | min: usize, 418 | max: usize, 419 | } 420 | 421 | impl RandomSampleLength { 422 | pub fn new(settings: &MeasurementSettings, seed: u64) -> Self { 423 | Self { 424 | rng: SmallRng::seed_from_u64(seed), 425 | min: settings.min_iterations_per_sample.max(1), 426 | max: settings.max_iterations_per_sample, 427 | } 428 | } 429 | } 430 | 431 | impl SampleLength for RandomSampleLength { 432 | fn next_sample_iterations(&mut self, _iteration_no: usize, estimate: usize) -> usize { 433 | let estimate = estimate.clamp(self.min, self.max); 434 | self.rng.gen_range(1..=estimate) 435 | } 436 | } 437 | 438 | /// Calculates the result of the benchmarking run 439 | /// 440 | /// Return None if no measurements were made 441 | pub(crate) fn calculate_run_result>( 442 | name: N, 443 | baseline: &[u64], 444 | candidate: &[u64], 445 | iterations_per_sample: &[usize], 446 | filter_outliers: bool, 447 | ) -> Option { 448 | assert!(baseline.len() == candidate.len()); 449 | assert!(baseline.len() == iterations_per_sample.len()); 450 | 451 | let mut iterations_per_sample = iterations_per_sample.to_vec(); 452 | 453 | let mut diff = candidate 454 | .iter() 455 | .zip(baseline.iter()) 456 | // Calculating difference between candidate and baseline 457 | .map(|(&c, &b)| (c as f64 - b as f64)) 458 | .zip(iterations_per_sample.iter()) 459 | // Normalizing difference to iterations count 460 | .map(|(diff, &iters)| diff / iters as f64) 461 | .collect::>(); 462 | 463 | // need to save number of original samples to calculate number of outliers correctly 464 | let n = diff.len(); 465 | 466 | // Normalizing measurements to iterations count 467 | let mut baseline = baseline 468 | .iter() 469 | .zip(iterations_per_sample.iter()) 470 | .map(|(&v, &iters)| (v as f64) / (iters as f64)) 471 | .collect::>(); 472 | let mut candidate = candidate 473 | .iter() 474 | .zip(iterations_per_sample.iter()) 475 | .map(|(&v, &iters)| (v as f64) / (iters as f64)) 476 | .collect::>(); 477 | 478 | // Calculating measurements range. All measurements outside this interval considered outliers 479 | let range = if filter_outliers { 480 | iqr_variance_thresholds(diff.to_vec()) 481 | } else { 482 | None 483 | }; 484 | 485 | // Cleaning measurements from outliers if needed 486 | if let Some(range) = range { 487 | // We filtering outliers to build statistical Summary and the order of elements in arrays 488 | // doesn't matter, therefore swap_remove() is used. But we need to make sure that all arrays 489 | // has the same length 490 | assert_eq!(diff.len(), baseline.len()); 491 | assert_eq!(diff.len(), candidate.len()); 492 | 493 | let mut i = 0; 494 | while i < diff.len() { 495 | if range.contains(&diff[i]) { 496 | i += 1; 497 | } else { 498 | diff.swap_remove(i); 499 | iterations_per_sample.swap_remove(i); 500 | baseline.swap_remove(i); 501 | candidate.swap_remove(i); 502 | } 503 | } 504 | }; 505 | 506 | let diff_summary = Summary::from(&diff)?; 507 | let baseline_summary = Summary::from(&baseline)?; 508 | let candidate_summary = Summary::from(&candidate)?; 509 | 510 | let diff_estimate = DiffEstimate::build(&baseline_summary, &diff_summary); 511 | 512 | Some(RunResult { 513 | baseline: baseline_summary, 514 | candidate: candidate_summary, 515 | diff: diff_summary, 516 | name: name.into(), 517 | diff_estimate, 518 | outliers: n - diff_summary.n, 519 | }) 520 | } 521 | 522 | /// Contains the estimation of how much faster or slower is candidate function compared to baseline 523 | pub(crate) struct DiffEstimate { 524 | // Percentage of difference between candidate and baseline 525 | // 526 | // Negative value means that candidate is faster than baseline, positive - slower. 527 | pct: f64, 528 | 529 | // Is the difference statistically significant 530 | significant: bool, 531 | } 532 | 533 | impl DiffEstimate { 534 | /// Builds [`DiffEstimate`] from flat sampling 535 | /// 536 | /// Flat sampling is a sampling where each measurement is normalized by the number of iterations. 537 | /// This is needed to make measurements comparable between each other. Linear sampling is more 538 | /// robust to outliers, but it is requiring more iterations. 539 | /// 540 | /// It is assumed that baseline and candidate are already normalized by iterations count. 541 | fn build(baseline: &Summary, diff: &Summary) -> Self { 542 | let std_dev = diff.variance.sqrt(); 543 | let std_err = std_dev / (diff.n as f64).sqrt(); 544 | let z_score = diff.mean / std_err; 545 | 546 | // significant result is far away from 0 and have more than 0.5% base/candidate difference 547 | // z_score = 2.6 corresponds to 99% significance level 548 | let significant = z_score.abs() >= 2.6 549 | && (diff.mean / baseline.mean).abs() > 0.005 550 | && diff.mean.abs() >= ActiveTimer::precision() as f64; 551 | let pct = diff.mean / baseline.mean * 100.0; 552 | 553 | Self { pct, significant } 554 | } 555 | } 556 | 557 | /// Describes the results of a single benchmark run 558 | pub(crate) struct RunResult { 559 | /// name of a test 560 | name: String, 561 | 562 | /// statistical summary of baseline function measurements 563 | baseline: Summary, 564 | 565 | /// statistical summary of candidate function measurements 566 | candidate: Summary, 567 | 568 | /// individual measurements of a benchmark (candidate - baseline) 569 | diff: Summary, 570 | 571 | diff_estimate: DiffEstimate, 572 | 573 | /// Numbers of detected and filtered outliers 574 | outliers: usize, 575 | } 576 | 577 | /// Statistical summary for a given iterator of numbers. 578 | /// 579 | /// Calculates all the information using single pass over the data. Mean and variance are calculated using 580 | /// streaming algorithm described in _Art of Computer Programming, Vol 2, page 232_. 581 | #[derive(Clone, Copy)] 582 | pub struct Summary { 583 | pub n: usize, 584 | pub min: T, 585 | pub max: T, 586 | pub mean: f64, 587 | pub variance: f64, 588 | } 589 | 590 | impl Summary { 591 | pub fn from<'a, C>(values: C) -> Option 592 | where 593 | C: IntoIterator, 594 | T: ToPrimitive + Copy + Default + 'a, 595 | { 596 | Self::running(values.into_iter().copied()).last() 597 | } 598 | 599 | pub fn running(iter: I) -> impl Iterator> 600 | where 601 | T: ToPrimitive + Copy + Default, 602 | I: Iterator, 603 | { 604 | RunningSummary { 605 | iter, 606 | n: 0, 607 | min: T::default(), 608 | max: T::default(), 609 | mean: 0., 610 | s: 0., 611 | } 612 | } 613 | } 614 | 615 | struct RunningSummary { 616 | iter: I, 617 | n: usize, 618 | min: T, 619 | max: T, 620 | mean: f64, 621 | s: f64, 622 | } 623 | 624 | impl Iterator for RunningSummary 625 | where 626 | T: Copy + PartialOrd, 627 | I: Iterator, 628 | T: ToPrimitive, 629 | { 630 | type Item = Summary; 631 | 632 | fn next(&mut self) -> Option { 633 | let value = self.iter.next()?; 634 | let fvalue = value.to_f64().expect("f64 overflow detected"); 635 | 636 | if self.n == 0 { 637 | self.min = value; 638 | self.max = value; 639 | } 640 | 641 | if let Some(Ordering::Less) = value.partial_cmp(&self.min) { 642 | self.min = value; 643 | } 644 | if let Some(Ordering::Greater) = value.partial_cmp(&self.max) { 645 | self.max = value; 646 | } 647 | 648 | self.n += 1; 649 | let mean_p = self.mean; 650 | self.mean += (fvalue - self.mean) / self.n as f64; 651 | self.s += (fvalue - mean_p) * (fvalue - self.mean); 652 | let variance = if self.n > 1 { 653 | self.s / (self.n - 1) as f64 654 | } else { 655 | 0. 656 | }; 657 | 658 | Some(Summary { 659 | n: self.n, 660 | min: self.min, 661 | max: self.max, 662 | mean: self.mean, 663 | variance, 664 | }) 665 | } 666 | } 667 | 668 | /// Outlier detection algorithm based on interquartile range 669 | /// 670 | /// Observations that are 1.5 IQR away from the corresponding quartile are consideted as outliers 671 | /// as described in original Tukey's paper. 672 | pub fn iqr_variance_thresholds(mut input: Vec) -> Option> { 673 | const MINIMUM_IQR: f64 = 1.; 674 | 675 | input.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal)); 676 | let (q1, q3) = (input.len() / 4, input.len() * 3 / 4 - 1); 677 | if q1 >= q3 || q3 >= input.len() { 678 | return None; 679 | } 680 | // In case q1 and q3 are equal, we need to make sure that IQR is not 0 681 | // In the future it would be nice to measure system timer precision empirically. 682 | let iqr = (input[q3] - input[q1]).max(MINIMUM_IQR); 683 | 684 | let low_threshold = input[q1] - iqr * 1.5; 685 | let high_threshold = input[q3] + iqr * 1.5; 686 | 687 | // Calculating the indices of the thresholds in an dataset 688 | let low_threshold_idx = 689 | match input[0..q1].binary_search_by(|probe| probe.total_cmp(&low_threshold)) { 690 | Ok(idx) => idx, 691 | Err(idx) => idx, 692 | }; 693 | 694 | let high_threshold_idx = 695 | match input[q3..].binary_search_by(|probe| probe.total_cmp(&high_threshold)) { 696 | Ok(idx) => idx, 697 | Err(idx) => idx, 698 | }; 699 | 700 | if low_threshold_idx == 0 || high_threshold_idx >= input.len() { 701 | return None; 702 | } 703 | 704 | // Calculating the equal number of observations which should be removed from each "side" of observations 705 | let outliers_cnt = low_threshold_idx.min(input.len() - high_threshold_idx); 706 | 707 | Some(input[outliers_cnt]..=(input[input.len() - outliers_cnt - 1])) 708 | } 709 | 710 | mod timer { 711 | use std::time::Instant; 712 | 713 | #[cfg(all(feature = "hw-timer", target_arch = "x86_64"))] 714 | pub(super) type ActiveTimer = x86::RdtscpTimer; 715 | 716 | #[cfg(not(all(feature = "hw-timer", target_arch = "x86_64")))] 717 | pub(super) type ActiveTimer = PlatformTimer; 718 | 719 | pub(super) trait Timer { 720 | fn start() -> T; 721 | fn stop(start_time: T) -> u64; 722 | 723 | /// Timer precision in nanoseconds 724 | /// 725 | /// The results less than the precision of a timer are considered not significant 726 | fn precision() -> u64 { 727 | 1 728 | } 729 | } 730 | 731 | pub(super) struct PlatformTimer; 732 | 733 | impl Timer for PlatformTimer { 734 | #[inline] 735 | fn start() -> Instant { 736 | Instant::now() 737 | } 738 | 739 | #[inline] 740 | fn stop(start_time: Instant) -> u64 { 741 | start_time.elapsed().as_nanos() as u64 742 | } 743 | } 744 | 745 | #[cfg(all(feature = "hw-timer", target_arch = "x86_64"))] 746 | pub(super) mod x86 { 747 | use super::Timer; 748 | use std::arch::x86_64::{__rdtscp, _mm_mfence}; 749 | 750 | pub struct RdtscpTimer; 751 | 752 | impl Timer for RdtscpTimer { 753 | #[inline] 754 | fn start() -> u64 { 755 | unsafe { 756 | _mm_mfence(); 757 | __rdtscp(&mut 0) 758 | } 759 | } 760 | 761 | #[inline] 762 | fn stop(start: u64) -> u64 { 763 | unsafe { 764 | let end = __rdtscp(&mut 0); 765 | _mm_mfence(); 766 | end - start 767 | } 768 | } 769 | } 770 | } 771 | } 772 | 773 | #[cfg(feature = "async")] 774 | pub mod asynchronous { 775 | use super::{Benchmark, BenchmarkParams, ErasedSampler, Sampler, SamplerFactory}; 776 | use std::{future::Future, ops::Deref}; 777 | 778 | pub fn async_benchmark_fn( 779 | name: impl Into, 780 | runtime: R, 781 | sampler_factory: F, 782 | ) -> Benchmark 783 | where 784 | R: AsyncRuntime + 'static, 785 | F: FnMut(AsyncBencher) -> Box + 'static, 786 | { 787 | let name = name.into(); 788 | assert!(!name.is_empty()); 789 | Benchmark { 790 | name, 791 | sampler_factory: Box::new(AsyncSampleFactory(sampler_factory, runtime)), 792 | } 793 | } 794 | 795 | pub struct AsyncSampleFactory(pub F, pub R); 796 | 797 | impl) -> Box> SamplerFactory 798 | for AsyncSampleFactory 799 | { 800 | fn create_sampler(&mut self, params: BenchmarkParams) -> Box { 801 | (self.0)(AsyncBencher { 802 | params, 803 | runtime: self.1, 804 | }) 805 | } 806 | } 807 | 808 | pub struct AsyncBencher { 809 | params: BenchmarkParams, 810 | runtime: R, 811 | } 812 | 813 | impl AsyncBencher { 814 | pub fn iter(self, func: F) -> Box 815 | where 816 | O: 'static, 817 | Fut: Future, 818 | F: FnMut() -> Fut + Copy + 'static, 819 | { 820 | Box::new(Sampler(move || self.runtime.block_on(func))) 821 | } 822 | } 823 | 824 | impl Deref for AsyncBencher { 825 | type Target = BenchmarkParams; 826 | 827 | fn deref(&self) -> &Self::Target { 828 | &self.params 829 | } 830 | } 831 | 832 | pub trait AsyncRuntime: Copy { 833 | fn block_on, F: FnMut() -> Fut>(&self, f: F) -> O; 834 | } 835 | 836 | #[cfg(feature = "async-tokio")] 837 | pub mod tokio { 838 | use super::*; 839 | use ::tokio::runtime::Builder; 840 | 841 | #[derive(Copy, Clone)] 842 | pub struct TokioRuntime; 843 | 844 | impl AsyncRuntime for TokioRuntime { 845 | fn block_on, F: FnMut() -> Fut>(&self, mut f: F) -> O { 846 | let runtime = Builder::new_current_thread().build().unwrap(); 847 | runtime.block_on(f()) 848 | } 849 | } 850 | } 851 | } 852 | 853 | #[cfg(test)] 854 | mod tests { 855 | use super::*; 856 | use rand::{rngs::SmallRng, Rng, RngCore, SeedableRng}; 857 | use std::{ 858 | iter::Sum, 859 | ops::{Add, Div}, 860 | thread, 861 | time::Duration, 862 | }; 863 | 864 | #[test] 865 | fn check_iqr_variance_thresholds() { 866 | let mut rng = SmallRng::from_entropy(); 867 | 868 | // Generate 20 random values in range [-50, 50] 869 | // and add 10 outliers in each of two ranges [-1000, -200] and [200, 1000] 870 | // This way IQR is no more than 100 and thresholds should be within [-50, 50] range 871 | let mut values = vec![]; 872 | values.extend((0..20).map(|_| rng.gen_range(-50.0..=50.))); 873 | values.extend((0..10).map(|_| rng.gen_range(-1000.0..=-200.0))); 874 | values.extend((0..10).map(|_| rng.gen_range(200.0..=1000.0))); 875 | 876 | let thresholds = iqr_variance_thresholds(values).unwrap(); 877 | 878 | assert!( 879 | -50. <= *thresholds.start() && *thresholds.end() <= 50., 880 | "Invalid range: {:?}", 881 | thresholds 882 | ); 883 | } 884 | 885 | /// This tests checks that the algorithm is stable in case of zero difference between 25 and 75 percentiles 886 | #[test] 887 | fn check_outliers_zero_iqr() { 888 | let mut rng = SmallRng::from_entropy(); 889 | 890 | let mut values = vec![]; 891 | values.extend(std::iter::repeat(0.).take(20)); 892 | values.extend((0..10).map(|_| rng.gen_range(-1000.0..=-200.0))); 893 | values.extend((0..10).map(|_| rng.gen_range(200.0..=1000.0))); 894 | 895 | let thresholds = iqr_variance_thresholds(values).unwrap(); 896 | 897 | assert!( 898 | 0. <= *thresholds.start() && *thresholds.end() <= 0., 899 | "Invalid range: {:?}", 900 | thresholds 901 | ); 902 | } 903 | 904 | #[test] 905 | fn check_summary_statistics() { 906 | for i in 2u32..100 { 907 | let range = 1..=i; 908 | let values = range.collect::>(); 909 | let stat = Summary::from(&values).unwrap(); 910 | 911 | let sum = (i * (i + 1)) as f64 / 2.; 912 | let expected_mean = sum / i as f64; 913 | let expected_variance = naive_variance(values.as_slice()); 914 | 915 | assert_eq!(stat.min, 1); 916 | assert_eq!(stat.n, i as usize); 917 | assert_eq!(stat.max, i); 918 | assert!( 919 | (stat.mean - expected_mean).abs() < 1e-5, 920 | "Expected close to: {}, given: {}", 921 | expected_mean, 922 | stat.mean 923 | ); 924 | assert!( 925 | (stat.variance - expected_variance).abs() < 1e-5, 926 | "Expected close to: {}, given: {}", 927 | expected_variance, 928 | stat.variance 929 | ); 930 | } 931 | } 932 | 933 | #[test] 934 | fn check_summary_statistics_types() { 935 | Summary::from(<&[i64]>::default()); 936 | Summary::from(<&[u32]>::default()); 937 | Summary::from(&Vec::::default()); 938 | } 939 | 940 | #[test] 941 | fn check_naive_variance() { 942 | assert_eq!(naive_variance(&[1, 2, 3]), 1.0); 943 | assert_eq!(naive_variance(&[1, 2, 3, 4, 5]), 2.5); 944 | } 945 | 946 | #[test] 947 | fn check_running_variance() { 948 | let input = [1i64, 2, 3, 4, 5, 6, 7]; 949 | let variances = Summary::running(input.into_iter()) 950 | .map(|s| s.variance) 951 | .collect::>(); 952 | let expected = &[0., 0.5, 1., 1.6666, 2.5, 3.5, 4.6666]; 953 | 954 | assert_eq!(variances.len(), expected.len()); 955 | 956 | for (value, expected_value) in variances.iter().zip(expected) { 957 | assert!( 958 | (value - expected_value).abs() < 1e-3, 959 | "Expected close to: {}, given: {}", 960 | expected_value, 961 | value 962 | ); 963 | } 964 | } 965 | 966 | #[test] 967 | fn check_running_variance_stress_test() { 968 | let rng = RngIterator(SmallRng::seed_from_u64(0)).map(|i| i as i64); 969 | let mut variances = Summary::running(rng).map(|s| s.variance); 970 | 971 | assert!(variances.nth(1_000_000).unwrap() > 0.) 972 | } 973 | 974 | /// Basic check of measurement code 975 | /// 976 | /// This test is quite brittle. There is no guarantee the OS scheduler will wake up the thread 977 | /// soon enough to meet measurement target. We try to mitigate this possibility using several strategies: 978 | /// 1. repeating test several times and taking median as target measurement. 979 | /// 2. using more liberal checking condition (allowing 1 order of magnitude error in measurement) 980 | #[test] 981 | fn check_measure_time() { 982 | let expected_delay = 1; 983 | let mut target = benchmark_fn("foo", move |b| { 984 | b.iter(move || thread::sleep(Duration::from_millis(expected_delay))) 985 | }); 986 | target.prepare_state(0); 987 | 988 | let median = median_execution_time(&mut target, 10).as_millis() as u64; 989 | assert!(median < expected_delay * 10); 990 | } 991 | 992 | struct RngIterator(T); 993 | 994 | impl Iterator for RngIterator { 995 | type Item = u32; 996 | 997 | fn next(&mut self) -> Option { 998 | Some(self.0.next_u32()) 999 | } 1000 | } 1001 | 1002 | fn naive_variance(values: &[T]) -> f64 1003 | where 1004 | T: Sum + Copy, 1005 | f64: From, 1006 | { 1007 | let n = values.len() as f64; 1008 | let mean = f64::from(values.iter().copied().sum::()) / n; 1009 | let mut sum_of_squares = 0.; 1010 | for value in values.iter().copied() { 1011 | sum_of_squares += (f64::from(value) - mean).powi(2); 1012 | } 1013 | sum_of_squares / (n - 1.) 1014 | } 1015 | 1016 | fn median_execution_time(target: &mut Benchmark, iterations: u32) -> Duration { 1017 | assert!(iterations >= 1); 1018 | let mut state = target.prepare_state(0); 1019 | let measures: Vec<_> = (0..iterations).map(|_| state.measure(1)).collect(); 1020 | let time = median(measures).max(1); 1021 | Duration::from_nanos(time) 1022 | } 1023 | 1024 | fn median + Div>(mut measures: Vec) -> T { 1025 | assert!(!measures.is_empty(), "Vec is empty"); 1026 | measures.sort_unstable(); 1027 | measures[measures.len() / 2] 1028 | } 1029 | } 1030 | -------------------------------------------------------------------------------- /tango-bench/src/linux.rs: -------------------------------------------------------------------------------- 1 | use goblin::{ 2 | elf::{Dyn, Elf}, 3 | elf64::{ 4 | dynamic::{DF_1_PIE, DT_FLAGS_1}, 5 | program_header::PT_DYNAMIC, 6 | }, 7 | error::Error as GoblinError, 8 | }; 9 | use scroll::{Pread, Pwrite}; 10 | use std::{ 11 | fs, mem, 12 | path::{Path, PathBuf}, 13 | }; 14 | use thiserror::Error; 15 | 16 | #[derive(Debug, Error)] 17 | pub enum Error { 18 | #[error("Unable to parse ELF file")] 19 | UnableToParseElf(GoblinError), 20 | 21 | #[error("Unable to serialize ELF file")] 22 | UnableToSerializeElf(GoblinError), 23 | 24 | #[error("Unable to found DT_FLAGS_1 position")] 25 | NoDTFlags1Found, 26 | 27 | #[error("Unable to find PT_DYNAMIC section")] 28 | NoDynamicSectionFound, 29 | 30 | #[error("DT_FLAGS_1 flag crosscheck failed")] 31 | FlagCrosscheckFailed, 32 | 33 | #[error("IOError")] 34 | IOError(#[from] std::io::Error), 35 | } 36 | 37 | /// Patches executable file for new version of glibc dynamic loader 38 | /// 39 | /// After glibc 2.29 on linux `dlopen` is explicitly denying loading 40 | /// PIE executables as a shared library. The following error might appear: 41 | /// 42 | /// ```console 43 | /// dlopen error: cannot dynamically load position-independent executable 44 | /// ``` 45 | /// 46 | /// From 2.29 [dynamic loader throws an error](glibc) if `DF_1_PIE` flag is set in 47 | /// `DT_FLAG_1` tag on the `PT_DYNAMIC` section. Although the loading of executable 48 | /// files as a shared library was never an intended use case, through the years 49 | /// some applications adopted this technique and it is very convenient in the context 50 | /// of paired benchmarking. 51 | /// 52 | /// Following method check if this flag is set and patch binary at runtime 53 | /// (writing patched version in a different file). As far as I am aware 54 | /// this is safe modification because `DF_1_PIE` is purely informational and doesn't 55 | /// changle the dynamic linking process in any way. Theoretically in the future this modification 56 | /// could prevent ASLR ramndomization on the OS level which is irrelevant for benchmark 57 | /// executables. 58 | /// 59 | /// [glibc]: https://github.com/bminor/glibc/blob/2e0c0ff95ca0e3122eb5b906ee26a31f284ce5ab/elf/dl-load.c#L1280-L1282 60 | pub fn patch_pie_binary_if_needed( 61 | #[allow(unused_variables)] path: impl AsRef, 62 | ) -> Result, Error> { 63 | let mut bytes = fs::read(path.as_ref())?; 64 | let elf = Elf::parse(&bytes).map_err(Error::UnableToParseElf)?; 65 | 66 | let Some(dynamic) = elf.dynamic else { 67 | return Ok(None); 68 | }; 69 | if dynamic.info.flags_1 & DF_1_PIE == 0 { 70 | return Ok(None); 71 | } 72 | 73 | let (dyn_idx, _) = dynamic 74 | .dyns 75 | .iter() 76 | .enumerate() 77 | .find(|(_, d)| d.d_tag == DT_FLAGS_1) 78 | .ok_or(Error::NoDTFlags1Found)?; 79 | 80 | // Finding PT_DYNAMIC section offset 81 | let header = elf 82 | .program_headers 83 | .iter() 84 | .find(|h| h.p_type == PT_DYNAMIC) 85 | .ok_or(Error::NoDynamicSectionFound)?; 86 | 87 | // Finding target Dyn item offset 88 | let dyn_offset = header.p_offset as usize + dyn_idx * mem::size_of::(); 89 | 90 | // Crosschecking we found right dyn tag 91 | let mut dyn_item = bytes 92 | .pread::(dyn_offset) 93 | .map_err(Error::UnableToSerializeElf)?; 94 | 95 | if dyn_item.d_tag != DT_FLAGS_1 || dyn_item.d_val != dynamic.info.flags_1 { 96 | return Err(Error::FlagCrosscheckFailed); 97 | } 98 | 99 | // clearing DF_1_PIE bit and writing patched binary 100 | dyn_item.d_val &= !DF_1_PIE; 101 | bytes 102 | .pwrite(dyn_item, dyn_offset) 103 | .map_err(Error::UnableToSerializeElf)?; 104 | 105 | let path = path.as_ref().with_extension("patched"); 106 | fs::write(&path, bytes)?; 107 | 108 | Ok(Some(path)) 109 | } 110 | -------------------------------------------------------------------------------- /tango-bench/src/plot.gnuplot: -------------------------------------------------------------------------------- 1 | set term svg enhanced size 1000,400 lw 1.5 background rgb 'white' 2 | set output ARG2 3 | set grid 4 | 5 | set datafile separator ',' 6 | 7 | set multiplot 8 | 9 | set ylabel "time (us.)" 10 | set xlabel "observation no" 11 | 12 | set title "Execution time" 13 | set size 0.6,1 14 | set origin 0,0 15 | plot ARG1 using ($1/1000) title "base" with linespoints pt 1 ps 0.3 lw 0.2 lc 'dark-red', \ 16 | ARG1 using (-$2/1000) title "-candidate" with linespoints pt 1 ps 0.3 lw 0.2 lc 'dark-green', \ 17 | ARG1 using (($2 - $1)/1000) title "(candidate-baseline)" with lines lw 0.5 lc 'navy' 18 | 19 | set xtics autofreq 20 | set ytics autofreq 21 | 22 | set ylabel "time (us.) - candidate" 23 | set xlabel "time (us.) - base" 24 | 25 | f(x) = x 26 | 27 | unset title 28 | set size 0.4,1 29 | set origin 0.6,0 30 | set log y 31 | set log x 32 | unset key 33 | 34 | plot f(x) notitle with lines linestyle 1 lc "red" dt 4 lw 1, \ 35 | ARG1 using ($1/1000):($2/1000) title "time to execute" with points pt 1 ps 0.5 lc rgb 'dark-green' 36 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | cargo +nightly export target/benches -- bench --bench='search-ord' 5 | 6 | pushd target/benches 7 | cp search_ord search_ord2 8 | 9 | if [[ "$(uname)" == "Darwin" ]]; then 10 | codesign --force --deep --sign - search_ord* 11 | fi 12 | 13 | rm -rf ../data/*.{csv,svg} 14 | ./search_ord compare -d ../data search_ord2 $@ 15 | popd 16 | --------------------------------------------------------------------------------