├── .gitignore ├── figures ├── request_rate_async_200ms.png ├── latency_histogram_async_200ms.png ├── latency_timeline_async_200ms.png └── latency_percentiles_async_200ms.png ├── diagrams ├── amplifying-failures.puml ├── amplifying-failures-big-time.puml ├── fine-grained-locking.puml └── coarse-grained-locking.puml ├── Cargo.toml ├── .github └── workflows │ └── clippy.yml ├── benches ├── benchmarks_sum.rs ├── README.md └── benchmarks.rs ├── README.md ├── LICENSE └── src └── main.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /.idea 3 | Cargo.lock 4 | -------------------------------------------------------------------------------- /figures/request_rate_async_200ms.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xnuter/concurrency-demo-benchmarks/HEAD/figures/request_rate_async_200ms.png -------------------------------------------------------------------------------- /figures/latency_histogram_async_200ms.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xnuter/concurrency-demo-benchmarks/HEAD/figures/latency_histogram_async_200ms.png -------------------------------------------------------------------------------- /figures/latency_timeline_async_200ms.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xnuter/concurrency-demo-benchmarks/HEAD/figures/latency_timeline_async_200ms.png -------------------------------------------------------------------------------- /figures/latency_percentiles_async_200ms.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xnuter/concurrency-demo-benchmarks/HEAD/figures/latency_percentiles_async_200ms.png -------------------------------------------------------------------------------- /diagrams/amplifying-failures.puml: -------------------------------------------------------------------------------- 1 | @startuml 2 | [Service] -down-> node_1 3 | [Service] -down-> node_2 4 | [Service] -down-> node_... 5 | [Service] -down-> node_9 6 | () node_10 #red 7 | [Service] .down. node_10 #Red 8 | @enduml -------------------------------------------------------------------------------- /diagrams/amplifying-failures-big-time.puml: -------------------------------------------------------------------------------- 1 | @startuml 2 | [Service] -down-> node_1 3 | [Service] -down-> node_2 4 | [Service] -down-> node_... 5 | [Service] -down-> node_9 6 | () node_10 #red 7 | [Service] .down. node_10 #Red 8 | @enduml -------------------------------------------------------------------------------- /diagrams/fine-grained-locking.puml: -------------------------------------------------------------------------------- 1 | @startuml 2 | participant Thread1 order 1 3 | participant Thread2 order 2 4 | collections EventQueue order 3 5 | participant StreamOwner order 4 6 | participant OutputStream order 5 7 | 8 | Thread1 -> EventQueue: offer 9 | Thread2 -> EventQueue: offer 10 | group while (!done) 11 | StreamOwner -> EventQueue: poll 12 | StreamOwner -> OutputStream: write 13 | activate OutputStream 14 | StreamOwner <- OutputStream: ack 15 | deactivate OutputStream 16 | end 17 | @enduml -------------------------------------------------------------------------------- /diagrams/coarse-grained-locking.puml: -------------------------------------------------------------------------------- 1 | @startuml 2 | participant Thread1 order 1 3 | participant Thread2 order 2 4 | participant OutputStream order 3 5 | 6 | group synchronized2 7 | group synchronized1 8 | Thread1 -> OutputStream: write 9 | hnote over Thread2: idle 10 | activate OutputStream 11 | OutputStream -> Thread1: ack 12 | deactivate OutputStream 13 | end synchronized2 14 | Thread2 -> OutputStream: write 15 | activate OutputStream 16 | OutputStream -> Thread2: ack 17 | deactivate OutputStream 18 | end synchronized1 19 | 20 | @enduml -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "concurrency-demo-benchmarks" 3 | version = "0.0.8" 4 | authors = ["xnuter"] 5 | edition = "2018" 6 | license = "MIT OR Apache-2.0" 7 | publish = true 8 | readme = "README.md" 9 | repository = "https://github.com/xnuter/concurrency-demo-benchmarks" 10 | homepage = "https://github.com/xnuter/concurrency-demo-benchmarks" 11 | description = "A small utility to benchmark different approaches for building concurrent applications." 12 | 13 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 14 | 15 | [dependencies] 16 | tokio = { version = "0.2", features = ["full"] } 17 | matplotrust = "0.1" 18 | leaky-bucket = "0.8.2" 19 | clap = "3.0.0-beta.1" 20 | crossbeam = "0.8" 21 | humantime = "2.0" 22 | 23 | [dev-dependencies] 24 | criterion = "0.3" 25 | 26 | [[bench]] 27 | name = "benchmarks" 28 | harness = false 29 | 30 | [[bench]] 31 | name = "benchmarks_sum" 32 | harness = false 33 | -------------------------------------------------------------------------------- /.github/workflows/clippy.yml: -------------------------------------------------------------------------------- 1 | on: [push, pull_request] 2 | name: Clippy/Fmt 3 | jobs: 4 | clippy: 5 | name: Clippy 6 | runs-on: ubuntu-latest 7 | steps: 8 | - name: Checkout sources 9 | uses: actions/checkout@v2 10 | 11 | - name: Install nightly toolchain with clippy available 12 | uses: actions-rs/toolchain@v1 13 | with: 14 | profile: minimal 15 | toolchain: nightly 16 | override: true 17 | components: clippy 18 | 19 | - name: Run cargo clippy 20 | uses: actions-rs/cargo@v1 21 | with: 22 | command: clippy 23 | args: -- -D warnings 24 | 25 | rustfmt: 26 | name: Format 27 | runs-on: ubuntu-latest 28 | steps: 29 | - name: Checkout sources 30 | uses: actions/checkout@v2 31 | 32 | - name: Install nightly toolchain with rustfmt available 33 | uses: actions-rs/toolchain@v1 34 | with: 35 | profile: minimal 36 | toolchain: nightly 37 | override: true 38 | components: rustfmt 39 | 40 | - name: Run cargo fmt 41 | uses: actions-rs/cargo@v1 42 | with: 43 | command: fmt 44 | args: --all -- --check 45 | -------------------------------------------------------------------------------- /benches/benchmarks_sum.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 2 | use std::sync::atomic::{AtomicUsize, Ordering}; 3 | use std::sync::Mutex; 4 | 5 | fn sum_batched(observations: &[usize], counter: &AtomicUsize) { 6 | let mut batch = 0; 7 | for i in observations { 8 | batch += i; 9 | } 10 | counter.fetch_add(batch, Ordering::Relaxed); 11 | } 12 | 13 | fn sum_naive_atomic(observations: &[usize], counter: &AtomicUsize) { 14 | for i in observations { 15 | counter.fetch_add(*i, Ordering::Relaxed); 16 | } 17 | } 18 | 19 | fn sum_naive_mutex(observations: &[usize], counter_mutex: &Mutex) { 20 | for i in observations { 21 | let mut lock = counter_mutex.lock().expect("Never fails in this bench"); 22 | *lock += *i; 23 | } 24 | } 25 | 26 | fn benchmark_increment(c: &mut Criterion) { 27 | let counter_batched: AtomicUsize = AtomicUsize::new(0); 28 | let counter_atomic: AtomicUsize = AtomicUsize::new(0); 29 | let counter_mutex: Mutex = Mutex::new(0); 30 | 31 | // filling the array with values 0,1,2 32 | let repetitions = 1_002; 33 | let vec = (0..repetitions).map(|i| i % 3).collect::>(); 34 | let increment = vec.as_slice(); 35 | 36 | c.bench_function("Sum Batched", |b| { 37 | b.iter(|| black_box(sum_batched(increment, &counter_batched))) 38 | }); 39 | c.bench_function("Sum Naive Atomic", |b| { 40 | b.iter(|| black_box(sum_naive_atomic(increment, &counter_atomic))) 41 | }); 42 | c.bench_function("Sum Naive Mutex", |b| { 43 | b.iter(|| black_box(sum_naive_mutex(increment, &counter_mutex))) 44 | }); 45 | 46 | let batched = counter_batched.load(Ordering::Relaxed); 47 | let atomic = counter_atomic.load(Ordering::Relaxed); 48 | let mutex = counter_mutex.lock().unwrap(); 49 | println!( 50 | "Sum Batched {:12} operations, {:.6}", 51 | batched / repetitions, 52 | batched as f64 / batched as f64 53 | ); 54 | println!( 55 | "Sum Atomic {:12} operations, {:.6}", 56 | atomic / repetitions, 57 | atomic as f64 / batched as f64 58 | ); 59 | println!( 60 | "Sum Mutex {:12} operations, {:.6}", 61 | *mutex / repetitions, 62 | *mutex as f64 / batched as f64 63 | ); 64 | } 65 | 66 | criterion_group!(benches, benchmark_increment,); 67 | 68 | criterion_main!(benches); 69 | -------------------------------------------------------------------------------- /benches/README.md: -------------------------------------------------------------------------------- 1 | ### Benchmarks 2 | 3 | These benchmarks compare the difference between manipulating data in the following modes: 4 | 5 | * Multi-thread batched atomic 6 | * Multi-thread naïve atomic 7 | * Multi-thread naïve mutex 8 | 9 | The benchmarks are artificial, but the goal is to give a general idea about the cost of consistency guarantees. 10 | 11 | All benchmarks are performed on `Intel(R) Xeon(R) CPU @ 2.30GHz`. 12 | 13 | ### Benchmark 1. Incrementing data 14 | 15 | ```rust 16 | fn benchmark_batched_increment(repetitions: usize, mut increment: usize, counter: &AtomicUsize) { 17 | let mut batch = 0; 18 | for _ in 0..repetitions { 19 | // avoiding compiler optimizations 20 | // E.g. go to https://rust.godbolt.org/z/7he65h 21 | // and try to comment the line #4 22 | increment ^= 1; 23 | batch += increment; 24 | } 25 | counter.fetch_add(batch, Ordering::Relaxed); 26 | } 27 | ``` 28 | 29 | The `increment ^= 1;` statement was introduced to avoid optimizations. 30 | However, it turned out that some optimizations still have place: 31 | https://stackoverflow.com/questions/65010708/why-is-xor-much-faster-than-or 32 | 33 | That's why another scenario was benchmarked to sum an array. 34 | 35 | ### Benchmark 2. Summing an array 36 | 37 | Semantically, this is the same operation - as for the benchmark 1, 38 | but designed to hide from the compiler the fact that it's all `1`s to add and make it more _realistic_: 39 | 40 | ```rust 41 | fn sum_batched(observations: &[usize], counter: &AtomicUsize) { 42 | let mut batch = 0; 43 | for i in observations { 44 | batch += i; 45 | } 46 | counter.fetch_add(batch, Ordering::Relaxed); 47 | } 48 | 49 | fn sum_naive_atomic(observations: &[usize], counter: &AtomicUsize) { 50 | for i in observations { 51 | counter.fetch_add(*i, Ordering::Relaxed); 52 | } 53 | } 54 | 55 | fn sum_naive_mutex(observations: &[usize], counter_mutex: &Mutex) { 56 | for i in observations { 57 | let mut lock = counter_mutex.lock().expect("Never fails in this bench"); 58 | *lock += *i; 59 | } 60 | } 61 | ``` 62 | 63 | The difference is not as prominent, but of the same order: 64 | 65 | ``` 66 | Sum Batched time: 0.1149 us 67 | Sum Naive Atomic time: 6.7829 us 68 | Sum Naive Mutex time: 21.455 us 69 | 70 | Sum Batched 76848081 operations, 1.000000 71 | Sum Atomic 1261587 operations, 0.016417 72 | Sum Mutex 494443 operations, 0.006434 73 | ``` 74 | 75 | `Batched` is faster than `Atomic` ~61 times, and faster than `Mutex` ~155 times. 76 | 77 | -------------------------------------------------------------------------------- /benches/benchmarks.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 2 | use std::sync::atomic::{AtomicUsize, Ordering}; 3 | use std::sync::Mutex; 4 | 5 | fn benchmark_batched_increment(repetitions: usize, mut increment: usize, counter: &AtomicUsize) { 6 | let mut batch = 0; 7 | for _ in 0..repetitions { 8 | // avoiding compiler optimizations 9 | // E.g. go to https://rust.godbolt.org/z/7he65h 10 | // and try to comment the line #4 11 | increment = increment ^ 1; 12 | batch += increment; 13 | } 14 | counter.fetch_add(batch, Ordering::Relaxed); 15 | } 16 | 17 | fn benchmark_atomic_increment(repetitions: usize, mut increment: usize, counter: &AtomicUsize) { 18 | for _ in 0..repetitions { 19 | increment = increment ^ 1; 20 | counter.fetch_add(increment, Ordering::Relaxed); 21 | } 22 | } 23 | 24 | fn benchmark_mutex_increment( 25 | repetitions: usize, 26 | mut increment: usize, 27 | counter_mutex: &Mutex, 28 | ) { 29 | for _ in 0..repetitions { 30 | increment = increment ^ 1; 31 | let mut lock = counter_mutex.lock().expect("Never fails in this bench"); 32 | *lock += increment; 33 | } 34 | } 35 | 36 | fn benchmark_increment(c: &mut Criterion) { 37 | let counter_batched: AtomicUsize = AtomicUsize::new(0); 38 | let counter_atomic: AtomicUsize = AtomicUsize::new(0); 39 | let counter_mutex: Mutex = Mutex::new(0); 40 | 41 | let increment = 1; 42 | let repetitions = 1000; 43 | 44 | c.bench_function("Increment Batched", |b| { 45 | b.iter(|| { 46 | black_box(benchmark_batched_increment( 47 | repetitions, 48 | increment, 49 | &counter_batched, 50 | )) 51 | }) 52 | }); 53 | c.bench_function("Increment Atomic", |b| { 54 | b.iter(|| { 55 | black_box(benchmark_atomic_increment( 56 | repetitions, 57 | increment, 58 | &counter_atomic, 59 | )) 60 | }) 61 | }); 62 | c.bench_function("Increment Mutex", |b| { 63 | b.iter(|| { 64 | black_box(benchmark_mutex_increment( 65 | repetitions, 66 | increment, 67 | &counter_mutex, 68 | )) 69 | }) 70 | }); 71 | 72 | let batched = counter_batched.load(Ordering::Relaxed); 73 | let atomic = counter_atomic.load(Ordering::Relaxed); 74 | let mutex = counter_mutex.lock().unwrap(); 75 | println!( 76 | "Batched {:12} operations, {:.6}", 77 | batched / repetitions, 78 | batched as f64 / batched as f64 79 | ); 80 | println!( 81 | "Atomic {:12} operations, {:.6}", 82 | atomic / repetitions, 83 | atomic as f64 / batched as f64 84 | ); 85 | println!( 86 | "Mutex {:12} operations, {:.6}", 87 | *mutex / repetitions, 88 | *mutex as f64 / batched as f64 89 | ); 90 | } 91 | 92 | criterion_group!(benches, benchmark_increment,); 93 | 94 | criterion_main!(benches); 95 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Crate](https://img.shields.io/crates/v/concurrency-demo-benchmarks.svg)](https://crates.io/crates/concurrency-demo-benchmarks) 2 | ![Clippy/Fmt](https://github.com/xnuter/concurrency-demo-benchmarks/workflows/Clippy/Fmt/badge.svg) 3 | 4 | ### Overview 5 | 6 | A small utility that models blocking and non-blocking forms of handling I/O. You can read more [here](https://medium.com/swlh/distributed-systems-and-asynchronous-i-o-ef0f27655ce5). 7 | 8 | #### Pre-requisites 9 | 10 | 1. `cargo` - https://www.rust-lang.org/tools/install 11 | 1. `python3.6+` with `matplotlib` 12 | 13 | It generates the following files in the current directory: 14 | 15 | * `latency_histogram_{name}.png` - X-axis latency in ms, Y-axis - counts for buckets 16 | ![LatencyHistogram](./figures/latency_histogram_async_200ms.png) 17 | * `latency_percentiles_{name}.png` - X-axis - 0..100. Y-axis - latency percentile in ms 18 | ![LatencyPercentiles](./figures/latency_percentiles_async_200ms.png) 19 | * `latency_timeline_{name}.png` - X-axis - a timeline in seconds, Y-axis - latency in ms, p50, p90 and p99 20 | ![LatencyTimeline](./figures/latency_timeline_async_200ms.png) 21 | * `request_rate_{name}.png` - X-axis - a timeline in seconds, Y-axis - effective RPS (successes only) 22 | ![RequestRate](./figures/request_rate_async_200ms.png) 23 | 24 | where `{name}` is the `--name` (or `-N`) parameter value. 25 | 26 | You may need to use `--python`/`-p` parameter to specify `python3` binary, if it's not in `/usr/bin/python3`. E.g. 27 | 28 | ``` 29 | concurrency-demo-benchmarks --name async_30s \ 30 | --rate 1000 \ 31 | --num_req 100000 \ 32 | --latency "20ms*9,30s" \ 33 | --python /somewhere/else/python3 \ 34 | async 35 | ``` 36 | 37 | #### Installation 38 | 39 | ``` 40 | cargo install concurrency-demo-benchmarks 41 | ``` 42 | 43 | 44 | #### Run batched/atomic/mutex increments benchmark 45 | 46 | ``` 47 | git clone https://github.com/xnuter/concurrency-demo-benchmarks.git 48 | cargo bench 49 | ``` 50 | 51 | See [benchmark comments here](./benches). 52 | 53 | #### Command line options 54 | 55 | ``` 56 | A tool to model sync vs async processing for a network service 57 | 58 | USAGE: 59 | concurrency-demo-benchmarks [OPTIONS] --name --rate --num_req --latency [SUBCOMMAND] 60 | 61 | FLAGS: 62 | -h, --help Prints help information 63 | -V, --version Prints version information 64 | 65 | OPTIONS: 66 | -l, --latency Comma separated latency values. E.g. 20ms*9,30s or 10ms,20ms,30ms 67 | -N, --name Name of the test-case 68 | -n, --num_req Number of requests. E.g. 1000 69 | -p, --python_path Optional path to python3, e.g. /usr/bin/python3 70 | -r, --rate Request rate per second. E.g. 100 or 1000 71 | 72 | SUBCOMMANDS: 73 | async Model a service with Async I/O 74 | help Prints this message or the help of the given subcommand(s) 75 | sync Model a service with Blocking I/O 76 | 77 | ``` 78 | 79 | Output example: 80 | ``` 81 | Latencies: 82 | p0.000 - 0.477 ms 83 | p50.000 - 0.968 ms 84 | p90.000 - 1.115 ms 85 | p95.000 - 1.169 ms 86 | p99.000 - 1.237 ms 87 | p99.900 - 1.295 ms 88 | p99.990 - 1.432 ms 89 | p100.000 - 1.469 ms 90 | Avg rate: 1000.000, StdDev: 0.000 91 | ``` 92 | 93 | #### Run sync demo 94 | * 1000 rps 95 | * 20ms latency, 10 endpoints 96 | * 50 threads 97 | ``` 98 | concurrency-demo-benchmarks --name sync_20ms \ 99 | --rate 1000 \ 100 | --num_req 10000 \ 101 | --latency "20ms*10" \ 102 | sync --threads 50 103 | ``` 104 | 105 | * 1000 rps 106 | * 60ms latency for 10 targets 107 | * 50 threads 108 | ``` 109 | concurrency-demo-benchmarks --name sync_60ms \ 110 | --rate 1000 \ 111 | --num_req 10000 \ 112 | --latency "60ms*10" \ 113 | sync --threads 50 114 | ``` 115 | 116 | * 1000 rps 117 | * 20ms latency for 9 targets, but 30s for the other one 118 | * 50 threads 119 | ``` 120 | concurrency-demo-benchmarks --name sync_30s \ 121 | --rate 1000 \ 122 | --num_req 100000 \ 123 | --latency "20ms*9,30s" \ 124 | sync --threads 50 125 | ``` 126 | 127 | #### Run async demo 128 | * 1000 rps 129 | * 20ms latency, 10 targets 130 | ``` 131 | concurrency-demo-benchmarks --name async_20ms \ 132 | --rate 1000 \ 133 | --num_req 10000 \ 134 | --latency "20ms*10" \ 135 | async 136 | ``` 137 | 138 | * 1000 rps 139 | * 60ms latency , 10 targets 140 | ``` 141 | concurrency-demo-benchmarks --name async_60ms \ 142 | --rate 1000 \ 143 | --num_req 100000 \ 144 | --latency "60ms*10" \ 145 | async 146 | ``` 147 | 148 | * 1000 rps 149 | * 20ms latency but 30s for 10% 150 | ``` 151 | concurrency-demo-benchmarks --name async_30s \ 152 | --rate 1000 \ 153 | --num_req 100000 \ 154 | --latency "20ms*9,30s" \ 155 | async 156 | ``` 157 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use clap::clap_app; 2 | use humantime::parse_duration; 3 | use leaky_bucket::LeakyBucket; 4 | use matplotrust::{histogram, line_plot, Figure}; 5 | use std::collections::HashMap; 6 | use std::ops::AddAssign; 7 | use std::sync::atomic::{AtomicUsize, Ordering}; 8 | use std::thread; 9 | use std::thread::sleep; 10 | use std::time::{Duration, Instant}; 11 | use tokio::time::delay_for; 12 | 13 | const TIMEOUT: Duration = Duration::from_secs(1); 14 | 15 | #[derive(Clone)] 16 | struct Task { 17 | start: Instant, 18 | cost: u64, 19 | } 20 | 21 | #[derive(Clone)] 22 | struct TaskStats { 23 | success: bool, 24 | start_time: Instant, 25 | completion_time: Instant, 26 | overhead: f64, 27 | } 28 | 29 | #[derive(Debug)] 30 | enum Mode { 31 | Sync(usize), 32 | Async, 33 | } 34 | 35 | #[derive(Debug)] 36 | struct ModelConfig { 37 | name: String, 38 | n_jobs: usize, 39 | rps: usize, 40 | latency_distribution: Vec, 41 | python_path: Option, 42 | mode: Mode, 43 | } 44 | 45 | #[tokio::main] 46 | async fn main() { 47 | let config = ModelConfig::from_cli(); 48 | println!("Config: {:#?}", config); 49 | 50 | let mut duration_ms = 1000; 51 | let mut refill = config.rps; 52 | while duration_ms > 10 && refill % 10 == 0 { 53 | duration_ms /= 10; 54 | refill /= 10; 55 | } 56 | println!("Rate limit refill {} per {} ms", refill, duration_ms); 57 | let rate_limiter = LeakyBucket::builder() 58 | .refill_amount(refill) 59 | .refill_interval(Duration::from_millis(duration_ms as u64)) 60 | .build() 61 | .expect("LeakyBucket builder failed"); 62 | 63 | let start_time = Instant::now(); 64 | 65 | let stats = match config.mode { 66 | Mode::Sync(n_workers) => { 67 | sync_execution( 68 | n_workers, 69 | &config.latency_distribution, 70 | config.n_jobs, 71 | rate_limiter, 72 | ) 73 | .await 74 | } 75 | Mode::Async => { 76 | async_execution(&config.latency_distribution, config.n_jobs, rate_limiter).await 77 | } 78 | }; 79 | 80 | let (latencies, rps_buckets) = process_stats(start_time, stats); 81 | 82 | build_latency_timeline(&config, latencies.clone()); 83 | build_latency_histogram(&config, latencies); 84 | build_rps_graph(&config, rps_buckets); 85 | } 86 | 87 | /// Model multi-thread environment, where each threads can handle 88 | /// a single connection at a time. 89 | async fn sync_execution( 90 | n_workers: usize, 91 | latency_distribution: &[u64], 92 | n_jobs: usize, 93 | rate_limiter: LeakyBucket, 94 | ) -> Vec { 95 | let mut threads = Vec::with_capacity(n_workers); 96 | let (send, recv) = crossbeam::channel::bounded::(n_jobs); 97 | static TASK_COUNTER: AtomicUsize = AtomicUsize::new(0); 98 | 99 | for _ in 0..n_workers { 100 | let receiver = recv.clone(); 101 | 102 | threads.push(thread::spawn(move || { 103 | let mut thread_stats = vec![]; 104 | for val in receiver { 105 | sleep(Duration::from_millis(val.cost)); 106 | // report metrics 107 | let now = Instant::now(); 108 | let stats = TaskStats { 109 | start_time: val.start, 110 | success: val.cost < TIMEOUT.as_millis() as u64, 111 | completion_time: now, 112 | overhead: now.duration_since(val.start).as_secs_f64() - val.cost as f64 / 1000., 113 | }; 114 | thread_stats.push(stats); 115 | TASK_COUNTER.fetch_add(1, Ordering::Relaxed); 116 | } 117 | thread_stats 118 | })); 119 | } 120 | 121 | println!("Starting sending tasks..."); 122 | 123 | for i in 0..n_jobs { 124 | rate_limiter.acquire_one().await.unwrap_or_default(); 125 | let cost = latency_distribution[i % latency_distribution.len()]; 126 | let now = Instant::now(); 127 | send.send(Task { start: now, cost }).unwrap(); 128 | } 129 | 130 | println!("Waiting for completion..."); 131 | 132 | while TASK_COUNTER.load(Ordering::Relaxed) < n_jobs { 133 | sleep(Duration::from_secs(1)); 134 | } 135 | 136 | drop(send); 137 | 138 | let mut combined_stats = vec![]; 139 | for t in threads { 140 | let thread_stats = t.join().unwrap(); 141 | combined_stats.extend(thread_stats); 142 | } 143 | 144 | combined_stats 145 | } 146 | 147 | /// Model an async environment, where there are several threads 148 | /// handling up to tens (or hundreds) of thousands of connections simultaneously. 149 | async fn async_execution( 150 | latency_distribution: &[u64], 151 | n_jobs: usize, 152 | rate_limiter: LeakyBucket, 153 | ) -> Vec { 154 | let mut tasks = Vec::with_capacity(n_jobs); 155 | 156 | println!("Starting sending tasks..."); 157 | 158 | for i in 0..n_jobs { 159 | rate_limiter.acquire_one().await.unwrap_or_default(); 160 | let cost = latency_distribution[i % latency_distribution.len()]; 161 | let start = Instant::now(); 162 | tasks.push(tokio::spawn(async move { 163 | delay_for(Duration::from_millis(cost)).await; 164 | 165 | let now = Instant::now(); 166 | TaskStats { 167 | start_time: start, 168 | success: cost < TIMEOUT.as_millis() as u64, 169 | completion_time: now, 170 | overhead: now.duration_since(start).as_secs_f64() - cost as f64 / 1000., 171 | } 172 | })); 173 | } 174 | 175 | println!("Waiting for completion..."); 176 | 177 | let mut combined_stats = vec![]; 178 | for t in tasks { 179 | combined_stats.push(t.await.expect("Task failed")); 180 | } 181 | 182 | combined_stats 183 | } 184 | 185 | fn process_stats( 186 | start_time: Instant, 187 | stats_collection: Vec, 188 | ) -> (Vec, HashMap) { 189 | let mut latencies = vec![]; 190 | let mut rps_buckets = HashMap::new(); 191 | for stats in stats_collection { 192 | if stats.success { 193 | latencies.push(stats.clone()); 194 | rps_buckets 195 | .entry(stats.completion_time.duration_since(start_time).as_secs()) 196 | .or_insert(0) 197 | .add_assign(1); 198 | } 199 | } 200 | (latencies, rps_buckets) 201 | } 202 | 203 | impl ModelConfig { 204 | fn from_cli() -> Self { 205 | let matches = clap_app!(myapp => 206 | (name: "Model Sync/Async execution") 207 | (version: "0.0.1") 208 | (author: "Eugene Retunsky") 209 | (about: "A tool to model sync vs async processing for a network service") 210 | (@arg NAME: --name -N +takes_value +required "Name of the test-case") 211 | (@arg RATE: --rate -r +takes_value +required "Request rate per second. E.g. 100 or 1000") 212 | (@arg NUM_REQUESTS: --num_req -n +takes_value +required "Number of requests. E.g. 1000") 213 | (@arg LATENCY_DISTRIBUTION: --latency -l +takes_value +required "Comma separated latency values. E.g. 20ms*9,30s or 10ms,20ms,30ms") 214 | (@arg PYTHON_PATH: --python_path -p +takes_value "Optional path to python3, e.g. /usr/bin/python3") 215 | (@subcommand async => 216 | (about: "Model a service with Async I/O") 217 | (version: "0.0.1") 218 | ) 219 | (@subcommand sync => 220 | (about: "Model a service with Blocking I/O") 221 | (version: "0.0.1") 222 | (@arg THREADS: --threads -t +takes_value +required "The number of worker threads") 223 | ) 224 | ).get_matches(); 225 | 226 | Self { 227 | name: matches 228 | .value_of("NAME") 229 | .expect("Name is required") 230 | .to_string(), 231 | n_jobs: matches 232 | .value_of("NUM_REQUESTS") 233 | .expect("Rate is required") 234 | .parse() 235 | .expect("NUM_REQUESTS must be a positive integer"), 236 | rps: matches 237 | .value_of("RATE") 238 | .expect("Rate is required") 239 | .parse() 240 | .expect("RATE must be a positive integer"), 241 | latency_distribution: matches 242 | .value_of("LATENCY_DISTRIBUTION") 243 | .expect("Rate is required") 244 | .split(',') 245 | .map(|s| ModelConfig::parse_latency_item(s)) 246 | .flatten() 247 | .collect(), 248 | python_path: matches.value_of("PYTHON_PATH").map(|s| s.to_string()), 249 | mode: if let Some(config) = matches.subcommand_matches("sync") { 250 | Mode::Sync( 251 | config 252 | .value_of("THREADS") 253 | .expect("Rate is required") 254 | .parse() 255 | .expect("THREADS must be a positive integer"), 256 | ) 257 | } else { 258 | Mode::Async 259 | }, 260 | } 261 | } 262 | 263 | fn parse_latency_item(s: &str) -> Vec { 264 | if !s.contains('*') { 265 | vec![ModelConfig::parse_latency(s)] 266 | } else { 267 | let mut split = s.split('*'); 268 | let value = split.next().expect("Must be in format `value*count`"); 269 | let count: usize = split 270 | .next() 271 | .expect("Must be in format `value*count`") 272 | .parse() 273 | .expect("Illegal numeric value"); 274 | (0..count) 275 | .map(|_| ModelConfig::parse_latency(value)) 276 | .collect() 277 | } 278 | } 279 | 280 | fn parse_latency(value: &str) -> u64 { 281 | match parse_duration(value) { 282 | Ok(d) => d.as_millis() as u64, 283 | Err(_) => value.parse().expect("Illegal numeric value"), 284 | } 285 | } 286 | 287 | fn get_python_path(&self) -> Option<&str> { 288 | let python_path = match self.python_path.as_ref() { 289 | None => Some("/usr/bin/python3"), 290 | Some(s) => Some(s.as_str()), 291 | }; 292 | python_path 293 | } 294 | } 295 | 296 | fn build_rps_graph(config: &ModelConfig, rps_buckets: HashMap) { 297 | // ignore the first and the last second as they may be incomplete 298 | let start = 1 299 | + rps_buckets 300 | .iter() 301 | .map(|(k, _)| k) 302 | .min() 303 | .expect("At least single data point must be here") 304 | + 1; 305 | let end = rps_buckets 306 | .iter() 307 | .map(|(k, _)| k) 308 | .max() 309 | .expect("At least single data point must be here") 310 | - 1; 311 | let mut x = vec![0]; 312 | let mut y = vec![0]; 313 | let mut total = 0.; 314 | for i in start..end { 315 | let value = *rps_buckets.get(&i).unwrap_or(&0); 316 | let time_since_start = i - start; 317 | x.push(time_since_start); 318 | y.push(value); 319 | total += value as f64; 320 | } 321 | 322 | let data_points_count = (end - start) as f64; 323 | let avg = total / data_points_count; 324 | let mut deviation = 0.; 325 | for i in start..end { 326 | let value = *rps_buckets.get(&i).unwrap_or(&0); 327 | deviation += (avg - value as f64) * (avg - value as f64); 328 | } 329 | 330 | println!( 331 | "Avg rate: {:.3}, StdDev: {:.3}", 332 | avg, 333 | (deviation / data_points_count).sqrt() 334 | ); 335 | 336 | let line_plot = line_plot::(x, y, None); 337 | let mut figure = Figure::new(); 338 | figure.add_plot(line_plot.clone()); 339 | figure.add_plot(line_plot); 340 | figure.save( 341 | format!("./request_rate_{}.png", config.name).as_str(), 342 | config.get_python_path(), 343 | ); 344 | } 345 | 346 | fn build_latency_histogram(config: &ModelConfig, mut latencies: Vec) { 347 | println!("Latencies:"); 348 | 349 | latencies.sort_by(|a, b| a.overhead.partial_cmp(&b.overhead).unwrap()); 350 | let mut percentiles_x = vec![]; 351 | let mut percentiles_y = vec![]; 352 | let printed_percentiles = vec![0, 5000, 9000, 9500, 9900, 9990, 9999, 10000]; 353 | 354 | for p in 0..=10000 { 355 | let stats = 356 | &latencies[((p as f64 / 10000. * latencies.len() as f64) as i32 - 1).max(0) as usize]; 357 | let value = stats.overhead; 358 | if printed_percentiles.contains(&p) { 359 | println!( 360 | "{} - {}", 361 | format!("p{:.3}", p as f64 / 100.), 362 | format!("{:.3} ms", value * 1000.), 363 | ); 364 | } 365 | percentiles_x.push(p as f64 / 100.); 366 | percentiles_y.push(value * 1000.); 367 | } 368 | 369 | let mut figure = Figure::new(); 370 | let x = latencies.iter().map(|v| v.overhead * 1000.).collect(); 371 | let plot = histogram::(x, None); 372 | figure.add_plot(plot); 373 | 374 | figure.save( 375 | format!("./latency_histogram_{}.png", config.name).as_str(), 376 | config.get_python_path(), 377 | ); 378 | 379 | let line_plot = line_plot::(percentiles_x, percentiles_y, None); 380 | let mut figure = Figure::new(); 381 | figure.add_plot(line_plot.clone()); 382 | figure.add_plot(line_plot); 383 | figure.save( 384 | format!("./latency_percentiles_{}.png", config.name).as_str(), 385 | config.get_python_path(), 386 | ); 387 | } 388 | 389 | fn build_latency_timeline(config: &ModelConfig, mut latencies: Vec) { 390 | latencies.sort_by(|a, b| a.start_time.partial_cmp(&b.start_time).unwrap()); 391 | 392 | let mut timeline_x = vec![]; 393 | let mut p50_y = vec![]; 394 | let mut p90_y = vec![]; 395 | let mut p99_y = vec![]; 396 | 397 | let mut start = latencies[0].start_time; 398 | let mut current_x = 0; 399 | let mut next_second_latency_batch: Vec = vec![]; 400 | 401 | for (i, task) in latencies.iter().enumerate() { 402 | let moment = task.start_time; 403 | if moment.duration_since(start).as_secs_f64() >= 1. || i == latencies.len() - 1 { 404 | timeline_x.push(current_x); 405 | current_x += 1; 406 | 407 | next_second_latency_batch.sort_by(|a, b| a.partial_cmp(&b).unwrap()); 408 | let batch_size = next_second_latency_batch.len(); 409 | p50_y.push(next_second_latency_batch[batch_size / 2 - 1] * 1000.); 410 | p90_y.push(next_second_latency_batch[batch_size * 9 / 10 - 1] * 1000.); 411 | p99_y.push(next_second_latency_batch[batch_size * 99 / 100 - 1] * 1000.); 412 | 413 | start = moment; 414 | } else { 415 | next_second_latency_batch.push(task.overhead); 416 | } 417 | } 418 | 419 | let mut figure = Figure::new(); 420 | let p50_plot = line_plot::(timeline_x.clone(), p50_y, None); 421 | let p90_plot = line_plot::(timeline_x.clone(), p90_y, None); 422 | let p99_plot = line_plot::(timeline_x, p99_y, None); 423 | figure.add_plot(p50_plot); 424 | figure.add_plot(p90_plot); 425 | figure.add_plot(p99_plot); 426 | figure.save( 427 | format!("./latency_timeline_{}.png", config.name).as_str(), 428 | config.get_python_path(), 429 | ); 430 | } 431 | --------------------------------------------------------------------------------