├── .github └── workflows │ └── erlang.yml ├── .gitignore ├── .gitlab-ci.yml ├── CHANGELOG.md ├── CLI.md ├── DETAILS.md ├── LICENSE.md ├── README.md ├── rebar.config ├── rebar.lock ├── src ├── erlperf.app.src ├── erlperf.erl ├── erlperf_app.erl ├── erlperf_cli.erl ├── erlperf_cluster_monitor.erl ├── erlperf_file_log.erl ├── erlperf_history.erl ├── erlperf_job.erl ├── erlperf_job_sup.erl ├── erlperf_monitor.erl └── erlperf_sup.erl └── test ├── erlperf_SUITE.erl ├── erlperf_cli_SUITE.erl ├── erlperf_cluster_monitor_SUITE.erl ├── erlperf_file_log_SUITE.erl ├── erlperf_history_SUITE.erl ├── erlperf_job_SUITE.erl └── erlperf_monitor_SUITE.erl /.github/workflows/erlang.yml: -------------------------------------------------------------------------------- 1 | name: Build, Test, Dialyze 2 | 3 | on: 4 | pull_request: 5 | types: [ opened, reopened, synchronize ] 6 | push: 7 | branches: 8 | - 'master' 9 | 10 | jobs: 11 | linux: 12 | name: Test on OTP ${{ matrix.otp_version }} and ${{ matrix.os }} 13 | runs-on: ${{ matrix.os }} 14 | 15 | strategy: 16 | matrix: 17 | otp_version: [23, 24, 25, 26, 27] 18 | os: [ubuntu-latest] 19 | 20 | container: 21 | image: erlang:${{ matrix.otp_version }} 22 | 23 | steps: 24 | - uses: actions/checkout@v2 25 | - name: Run tests 26 | run: rebar3 ct 27 | - name: Documentation 28 | run: rebar3 edoc 29 | - name: ExDoc Documentation 30 | run: if [ $(rebar3 version | awk '{print $5}') -gt 23 ]; then rebar3 ex_doc; fi; 31 | - shell: bash 32 | name: Dialyzer 33 | run: rebar3 dialyzer 34 | - shell: bash 35 | name: Escriptize 36 | run: rebar3 as prod escriptize 37 | - shell: bash 38 | name: Smoke test 39 | run: ./erlperf 'timer:sleep(1).' 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | _* 2 | .idea 3 | *.iml 4 | *~ 5 | erlperf 6 | doc -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | stages: 2 | - test 3 | - deploy 4 | 5 | test-default-docker: 6 | tags: 7 | - linux 8 | - x86_64 9 | image: ${CI_DEPENDENCY_PROXY_GROUP_IMAGE_PREFIX}/erlang:latest 10 | stage: test 11 | script: 12 | - rebar3 compile 13 | - rebar3 edoc 14 | - rebar3 dialyzer 15 | - rebar3 ct 16 | - rebar3 as prod escriptize 17 | - ./erlperf 'timer:sleep(1).' 18 | after_script: 19 | - mv "_build/test/logs" ./public 20 | artifacts: 21 | when: always 22 | paths: 23 | - "./public" 24 | expire_in: 3 days 25 | reports: 26 | junit: 27 | - "./public/last/junit_report.xml" 28 | 29 | # Pages: publishing Common Test results 30 | pages: 31 | stage: deploy 32 | needs: 33 | - test-default-docker 34 | script: 35 | - echo "Uploading to pages" 36 | artifacts: 37 | paths: 38 | - public 39 | rules: 40 | - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 2.3.0 4 | - added warning for non-optimised ERTS build running the benchmark 5 | - fixed output for continuous mode when samples are zero 6 | - added `step` for quicker concurrency estimation mode (@mkuratczyk) 7 | 8 | ## 2.2.2 9 | - added generated source code output in verbose mode 10 | 11 | ## 2.2.1 12 | - tested with OTP 26 and 27 13 | - updated to argparse 2.0.0 14 | 15 | ## 2.2.0 16 | - added extended and full reporting capabilities 17 | - implemented additional statistics (standard deviation, median, p99) 18 | - exported formatting APIs to allow escript-based benchmarks 19 | - improved documentation, switched from edoc to ex_doc 20 | - added convenience functions and defaults to monitor, file_log, cluster_monitor and history 21 | - fixed cluster monitor output for multi-node configurations 22 | - breaking change: consolidated monitor sample structure for cluster and local process groups 23 | - fixed history store 24 | - refined types for better Dialyzer analysis 25 | 26 | ## 2.1.0 27 | - fixed -w (--warmup) argument missing from command line 28 | - synchronised worker startup when adding concurrency 29 | - concurrent worker shutdown when reducing concurrency 30 | - elevated job & benchmark process priority to avoid result skew 31 | - implemented scheduling problem detection (e.g. lock contention), 32 | added a busy loop method workaround 33 | 34 | ## 2.0.2 35 | - added convenience command line options: init_all, done_all, init_runner_all 36 | 37 | ## 2.0.1 38 | - minor bugfixes (friendlier error reporting) 39 | 40 | ## 2.0 41 | - incompatible change: `erlperf` requires runner arity to be defined explicitly. 42 | Code example: `erlperf:run(#{runner => {timer, sleep, []}, init_runner => "1."})`, 43 | with `erlperf` making a guess that `init_runner` is defined, therefore its return 44 | value can be passed as the argument to `timer:sleep/1`. This behaviour was confusing 45 | and is no longer supported. 46 | - incompatible change: crashed runner causes entire job to stop (error contains the 47 | reason and stack trace) 48 | - incompatible change: removed fprof/profiling support in favour of JIT + `perf` 49 | - `erlperf` application is no longer required to be started for one-off benchmark runs 50 | 51 | ## 1.1.5: 52 | - support for OTP 25 (peer replacing slave) 53 | 54 | ## 1.1.4: 55 | - fixed an issue with pg already started 56 | - moved profiling to spawned process 57 | 58 | ## 1.1.3: 59 | - addressed deprecation, updated to argparse 1.1.4 60 | 61 | ## 1.1.2: 62 | - updated command line parser to new argparse 63 | 64 | ## 1.1.1: 65 | - added support for OTP 24 66 | - added edoc documentation 67 | 68 | ## 1.0.0: 69 | - initial release 70 | -------------------------------------------------------------------------------- /CLI.md: -------------------------------------------------------------------------------- 1 | # Command Line 2 | Run `erlperf` with no arguments to get command line usage. 3 | 4 | ## Synopsis 5 | 6 | ```bash 7 | erlperf [FLAG] runner [INIT] [INIT_RUNNER] [DONE] [runner...] 8 | ``` 9 | 10 | ## Flags 11 | 12 | | Short | Long | Description | 13 | |-------|-------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------| 14 | | -c | --concurrency | Specifies the number of workers per job. Allowed only in continuous mode | 15 | | | --cv | Coefficient of variation. Accepted in continuous and concurrency estimation mode. Benchmark keeps running until standard deviation is below the specified number | 16 | | -i | --isolation | Requests to run every benchmark in a separate Erlang VM for isolation purposes | 17 | | -s | --samples | Number of samples to take. Defaults to 1 for timed mode, 3 for continuous and concurrency estimation | 18 | | -d | --sample_duration | Sample duration, in milliseconds, for continuous and concurrency estimation modes | 19 | | -l | --loop | Sample duration (iterations) for the timed mode. Engages timed mode when specified | 20 | | | --max | Maximum number of workers allowed in the concurrency estimation mode | 21 | | | --min | Starting number of workers in concurrency estimation mode | 22 | | -pa | | Adds extra code path to the Erlang VM. Useful for benchmarking *.beam files on your filesystem | 23 | | -r | --report | Requests `basic`, `extended` or `full` report. Defaults to `basic` when less than 10 samples are requested, and `extended` for 10 and more | 24 | | -q | -squeeze | Engages concurrency estimation mode | 25 | | -t | -threshold | Sets number of extra workers to try in concurrency estimation mode before concluding the test | 26 | | -v | --verbose | Turns on verbose mode, including generated source code, VM statistics and performance of continuous jobs) | 27 | | -w | --warmup | Warmup | 28 | 29 | ## Benchmark code 30 | At least one runner code is required. Specify multiple runner codes to perform 31 | a comparison run. 32 | 33 | Initialisation and cleanup definitions are read in the same order as runner codes. Example: 34 | ```bash 35 | # first runner receives 1 as input, second - 2 36 | erlperf --init_runner '1.' 'run(1) -> ok.' 'run(2) -> ok.' --init_runner '2.' 37 | # next run fails with function_clause, because first runner receives '2', and second - 1 38 | erlperf --init_runner '2.' 'run(1) -> ok.' 'run(2) -> ok.' --init_runner '1.' 39 | ``` 40 | 41 | | | Description | 42 | |-------------------|---------------------------------------------------------------------------| 43 | | --init | Job initialisation code, see accepted callable formats below | 44 | | --init_runner | Worker initialisation code | 45 | | --done | Job cleanup code | 46 | | --label | Runner label | 47 | | | | 48 | | --init_all | Default init code for all runners that do not have a specific code | 49 | | --init_runner_all | Default init_runner code | 50 | | --done_all | Default done code | 51 | 52 | Accepted callable formats: 53 | * valid Erlang code: `timer:sleep(1).` 54 | * valid Erlang function: `run() -> timer:sleep(1).` 55 | * function with arguments: `run(X) -> timer:sleep(X).', 'run(X, Y) -> timer:sleep(X), Y.` 56 | * tuple with module, function name and arguments: `{timer, sleep, [1]}` 57 | * file name with call chain recording: `record.trace`. **deprecated**, do not use -------------------------------------------------------------------------------- /DETAILS.md: -------------------------------------------------------------------------------- 1 | # Implementation details 2 | 3 | Starting with 2.0, `erlperf` uses call counting for continuous benchmarking purposes. This allows 4 | the tightest possible loop without extra runtime calls. Running 5 | `erlperf 'rand:uniform().' --init '1'. --done '2.' --init_runner '3.'` results in creating, 6 | compiling and loading a module with this source code: 7 | 8 | ```erlang 9 | -module(unique_name). 10 | -export([init/0, init_runner/0, done/0, run/0]). 11 | 12 | init() -> 13 | 1. 14 | 15 | init_runner() -> 16 | 3. 17 | 18 | done() -> 19 | 2. 20 | 21 | run() -> 22 | runner(), 23 | run(). 24 | 25 | runner() -> 26 | rand:uniform(). 27 | ``` 28 | 29 | Number of `run/0` calls per second is reported as throughput. Before 2.0, `erlperf` 30 | used `atomics` to maintain a counter shared between all runner processes, introducing 31 | unnecessary BIF call overhead. 32 | 33 | Timed (low-overhead) mode tightens it even further, turning runner into this function: 34 | ```erlang 35 | runner(0) -> 36 | ok; 37 | runner(Count) -> 38 | rand:uniform(), 39 | runner(Count - 1). 40 | ``` -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019-2021 Maxim Fedorov 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of the copyright holder nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY 19 | THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 20 | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 22 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 23 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 26 | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 27 | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 | POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # erlperf 2 | 3 | [![Build Status](https://github.com/max-au/erlperf/actions/workflows/erlang.yml/badge.svg?branch=master)](https://github.com/max-au/erlperf/actions) [![Hex.pm](https://img.shields.io/hexpm/v/erlperf.svg)](https://hex.pm/packages/erlperf) [![Hex Docs](https://img.shields.io/badge/hex-docs-blue.svg)](https://hexdocs.pm/erlperf) 4 | 5 | Erlang Performance & Benchmarking Suite. 6 | Simple way to say "this code is faster than that one". See [CLI reference](CLI.md) 7 | and detailed API reference for `erlperf` and `erlperf_job` modules. 8 | 9 | Build (tested with OTP 23-27): 10 | 11 | ```bash 12 | $ rebar3 as prod escriptize 13 | ``` 14 | 15 | ## Quick start: command line 16 | Beware of the shell escaping your code in an unpredictable way! 17 | 18 | 1. Run a single process iterating `rand:uniform()` in a tight loop for 3 seconds, 19 | printing **average iterations per second** (~17 millions) and an average time 20 | to run a single iteration (57 ns). 21 | 22 | ```bash 23 | $ ./erlperf 'rand:uniform().' 24 | Code || QPS Time 25 | rand:uniform(). 1 17266 Ki 57 ns 26 | ``` 27 | 28 | 2. Run four processes doing this same concurrently. 29 | 30 | ```bash 31 | $ ./erlperf 'rand:uniform().' -c 4 32 | Code || QPS Time 33 | rand:uniform(). 4 53893 Ki 74 ns 34 | ``` 35 | 36 | 3. Benchmark `rand:uniform()` vs `crypto:strong_rand_bytes/1` for 10 seconds, adding 37 | an extra second to warm up the algorithms. 38 | 39 | ```bash 40 | $ ./erlperf 'rand:uniform().' 'crypto:strong_rand_bytes(2).' --samples 10 --warmup 1 41 | Code || Samples Avg StdDev Median P99 Iteration Rel 42 | rand:uniform(). 1 10 16611 Ki 0.20% 16614 Ki 16664 Ki 60 ns 100% 43 | crypto:strong_rand_bytes(2). 1 10 1804 Ki 0.79% 1797 Ki 1829 Ki 554 ns 11% 44 | ``` 45 | 46 | 4. Run a function passing the state into the next iteration. This code demonstrates performance difference 47 | between `rand:uniform_s/1` with state passed explicitly, and `rand:uniform/1` reading state from the process 48 | dictionary. 49 | 50 | ```bash 51 | $ ./erlperf 'r(_, S) -> {_, N} = rand:uniform_s(S), N.' --init_runner 'rand:seed(exsss).' \ 52 | 'r() -> rand:uniform().' 53 | Code || QPS Time Rel 54 | r(_, S) -> {_, N} = rand:uniform_s(S), N. 1 26180 Ki 38 ns 100% 55 | r() -> rand:uniform(). 1 16958 Ki 58 ns 65% 56 | ``` 57 | 58 | 5. Estimate `./erlperf 'application_controller:is_running(kernel).` concurrency characteristics. This function 59 | is implemented as `gen_server:call`, and all calculations are done in a single process. It is still 60 | possible to squeeze a bit more from a single process by putting work into the queue from multiple runners. 61 | 62 | ```bash 63 | $ ./erlperf 'application_controller:is_running(kernel).' --squeeze 64 | Code || QPS Time 65 | application_controller:is_running(kernel). 3 1189 Ki 2524 ns 66 | 67 | 68 | 69 | $ ./erlperf 'persistent_term:put(atom, "string").' -q 70 | Code || QPS Time 71 | persistent_term:put(atom, "string"). 1 8882 Ki 112 ns 72 | ``` 73 | 74 | 6. Start a server (`pg` scope in this example), use it in benchmark, and shut down after. 75 | 76 | ```bash 77 | $ ./erlperf 'pg:join(scope, group, self()), pg:leave(scope, group, self()).' \ 78 | --init 'pg:start_link(scope).' --done 'gen_server:stop(scope).' 79 | Code || QPS Time 80 | pg:join(scope, group, self()), pg:leave(scope, group, self()). 1 336 Ki 2976 ns 81 | ``` 82 | 83 | 7. Run the same code with different arguments, returned from `init_runner` function. Note the trick 84 | of adding extra spaces in the source code to know which code is where. 85 | 86 | ```bash 87 | $ ./erlperf 'runner(X) -> timer:sleep(X).' --init_runner '1.' \ 88 | ' runner(X) -> timer:sleep(X).' --init_runner '2.' 89 | Code || QPS Time Rel 90 | runner(X) -> timer:sleep(X). 1 500 2001 us 100% 91 | runner(X) -> timer:sleep(X). 1 333 3001 us 67% 92 | ``` 93 | 94 | 8. Determine how many times a process can join/leave pg2 group on a single node (requires OTP 23 95 | or older, as pg2 is removed in later versions). 96 | 97 | ```bash 98 | $ ./erlperf 'ok = pg2:join(g, self()), ok = pg2:leave(g, self()).' --init 'pg2:create(g).' 99 | Code || QPS Time 100 | ok = pg2:join(g, self()), ok = pg2:leave(g, self()). 1 64021 15619 ns 101 | ``` 102 | 103 | 9. Compare `pg` with `pg2` running in a 3-node cluster. Note the `-i` argument spawning an isolated 104 | extra Erlang VM for each benchmark. 105 | 106 | ```bash 107 | ./erlperf 'ok = pg2:join(g, self()), ok = pg2:leave(g, self()).' --init 'pg2:create(g).' \ 108 | 'ok = pg:join(g, self()), ok = pg:leave(g, self()).' --init 'pg:start(pg).' -i 109 | Code || QPS Time Rel 110 | ok = pg:join(g, self()), ok = pg:leave(g, self()). 1 241 Ki 4147 ns 100% 111 | ok = pg2:join(g, self()), ok = pg2:leave(g, self()). 1 1415 707 us 0% 112 | ``` 113 | 114 | 10. Watch the progress of your test running (`-v` option) with extra information: scheduler utilisation, dirty CPU & IO 115 | schedulers, number of running processes, ports, ETS tables, and memory consumption. Last column is the job throughput. 116 | When there are multiple jobs, multiple columns are printed. Test will continue until adding 8 more workers (`-t 8`) 117 | does not increase total throughput. 118 | 119 | ```bash 120 | $ ./erlperf 'rand:uniform().' -q -v -t 8 121 | 122 | YYYY-MM-DDTHH:MM:SS-oo:oo Sched DCPU DIO Procs Ports ETS Mem Total Mem Proc Mem Bin Mem ETS <0.84.0> 123 | 2023-01-22T11:02:51-08:00 6.12 0.00 0.20 46 2 21 24737 Kb 4703 Kb 191 Kb 471 Kb 14798 Ki 124 | 2023-01-22T11:02:52-08:00 6.31 0.00 0.00 46 2 21 25105 Kb 5565 Kb 218 Kb 472 Kb 16720 Ki 125 | 2023-01-22T11:02:53-08:00 6.26 0.00 0.00 46 2 21 25501 Kb 5427 Kb 218 Kb 472 Kb 16715 Ki 126 | <...> 127 | 2023-01-22T11:03:37-08:00 100.00 0.00 0.00 61 2 21 25874 Kb 5696 Kb 221 Kb 472 Kb 55235 Ki 128 | 2023-01-22T11:03:38-08:00 100.00 0.00 0.00 61 2 21 25955 Kb 5565 Kb 218 Kb 472 Kb 55139 Ki 129 | Code || QPS Time 130 | rand:uniform(). 8 61547 Ki 130 ns 131 | 132 | ``` 133 | 134 | ## Benchmark 135 | Running benchmark is called a **job**, see `erlperf_job` for detailed description. 136 | Every job has a controller process, responsible for starting and stopping worker 137 | processes, or **workers**. Worker processes execute **runner** function in a tight 138 | loop, incrementing **iteration** counter. 139 | 140 | Benchmark runs either for a specified amount of time (**sample duration** in 141 | continuous mode), or until requested number of iterations is made (timed mode). 142 | Resulting **sample** is total number of *iterations* for all workers, or elapsed time 143 | it took in timed mode. 144 | 145 | The process repeats until the specified amount of *samples* is collected, producing 146 | a **report** (see details below). 147 | 148 | For comparison convenience, basic reports contain **QPS** - historical metric 149 | from the original implementation (designed for network service throughput assessment). 150 | It is approximate amount of *runner iterations per sample_duration achieved by all workers 151 | of the job*. Given that default duration is 1 second, *QPS* is a good proxy for 152 | the total job throughput. 153 | 154 | Single worker performance can be estimated using **time** metric. It can also be 155 | considered as function latency - how long it takes on average to execute a 156 | single *iteration* of a *runner*. 157 | 158 | ### Benchmark definition 159 | A benchmark may define following functions: 160 | * **runner**: code that is executed in the tight loop 161 | * **init** (optional): executed once when the job starts 162 | * **done** (optional): executed once when the job is about to stop 163 | * **init_runner** (optional): executed on every worker process startup 164 | * **label** (optional): string that overrides the code value in reports 165 | 166 | See `erlperf_job` for the detailed reference and ways to define a function (**callable**). 167 | 168 | Note that different ways to call a function have different performance characteristics: 169 | 170 | ```bash 171 | $ ./erlperf '{rand, uniform, []}' 'rand:uniform().' -l 10M 172 | Code || QPS Time Rel 173 | rand:uniform(). 1 18519 Ki 54 ns 100% 174 | {rand,uniform,[]} 1 16667 Ki 60 ns 90% 175 | ``` 176 | 177 | This difference may get more pronounced depending on ERTS version and *runner* code: 178 | 179 | ```erlang 180 | (erlperf@max-au)7> erlperf:benchmark([ 181 | #{runner => "runner(X) -> is_float(X).", init_runner=>"2."}, 182 | #{runner => {erlang, is_float, [2]}}, 183 | #{runner => fun (X) -> is_float(X) end, init_runner => "2."}], 184 | #{}, undefined). 185 | [105824351,66424280,5057372] 186 | ``` 187 | 188 | It is caused by the ERTS: running compiled code (first variant) with OTP 25 is 189 | two times faster than applying a function, and 20 times faster than repeatedly 190 | calling anonymous `fun`. Use the same invocation method to get a relevant result. 191 | 192 | Absolute benchmarking overhead may be significant for very fast functions taking just a few nanoseconds. 193 | Use timed mode for such occasions. 194 | 195 | ### Run options 196 | See `erlperf` module documentation and [command line reference](CLI.md) for all available options. 197 | 198 | ## Benchmarking modes 199 | 200 | ### Continuous mode 201 | Benchmarking is done by counting number of *runner* iterations done over 202 | a specified period of time (**sample_duration**). 203 | 204 | Two examples below demonstrate the effect caused by changing *sample_duration*. 205 | First run takes 20 samples (`-s 20`) with 100 ms duration. Second invocation 206 | takes the same 20 sample, but with 200 ms duration (`-d 200`). Note that all metrics, 207 | except a single *iteration* time, doubled. 208 | 209 | ```bash 210 | $ ./erlperf 'rand:uniform().' -d 100 -s 20 211 | Code || Samples Avg StdDev Median P99 Iteration 212 | rand:uniform(). 1 20 1647 Ki 0.39% 1648 Ki 1660 Ki 60 ns 213 | $ ./erlperf 'rand:uniform().' -d 200 -s 20 214 | Code || Samples Avg StdDev Median P99 Iteration 215 | rand:uniform(). 1 20 3354 Ki 0.16% 3354 Ki 3368 Ki 59 ns 216 | ``` 217 | 218 | ### Timed mode 219 | In this mode *runner* code is executed for *sample_duration* iterations for every *sample*. 220 | Report contains average/median/p99 *time* it takes to produce a single sample. In the 221 | example below, it takes an average of 554 ms to make 10 million calls to `rand:uniform()`. 222 | 223 | ```bash 224 | $ ./erlperf 'rand:uniform().' 'rand:uniform(1000).' -l 10M -s 20 225 | Code || Samples Avg StdDev Median P99 Iteration Rel 226 | rand:uniform(1000). 1 20 554 ms 0.37% 554 ms 563 ms 55 ns 100% 227 | rand:uniform(). 1 20 560 ms 0.60% 560 ms 564 ms 55 ns 99% 228 | ``` 229 | 230 | Effectively, this example runs following code: `loop(0) -> ok; loop(Count) -> rand:uniform(), loop(Count - 1).` 231 | Timed mode has slightly less overhead compared to continuous mode. 232 | 233 | Timed mode does not support `--concurrency` setting, using only 234 | one process. However, it does support comparison run with multiple concurrent jobs. 235 | 236 | ### Concurrency estimation mode 237 | In this mode `erlperf` performs multiple continuous benchmarks with 238 | increasing concurrency. The test concludes when increasing worker 239 | count does not result in increase of the total throughput. Report 240 | contains statistics of the most successful run. 241 | 242 | This mode can also be used to detect bottlenecks, e.g. lock contention, single 243 | `gen_server` processes, or VM-wide shared resources (`persistent_term`s). 244 | Example (with maximum concurrency limited to 50): 245 | 246 | ```bash 247 | $ ./erlperf '{code, is_loaded, [local_udp]}' -w 1 --max 50 -q 248 | Code || QPS Time 249 | {code,is_loaded,[local_udp]} 6 1665 Ki 3604 ns 250 | ``` 251 | 252 | Same in the Erlang shell: 253 | 254 | ```erlang 255 | > erlperf:run({code, is_loaded, [local_udp]}, #{warmup => 1}, #{max => 50}). 256 | {1676758,6} 257 | ``` 258 | 259 | In this example, 6 concurrent processes were able to squeeze 1676758 calls per second 260 | for `code:is_loaded(local_udp)`. In current OTP version `code:is_loaded` is implemented 261 | as a `gen_server:call` to a single process (`code_server`), that limits potential 262 | performance. 263 | 264 | See `erlperf_job` for the detailed description of different benchmarking modes. 265 | 266 | ## Reports 267 | Historically `erlperf` had only the basic reporting available for command line 268 | usage. Since 2.2 it is possible to request additional information. 269 | 270 | ### Basic report 271 | This is the default report form when less than 10 samples were collected. 272 | Use `-r basic` to force basic reports with 10 and more samples. 273 | 274 | Basic report contains following columns: 275 | * **Code**: Erlang code or label supplied to the benchmark 276 | * **||**: how many concurrent processes were running. In the timed mode, it is always 1. In the concurrency 277 | estimation mode, the number that achieved the highest total throughput (QPS) 278 | * **QPS**: average number of runner code *iterations* (throughput). Measure per single *sample_duration* 279 | in the continuous mode. In the timed mode, calculated with the assumption that *sample_duration* is 1 second 280 | * **Time**: single runner iteration time 281 | * **Rel**: relative performance of this code, compared to others. Printed only when more than 282 | one runner is specified. 283 | 284 | ### Extended report 285 | When 10 or more samples were collected, this mode is the default. Use `-r extended` 286 | to force printing this report for smaller sample sets. 287 | 288 | Note that average, deviation, median and 99th percentile are calculated for the *sample_duration*. 289 | If you requested 20 samples of 100 ms in the continuous mode, these fields will contain *iteration* 290 | count per 100 ms. If you requested 10 million iterations (`-l 10M`), extended report for timed mode 291 | displays average time it takes to do 10M iterations. Single iteration time is printed as *Iteration*. 292 | 293 | Code, concurrency, and relative performance fields have the same meaning as in basic report. In addition, 294 | following columns are printed: 295 | * **Samples**: how many samples were collected (useful when requesting continuous test with standard deviation requirement) 296 | * **Avg**: same as QPS for continuous mode, but in the timed mode, average sample time 297 | * **StdDev**: standard deviation from average 298 | * **Median**: median value, in the continuous mode, median estimated throughput, in the timed mode - time to 299 | complete the requested iterations 300 | * **Iteration**: single runner iteration time 301 | * **P99**: 99th percentile 302 | 303 | ### Full report 304 | This mode must be explicitly specified with `-r full`. 305 | 306 | Contains everything that extended report has. Includes extra information about the system 307 | used for benchmarking - OS type, CPU and Erlang VM characteristics. 308 | 309 | ## Benchmarking compiled code 310 | `erlperf` can be used to measure performance of your application running in production, or code that is stored 311 | on disk. 312 | 313 | Use `-pa` argument to add an extra code path. Example: 314 | ```bash 315 | $ ./erlperf 'args:parse([], #{}).' -pa _build/test/lib/argparse/ebin 316 | Code || QPS Time 317 | args:parse([], #{}). 1 955 Ki 1047 ns 318 | ``` 319 | 320 | If you need to add multiple released applications, supply `ERL_LIBS` environment variable instead: 321 | ```bash 322 | $ ERL_LIBS="_build/test/lib" erlperf 'args:parse([], #{}).' 323 | Code || QPS Time 324 | args:parse([], #{}). 1 735 Ki 1361 ns 325 | ``` 326 | 327 | ### Usage in production 328 | It is possible to use `erlperf` to benchmark an application running in production. 329 | Add `erlperf` as a dependency, and use remote shell: 330 | 331 | ```bash 332 | # connect a remote shell to the production node 333 | erl -remsh production@max-au 334 | (production@max-au)3> erlperf:run(timer, sleep, [1]). 335 | 488 336 | ``` 337 | 338 | ### Permanent continuous benchmarking 339 | You can run a job continuously, to examine performance gains or losses while doing 340 | hot code reload. This process is designed to help during development and testing stages, 341 | allowing to quickly notice performance regressions. 342 | 343 | Example source code: 344 | ```erlang 345 | -module(mymod). 346 | -export([do/1]). 347 | do(Arg) -> timer:sleep(Arg). 348 | ``` 349 | 350 | Example below assumes you have `erlperf` application started (e.g. in a `rebar3 shell`) 351 | 352 | ```erlang 353 | % start a logger that prints VM monitoring information 354 | > {ok, Logger} = erlperf_file_log:start_link(). 355 | {ok,<0.235.0>} 356 | 357 | % start a job that will continuously benchmark mymod:do(), 358 | % with initial concurrency 2. 359 | > JobPid = erlperf:start(#{init_runner => "rand:uniform(10).", 360 | runner => "runner(Arg) -> mymod:do(Arg)."}, 2). 361 | {ok,<0.291.0>} 362 | 363 | % increase concurrency to 4 364 | > erlperf_job:set_concurrency(JobPid, 4). 365 | ok. 366 | 367 | % watch your job performance 368 | 369 | % modify your application code, 370 | % set do(Arg) -> timer:sleep(2*Arg), do hot code reload 371 | > c(mymod). 372 | {module, mymod}. 373 | 374 | % see that after hot code reload throughput halved! 375 | ``` 376 | 377 | ## Timer precision 378 | ERTS cannot guarantee precise timing when there is severe lock contention happening, 379 | and scheduler utilisation is 100%. This often happens with ETS: 380 | 381 | ```bash 382 | $ ./erlperf -c 50 'ets:insert(ac_tab, {1, 2}).' -d 100 -s 50 383 | Timer accuracy problem detected, results may be inaccurate 384 | 385 | Code || Samples Avg StdDev Median P99 Iteration 386 | ets:insert(ac_tab, {1, 2}). 50 50 6079 82.27% 5497 40313 823 us 387 | ``` 388 | 389 | Running 50 concurrent processes trying to overwrite the very same key of an ETS 390 | table leads to lock contention on a shared resource (ETS table/bucket lock). `erlperf` 391 | may detect this issue and switch to a busy wait loop for precise timing. This may 392 | result in lowered throughput and other metrics skew. `erlperf` does not attempt to 393 | pinpoint the source of contention, it is up to user to figure that out. It's recommended 394 | to use lock-counting emulator, or Linux `perf` utility to troubleshoot VM-level issues. 395 | 396 | 397 | ## Experimental features 398 | These features are not fully supported. APIs may change in the future `erlperf` 399 | releases. 400 | 401 | ### Benchmarking in a cluster 402 | It's possible to run a job on a separate node in the cluster. See 403 | `erlperf_cluster_monitor` for additional details. 404 | 405 | ```erlang 406 | % watch the entire cluster (printed to console) 407 | (node1@host)> {ok, _} = erlperf_history:start_link(). 408 | {ok,<0.213.0>} 409 | (node1@host)> {ok, ClusterLogger} = erlperf_cluster_monitor:start_link(group_leader(), 1000, [node, sched_util, jobs]). 410 | {ok, <0.216.0>} 411 | 412 | % also log cluster-wide reports to file (jobs & sched_util) 413 | (node1@host)> {ok, FileLogger} = erlperf_cluster_monitor:start_link("/tmp/cluster", 1000, [time, node, sched_util, jobs]). 414 | {ok, <0.223.0>} 415 | 416 | % run the benchmarking process in a different node of your cluster 417 | (node1@host)> rpc:call('node2@host', erlperf, run, [#{runner => {rand, uniform, []}}]). 418 | ``` 419 | -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | {erl_opts, [debug_info]}. 2 | {deps, [argparse]}. 3 | 4 | {shell, [ 5 | {apps, [erlperf]} 6 | ]}. 7 | 8 | {dist_node, [ 9 | {sname, erlperf} 10 | ]}. 11 | 12 | {escript_incl_apps, [argparse]}. 13 | {escript_emu_args, "%%! +pc unicode -escript main erlperf_cli\n"}. 14 | 15 | {cover_enabled, true}. 16 | {cover_opts, [verbose]}. 17 | 18 | {ct_opts, [ 19 | %% {ct_hooks, [cth_surefire]}, 20 | {keep_logs, 1} 21 | ]}. 22 | 23 | {post_hooks, [{"(linux|darwin|solaris|freebsd|netbsd|openbsd)", 24 | escriptize, 25 | "cp \"$REBAR_BUILD_DIR/bin/erlperf\" ./erlperf"}, 26 | {"win32", 27 | escriptize, 28 | "robocopy \"%REBAR_BUILD_DIR%/bin/\" ./ erlperf* " 29 | "/njs /njh /nfl /ndl & exit /b 0"} % silence things 30 | ]}. 31 | 32 | {hex, [ 33 | {doc, #{provider => ex_doc}} 34 | ]}. 35 | 36 | {project_plugins, [rebar3_ex_doc]}. 37 | 38 | {ex_doc, [ 39 | {extras, [ 40 | {"README.md", #{title => "Overview"}}, 41 | {"CLI.md", #{title => "Command Line"}}, 42 | {"CHANGELOG.md", #{title => "Changelog"}}, 43 | {"LICENSE.md", #{title => "License"}} 44 | ]}, 45 | {main, "README.md"}, 46 | {source_url, "https://github.com/max-au/erlperf"}, 47 | {source_ref, <<"master">>} 48 | ]}. 49 | -------------------------------------------------------------------------------- /rebar.lock: -------------------------------------------------------------------------------- 1 | {"1.2.0", 2 | [{<<"argparse">>,{pkg,<<"argparse">>,<<"2.0.0">>},0}]}. 3 | [ 4 | {pkg_hash,[ 5 | {<<"argparse">>, <<"3EDF299FB5BC089E6AF2F1A7C6532104B4CD6136E0147C6CF9622E6E4A741434">>}]}, 6 | {pkg_hash_ext,[ 7 | {<<"argparse">>, <<"525979122BEA3641A1DD3ABC53F2ADD19F7F427D507018F8C7CAF0693A6E78C8">>}]} 8 | ]. 9 | -------------------------------------------------------------------------------- /src/erlperf.app.src: -------------------------------------------------------------------------------- 1 | {application, erlperf, 2 | [{description, "Erlang Performance & Benchmarking Suite"}, 3 | {vsn, "2.3.0"}, 4 | {registered, [ 5 | erlperf_sup, erlperf_job_sup, erlperf_monitor, 6 | erlperf_history, erlperf_file_log, erlperf_cluster_monitor 7 | ]}, 8 | {mod, {erlperf_app, []}}, 9 | {applications, 10 | [kernel, 11 | stdlib, 12 | compiler, 13 | argparse 14 | ]}, 15 | {env,[]}, 16 | {modules, []}, 17 | 18 | {licenses, ["BSD-3-Clause-Clear"]}, 19 | {links, [{"Github", "https://github.com/max-au/erlperf"}]}, 20 | {include_paths, ["CLI.md", "DETAILS.md"]} 21 | ]}. 22 | -------------------------------------------------------------------------------- /src/erlperf.erl: -------------------------------------------------------------------------------- 1 | %%% @copyright (C) 2019-2023, Maxim Fedorov 2 | %%% @doc 3 | %%% Convenience APIs for benchmarking. 4 | %%% 5 | %%% This module implements following benchmarking modes: 6 | %%% 11 | %%% 12 | %%%

Continuous mode

13 | %%% This is the default mode. Separate {@link erlperf_job} is started for 14 | %%% each benchmark, iterating supplied runner in a tight loop, 15 | %%% bumping a counter for each iteration of each worker. `erlperf' reads 16 | %%% this counter every second (or `sample_duration'), calculating the 17 | %%% difference between current and previous value. This difference is 18 | %%% called a sample. 19 | %%% 20 | %%% By default, `erlperf' collects 3 samples and stops, reporting the average. 21 | %%% To give an example, if your function runs for 20 milliseconds, `erlperf' 22 | %%% may capture samples with 48, 52 and 50 iterations. The average would be 50. 23 | %%% 24 | %%% This approach works well for CPU-bound calculations, but may produce 25 | %%% unexpected results for slow functions taking longer than sample duration. 26 | %%% For example, timer:sleep(2000) with default settings yields zero throughput. 27 | %%% You can change the sample duration and the number of samples to take to 28 | %%% avoid that. 29 | %%% 30 | %%%

Timed mode

31 | %%% In this mode `erlperf' loops your code a specified amount of times, measuring 32 | %%% how long it took to complete. It is essentially what {@link timer:tc/3} does. This mode 33 | %%% has slightly less overhead compared to continuous mode. This difference may be 34 | %%% significant if you’re profiling low-level ERTS primitives. 35 | %%% 36 | %%% This mode does not support `concurrency' setting (concurrency locked to 1). 37 | %%% 38 | %%%

Concurrency estimation mode

39 | %%% In this mode `erlperf' attempts to estimate how concurrent the supplied 40 | %%% runner code is. The run consists of multiple passes, increasing concurrency 41 | %%% with each pass, and stopping when total throughput is no longer growing. 42 | %%% This mode proves useful to find concurrency bottlenecks. For example, some 43 | %%% functions may have limited throughput because they execute remote calls 44 | %%% served by a single process. See {@link benchmark/3} for the detailed 45 | %%% description. 46 | %%% 47 | %%% 48 | %%% @end 49 | -module(erlperf). 50 | -author("maximfca@gmail.com"). 51 | 52 | %% Public API for single-run simple benchmarking 53 | %% Programmatic access. 54 | -export([ 55 | benchmark/3, 56 | compare/2, 57 | record/4, 58 | run/1, 59 | run/2, 60 | run/3, 61 | start/2, 62 | time/2 63 | ]). 64 | 65 | %% Exported for testing purposes only. 66 | -export([report_stats/1]). 67 | 68 | %% compare/2 accepts code map, or just the runner code 69 | -type code() :: erlperf_job:code_map() | erlperf_job:callable(). 70 | %% Convenience type used in `run/1,2,3' and `compare/2'. 71 | 72 | %% node isolation options: 73 | -type isolation() :: #{ 74 | host => string() 75 | }. 76 | %% Node isolation settings. 77 | %% 78 | %% Currently, `host' selection is not supported. 79 | 80 | -type run_options() :: #{ 81 | concurrency => pos_integer(), 82 | sample_duration => pos_integer() | undefined | {timed, pos_integer()}, 83 | warmup => non_neg_integer(), 84 | samples => pos_integer(), 85 | cv => float() | undefined, 86 | priority => erlang:priority_level(), 87 | report => basic | extended | full, 88 | isolation => isolation() 89 | }. 90 | %% Benchmarking mode selection and parameters of the benchmark run. 91 | %% 92 | %% 122 | 123 | %% Concurrency test options 124 | -type concurrency_test() :: #{ 125 | threshold => pos_integer(), 126 | min => pos_integer(), 127 | step => pos_integer(), 128 | max => pos_integer() 129 | }. 130 | %% Concurrency estimation mode options. 131 | %% 132 | %% 139 | 140 | %% Single run result: one or multiple samples (depending on report verbosity) 141 | -type run_result() :: non_neg_integer() | [non_neg_integer()]. 142 | %% Benchmark results. 143 | %% 144 | %% For continuous mode, an average (arithmetic mean) of the collected samples, 145 | %% or a list of all samples collected. 146 | %% Timed mode returns elapsed time (microseconds). 147 | 148 | 149 | %% Concurrency test result (non-verbose) 150 | -type concurrency_result() :: {QPS :: non_neg_integer(), Concurrency :: non_neg_integer()}. 151 | %% Basic concurrency estimation report 152 | %% 153 | %% Only the highest throughput run is reported. `Concurrency' contains the number of 154 | %% concurrently running workers when the best result is achieved. 155 | 156 | %% Extended report returns all samples collected. 157 | -type concurrency_test_result() :: concurrency_result() | {Max :: concurrency_result(), [concurrency_result()]}. 158 | %% Concurrency estimation mode result 159 | %% 160 | %% Extended report contains results for all runs, starting from the minimum number 161 | %% of workers, to the highest throughput detected, plus up to `threshold' more. 162 | 163 | -type system_information() :: #{ 164 | os := {unix | win32, atom()}, 165 | system_version := string(), 166 | debug => boolean(), %% true if the emulator has been debug-compiled, otherwise false 167 | emu_type => atom(), %% see system_info(emu_type), since OTP 24 168 | emu_flavor => atom(), %% see system_info(emu_flavor), since OTP 24 169 | dynamic_trace => atom(),%% see system_info(dynamic_trace), since OTP 24 170 | cpu => string() 171 | }. 172 | %% System information, as returned by {@link erlang:system_info/1} 173 | %% May also contain CPU model name on supported operating systems. 174 | 175 | -type run_statistics() :: #{ 176 | average => non_neg_integer(), 177 | variance => float(), 178 | stddev => float(), 179 | median => non_neg_integer(), 180 | p99 => non_neg_integer(), 181 | best => non_neg_integer(), 182 | worst => non_neg_integer(), 183 | samples => [non_neg_integer()], 184 | time => non_neg_integer(), 185 | iteration_time => non_neg_integer() 186 | }. 187 | %% Results reported by a single benchmark run. 188 | %% 189 | %% 203 | 204 | -type report() :: #{ 205 | mode := timed | continuous | concurrency, 206 | result := run_statistics(), 207 | history => [{Concurrency :: pos_integer(), Result :: run_statistics()}], 208 | code := erlperf_job:code_map(), 209 | run_options := run_options(), 210 | concurrency_options => concurrency_test(), 211 | system => system_information(), 212 | sleep => sleep | busy_wait 213 | }. 214 | %% Full benchmark report, containing all collected samples and statistics 215 | %% 216 | %% 232 | 233 | -export_type([code/0, isolation/0, run_options/0, concurrency_test/0, report/0, 234 | system_information/0, run_statistics/0]). 235 | 236 | %% Milliseconds, timeout for any remote node operation 237 | -define(REMOTE_NODE_TIMEOUT, 10000). 238 | 239 | 240 | %% @doc 241 | %% Generic benchmarking suite, accepting multiple code maps, modes and options. 242 | %% 243 | %% `Codes' contain a list of code versions. Every element is a separate job that runs 244 | %% in parallel with all other jobs. Same `RunOptions' are applied to all jobs. 245 | %% 246 | %% `ConcurrencyTestOpts' specifies options for concurrency estimation mode. Passing 247 | %% `undefined' results in a continuous or a timed run. It is not supported to 248 | %% run multiple jobs while doing a concurrency estimation run. 249 | %% 250 | %% Concurrency estimation run consists of multiple passes. First pass is done with 251 | %% a `min' number of workers, subsequent passes are increasing concurrency by 1, until 252 | %% `max' concurrency is reached, or total job iterations stop growing for `threshold' 253 | %% consecutive passes. To give an example, if your code is not concurrent at all, 254 | %% and you try to benchmark it with `threshold' set to 3, there will be 4 passes in 255 | %% total: first with a single worker, then 3 more, demonstrating no throughput growth. 256 | %% 257 | %% In this mode, job is started once before the first pass. Subsequent passes only 258 | %% change the concurrency. All other options passed in `RunOptions' are honoured. So, 259 | %% if you set `samples' to 30, keeping default duration of a second, every single 260 | %% pass will last for 30 seconds. 261 | %% @end 262 | -spec benchmark([erlperf_job:code_map()], RunOptions :: run_options(), undefined) -> run_result() | [run_result()] | [report()]; 263 | ([erlperf_job:code_map()], RunOptions :: run_options(), concurrency_test()) -> concurrency_test_result() | [report()]. 264 | benchmark(Codes, #{isolation := Isolation} = RunOptions, ConcurrencyTestOpts) -> 265 | erlang:is_alive() orelse erlang:error(not_alive), 266 | %% isolation requested: need to rely on cluster_monitor and other distributed things. 267 | {Peers, Nodes} = prepare_nodes(length(Codes)), 268 | Opts = maps:remove(isolation, RunOptions), 269 | try 270 | %% no timeout here (except that rpc itself could time out) 271 | Promises = 272 | [erpc:send_request(Node, erlperf, run, [Code, Opts, ConcurrencyTestOpts]) 273 | || {Node, Code} <- lists:zip(Nodes, Codes)], 274 | %% now wait for everyone to respond 275 | Reports = [erpc:receive_response(Promise) || Promise <- Promises], 276 | %% if full reports were requested, restore isolation flag 277 | case maps:get(report, RunOptions, basic) of 278 | full -> 279 | [maps:update_with(run_options, fun(RO) -> RO#{isolation => Isolation} end, Report) 280 | || Report <- Reports]; 281 | _ -> 282 | Reports 283 | end 284 | catch 285 | error:{exception, Reason, Stack} -> 286 | erlang:raise(error, Reason, Stack) 287 | after 288 | stop_nodes(Peers, Nodes) 289 | end; 290 | 291 | %% foolproofing 292 | benchmark([_, _ | _], _RunOptions, #{}) -> 293 | erlang:error(not_supported); 294 | 295 | %% No isolation requested. 296 | %% This is the primary entry point for all benchmark jobs. 297 | benchmark(Codes, RunOptions0, ConOpts0) -> 298 | %% fill in all missing defaults 299 | ConOpts = concurrency_mode_defaults(ConOpts0), 300 | #{report := ReportType, priority := SetPrio} = RunOptions = run_options_defaults(RunOptions0), 301 | %% elevate priority to reduce timer skew 302 | PrevPriority = process_flag(priority, SetPrio), 303 | Jobs = start_jobs(Codes, []), 304 | {JobPids, Handles, _} = lists:unzip3(Jobs), 305 | Reports = 306 | try 307 | benchmark_impl(JobPids, RunOptions, ConOpts, Handles) 308 | after 309 | stop_jobs(Jobs), 310 | process_flag(priority, PrevPriority) 311 | end, 312 | %% generate statistical information from the samples returned 313 | report(ReportType, Codes, Reports). 314 | 315 | %% @doc 316 | %% Comparison run: benchmark multiple jobs at the same time. 317 | %% 318 | %% A job is defined by either {@link erlperf_job:code_map()}, 319 | %% or just the runner {@link erlperf_job:callable(). callable}. 320 | %% Example comparing {@link rand:uniform/0} %% performance 321 | %% to {@link rand:mwc59/1}: 322 | %% ``` 323 | %% (erlperf@ubuntu22)7> erlperf:compare([ 324 | %% {rand, uniform, []}, 325 | %% #{runner => "run(X) -> rand:mwc59(X).", init_runner => {rand, mwc59_seed, []}} 326 | %% ], #{}). 327 | %% [14823854,134121999] 328 | %% ''' 329 | %% 330 | %% See {@link benchmark/3} for `RunOptions' definition and return values. 331 | -spec compare(Codes :: [code()], RunOptions :: run_options()) -> [run_result()] | [report()]. 332 | compare(Codes, RunOptions) -> 333 | benchmark([code(Code) || Code <- Codes], RunOptions, undefined). 334 | 335 | %% @doc 336 | %% Runs a single benchmark for 3 seconds, returns average number of iterations per second. 337 | %% 338 | %% Accepts either a full {@link erlperf_job:code_map()}, or just the runner 339 | %% {@link erlperf_job:callable(). callable}. 340 | -spec run(code()) -> non_neg_integer(). 341 | run(Code) -> 342 | [Report] = benchmark([code(Code)], #{}, undefined), 343 | Report. 344 | 345 | %% @doc 346 | %% Runs a single benchmark job, returns average number of iterations per second, 347 | %% or a full report. 348 | %% 349 | %% Accepts either a full {@link erlperf_job:code_map()}, or just the runner 350 | %% {@link erlperf_job:callable(). callable}. 351 | %% Equivalent of returning the first result of `run([Code], RunOptions)'. 352 | -spec run(Code :: code(), RunOptions :: run_options()) -> run_result() | report(). 353 | run(Code, RunOptions) -> 354 | [Report] = benchmark([code(Code)], RunOptions, undefined), 355 | Report. 356 | 357 | %% @doc 358 | %% Concurrency estimation run, or an alias for quick benchmarking of an MFA tuple. 359 | %% 360 | %% Attempt to find concurrency characteristics of the runner code, 361 | %% see {@link benchmark/3} for a detailed description. Accepts either a full 362 | %% {@link erlperf_job:code_map()}, or just the runner 363 | %% {@link erlperf_job:callable(). callable}. 364 | %% 365 | %% When `Module' and `Function' are atoms, and `Args' is a list, this call is 366 | %% equivalent of `run(Module, Function, Args)'. 367 | -spec run(code(), run_options(), concurrency_test()) -> concurrency_test_result() | report(); 368 | (module(), atom(), [term()]) -> QPS :: non_neg_integer(). 369 | run(Module, Function, Args) when is_atom(Module), is_atom(Function), is_list(Args) -> 370 | %% this typo is so common that I decided to have this as an unofficial API 371 | run({Module, Function, Args}); 372 | run(Code, RunOptions, ConTestOpts) -> 373 | [Report] = benchmark([code(Code)], RunOptions, ConTestOpts), 374 | Report. 375 | 376 | %% @doc 377 | %% Starts a new supervised job with the specified concurrency. 378 | %% 379 | %% Requires `erlperf' application to be running. Returns job 380 | %% controller process identifier. 381 | %% This function is designed for distributed benchmarking, when 382 | %% jobs are started in different nodes, and monitored via 383 | %% {@link erlperf_cluster_monitor}. 384 | -spec start(code(), Concurrency :: non_neg_integer()) -> pid(). 385 | start(Code, Concurrency) -> 386 | {ok, Job} = supervisor:start_child(erlperf_job_sup, [code(Code)]), 387 | ok = erlperf_job:set_concurrency(Job, Concurrency), 388 | Job. 389 | 390 | %% @doc 391 | %% Timed benchmarking mode. Iterates the runner code `Count' times and returns 392 | %% elapsed time in microseconds. 393 | %% 394 | %% This method has lower overhead compared to continuous benchmarking. It is 395 | %% not supported to run multiple workers in this mode. 396 | -spec time(code(), Count :: non_neg_integer()) -> TimeUs :: non_neg_integer(). 397 | time(Code, Count) -> 398 | [Report] = benchmark([code(Code)], #{samples => Count, sample_duration => undefined}, undefined), 399 | Report. 400 | 401 | %% @private 402 | %% @doc 403 | %% Records call trace, so it could be used to benchmark later. 404 | %% Experimental, do not use. 405 | -spec record(module(), atom(), non_neg_integer(), pos_integer()) -> 406 | [[{module(), atom(), [term()]}]]. 407 | record(Module, Function, Arity, TimeMs) -> 408 | TracerPid = spawn_link(fun rec_tracer/0), 409 | TraceSpec = [{'_', [], []}], 410 | MFA = {Module, Function, Arity}, 411 | erlang:trace_pattern(MFA, TraceSpec, [global]), 412 | erlang:trace(all, true, [call, {tracer, TracerPid}]), 413 | receive after TimeMs -> ok end, 414 | erlang:trace(all, false, [call]), 415 | erlang:trace_pattern(MFA, false, [global]), 416 | TracerPid ! {stop, self()}, 417 | receive 418 | {data, Samples} -> 419 | Samples 420 | end. 421 | 422 | %% =================================================================== 423 | %% Implementation details 424 | concurrency_mode_defaults(undefined) -> 425 | undefined; 426 | concurrency_mode_defaults(ConOpts) -> 427 | maps:merge(#{min => 1, step => 1, max => erlang:system_info(process_limit) - 1000, threshold => 3}, ConOpts). 428 | 429 | run_options_defaults(RunOptions) -> 430 | maps:merge(#{ 431 | concurrency => 1, 432 | sample_duration => 1000, 433 | warmup => 0, 434 | samples => 3, 435 | cv => undefined, 436 | priority => high, 437 | report => basic}, 438 | RunOptions). 439 | 440 | %%=================================================================== 441 | %% Codification: translate from {M, F, A} to #{runner => ...} map 442 | code(#{runner := _Runner} = Code) -> 443 | Code; 444 | code({M, F, A}) when is_atom(M), is_atom(F), is_list(A) -> 445 | #{runner => {M, F, A}}; 446 | code(Fun) when is_function(Fun) -> 447 | #{runner => Fun}; 448 | code(Text) when is_list(Text) -> 449 | #{runner => Text}. 450 | 451 | %%=================================================================== 452 | %% Benchmarking itself 453 | 454 | %% OTP 25 support 455 | -dialyzer({no_missing_calls, start_node/1}). 456 | -compile({nowarn_deprecated_function, [{slave, start_link, 3}, {slave, stop, 1}]}). 457 | -compile({nowarn_removed, [{slave, start_link, 3}, {slave, stop, 1}]}). 458 | 459 | start_node({module, peer}) -> 460 | {ok, _Peer, _Node} = peer:start_link(#{name => peer:random_name()}); 461 | start_node({error, nofile}) -> 462 | OsPid = os:getpid(), 463 | [_, HostString] = string:split(atom_to_list(node()), "@"), 464 | Host = list_to_atom(HostString), 465 | Args = "-setcookie " ++ atom_to_list(erlang:get_cookie()), 466 | Uniq = erlang:unique_integer([positive]), 467 | NodeId = list_to_atom(lists:concat(["job-", Uniq, "-", OsPid])), 468 | {ok, Node} = slave:start_link(Host, NodeId, Args), 469 | {ok, undefined, Node}. 470 | 471 | prepare_nodes(HowMany) -> 472 | %% start 'erlperf' parts on all peers 473 | %% Cannot do this via "code:add_path" because actual *.beam files are 474 | %% parts of the binary escript. 475 | _ = application:load(erlperf), 476 | {ok, ModNames} = application:get_key(erlperf, modules), 477 | Modules = [{Mod, _Bin, _Path} = code:get_object_code(Mod) || Mod <- ModNames], 478 | PeerPresent = code:ensure_loaded(peer), 479 | %% start multiple nodes 480 | lists:unzip([begin 481 | {ok, Peer, Node} = start_node(PeerPresent), 482 | [{module, Mod} = erpc:call(Node, code, load_binary, [Mod, Path, Bin], ?REMOTE_NODE_TIMEOUT) 483 | || {Mod, Bin, Path} <- Modules], 484 | {ok, _PgPid} = erpc:call(Node, pg, start, [erlperf]), 485 | {ok, _MonPid} = erpc:call(Node, erlperf_monitor, start, []), 486 | {Peer, Node} 487 | end || _ <- lists:seq(1, HowMany)]). 488 | 489 | stop_nodes([undefined | _], Nodes) -> 490 | [slave:stop(Node) || Node <- Nodes]; 491 | stop_nodes(Peers, _Nodes) -> 492 | [peer:stop(Peer) || Peer <- Peers]. 493 | 494 | start_jobs([], Jobs) -> 495 | lists:reverse(Jobs); 496 | start_jobs([Code | Codes], Jobs) -> 497 | try 498 | {ok, Pid} = erlperf_job:start(Code), 499 | Handle = erlperf_job:handle(Pid), 500 | MonRef = monitor(process, Pid), 501 | start_jobs(Codes, [{Pid, Handle, MonRef} | Jobs]) 502 | catch Class:Reason:Stack -> 503 | %% stop jobs that were started 504 | stop_jobs(Jobs), 505 | erlang:raise(Class, Reason, Stack) 506 | end. 507 | 508 | stop_jobs(Jobs) -> 509 | %% do not use gen:stop/1,2 or sys:terminate/2,3 here, as they spawn process running 510 | %% with normal priority, and they don't get scheduled fast enough when there is severe 511 | %% lock contention 512 | WaitFor = [begin erlperf_job:request_stop(Pid), {Pid, Mon} end || {Pid, _, Mon} <- Jobs, is_process_alive(Pid)], 513 | %% now wait for all monitors to fire 514 | [receive {'DOWN', Mon, process, Pid, _R} -> ok end || {Pid, Mon} <- WaitFor]. 515 | 516 | %% Benchmark implementation. Always returns a full report (post-processing will dumb it down if needed). 517 | 518 | %% Timed mode,backwards compatibility conversion 519 | benchmark_impl(Jobs, #{sample_duration := undefined, samples := Samples} = RunOptions, undefined, Handles) -> 520 | benchmark_impl(Jobs, RunOptions#{sample_duration => {timed, Samples}, samples => 1}, undefined, Handles); 521 | 522 | %% timed mode 523 | benchmark_impl(Jobs, #{sample_duration := {timed, Duration}, samples := Samples, warmup := Warmup} = RunOptions, undefined, _Handles) -> 524 | Proxies = [ 525 | spawn_monitor( 526 | fun () -> 527 | _Discarded = [erlperf_job:measure(Job, Duration) || _ <- lists:seq(1, Warmup)], 528 | Times = [erlperf_job:measure(Job, Duration) || _ <- lists:seq(1, Samples)], 529 | exit({success, Times}) 530 | end) 531 | || Job <- Jobs], 532 | [case Res of 533 | {success, TimesUs} -> 534 | #{mode => timed, result => #{samples => TimesUs}, run_options => RunOptions}; 535 | Error -> 536 | erlang:error(Error) 537 | end || Res <- multicall_result(Proxies, [])]; 538 | 539 | %% Continuous mode 540 | %% QPS considered stable when: 541 | %% * 'warmup' done 542 | %% * 'samples' received 543 | %% * (optional) for the last 'samples' standard deviation must not exceed 'cv' 544 | benchmark_impl(Jobs, #{sample_duration := Interval, cv := CV, samples := SampleCount, 545 | warmup := Warmup, concurrency := Concurrency} = RunOptions, undefined, Handles) -> 546 | %% TODO: turn the next sequential call into a multi-call, to make warmup time fair 547 | [ok = erlperf_job:set_concurrency(Job, Concurrency) || Job <- Jobs], 548 | %% warmup: intended to figure out sleep method (whether to apply busy_wait immediately) 549 | NowTime = os:system_time(millisecond), 550 | SleepMethod = warmup(Warmup, NowTime, NowTime + Interval, Interval, sleep), 551 | %% remember initial counters in Before 552 | Before = [[erlperf_job:sample(Handle)] || Handle <- Handles], 553 | StartedAt = os:system_time(millisecond), 554 | {Samples, TimerSkew, FinishedAt} = measure_impl(Before, Handles, StartedAt, StartedAt + Interval, Interval, 555 | SleepMethod, SampleCount, CV), 556 | Time = FinishedAt - StartedAt, 557 | [#{mode => continuous, result => #{samples => lists:reverse(S), time => Time * 1000}, 558 | run_options => RunOptions, sleep => TimerSkew} 559 | || S <- Samples]; 560 | 561 | %% squeeze test - concurrency benchmark 562 | benchmark_impl(Jobs, RunOptions, #{min := Min} = ConOpts, Handles) -> 563 | [estimate_concurrency(Jobs, RunOptions, ConOpts, Handles, Min, [], {0, 0})]. 564 | 565 | %% warmup procedure: figure out if sleep/4 can work without falling back to busy wait 566 | warmup(0, _LastSampleTime, _NextSampleTime, _Interval, Method) -> 567 | Method; 568 | warmup(Count, LastSampleTime, NextSampleTime, Interval, Method) -> 569 | SleepFor = NextSampleTime - LastSampleTime, 570 | NextMethod = sleep(Method, SleepFor, NextSampleTime), 571 | NowTime = os:system_time(millisecond), 572 | warmup(Count - 1, NowTime, NextSampleTime + Interval, Interval, NextMethod). 573 | 574 | %% collected all samples, CV is not defined 575 | measure_impl(Before, _Handles, LastSampleTime, _NextSampleTime, _Interval, SleepMethod, 0, undefined) -> 576 | {Before, SleepMethod, LastSampleTime}; 577 | 578 | %% collected all samples, but CV is defined - check whether to collect more samples 579 | measure_impl(Before, Handles, LastSampleTime, NextSampleTime, Interval, SleepMethod, 0, CV) -> 580 | %% Complication: some jobs may need a long time to stabilise compared to others. 581 | %% Decision: wait for all jobs to stabilise. Stopping completed jobs skews the measurements. 582 | case 583 | lists:any( 584 | fun (Samples) -> 585 | Normal = difference(Samples), 586 | Len = length(Normal), 587 | Mean = lists:sum(Normal) / Len, 588 | StdDev = math:sqrt(lists:sum([(S - Mean) * (S - Mean) || S <- Normal]) / (Len - 1)), 589 | StdDev / Mean > CV 590 | end, Before) 591 | of 592 | false -> 593 | {Before, SleepMethod, LastSampleTime}; 594 | true -> 595 | %% imitate queue - drop last sample, push another in the head 596 | %% TODO: change the behaviour to return all samples in the full report 597 | TailLess = [lists:droplast(L) || L <- Before], 598 | measure_impl(TailLess, Handles, LastSampleTime, NextSampleTime + Interval, 599 | Interval, SleepMethod, 1, CV) 600 | end; 601 | 602 | %% LastSampleTime: system time of the last sample 603 | %% NextSampleTime: system time when to take the next sample 604 | %% Interval: to calculate the next NextSampleTime 605 | %% Count: how many more samples to take 606 | %% CV: acceptable standard deviation 607 | measure_impl(Before, Handles, LastSampleTime, NextSampleTime, Interval, SleepMethod, Count, CV) -> 608 | SleepFor = NextSampleTime - LastSampleTime, 609 | NextSleepMethod = sleep(SleepMethod, SleepFor, NextSampleTime), 610 | Counts = [erlperf_job:sample(Handle) || Handle <- Handles], 611 | NowTime = os:system_time(millisecond), 612 | measure_impl(merge(Counts, Before), Handles, NowTime, NextSampleTime + Interval, Interval, 613 | NextSleepMethod, Count - 1, CV). 614 | 615 | %% ERTS real-time properties are easily broken by lock contention (e.g. ETS misuse) 616 | %% When it happens, even the 'max' priority process may not run for an extended 617 | %% period of time. 618 | sleep(sleep, SleepFor, _WaitUntil) when SleepFor > 0 -> 619 | receive 620 | {'DOWN', _Ref, process, Pid, Reason} -> 621 | erlang:error({benchmark, {'EXIT', Pid, Reason}}) 622 | after SleepFor -> 623 | sleep 624 | end; 625 | sleep(_Mode, _SleepFor, WaitUntil) -> 626 | busy_wait(WaitUntil). 627 | 628 | %% When sleep detects significant difference in the actual sleep time vs. expected, 629 | %% loop is switched to the busy wait. 630 | %% Once switched to busy wait, erlperf stays there until the end of the test. 631 | busy_wait(WaitUntil) -> 632 | receive 633 | {'DOWN', _Ref, process, Pid, Reason} -> 634 | erlang:error({benchmark, {'EXIT', Pid, Reason}}) 635 | after 0 -> 636 | case os:system_time(millisecond) of 637 | Now when Now > WaitUntil -> 638 | busy_wait; 639 | _ -> 640 | busy_wait(WaitUntil) 641 | end 642 | end. 643 | 644 | merge([], []) -> 645 | []; 646 | merge([M | T], [H | T2]) -> 647 | [[M | H] | merge(T, T2)]. 648 | 649 | difference([_]) -> 650 | []; 651 | difference([S, F | Tail]) -> 652 | [F - S | difference([F | Tail])]. 653 | 654 | %% Determine maximum throughput by measuring multiple times with different concurrency. 655 | %% Test considered complete when either: 656 | %% * maximum number of workers reached 657 | %% * last 'threshold' added workers did not increase throughput 658 | estimate_concurrency(Jobs, Options, #{threshold := Threshold, step := Step, max := Max} = ConOpts, Handles, Current, History, QMax) -> 659 | RunOptions = Options#{concurrency => Current}, 660 | [Report] = benchmark_impl(Jobs, RunOptions, undefined, Handles), 661 | #{result := Result0} = Report, 662 | #{samples := Samples} = Result0, 663 | %% calculate average QPS 664 | QPS = lists:sum(difference(Samples)) div (length(Samples) - 1), 665 | Result = Result0#{average => QPS}, 666 | NewHistory = [{Current, Result} | History], 667 | %% this gives us nice round numbers (eg. with step of 10, we'll have [1, 10, 20...]) 668 | Next = (Current + Step) div Step * Step, 669 | %% test if we are at Max concurrency, or saturated the node 670 | case maxed(QPS, Current, QMax, Threshold) of 671 | true -> 672 | %% QPS are either stable or decreasing, get back to the best run 673 | #{sleep := SleepMethod} = Report, 674 | {_BestMax, BestConcurrency} = QMax, 675 | {BestConcurrency, BestResult} = lists:keyfind(BestConcurrency, 1, History), 676 | #{mode => concurrency, result => BestResult, history => NewHistory, sleep => SleepMethod, 677 | concurrency_options => ConOpts, run_options => Options#{concurrency => BestConcurrency}}; 678 | _NewQMax when Next > Max -> 679 | #{sleep := SleepMethod} = Report, 680 | #{mode => concurrency, result => Result, history => NewHistory, sleep => SleepMethod, 681 | concurrency_options => ConOpts, run_options => RunOptions}; 682 | NewQMax -> 683 | % need more workers 684 | estimate_concurrency(Jobs, RunOptions, ConOpts, Handles, Next, NewHistory, NewQMax) 685 | end. 686 | 687 | maxed(QPS, Current, {Q, _}, _) when QPS > Q -> 688 | {QPS, Current}; 689 | maxed(_, Current, {_, W}, Count) when Current - W >= Count -> 690 | true; 691 | maxed(_, _, QMax, _) -> 692 | QMax. 693 | 694 | multicall_result([], Acc) -> 695 | lists:reverse(Acc); 696 | multicall_result([{Pid, Ref} | Proxies], Acc) -> 697 | receive 698 | {'DOWN', Ref, process, Pid, Result} -> 699 | multicall_result(Proxies, [Result | Acc]) 700 | end. 701 | 702 | %%%=================================================================== 703 | %%% Tracer process, uses heap to store tracing information. 704 | rec_tracer() -> 705 | process_flag(message_queue_data, off_heap), 706 | tracer_loop([]). 707 | 708 | -spec tracer_loop([{module(), atom(), [term()]}]) -> ok. 709 | tracer_loop(Trace) -> 710 | receive 711 | {trace, _Pid, call, MFA} -> 712 | tracer_loop([MFA | Trace]); 713 | {stop, Control} -> 714 | Control ! {data, Trace}, 715 | ok 716 | end. 717 | 718 | %% Reporting: in full mode, add extra information (e.g. codes and statistics) 719 | %% full report for continuous mode (needs history rewritten) 720 | report(full, [Code], [#{mode := concurrency, history := History, result := Result, run_options := RunOpts} = Report]) -> 721 | System = system_report(), 722 | [Report#{system => System, code => Code, 723 | result => process_result(Result, continuous, RunOpts, #{}), 724 | history => [{C, process_result(R, continuous, #{report => full, concurrency => C}, #{})} || {C, R} <- History]}]; 725 | %% full reports 726 | report(full, Codes, Reports) -> 727 | System = system_report(), 728 | [Report#{system => System, code => Code, result => process_result(Result, Mode, RunOptions, Report)} 729 | || {Code, #{mode := Mode, result := Result, run_options := RunOptions} = Report} <- lists:zip(Codes, Reports)]; 730 | report(_ReportType, _Codes, Reports) -> 731 | [process_result(Result, Mode, RunOptions, Report) 732 | || #{mode := Mode, result := Result, run_options := RunOptions} = Report <-Reports]. 733 | 734 | %% Transform raw samples into requested report 735 | process_result(#{samples := Samples}, timed, #{report := full, samples := Count, 736 | sample_duration := {timed, Loop}}, _Report) -> 737 | Stat = report_stats(Samples), 738 | TotalTime = lists:sum(Samples), 739 | Stat#{time => TotalTime, iteration_time => TotalTime * 1000 div (Count * Loop), samples => Samples}; 740 | process_result(#{samples := Samples}, timed, #{report := basic}, _Report) -> 741 | %% timed mode, basic report 742 | lists:sum(Samples) div length(Samples) div 1000; 743 | process_result(#{samples := Samples}, timed, #{report := extended}, _Report) -> 744 | %% timed mode, extended report, convert to milliseconds for backwards compatibility 745 | [S div 1000 || S <- Samples]; 746 | process_result(#{samples := Samples, time := TimeUs}, continuous, #{report := full, concurrency := C}, _Report) -> 747 | Stat = report_stats(difference(Samples)), 748 | IterationTime = case lists:last(Samples) - hd(Samples) of 749 | 0 -> 750 | infinity; 751 | Total -> 752 | erlang:round(TimeUs * C * 1000 div Total) 753 | end, 754 | Stat#{samples => Samples, time => TimeUs, iteration_time => IterationTime}; 755 | process_result(#{samples := Samples}, continuous, #{report := extended}, _Report) -> 756 | difference(Samples); 757 | process_result(#{samples := Samples}, continuous, #{report := basic}, _Report) -> 758 | Diffs = difference(Samples), 759 | lists:sum(Diffs) div length(Diffs); 760 | process_result(#{average := Avg}, concurrency, #{report := basic, concurrency := C}, _Report) -> 761 | {Avg, C}; 762 | process_result(#{average := Avg}, concurrency, #{report := extended, concurrency := C}, #{history := H}) -> 763 | %% return {Best, History} 764 | {{Avg, C}, [{A, W} || {W, #{average := A}} <- H]}. 765 | 766 | %% @private 767 | %% Calculates a requested statistical function over the passed samples. 768 | %% Exported for unit-testing purposes 769 | report_stats(Samples) -> 770 | Sum = lists:sum(Samples), 771 | Len = length(Samples), 772 | Avg = Sum / Len, %% arithmetic mean 773 | Variance = if Len =:= 0 -> 0; true -> lists:sum([(S - Avg) * (S - Avg) || S <- Samples]) / (Len - 1) end, 774 | Sorted = lists:sort(Samples), 775 | #{ 776 | average => Avg, 777 | min => hd(Sorted), 778 | max => lists:last(Sorted), 779 | stddev => math:sqrt(Variance), 780 | median => lists:nth(erlang:round(0.50 * Len), Sorted), 781 | p99 => lists:nth(erlang:round(0.99 * Len), Sorted) 782 | }. 783 | 784 | system_report() -> 785 | OSType = erlang:system_info(os_type), 786 | Guaranteed = detect_feature([emu_type, emu_flavor, dynamic_trace], #{ 787 | os => OSType, 788 | system_version => string:trim(erlang:system_info(system_version), trailing) 789 | }), 790 | try Guaranteed#{cpu => string:trim(detect_cpu(OSType), both)} 791 | catch _:_ -> Guaranteed 792 | end. 793 | 794 | detect_feature([], System) -> 795 | System; 796 | detect_feature([F | T], System) -> 797 | try detect_feature(T, System#{F => erlang:system_info(F)}) 798 | catch error:badarg -> detect_feature(T, System) 799 | end. 800 | 801 | detect_cpu({unix, freebsd}) -> 802 | os:cmd("sysctl -n hw.model"); 803 | detect_cpu({unix, darwin}) -> 804 | os:cmd("sysctl -n machdep.cpu.brand_string"); 805 | detect_cpu({unix, linux}) -> 806 | {ok, Bin} = file:read_file("/proc/cpuinfo"), 807 | linux_cpu_model(binary:split(Bin, <<"\n">>)); 808 | detect_cpu({win32, nt}) -> 809 | [_, CPU] = string:split(os:cmd("WMIC CPU GET NAME"), "\n"), 810 | CPU. 811 | 812 | linux_cpu_model([<<"model name", Model/binary>>, _]) -> 813 | [_, ModelName] = binary:split(Model, <<":">>), 814 | binary_to_list(ModelName); 815 | linux_cpu_model([_Skip, Tail]) -> 816 | linux_cpu_model(binary:split(Tail, <<"\n">>)). -------------------------------------------------------------------------------- /src/erlperf_app.erl: -------------------------------------------------------------------------------- 1 | %%% @copyright (C) 2019-2023, Maxim Fedorov 2 | %%% @private 3 | %%% Continuous benchmarking application behaviour. 4 | -module(erlperf_app). 5 | -author("maximfca@gmail.com"). 6 | 7 | -behaviour(application). 8 | 9 | -export([start/2, stop/1]). 10 | 11 | -spec start(application:start_type(), term()) -> {ok, pid()}. 12 | start(_StartType, _StartArgs) -> 13 | {ok, Sup} = erlperf_sup:start_link(), 14 | {ok, Sup}. 15 | 16 | -spec stop(term()) -> ok. 17 | stop(_State) -> 18 | ok. 19 | 20 | -------------------------------------------------------------------------------- /src/erlperf_cli.erl: -------------------------------------------------------------------------------- 1 | %%% @copyright (C) 2019-2023, Maxim Fedorov 2 | %%% @doc 3 | %%% Command line interface adapter. 4 | %%% 5 | %%% Exports functions to format {@link erlperf:benchmark/3} output 6 | %%% in the same way as command line interface. 7 | %%% 8 | %%% Example: 9 | %%% ``` 10 | %%% #!/usr/bin/env escript 11 | %%% %%! +pc unicode -pa /home/max-au/git/max-au/erlperf/_build/default/lib/erlperf/ebin 12 | %%% -mode(compile). 13 | %%% 14 | %%% main(_) -> 15 | %%% Report = erlperf:benchmark([ 16 | %%% #{runner => fun() -> rand:uniform(10) end}, 17 | %%% #{runner => {rand, mwc59, [1]}} 18 | %%% ], #{report => full}, undefined), 19 | %%% Out = erlperf_cli:format(Report, #{format => extended, viewport_width => 120}), 20 | %%% io:format(Out), 21 | %%% halt(0). 22 | %%% ''' 23 | %%% Running the script produces following output: 24 | %%% ``` 25 | %%% $ ./bench 26 | %%% Code || Samples Avg StdDev Median P99 Iteration Rel 27 | %%% {rand,mwc59,[1]} 1 3 80515 Ki 0.59% 80249 Ki 81067 Ki 12 ns 100% 28 | %%% #Fun pos_integer(), 41 | format => basic | extended | full 42 | }. 43 | %% Defines text report format. 44 | %% 45 | %% 58 | 59 | -export_type([format_options/0]). 60 | 61 | %% @doc 62 | %% Formats result produced by {@link erlperf:benchmark/3}. 63 | %% 64 | %% Requires full report. Does not accept basic or extended variants. 65 | -spec format(Reports, Options) -> iolist() when 66 | Reports :: [erlperf:report()], 67 | Options :: format_options(). 68 | format(Reports, Options) -> 69 | Format = 70 | case maps:find(format, Options) of 71 | {ok, F} -> 72 | F; 73 | error -> 74 | %% if format is not specified, choose between basic and extended 75 | %% based on amount of samples collected. Extended report does 76 | %% not make much sense for 3 samples. 77 | case maps:find(samples, maps:get(result, hd(Reports))) of 78 | {ok, Samples} when length(Samples) >= 10 -> 79 | extended; 80 | _ -> 81 | basic 82 | end 83 | end, 84 | Width = maps:get(viewport_width, Options, viewport_width()), 85 | %% if any of the reports has "sleep" set to busy_wait, write a warning 86 | Prefix = 87 | case lists:any(fun (#{sleep := busy_wait}) -> true; (_) -> false end, Reports) of 88 | true -> 89 | color(warning, io_lib:format("Timer accuracy problem detected, results may be inaccurate~n", [])); 90 | false -> 91 | "" 92 | end, 93 | %% 94 | Prefix ++ format_report(Format, Reports, Width). 95 | 96 | %%------------------------------------------------------------------- 97 | %% Internal implementation 98 | 99 | %% @private 100 | %% Used from escript invocation 101 | -spec main([string()]) -> no_return(). 102 | main(Args) -> 103 | Prog = #{progname => "erlperf"}, 104 | try 105 | ParsedOpts = args:parse(Args, arguments(), Prog), 106 | 107 | Verbose = maps:get(verbose, ParsedOpts, false), 108 | 109 | %% turn off logger unless verbose output is requested 110 | Verbose orelse 111 | logger:add_primary_filter(suppress_sasl, { 112 | fun(#{meta := #{error_logger := #{tag := Tag}}}, _) when Tag =:= error; Tag =:= error_report -> 113 | stop; 114 | (_, _) -> 115 | ignore 116 | end, ok}), 117 | 118 | %% timed benchmarking is not compatible with many options, and may have "loop" written as 100M, 100K 119 | {RunOpts0, ConcurrencyTestOpts} = determine_mode(ParsedOpts), 120 | 121 | %% add code paths 122 | [case code:add_path(P) of true -> ok; {error, Error} -> erlang:error({add_path, {P,Error}}) end 123 | || P <- maps:get(code_path, ParsedOpts, [])], 124 | 125 | %% find all runners 126 | Code0 = [parse_code(C) || C <- maps:get(code, ParsedOpts)], 127 | %% find associated init, init_runner, done, label 128 | {_, Codes} = lists:foldl(fun callable/2, {ParsedOpts, Code0}, 129 | [{init, init_all}, {init_runner, init_runner_all}, {done, done_all}, {label, undefined}]), 130 | 131 | %% when isolation is requested, the node must be distributed 132 | RunOpts = case is_map_key(isolation, ParsedOpts) of 133 | true -> 134 | erlang:is_alive() orelse start_distribution(), 135 | RunOpts0#{isolation => #{}}; 136 | false -> 137 | RunOpts0 138 | end, 139 | 140 | FormatOpts = case maps:find(report, ParsedOpts) of 141 | {ok, Fmt1} -> 142 | #{format => Fmt1}; 143 | error -> 144 | #{} 145 | end, 146 | %% do the actual run 147 | Results = benchmark(Codes, RunOpts#{report => full}, ConcurrencyTestOpts, Verbose), 148 | %% format results 149 | Formatted = format(Results, FormatOpts#{viewport_width => viewport_width()}), 150 | io:format(Formatted) 151 | catch 152 | error:{args, Reason} -> 153 | Fmt = args:format_error(Reason, arguments(), Prog), 154 | format(info, "Error: ~s", [Fmt]); 155 | throw:{parse, FunName, Other} -> 156 | format(error, "Unable to read file named '~s' (expected to contain call chain recording)~nReason: ~p\n" 157 | "Did you forget to end your function with period? (dot)~n", [FunName, Other]); 158 | error:{add_path, {Path, Error}} -> 159 | format(error, "Error adding code path ~s: ~p~n", [Path, Error]); 160 | error:{generic, Error} -> 161 | format(error, "Error: ~s~n", [Error]); 162 | error:{loop, Option} -> 163 | format(error, "Timed benchmarking is not compatible with --~s option~n", [Option]); 164 | error:{concurrency, Option} -> 165 | format(error, "Concurrency estimation is not compatible with --~s option~n", [Option]); 166 | error:{generate, {parse, FunName, Error}} -> 167 | format(error, "Parse error for ~s: ~s~n", [FunName, lists:flatten(Error)]); 168 | error:{generate, {What, WhatArity, requires, Dep}} -> 169 | format(error, "~s/~b requires ~s function defined~n", [What, WhatArity, Dep]); 170 | error:{compile, Errors, Warnings} -> 171 | Errors =/= [] andalso format(error, "Compile error: ~s~n", [compile_errors(Errors)]), 172 | Warnings =/= [] andalso format(warning, "Warning: ~s~n", [compile_errors(Warnings)]); 173 | error:{benchmark, {'EXIT', Job, Error}} -> 174 | node(Job) =/= node() andalso format(error, "~s reported an error:~n", [node(Job)]), 175 | format(error, "~p~n", [Error]); 176 | Cls:Rsn:Stack -> 177 | format(error, "Unhandled exception: ~ts:~p~n~p~n", [Cls, Rsn, Stack]) 178 | after 179 | logger:remove_primary_filter(suppress_sasl) 180 | end. 181 | 182 | %% timed mode 183 | determine_mode(#{loop := Loop} = ParsedOpts) -> 184 | [erlang:error({loop, Option}) || Option <- 185 | [concurrency, sample_duration, warmup, cv, concurrency_estimation], is_map_key(Option, ParsedOpts)], 186 | RunOpts = maps:with([samples], ParsedOpts), 187 | {RunOpts#{sample_duration => {timed, parse_loop(Loop)}}, undefined}; 188 | 189 | %% concurrency estimation mode 190 | determine_mode(#{concurrency_estimation := true} = ParsedOpts) -> 191 | [erlang:error({concurrency, Option}) || Option <- 192 | [concurrency], is_map_key(Option, ParsedOpts)], 193 | length(maps:get(code, ParsedOpts)) > 1 andalso 194 | erlang:error({generic, "Parallel concurrency estimation runs are not supported~n"}), 195 | RunOpts = maps:with([sample_duration, samples, warmup, cv], ParsedOpts), 196 | {RunOpts, maps:with([min, step, max, threshold], ParsedOpts)}; 197 | 198 | %% continuous mode 199 | determine_mode(ParsedOpts) -> 200 | RunOpts = maps:with([concurrency, sample_duration, samples, warmup, cv], ParsedOpts), 201 | {RunOpts, undefined}. 202 | 203 | %% wrapper to ensure verbose output 204 | benchmark(Codes, RunOpts, ConcurrencyTestOpts, false) -> 205 | erlperf:benchmark(Codes, RunOpts, ConcurrencyTestOpts); 206 | benchmark(Codes, RunOpts, ConcurrencyTestOpts, true) -> 207 | [begin 208 | io:format(">>>>>>>>>>>>>>> ~-32ts ~n", [format_code(C)]), 209 | [io:format("~ts~n", [L]) || L <- erlperf_job:source(C)], 210 | io:format("<<<<<<<<<<<<<<< ~n") 211 | end|| C <- Codes], 212 | {ok, Pg} = pg:start_link(erlperf), 213 | {ok, Monitor} = erlperf_monitor:start_link(), 214 | {ok, Logger} = erlperf_file_log:start_link(), 215 | try 216 | erlperf:benchmark(Codes, RunOpts, ConcurrencyTestOpts) 217 | after 218 | gen:stop(Logger), 219 | gen:stop(Monitor), 220 | gen:stop(Pg) 221 | end. 222 | 223 | start_distribution() -> 224 | Node = list_to_atom(lists:concat(["erlperf-", erlang:unique_integer([positive]), "-", os:getpid()])), 225 | {ok, _} = net_kernel:start([Node, shortnames]). 226 | 227 | %% formats compiler errors/warnings 228 | compile_errors([]) -> ""; 229 | compile_errors([{_, []} | Tail]) -> 230 | compile_errors(Tail); 231 | compile_errors([{L, [{_Anno, Mod, Err} | T1]} | Tail]) -> 232 | lists:flatten(Mod:format_error(Err) ++ io_lib:format("~n", [])) ++ compile_errors([{L, T1} | Tail]). 233 | 234 | callable({Type, Default}, {Args, Acc}) -> 235 | case maps:find(Type, Args) of 236 | error when is_map_key(Default, Args) -> 237 | %% default is set, no overrides 238 | {Args, merge_callable(Type, lists:duplicate(length(Acc), [maps:get(Default, Args)]), Acc, [])}; 239 | error -> 240 | %% no overrides, no default - most common case 241 | {Args, merge_callable(Type, [], Acc, [])}; 242 | {ok, Overrides} when is_map_key(Default, Args) -> 243 | %% some overrides, and the default as well 244 | %% extend the Overrides array to expected size by adding default value 245 | Def = [maps:get(Default, Args)], 246 | Complete = Overrides ++ [Def || _ <- lists:seq(1, length(Acc) - length(Overrides))], 247 | {Args, merge_callable(Type, Complete, Acc, [])}; 248 | {ok, NoDefault} -> 249 | %% no default, but some arguments are defined 250 | {Args, merge_callable(Type, NoDefault, Acc, [])} 251 | end. 252 | 253 | merge_callable(_Type, [], Acc, Merged) -> 254 | lists:reverse(Merged) ++ Acc; 255 | merge_callable(_Type, _, [], Merged) -> 256 | lists:reverse(Merged); 257 | merge_callable(Type, [[H] | T], [HA | Acc], Merged) -> 258 | merge_callable(Type, T, Acc, [HA#{Type => H} | Merged]). 259 | 260 | parse_code(Code) -> 261 | case lists:last(Code) of 262 | $. -> 263 | #{runner => Code}; 264 | $} when hd(Code) =:= ${ -> 265 | % parse MFA tuple with added "." 266 | #{runner => parse_mfa_tuple(Code)}; 267 | _ -> 268 | case file:read_file(Code) of 269 | {ok, Bin} -> 270 | #{runner => parse_call_record(Bin)}; 271 | Other -> 272 | erlang:throw({parse, Code, Other}) 273 | end 274 | end. 275 | 276 | parse_mfa_tuple(Code) -> 277 | {ok, Scan, _} = erl_scan:string(Code ++ "."), 278 | {ok, Term} = erl_parse:parse_term(Scan), 279 | Term. 280 | 281 | parse_call_record(Bin) -> 282 | binary_to_term(Bin). 283 | 284 | parse_loop(Loop) -> 285 | case string:to_integer(Loop) of 286 | {Int, "M"} -> Int * 1000000; 287 | {Int, "K"} -> Int * 1000; 288 | {Int, []} -> Int; 289 | {Int, "G"} -> Int * 1000000000; 290 | _Other -> erlang:error({generic, "unsupported syntax for timed iteration count: " ++ Loop}) 291 | end. 292 | 293 | arguments() -> 294 | #{help => 295 | "\nFull documentation available at: https://hexdocs.pm/erlperf/\n" 296 | "\nBenchmark timer:sleep(1):\n erlperf 'timer:sleep(1).'\n" 297 | "Benchmark rand:uniform() vs crypto:strong_rand_bytes(2):\n erlperf 'rand:uniform().' 'crypto:strong_rand_bytes(2).' --samples 10 --warmup 1\n" 298 | "Figure out concurrency limits:\n erlperf 'application_controller:is_running(kernel).' -q'\n" 299 | "Benchmark pg join/leave operations:\n erlperf 'pg:join(s, foo, self()), pg:leave(s, foo, self()).' --init 'pg:start_link(s).'\n" 300 | "Timed benchmark for a single BIF:\n erlperf 'erlang:unique_integer().' -l 1000000\n", 301 | arguments => [ 302 | #{name => concurrency, short => $c, long => "-concurrency", 303 | help => "number of concurrently executed runner processes", 304 | type => {int, [{min, 1}, {max, 1024 * 1024 * 1024}]}}, 305 | #{name => sample_duration, short => $d, long => "-duration", 306 | help => "single sample duration, milliseconds (1000)", 307 | type => {int, [{min, 1}]}}, 308 | #{name => samples, short => $s, long => "-samples", 309 | help => "minimum number of samples to collect (3)", 310 | type => {int, [{min, 1}]}}, 311 | #{name => loop, short => $l, long => "-loop", 312 | help => "timed mode (lower overhead) iteration count: 50, 100K, 200M, 3G"}, 313 | #{name => warmup, short => $w, long => "-warmup", 314 | help => "number of samples to skip (0)", 315 | type => {int, [{min, 0}]}}, 316 | #{name => report, short => $r, long => "-report", 317 | help => "report verbosity, full adds system information", 318 | type => {atom, [basic, extended, full]}}, 319 | #{name => cv, long => "-cv", 320 | help => "coefficient of variation", 321 | type => {float, [{min, 0.0}]}}, 322 | #{name => verbose, short => $v, long => "-verbose", 323 | type => boolean, help => "print monitoring statistics"}, 324 | #{name => code_path, long => "pa", type => string, 325 | action => append, help => "extra code path, see -pa erl documentation"}, 326 | #{name => isolation, short => $i, long => "-isolated", type => boolean, 327 | help => "run benchmarks in an isolated environment (peer node)"}, 328 | #{name => concurrency_estimation, short => $q, long => "-squeeze", type => boolean, 329 | help => "run concurrency estimation test"}, 330 | #{name => min, long => "-min", 331 | help => "start with this amount of processes (1)", 332 | type => {int, [{min, 1}]}}, 333 | #{name => step, long => "-step", 334 | help => "increase the number of processes by this value on each iteration (1)", 335 | type => {int, [{min, 1}]}}, 336 | #{name => max, long => "-max", 337 | help => "do not exceed this number of processes", 338 | type => {int, [{max, erlang:system_info(process_limit) - 1000}]}}, 339 | #{name => threshold, short => $t, long => "-threshold", 340 | help => "cv at least samples should be less than to increase concurrency", default => 3, 341 | type => {int, [{min, 1}]}}, 342 | #{name => init, long => "-init", 343 | help => "init code, see erlperf_job documentation for details", nargs => 1, action => append}, 344 | #{name => done, long => "-done", 345 | help => "done code", nargs => 1, action => append}, 346 | #{name => init_runner, long => "-init_runner", 347 | help => "init_runner code", nargs => 1, action => append}, 348 | #{name => label, long => "-label", type => string, 349 | help => "runner label", nargs => 1, action => append}, 350 | #{name => init_all, long => "-init_all", 351 | help => "default init code for all runners"}, 352 | #{name => done_all, long => "-done_all", 353 | help => "default done code for all runners"}, 354 | #{name => init_runner_all, long => "-init_runner_all", 355 | help => "default init_runner code for all runners"}, 356 | #{name => code, 357 | help => "code to test", nargs => nonempty_list, action => extend} 358 | ]}. 359 | 360 | %%------------------------------------------------------------------- 361 | %% Color output 362 | 363 | -spec format(error | warning | info, string(), [term()]) -> ok. 364 | format(Level, Format, Terms) -> 365 | io:format(color(Level, Format), Terms). 366 | 367 | -define(RED, "\e[31m"). 368 | -define(MAGENTA, "\e[35m"). 369 | -define(END, "\e[0m~n"). 370 | 371 | color(error, Text) -> ?RED ++ Text ++ ?END; 372 | color(warning, Text) -> ?MAGENTA ++ Text ++ ?END; 373 | color(info, Text) -> Text. 374 | 375 | %% Report formatter 376 | format_report(full, [#{system := System} | _] = Reports, Width) -> 377 | warn_system(System) ++ [format_system(System), format_report(extended, Reports, Width)]; 378 | 379 | format_report(extended, [#{system := System} | _] = Reports, Width) -> 380 | Sorted = sort_by(Reports), 381 | #{result := #{average := MaxAvg}} = hd(Sorted), 382 | Header = ["Code", " ||", " Samples", " Avg", " StdDev", " Median", " P99", " Iteration", " Rel"], 383 | Data = [format_report_line(MaxAvg, ReportLine, extended) || ReportLine <- Sorted], 384 | warn_system(System) ++ format_table(remove_relative_column([Header | Data]), Width); 385 | 386 | format_report(basic, [#{system := System} | _] = Reports, Width) -> 387 | Sorted = sort_by(Reports), 388 | #{result := #{average := MaxAvg}} = hd(Sorted), 389 | Header = ["Code", " ||", " QPS", " Time", " Rel"], 390 | Data0 = [format_report_line(MaxAvg, ReportLine, basic) || ReportLine <- Sorted], 391 | %% remove columns that should not be displayed in basic mode 392 | Data = [[C1, C2, C3, C4, C5] || [C1, C2, _, C3, _, _, _, C4, C5] <- Data0], 393 | warn_system(System) ++ format_table(remove_relative_column([Header | Data]), Width). 394 | 395 | sort_by([#{mode := timed} | _] = Reports) -> 396 | lists:sort(fun (#{result := #{average := L}}, #{result := #{average := R}}) -> L < R end, Reports); 397 | sort_by([#{mode := _} | _] = Reports) -> 398 | lists:sort(fun (#{result := #{average := L}}, #{result := #{average := R}}) -> L > R end, Reports). 399 | 400 | remove_relative_column([H, D]) -> 401 | [lists:droplast(H), lists:droplast(D)]; 402 | remove_relative_column(HasRelative) -> 403 | HasRelative. 404 | 405 | format_report_line(MaxAvg, #{mode := timed, code := Code, result := #{average := Avg, stddev := StdDev, 406 | iteration_time := IterationTime, p99 := P99, median := Median, samples := Samples}, 407 | run_options := #{concurrency := Concurrency}}, ReportFormat) -> 408 | [ 409 | format_code(Code), 410 | integer_to_list(Concurrency), 411 | integer_to_list(length(Samples)), 412 | if ReportFormat =:= basic -> erlperf_file_log:format_number(erlang:round(1000000000 / IterationTime)); 413 | true ->erlperf_file_log:format_duration(erlang:round(Avg * 1000)) end, 414 | io_lib:format("~.2f%", [StdDev * 100 / Avg]), 415 | erlperf_file_log:format_duration(Median * 1000), %% convert from ms to us 416 | erlperf_file_log:format_duration(P99 * 1000), %% convert from ms to us 417 | erlperf_file_log:format_duration(IterationTime), %% already in us 418 | integer_to_list(erlang:round(MaxAvg * 100 / Avg)) ++ "%" 419 | ]; 420 | 421 | format_report_line(MaxAvg, #{code := Code, result := #{average := Avg, stddev := StdDev, 422 | iteration_time := IterationTime, p99 := P99, median := Median, samples := Samples}, 423 | run_options := #{concurrency := Concurrency}}, _ReportFormat) when Avg > 0.5 -> 424 | [ 425 | format_code(Code), 426 | integer_to_list(Concurrency), 427 | integer_to_list(length(Samples) - 1), 428 | erlperf_file_log:format_number(erlang:round(Avg)), 429 | io_lib:format("~.2f%", [StdDev * 100 / Avg]), 430 | erlperf_file_log:format_number(Median), 431 | erlperf_file_log:format_number(P99), 432 | erlperf_file_log:format_duration(IterationTime), 433 | integer_to_list(erlang:round(Avg * 100 / MaxAvg)) ++ "%" 434 | ]; 435 | 436 | format_report_line(_MaxAvg, #{code := Code, result := #{samples := Samples}, 437 | run_options := #{concurrency := Concurrency}}, _ReportFormat) -> 438 | [ 439 | format_code(Code), 440 | integer_to_list(Concurrency), 441 | integer_to_list(length(Samples) - 1), 442 | "0", 443 | "inf", 444 | "0", 445 | "0", 446 | "inf", 447 | "0%" 448 | ]. 449 | 450 | %% generic table formatter routine, accepting list of lists 451 | format_table([Header | Data] = Rows, Width) -> 452 | %% find the longest string in each column 453 | HdrWidths = [string:length(H) + 1 || H <- Header], 454 | ColWidths = lists:foldl( 455 | fun (Row, Acc) -> 456 | [max(string:length(D) + 1, Old) || {D, Old} <- lists:zip(Row, Acc)] 457 | end, HdrWidths, Data), 458 | %% reserved (non-adjustable) columns 459 | Reserved = lists:sum(tl(ColWidths)), 460 | FirstColWidth = min(hd(ColWidths), Width - Reserved), 461 | Format = "~*s" ++ lists:concat([io_lib:format("~~~bs", [W]) || W <- tl(ColWidths)]) ++ "~n", 462 | %% just format the table 463 | [io_lib:format(Format, [-FirstColWidth | Row]) || Row <- Rows]. 464 | 465 | %% detects terminal width (characters) to shorten long output lines 466 | viewport_width() -> 467 | case io:columns() of {ok, C} -> C; _ -> 80 end. 468 | 469 | format_code(#{label := Label}) when is_list(Label) -> 470 | Label; 471 | format_code(#{label := Label}) when is_binary(Label) -> 472 | binary_to_list(Label); 473 | format_code(#{label := undefined, runner := Runner}) -> 474 | format_code_1(Runner); 475 | format_code(#{runner := Runner}) -> 476 | format_code_1(Runner). 477 | 478 | format_code_1(Code) when is_tuple(Code) -> 479 | lists:flatten(io_lib:format("~tp", [Code])); 480 | format_code_1(Code) when is_tuple(hd(Code)) -> 481 | lists:flatten(io_lib:format("[~tp, ...]", [hd(Code)])); 482 | format_code_1(Code) when is_function(Code) -> 483 | lists:flatten(io_lib:format("~tp", [Code])); 484 | format_code_1(Code) when is_list(Code) -> 485 | Code; 486 | format_code_1(Code) when is_binary(Code) -> 487 | binary_to_list(Code). 488 | 489 | warn_system(#{dynamic_trace := Trace} = System) when Trace =/= none -> 490 | [io_lib:format("WARNING: Dynamic Trace Probes enabled (~s detected)~n", [Trace]) | warn_system(maps:remove(dynamic_trace, System))]; 491 | warn_system(#{emu_type := Type} = System) when Type =/= opt -> 492 | [io_lib:format("WARNING: Emulator is not optimised (~s detected)~n", [Type]) | warn_system(maps:remove(emu_type, System))]; 493 | warn_system(#{emu_flavor := Flavor} = System) when Flavor =/= jit -> 494 | [io_lib:format("WARNING: Emulator is not JIT (~s detected)~n", [Flavor]) | warn_system(maps:remove(emu_flavor, System))]; 495 | warn_system(_) -> 496 | []. 497 | 498 | format_system(#{os := OSType, system_version := SystemVsn} = System) -> 499 | OS = io_lib:format("OS : ~s~n", [format_os(OSType)]), 500 | CPU = if is_map_key(cpu, System) -> io_lib:format("CPU: ~s~n", [maps:get(cpu, System)]); true -> "" end, 501 | VM = io_lib:format("VM : ~s~n~n", [SystemVsn]), 502 | [OS, CPU, VM]. 503 | 504 | format_os({unix, freebsd}) -> "FreeBSD"; 505 | format_os({unix, darwin}) -> "MacOS"; 506 | format_os({unix, linux}) -> "Linux"; 507 | format_os({win32, nt}) -> "Windows"; 508 | format_os({Family, OS}) -> lists:flatten(io_lib:format("~s/~s", [Family, OS])). -------------------------------------------------------------------------------- /src/erlperf_cluster_monitor.erl: -------------------------------------------------------------------------------- 1 | %%% @copyright (C) 2019-2023, Maxim Fedorov 2 | %%% @doc 3 | %%% Logs monitoring events for the entire cluster, to file or device. 4 | %%% Requires {@link erlperf_history} service running, fails otherwise. 5 | %%% Uses completely different to {@link erlperf_monitor} approach; instead of waiting 6 | %%% for new samples to come, cluster monitor just outputs existing 7 | %%% samples periodically. 8 | %%% 9 | %%% Example primary node: 10 | %%% ``` 11 | %%% rebar3 shell --sname primary 12 | %%% (primary@ubuntu22)1> erlperf_history:start_link(). 13 | %%% {ok,<0.211.0>} 14 | %%% (primary@ubuntu22)2> erlperf_cluster_monitor:start_link(). 15 | %%% {ok,<0.216.0>} 16 | %%% ''' 17 | %%% 18 | %%% Example benchmarking node: 19 | %%% ``` 20 | %%% rebar3 shell --sname bench1 21 | %%% (bench1@ubuntu22)1> net_kernel:connect_node('primary@ubuntu22'). 22 | %%% true 23 | %%% (bench1@ubuntu22)2> erlperf:run(rand, uniform, []). 24 | %%% ''' 25 | %%% 26 | %%% As soon as the new benchmarking jon on the node `bench' is started, it is 27 | %%% reported in the cluster monitoring output. 28 | %%% @end 29 | -module(erlperf_cluster_monitor). 30 | -author("maximfca@gmail.com"). 31 | 32 | -behaviour(gen_server). 33 | 34 | %% API 35 | -export([ 36 | start_link/0, 37 | start_link/3 38 | ]). 39 | 40 | %% gen_server callbacks 41 | -export([ 42 | init/1, 43 | handle_call/3, 44 | handle_cast/2, 45 | handle_info/2 46 | ]). 47 | 48 | %% Handler: just like gen_event handler. 49 | %% If you do need gen_event handler, make a fun of it. 50 | -type handler() :: {module(), atom(), term()} | file:filename_all() | {fd, io:device()} | io:device(). 51 | %% Specifies monitoring output device. 52 | %% 53 | %% It could be an output {@link io:device()} (such as 54 | %% {@link erlang:group_leader/0}, `user' or `standard_error'), a file name, or a 55 | %% `{Module, Function, UserState}' tuple. In the latter case, instead of printing, cluster monitor 56 | %% calls the specified function, which must have arity of 2, accepting filtered 57 | %% {@link erlperf_monitor:monitor_sample()} as the first argument, and `Userstate' as the second, 58 | %% returning next `UserState'. 59 | 60 | 61 | %% Take a sample every second 62 | -define(DEFAULT_INTERVAL, 1000). 63 | 64 | -define(KNOWN_FIELDS, [time, node, sched_util, dcpu, dio, processes, ports, ets, memory_total, 65 | memory_processes, memory_binary, memory_ets, jobs]). 66 | 67 | %% @equiv start_link(erlang:group_leader(), 1000, undefined) 68 | -spec start_link() -> {ok, Pid :: pid()} | {error, Reason :: term()}. 69 | start_link() -> 70 | start_link(erlang:group_leader(), ?DEFAULT_INTERVAL, undefined). 71 | 72 | %% @doc 73 | %% Starts cluster-wide monitor process, and links it to the caller. 74 | %% 75 | %% Intended to be used in a supervisor `ChildSpec', making the process a part of the supervision tree. 76 | %% 77 | %% `IntervalMs' specifies time, in milliseconds, between output handler invocations. 78 | %% 79 | %% Fields specifies the list of field names to report, and the order in which columns are printed. 80 | %% see {@link erlperf_monitor:monitor_sample()} for options. Passing `undefined' prints all columns 81 | %% known by this version of `erlperf'. 82 | %% @end 83 | -spec start_link(Handler :: handler(), IntervalMs :: pos_integer(), Fields :: [atom()] | undefined) -> 84 | {ok, Pid :: pid()} | {error, Reason :: term()}. 85 | start_link(Handler, Interval, Fields) -> 86 | gen_server:start_link(?MODULE, [Handler, Interval, Fields], []). 87 | 88 | %%%=================================================================== 89 | %%% gen_server callbacks 90 | 91 | %% System monitor state 92 | -record(state, { 93 | next :: integer(), %% absolute timer for the next tick 94 | interval :: pos_integer(), 95 | handler :: handler(), 96 | fields :: [atom()] | undefined, 97 | %% previously printed header 98 | %% if the new header is different from the previous one, it gets printed 99 | header = [] :: [atom() | [pid()]] 100 | }). 101 | 102 | %% @private 103 | init([Handler, Interval, Fields0]) -> 104 | Fields = if Fields0 =:= undefined -> ?KNOWN_FIELDS; true -> Fields0 end, 105 | %% use absolute timer to avoid skipping ticks 106 | Now = erlang:monotonic_time(millisecond), 107 | {ok, handle_tick(#state{next = Now, interval = Interval, handler = make_handler(Handler), fields = Fields})}. 108 | 109 | %% @private 110 | handle_call(_Request, _From, _State) -> 111 | erlang:error(notsup). 112 | 113 | %% @private 114 | handle_cast(_Request, _State) -> 115 | erlang:error(notsup). 116 | 117 | %% @private 118 | handle_info({timeout, _, tick}, State) -> 119 | {noreply, handle_tick(State)}. 120 | 121 | %%%=================================================================== 122 | %%% Internal functions 123 | 124 | handle_tick(#state{next = Now, interval = Interval, fields = Fields, handler = Handler, header = Header} = State) -> 125 | Next = Now + Interval, 126 | %% 127 | erlang:start_timer(Next, self(), tick, [{abs, true}]), 128 | %% last interval updates 129 | GetHistoryTo = Now + erlang:time_offset(millisecond), 130 | %% be careful not to overlap the timings (history:get is inclusive) 131 | Samples = erlperf_history:get(GetHistoryTo - Interval + 1, GetHistoryTo), 132 | %% now invoke the handler 133 | {NewHandler, NewHeader} = run_handler(Handler, Fields, Header, Samples), 134 | State#state{next = Next, handler = NewHandler, header = NewHeader}. 135 | 136 | make_handler({_M, _F, _A} = MFA) -> 137 | MFA; 138 | make_handler(IoDevice) when is_pid(IoDevice); is_atom(IoDevice) -> 139 | {fd, IoDevice}; 140 | make_handler({fd, IoDevice}) when is_pid(IoDevice); is_atom(IoDevice) -> 141 | {fd, IoDevice}; 142 | make_handler(Filename) when is_list(Filename); is_binary(Filename) -> 143 | {ok, Fd} = file:open(Filename, [raw, append]), 144 | {fd, Fd}. 145 | 146 | run_handler(Handler, _Fields, Header, []) -> 147 | {Handler, Header}; 148 | 149 | %% handler: MFA callback 150 | run_handler({M, F, A}, Fields, Header, Samples) -> 151 | Filtered = [{Time, maps:with(Fields, Sample)} || {Time, Sample} <- Samples], 152 | {{M, F, M:F(Filtered, A)}, Header}; 153 | 154 | %% built-in handler: file/console output 155 | run_handler({fd, IoDevice}, Fields, Header, Samples) -> 156 | %% the idea of the formatter below is to print lines like this: 157 | %% Dane Time node sched ets memory <123.456.1> <0.123.0> 158 | %% 2022-11-12 08:35:16 node1@host 33.5% 16 128111 12345 159 | %% 2022-11-12 08:35:16 node1@host 33.5% 16 128111 9111 160 | 161 | %% collect all jobs from all samples 162 | Jobs = lists:usort(lists:foldl( 163 | fun ({_Time, #{jobs := Jobs}}, Acc) -> {Pids, _} = lists:unzip(Jobs), Pids ++ Acc end, 164 | [], Samples)), 165 | 166 | %% replace atom 'jobs' with list of Jobs. This is effectively lists:keyreplace, but with no key 167 | NewHeader = [if F =:= jobs -> Jobs; true -> F end || F <- Fields], 168 | 169 | %% format specific fields of samples 170 | Formatted = [ 171 | [formatter(F, if is_list(F) -> maps:get(jobs, Sample); true -> maps:get(F, Sample) end) || F <- NewHeader] 172 | || {_Time, Sample} <- Samples], 173 | 174 | NewLine = io_lib:nl(), 175 | BinNl = list_to_binary(NewLine), 176 | 177 | %% check if header has changed and print if it has 178 | NewHeader =/= Header andalso 179 | begin 180 | FmtHdr = [header(S) || S <- NewHeader] ++ [BinNl], 181 | ok = file:write(IoDevice, FmtHdr) 182 | end, 183 | 184 | %% print the actual line 185 | Data = [F ++ NewLine || F <- Formatted], 186 | ok = file:write(IoDevice, Data), 187 | {{fd, IoDevice}, NewHeader}. 188 | 189 | header(time) -> <<" date time TZ ">>; 190 | header(sched_util) -> <<" %sched">>; 191 | header(dcpu) -> <<" %dcpu">>; 192 | header(dio) -> <<" %dio">>; 193 | header(processes) -> <<" procs">>; 194 | header(ports) -> <<" ports">>; 195 | header(ets) -> <<" ets">>; 196 | header(memory_total) -> <<" mem_total">>; 197 | header(memory_processes) -> <<" mem_proc">>; 198 | header(memory_binary) -> <<" mem_bin">>; 199 | header(memory_ets) -> <<" mem_ets">>; 200 | header(Jobs) when is_list(Jobs) -> 201 | iolist_to_binary([io_lib:format("~16s", [pid_to_list(Pid)]) || Pid <- Jobs]); 202 | header(node) -> <<"node ">>. 203 | 204 | formatter(time, Time) -> 205 | calendar:system_time_to_rfc3339(Time div 1000) ++ " "; 206 | formatter(Percent, Num) when Percent =:= sched_util; Percent =:= dcpu; Percent =:= dio -> 207 | io_lib:format("~7.2f", [Num]); 208 | formatter(Number, Num) when Number =:= processes; Number =:= ports -> 209 | io_lib:format("~8b", [Num]); 210 | formatter(ets, Num) -> 211 | io_lib:format("~6b", [Num]); 212 | formatter(Size, Num) when Size =:= memory_total; Size =:= memory_processes; Size =:= memory_binary; Size =:= memory_ets -> 213 | io_lib:format("~10s", [erlperf_file_log:format_size(Num)]); 214 | formatter(Jobs, JobsInSample) when is_list(Jobs) -> 215 | %% here, all Jobs must be formatter, potentially empty (if they are not in JobsInSample) 216 | [case lists:keyfind(Job, 1, JobsInSample) of 217 | {Job, Num} -> io_lib:format("~16s", [erlperf_file_log:format_number(Num)]); 218 | false -> " " end 219 | || Job <- Jobs]; 220 | formatter(node, Node) -> 221 | io_lib:format("~*s", [-22, Node]). 222 | -------------------------------------------------------------------------------- /src/erlperf_file_log.erl: -------------------------------------------------------------------------------- 1 | %%% @copyright (C) 2019-2023, Maxim Fedorov 2 | %%% @doc 3 | %%% Prints monitoring reports produced by {@link erlperf_monitor} to file 4 | %%% or an output device. 5 | %%% 6 | %%% When the server starts up, it joins `{erlperf, Node}' process group 7 | %%% in the `erlperf' scope. If {@link erlperf_monitor} is also running in 8 | %%% the same node, reports are printed to the specified device or file. 9 | %%% 10 | %%% See {@link erlperf_monitor} for description of the monitoring report. 11 | %%% 12 | %%% `erlperf' leverages this service for verbose output during benchmarking. 13 | %%% @end 14 | -module(erlperf_file_log). 15 | -author("maximfca@gmail.com"). 16 | 17 | -behaviour(gen_server). 18 | 19 | %% API 20 | -export([ 21 | start_link/0, 22 | start_link/1, 23 | %% leaky API... 24 | format_number/1, 25 | format_size/1, 26 | format_duration/1 27 | ]). 28 | 29 | %% gen_server callbacks 30 | -export([ 31 | init/1, 32 | handle_call/3, 33 | handle_cast/2, 34 | handle_info/2 35 | ]). 36 | 37 | %% @equiv start_link(erlang:group_leader()) 38 | -spec start_link() -> {ok, Pid :: pid()} | {error, Reason :: term()}. 39 | start_link() -> 40 | start_link(erlang:group_leader()). 41 | 42 | %% @doc 43 | %% Starts the file log process. 44 | -spec start_link(Filename :: string() | file:io_device()) -> {ok, Pid :: pid()} | {error, Reason :: term()}. 45 | start_link(Filename) -> 46 | gen_server:start_link(?MODULE, [Filename], []). 47 | 48 | %%%=================================================================== 49 | %%% gen_server callbacks 50 | 51 | %% Repeat header every 30 lines (by default) 52 | -define(LOG_REPEAT_HEADER, 30). 53 | 54 | %% System monitor state 55 | -record(state, { 56 | % file logger counter 57 | log_counter = ?LOG_REPEAT_HEADER :: non_neg_integer(), 58 | % when to print the header once again 59 | log_limit = ?LOG_REPEAT_HEADER :: pos_integer(), 60 | % file descriptor 61 | log_file :: file:io_device(), 62 | % current format line 63 | format = "" :: string(), 64 | % saved list of job IDs executed previously 65 | jobs = [] :: [erlperf_monitor:monitor_sample()] 66 | }). 67 | 68 | %% @private 69 | init([Target]) -> 70 | % subscribe to monitor events 71 | ok = pg:join(erlperf, {erlperf_monitor, node()}, self()), 72 | WriteTo = if is_list(Target) -> {ok, LogFile} = file:open(Target, [raw, append]), LogFile; true -> Target end, 73 | {ok, #state{log_file = WriteTo}}. 74 | 75 | %% @private 76 | handle_call(_Request, _From, _State) -> 77 | erlang:error(notsup). 78 | 79 | %% @private 80 | handle_cast(_Request, _State) -> 81 | erlang:error(notsup). 82 | 83 | %% @private 84 | handle_info(#{jobs := Jobs, time := Time, sched_util := SchedUtil, dcpu := DCPU, dio := DIO, processes := Processes, 85 | ports := Ports, ets := Ets, memory_total := MemoryTotal, memory_processes := MemoryProcesses, 86 | memory_binary := MemoryBinary, memory_ets := MemoryEts}, #state{log_file = File} = State) -> 87 | {JobIds, Ts} = lists:unzip(Jobs), 88 | State1 = maybe_write_header(JobIds, State), 89 | % actual line 90 | TimeFormat = calendar:system_time_to_rfc3339(Time div 1000), 91 | Formatted = iolist_to_binary(io_lib:format(State1#state.format, [ 92 | TimeFormat, SchedUtil, DCPU, DIO, Processes, 93 | Ports, Ets, 94 | format_size(MemoryTotal), 95 | format_size(MemoryProcesses), 96 | format_size(MemoryBinary), 97 | format_size(MemoryEts) 98 | ] ++ [format_number(T) || T <- Ts])), 99 | ok = file:write(File, Formatted), 100 | {noreply, State1}. 101 | 102 | %%%=================================================================== 103 | %%% Internal functions 104 | 105 | maybe_write_header(Jobs, #state{log_counter = LC, log_limit = LL, jobs = Prev} = State) when LC >= LL; Jobs =/= Prev -> 106 | State#state{format = write_header(State#state.log_file, Jobs), log_counter = 0, jobs = Jobs}; 107 | maybe_write_header(_, State) -> 108 | State#state{log_counter = State#state.log_counter + 1}. 109 | 110 | write_header(File, Jobs) -> 111 | JobCount = length(Jobs), 112 | Format = "~s ~6.2f ~6.2f ~6.2f ~8b ~8b ~7b ~9s ~9s ~9s ~9s" ++ 113 | lists:concat(lists:duplicate(JobCount, "~13s")) ++ "~n", 114 | JobIds = list_to_binary(lists:flatten([io_lib:format(" ~12s", [pid_to_list(J)]) || J <- Jobs])), 115 | Header = <<"\nYYYY-MM-DDTHH:MM:SS-oo:oo Sched DCPU DIO Procs Ports ETS Mem Total Mem Proc Mem Bin Mem ETS", JobIds/binary, "\n">>, 116 | ok = file:write(File, Header), 117 | Format. 118 | 119 | %% @private 120 | %% @doc Formats size (bytes) rounded to 3 digits. 121 | %% Unlike @see format_number, used 1024 as a base, 122 | %% so 200 * 1024 is 200 Kb. 123 | -spec format_size(non_neg_integer()) -> string(). 124 | format_size(Num) when Num > 1024*1024*1024*1024 * 100 -> 125 | integer_to_list(round(Num / (1024*1024*1024*1024))) ++ " Tb"; 126 | format_size(Num) when Num > 1024*1024*1024 * 100 -> 127 | integer_to_list(round(Num / (1024*1024*1024))) ++ " Gb"; 128 | format_size(Num) when Num > 1024*1024*100 -> 129 | integer_to_list(round(Num / (1024 * 1024))) ++ " Mb"; 130 | format_size(Num) when Num > 1024 * 100 -> 131 | integer_to_list(round(Num / 1024)) ++ " Kb"; 132 | format_size(Num) -> 133 | integer_to_list(Num). 134 | 135 | %% @private 136 | %% @doc Formats number rounded to 3 digits. 137 | %% Example: 88 -> 88, 880000 -> 880 Ki, 100501 -> 101 Ki 138 | -spec format_number(non_neg_integer()) -> string(). 139 | format_number(Num) when Num > 100000000000000 -> 140 | integer_to_list(round(Num / 1000000000000)) ++ " Ti"; 141 | format_number(Num) when Num > 100000000000 -> 142 | integer_to_list(round(Num / 1000000000)) ++ " Gi"; 143 | format_number(Num) when Num > 100000000 -> 144 | integer_to_list(round(Num / 1000000)) ++ " Mi"; 145 | format_number(Num) when Num > 100000 -> 146 | integer_to_list(round(Num / 1000)) ++ " Ki"; 147 | format_number(Num) -> 148 | integer_to_list(Num). 149 | 150 | %% @private 151 | %% @doc Formats time duration, from nanoseconds to seconds 152 | %% Example: 88 -> 88 ns, 88000 -> 88 us, 10000000 -> 10 ms 153 | -spec format_duration(non_neg_integer() | infinity) -> string(). 154 | format_duration(infinity) -> 155 | "inf"; 156 | format_duration(Num) when Num > 6000000000000 -> 157 | integer_to_list(round(Num / 60000000000)) ++ " m"; 158 | format_duration(Num) when Num > 100000000000 -> 159 | integer_to_list(round(Num / 1000000000)) ++ " s"; 160 | format_duration(Num) when Num > 100000000 -> 161 | integer_to_list(round(Num / 1000000)) ++ " ms"; 162 | format_duration(Num) when Num > 100000 -> 163 | integer_to_list(round(Num / 1000)) ++ " us"; 164 | format_duration(Num) -> 165 | integer_to_list(Num) ++ " ns". -------------------------------------------------------------------------------- /src/erlperf_history.erl: -------------------------------------------------------------------------------- 1 | %%% @copyright (C) 2019-2023, Maxim Fedorov 2 | %%% @doc 3 | %%% Collects, accumulates & filters cluster-wide monitoring events. 4 | %%% Essentially a simple in-memory database for quick cluster overview. 5 | %%% 6 | %%% History server helps to collect monitoring reports from multiple 7 | %%% nodes of a single Erlang cluster. Example setup: single primary 8 | %%% node running `erlperf_history' and {@link erlperf_cluster_monitor} 9 | %%% listens to reports sent by several more nodes in a cluster, running 10 | %%% continuous benchmarking jobs. Nodes may run the same Erlang code, 11 | %%% but using different hardware or OS version. Or, conversely, same 12 | %%% hardware and OS, but variants of Erlang code. See {@link erlperf_cluster_monitor} 13 | %%% for a code sample. 14 | %%% 15 | %%% 16 | %%% @end 17 | -module(erlperf_history). 18 | -author("maximfca@gmail.com"). 19 | 20 | -behaviour(gen_server). 21 | 22 | %% API 23 | -export([ 24 | start_link/0, 25 | start_link/1, 26 | get/1, 27 | get/2 28 | ]). 29 | 30 | %% gen_server callbacks 31 | -export([ 32 | init/1, 33 | handle_call/3, 34 | handle_cast/2, 35 | handle_info/2 36 | ]). 37 | 38 | -define(TABLE, ?MODULE). 39 | 40 | %% default: keep history for 120 seconds 41 | -define(DEFAULT_HISTORY_DURATION, 120000). 42 | 43 | %% @equiv start_link(120_000) 44 | -spec(start_link() -> 45 | {ok, Pid :: pid()} | ignore | {error, Reason :: term()}). 46 | start_link() -> 47 | start_link(?DEFAULT_HISTORY_DURATION). 48 | 49 | %% @doc 50 | %% Starts the history server and links it to the calling process. 51 | %% 52 | %% Designed for use as a part of a supervision tree. 53 | %% `Duration' is time (in milliseconds), how long to keep the 54 | %% reports for. Older reports are discarded. 55 | -spec(start_link(Duration :: pos_integer()) -> 56 | {ok, Pid :: pid()} | ignore | {error, Reason :: term()}). 57 | start_link(Duration) -> 58 | gen_server:start_link({local, ?MODULE}, ?MODULE, [Duration], []). 59 | 60 | %% @doc 61 | %% Returns cluster history. 62 | %% 63 | %% Returns all reports since `From' timestamp to now, sorted by timestamp. 64 | %% `From' is wall clock time, in milliseconds (e.g. `os:system_time(millisecond)'). 65 | -spec get(From :: integer()) -> [{Time :: non_neg_integer(), erlperf_monitor:monitor_sample()}]. 66 | get(From) -> 67 | get(From, os:system_time(millisecond)). 68 | 69 | %% @doc 70 | %% Returns cluster history reports between From and To (inclusive). 71 | %% 72 | %% `From' and `To' are wall clock time, in milliseconds (e.g. `os:system_time(millisecond)'). 73 | -spec get(From :: integer(), To :: integer()) -> [{Time :: non_neg_integer(), erlperf_monitor:monitor_sample()}]. 74 | get(From, To) -> 75 | % ets:fun2ms(fun ({{T, _}, _} = R) when T =< To, T >= From -> {T, R} end). 76 | ets:select(?TABLE, [{{{'$1', '_'}, '$2'},[{'=<', '$1', To}, {'>=', '$1', From}], [{{'$1', '$2'}}]}]). 77 | 78 | %%=================================================================== 79 | %% gen_server implementation 80 | 81 | %% Keep an ordered set of samples (node, sample) ordered by time. 82 | -record(state, { 83 | duration :: pos_integer() 84 | }). 85 | 86 | %% @private 87 | init([Duration]) -> 88 | ok = pg:join(erlperf, cluster_monitor, self()), 89 | ?TABLE = ets:new(?TABLE, [protected, ordered_set, named_table, {write_concurrency, true}]), 90 | {ok, #state{duration = Duration}}. 91 | 92 | %% @private 93 | handle_call(_Request, _From, _State) -> 94 | erlang:error(notsup). 95 | 96 | %% @private 97 | handle_cast(_Request, _State) -> 98 | erlang:error(notsup). 99 | 100 | %% @private 101 | handle_info(#{time := Time, node := Node} = Sample, State) -> 102 | ets:insert(?TABLE, {{Time, Node}, Sample}), 103 | {noreply, maybe_clean(State)}. 104 | 105 | %% =================================================================== 106 | %% Internal functions 107 | 108 | maybe_clean(#state{duration = Duration} =State) -> 109 | Expired = os:system_time(millisecond) - Duration, 110 | %% ets:fun2ms(fun ({{T, _}, _}) -> T =< Expired end). 111 | ets:select_delete(?TABLE, [{{{'$1', '_'},'_'},[{'=<','$1', Expired}],[true]}]), 112 | State. -------------------------------------------------------------------------------- /src/erlperf_job.erl: -------------------------------------------------------------------------------- 1 | %%% @copyright (C) 2019-2023, Maxim Fedorov 2 | %%% @doc 3 | %%% Job is an instance of a benchmark. 4 | %%% 5 | %%% Every job has a corresponding temporary Erlang module generated. Use 6 | %%% {@link source/1} to get the source code of the generated module. 7 | %%% The structure of this code is an implementation detail and may change 8 | %%% between releases. 9 | %%% 10 | %%% Job controls how many workers are executing runner code in 11 | %%% a tight loop. It does not restart a failing worker, user must ensure 12 | %%% proper error handing and reporting. If a worker process crashes, 13 | %%% standard CRASH REPORT message is printed to the log (console). 14 | %%% 15 | %%% Job accepts a {@link code_map()} containing at least a runner 16 | %%% function definition. 17 | %%% 18 | %%% See {@link callable()} for accepted function definitions. 19 | %%% 20 | %%% Different callable forms have different performance overhead. Overhead can be measured 21 | %%% with {@link erlperf:compare/2}: 22 | %%% ```erlang 23 | %%% erlperf:compare([ 24 | %%% #{runner => fun (V) -> rand:mwc59(V) end, init_runner => {rand, mwc59_seed, []}}, 25 | %%% #{runner => "run(V) -> rand:mwc59(V).", init_runner => {rand, mwc59_seed, []}} 26 | %%% ], #{}). 27 | %% [4371541,131460130] 28 | %%% ''' 29 | %%% In the example above, callable defined as `fun' is 30 times slower than the code compiled 30 | %%% from the source. The difference is caused by the Erlang Runtime implementation, where 31 | %%% indirect calls via `fun' are considerably more expensive. As a rule of thumb, source 32 | %%% code provides the smallest overhead, followed by MFA tuples. 33 | %%% 34 | %%% You can mix & match various definition styles. In the example below, `init/0' 35 | %%% starts an extra {@link pg} scope, `done/0' stops it, and `init_runner/1' takes 36 | %%% the total heap size of `pg' scope controller to pass it to the `runner/1'. 37 | %%% ```erlang 38 | %%% erlperf_job:start_link( 39 | %%% #{ 40 | %%% runner => "run(Max) -> rand:uniform(Max).", 41 | %%% init => {pg, start_link, [scope]}, 42 | %%% init_runner => 43 | %%% fun ({ok, Pid}) -> 44 | %%% {total_heap_size, THS} = erlang:process_info(Pid, total_heap_size), 45 | %%% THS 46 | %%% end, 47 | %%% done => fun ({ok, Pid}) -> gen_server:stop(Pid) end 48 | %%% } 49 | %%% ). 50 | %%% ''' 51 | %%% Same example defined with just the source code: 52 | %%% ```erlang 53 | %%% erlperf_job:start_link( 54 | %%% #{ 55 | %%% runner => "runner(Max) -> rand:uniform(Max).", 56 | %%% init => "init() -> pg:start_link().", 57 | %%% init_runner => "init_runner({ok, Pid}) -> 58 | %%% {total_heap_size, THS} = erlang:process_info(Pid, total_heap_size), 59 | %%% THS.", 60 | %%% done => "done({ok, Pid}) -> gen_server:stop(Pid)." 61 | %%% } 62 | %%% ). 63 | %%% ''' 64 | %%% 65 | %%%

Runner function

66 | %%% Runner function represents code that is run in the tight loop, counting iterations 67 | %%% aggregated between all workers. To give an example, benchmarking a function that takes 68 | %%% exactly a millisecond to execute, with 2 workers, for 2 seconds, will result in 69 | %%% 4000 iterations in total. This would be the value returned by {@link sample/1}. 70 | %%% 71 | %%% 72 | %%% Runner definition can accept zero, one or two arguments. 73 | %%% 74 | %%% `runner/0' ignores the value returned by init_runner/0,1. 75 | %%% 76 | %%% `runner/1' accepts the value returned by init_runner/0,1. It is an error to define 77 | %%% `runner/1' without `init_runner/0,1' defined. This example prints "0" in a 78 | %%% tight loop, measuring {@link io:format/2} performance: 79 | %%% ```erlang 80 | %%% #{ 81 | %%% runner => "run(Init) -> io:format(\"~b~n\", [Init]).", 82 | %%% init_runner => "0." 83 | %%% } 84 | %%% ''' 85 | %%% 86 | %%% `runner/2' adds second argument, accumulator, initially set to the 87 | %%% value returned by init_runner/0,1. Subsequent invocations receive 88 | %%% value returned by the previous runner invocation. Example: 89 | %%% ```erlang 90 | %%% #{ 91 | %%% runner => "run(Init, Acc) -> io:format(\"~b~n\", [Init + Acc]), Acc + 1.", 92 | %%% init_runner => "0." 93 | %%% } 94 | %%% ''' 95 | %%% Running this benchmark prints monotonically increasing numbers. This 96 | %%% may be useful to test stateful functions, for example, fast Random Number 97 | %%% Generators introduced in OTP 25: 98 | %%% ```bash 99 | %%% ./erlperf --init_runner 'rand:mwc59_seed().' 'run(_, Cur) -> rand:mwc59(Cur).' 100 | %%% Code || QPS Time 101 | %%% run(_, Cur) -> rand:mwc59(Cur). 1 123 Mi 8 ns 102 | %%% ''' 103 | %%% 104 | %%% 105 | %%%

Common Test usage

106 | %%% 107 | %%% Example using `erlperf_job' directly, as a part of Common Test test case: 108 | %%% ```erlang 109 | %%% benchmark_rand(Config) when is_list(Config) -> 110 | %%% %% run timer:sleep(1000) for 5 second, 4 runners 111 | %%% {ok, Job} = erlperf_job:start_link(#{runner => {timer, sleep, [1000]}}), 112 | %%% Handle = erlperf_job:handle(Job), 113 | %%% ok = erlperf_job:set_concurrency(Job, 4), %% 4 runner instances 114 | %%% InitialIterations = erlperf_job:sample(Handle), 115 | %%% timer:sleep(5000), 116 | %%% IterationsIn5Sec = erlperf_job:sample(Handle) - InitialIterations, 117 | %%% erlperf_job:request_stop(Job), %% use gen:stop(Job) for synchronous call 118 | %%% %% expect at least 16 iterations (and up to 20) 119 | %%% ?assert(IterationsIn5Sec >= 16, {too_slow, IterationsIn5Sec}), 120 | %%% ?assert(IterationsIn5Sec =< 20, {too_fast, IterationsIn5Sec}). 121 | %%% ''' 122 | %%% 123 | %%% @end 124 | -module(erlperf_job). 125 | -author("maximfca@gmail.com"). 126 | 127 | -behaviour(gen_server). 128 | 129 | %% Job API 130 | -export([ 131 | start/1, 132 | start_link/1, 133 | request_stop/1, 134 | concurrency/1, 135 | set_concurrency/2, 136 | measure/2, 137 | sample/1, 138 | handle/1, 139 | source/1, 140 | set_priority/2 141 | ]). 142 | 143 | %% gen_server callbacks 144 | -export([ 145 | init/1, 146 | handle_call/3, 147 | handle_cast/2, 148 | handle_info/2, 149 | terminate/2 150 | ]). 151 | 152 | %% MFArgs: module, function, arguments. 153 | -type mfargs() :: {Module :: module(), Function :: atom(), Args :: [term()]}. 154 | %% `Module', `Function', `Args' accepted by {@link erlang:apply/3}. 155 | 156 | %% Callable: one or more MFArgs, or a function object, or source code 157 | -type callable() :: 158 | string() | 159 | fun() | 160 | fun((term()) -> term()) | 161 | fun((term(), term()) -> term()) | 162 | mfargs() | 163 | [mfargs()]. 164 | %% Function definition to use as a runner, init, done or init_runner. 165 | %% 166 | %%
    167 | %%
  • `string().' Erlang code ending with `.' (period). Example, zero arity: 168 | %% `"runner() -> timer:sleep(1)."', arity one: `"runner(T) -> timer:sleep(T)."', 169 | %% arity two: `"runner(Init, Acc) -> Acc + Init."'. It is allowed to omit the header 170 | %% for zero arity function, so it becomes `"timer:sleep(1)."'
  • 171 | %%
  • `fun()' function accepting no arguments, example: `fun() -> timer:sleep(1000) end'
  • 172 | %%
  • `fun(term()) -> term()' function accepting one argument, example: `fun(Time) -> timer:sleep(Time) end'
  • 173 | %%
  • `fun(term(), term()) -> term()' function accepting two arguments, example: `fun() -> timer:sleep(1000) end'
  • 174 | %%
  • `mfargs()' tuple accepted by {@link erlang:apply/3}. 175 | %% Example: `{rand, uniform, [10]}'
  • 176 | %%
  • `[mfargs()]' list of MFA tuples, example: `[{rand, uniform, [10]}]'. 177 | %% This functionality is experimental, and only used to replay a recorded calls 178 | %% list. May not be supported in future releases.
  • 179 | %%
180 | 181 | 182 | %% Benchmark code: init, init_runner, runner, done. 183 | -type code_map() :: #{ 184 | runner := callable(), 185 | init => callable(), 186 | init_runner => callable(), 187 | done => callable(), 188 | label => iodata() 189 | }. 190 | %% Code map contains definitions for: 191 | %% 192 | %%
    193 | %%
  • `init/0' - called once when starting the job for the first time. 194 | %% The call is made in the context of the job controller. It is 195 | %% guaranteed to run through the entire benchmark job. So if your 196 | %% benchmark needs to create additional resources - ETS tables, or 197 | %% linked processes, like extra {@link pg} scopes, - init/0 is a 198 | %% good choice. If init/0 fails, the entire job startup fails
  • 199 | %%
  • `init_runner/0,1' - called when the job starts a new worker. init_runner/1 200 | %% accepts the value returned by init/0. It is an error to omit init/0 201 | %% if init_runner/1 is defined. It is allowed to have init_runner/0 202 | %% when init/0 exists. The call to init_runner is made in the context of the 203 | %% worker process, so you can initialise process-local values (e.g. 204 | %% process dictionary)
  • 205 | %%
  • `runner/0,1,2' defines the function that will be called in a tight loop. 206 | %% See Runner Function for 207 | %% overview of a runner function variants.
  • 208 | %%
  • `done/0,1' - called when the job terminates, to clean up any resources 209 | %% that are not destroyed automatically. done/0 accepts the return of init/0. 210 | %% Call is made in the context of the job controller
  • 211 | %%
  • `label' - runner label displayed in reports. 212 | %% By default, the runner code is converted to a string
  • 213 | %%
214 | 215 | %% Internal (opaque) type, please do not use 216 | -opaque handle() :: {module(), non_neg_integer()}. 217 | 218 | %% Temporary type until OTP25+ is everywhere, and OTP <25 support is no longer needed 219 | -type server_ref() :: gen_server:server_ref(). 220 | 221 | -export_type([mfargs/0, handle/0, callable/0, code_map/0]). 222 | 223 | %% @doc 224 | %% Starts the benchmark job. 225 | %% 226 | %% Job starts with no workers, use {@link set_concurrency/2} to start workers. 227 | -spec start(code_map()) -> {ok, pid()} | {error, term()}. 228 | start(#{runner := _MustHave} = Code) -> 229 | gen_server:start(?MODULE, generate(Code), []). 230 | 231 | %% @doc 232 | %% Starts the job and links it to caller. 233 | %% 234 | %% Job starts with no workers, use {@link set_concurrency/2} to start workers. 235 | -spec start_link(code_map()) -> {ok, pid()} | {error, term()}. 236 | start_link(#{runner := _MustHave} = Code) -> 237 | gen_server:start_link(?MODULE, generate(Code), []). 238 | 239 | %% @doc 240 | %% Requests this job to stop. 241 | %% 242 | %% Job is stopped asynchronously. Caller should monitor the job process 243 | %% to find out when the job actually stopped. 244 | -spec request_stop(server_ref()) -> ok. 245 | request_stop(JobId) -> 246 | gen_server:cast(JobId, stop). 247 | 248 | %% @doc 249 | %% Returns the number of concurrently running workers for this job. 250 | %% 251 | %% This number may be lower than the amount requested by {@link set_concurrency/2} 252 | %% if workers crash. 253 | -spec concurrency(server_ref()) -> Concurrency :: non_neg_integer(). 254 | concurrency(JobId) -> 255 | gen_server:call(JobId, concurrency). 256 | 257 | %% @doc 258 | %% Sets the number of concurrently running workers for this job. 259 | %% 260 | %% Does not reset counting. May never return if init_runner 261 | %% hangs and does not return control the the job. 262 | %% `Concurrency': number of processes to run. It can be higher than 263 | %% the current count (making the job to start more workers), or 264 | %% lower, making the job to stop some. 265 | %% 266 | %% Workers that crashes are not restarted automatically. 267 | -spec set_concurrency(server_ref(), non_neg_integer()) -> ok. 268 | set_concurrency(JobId, Concurrency) -> 269 | gen_server:call(JobId, {set_concurrency, Concurrency}, infinity). 270 | 271 | %% @doc 272 | %% Run the timed mode benchmark for a job, similar to {@link timer:tc/3}. 273 | %% 274 | %% Executes the runner `SampleCount' times. Returns time in microseconds. 275 | %% Has less overhead compared to continuous benchmarking, therefore can 276 | %% be used even for very fast functions. 277 | -spec measure(server_ref(), SampleCount :: non_neg_integer()) -> 278 | TimeUs :: non_neg_integer() | already_started. 279 | measure(JobId, SampleCount) -> 280 | gen_server:call(JobId, {measure, SampleCount}, infinity). 281 | 282 | %% @doc 283 | %% Returns the sampling handle for the job. 284 | %% 285 | %% The returned value is opaque, and is an implementation detail, 286 | %% do not use it in any quality other than passing to {@link sample/1}. 287 | -spec handle(server_ref()) -> handle(). 288 | handle(JobId) -> 289 | gen_server:call(JobId, handle). 290 | 291 | %% @doc 292 | %% Returns the current iteration counter. 293 | %% 294 | %% The iteration counter (sample) monotonically grows by 1 295 | %% every time the runner function is called (without waiting 296 | %% for it to return, so a function that unconditionally crashes 297 | %% still generates a counter of 1). 298 | -spec sample(Handle :: handle()) -> non_neg_integer() | undefined. 299 | sample({Module, Arity}) -> 300 | {call_count, Count} = erlang:trace_info({Module, Module, Arity}, call_count), 301 | Count. 302 | 303 | %%% Internal, not exported, record. 304 | -record(exec, { 305 | name :: atom(), %% generated module name (must be generated for tracing to work) 306 | source :: [string()], %% module source code 307 | binary :: binary(), %% generated bytecode 308 | init :: fun(() -> term()), %% init function 309 | init_runner :: fun((term()) -> term()), %% must accept 1 argument 310 | runner :: {fun((term()) -> term()), non_neg_integer()}, 311 | sample_runner :: {fun((non_neg_integer(), term()) -> term()), non_neg_integer()}, 312 | done :: fun((term()) -> term()) %% must accept 1 argument 313 | }). 314 | 315 | -type exec() :: #exec{}. 316 | 317 | %% @doc 318 | %% Returns the source code generated from the code map, or for a running job. 319 | -spec source(server_ref() | code_map()) -> [string()]. 320 | source(Code) when is_map(Code) -> 321 | #exec{source = Src} = generate(benchmark, Code), 322 | Src; 323 | source(JobId) -> 324 | gen_server:call(JobId, source). 325 | 326 | 327 | %% @doc 328 | %% Sets job process priority when there are workers running. 329 | %% 330 | %% Worker processes may utilise all schedulers, making job 331 | %% process to lose control over starting and stopping workers. 332 | %% By default, job process sets 'high' priority when there are 333 | %% any workers running. 334 | %% Returns the previous setting. 335 | %% 336 | %% This function must be called before {@link set_concurrency/2}, 337 | %% otherwise it has no effect until all workers are stopped, and 338 | %% then restarted. 339 | -spec set_priority(server_ref(), erlang:priority_level()) -> erlang:priority_level(). 340 | set_priority(JobId, Priority) -> 341 | gen_server:call(JobId, {priority, Priority}). 342 | 343 | %%-------------------------------------------------------------------- 344 | %% Internal definitions 345 | 346 | -include_lib("kernel/include/logger.hrl"). 347 | 348 | -record(erlperf_job_state, { 349 | %% original spec 350 | exec :: exec(), 351 | %% return value of init/1 352 | init_result :: term(), 353 | %% continuous workers 354 | workers = [] :: [pid()], 355 | %% temporary workers (for sample_count call) 356 | sample_workers = #{} :: #{pid() => {pid(), reference()}}, 357 | %% priority to return to when no workers left 358 | initial_priority :: erlang:priority_level(), 359 | %% priority to set when workers are running 360 | priority = high :: erlang:priority_level() 361 | }). 362 | 363 | -type state() :: #erlperf_job_state{}. 364 | 365 | %%%=================================================================== 366 | %%% gen_server callbacks 367 | 368 | %% @private 369 | init(#exec{name = Mod, binary = Bin, init = Init, runner = {_Fun, Arity}} = Exec) -> 370 | %% need to trap exits to avoid crashing and not cleaning up the loaded module 371 | erlang:process_flag(trap_exit, true), 372 | {module, Mod} = code:load_binary(Mod, Mod, Bin), 373 | %% run the init/0 if defined 374 | InitRet = 375 | try Init() 376 | catch 377 | Class:Reason:Stack -> 378 | %% clean up loaded module before crashing 379 | code:purge(Mod), 380 | code:delete(Mod), 381 | erlang:raise(Class, Reason, Stack) 382 | end, 383 | %% register in the monitor 384 | ok = erlperf_monitor:register(self(), {Mod, Arity}, 0), 385 | %% start tracing this module runner function 386 | 1 = erlang:trace_pattern({Mod, Mod, Arity}, true, [local, call_count]), 387 | {priority, Prio} = erlang:process_info(self(), priority), 388 | {ok, #erlperf_job_state{exec = Exec, init_result = InitRet, initial_priority = Prio}}. 389 | 390 | %% @private 391 | -spec handle_call(term(), {pid(), reference()}, state()) -> {reply, term(), state()}. 392 | handle_call(handle, _From, #erlperf_job_state{exec = #exec{name = Name, runner = {_Fun, Arity}}} = State) -> 393 | {reply, {Name, Arity}, State}; 394 | 395 | handle_call(concurrency, _From, #erlperf_job_state{workers = Workers} = State) -> 396 | {reply, length(Workers), State}; 397 | 398 | handle_call({measure, SampleCount}, From, #erlperf_job_state{sample_workers = SampleWorkers, 399 | exec = #exec{init_runner = InitRunner, sample_runner = SampleRunner}, 400 | init_result = IR} = State) when SampleWorkers =:= #{} -> 401 | {noreply, State#erlperf_job_state{sample_workers = 402 | start_sample_count(SampleCount, From, InitRunner, IR, SampleRunner)}}; 403 | 404 | handle_call({measure, _SampleCount}, _From, #erlperf_job_state{} = State) -> 405 | {reply, already_started, State}; 406 | 407 | handle_call(source, _From, #erlperf_job_state{exec = #exec{source = Source}} = State) -> 408 | {reply, Source, State}; 409 | 410 | handle_call({priority, Prio}, _From, #erlperf_job_state{priority = Old} = State) -> 411 | {reply, Old, State#erlperf_job_state{priority = Prio}}; 412 | 413 | handle_call({set_concurrency, Concurrency}, _From, #erlperf_job_state{workers = Workers} = State) -> 414 | {reply, ok, State#erlperf_job_state{workers = set_concurrency_impl(length(Workers), Concurrency, State)}}. 415 | 416 | %% @private 417 | handle_cast(stop, State) -> 418 | {stop, normal, State}. 419 | 420 | %% @private 421 | -spec handle_info(term(), state()) -> {noreply, state()}. 422 | handle_info({'EXIT', SampleWorker, Reason}, 423 | #erlperf_job_state{sample_workers = SampleWorkers} = State) when is_map_key(SampleWorker, SampleWorkers) -> 424 | {ReplyTo, MoreSW} = maps:take(SampleWorker, SampleWorkers), 425 | gen:reply(ReplyTo, Reason), 426 | {noreply, State#erlperf_job_state{sample_workers = MoreSW}}; 427 | 428 | handle_info({'EXIT', Worker, Reason}, #erlperf_job_state{workers = Workers} = State) when Reason =:= shutdown -> 429 | {noreply, State#erlperf_job_state{workers = lists:delete(Worker, Workers)}}; 430 | handle_info({'EXIT', Worker, Reason}, #erlperf_job_state{workers = Workers} = State) -> 431 | {stop, Reason, State#erlperf_job_state{workers = lists:delete(Worker, Workers)}}. 432 | 433 | %% @private 434 | -spec terminate(term(), state()) -> ok. 435 | terminate(_Reason, #erlperf_job_state{init_result = IR, workers = Workers, exec = #exec{name = Mod, done = Done}} = State) -> 436 | %% terminate all workers first 437 | set_concurrency_impl(length(Workers), 0, State), 438 | %% call "done" for cleanup 439 | try Done(IR) 440 | catch 441 | Class:Reason:Stack -> 442 | %% duly note, but do not crash, it is pointless at this moment 443 | ?LOG_ERROR("Exception while executing 'done': ~s:~0p~n~0p", [Class, Reason, Stack]) 444 | after 445 | _ = code:purge(Mod), 446 | true = code:delete(Mod) 447 | end. 448 | 449 | %%%=================================================================== 450 | %%% Internal: runner implementation 451 | 452 | %% Single run 453 | start_sample_count(SampleCount, ReplyTo, InitRunner, InitRet, {SampleRunner, _}) -> 454 | Child = erlang:spawn_link( 455 | fun() -> 456 | %% need to send a message even if init_runner fails, hence 'after' 457 | IRR = InitRunner(InitRet), 458 | T1 = erlang:monotonic_time(), 459 | SampleRunner(SampleCount, IRR), 460 | T2 = erlang:monotonic_time(), 461 | Time = erlang:convert_time_unit(T2 - T1, native, microsecond), 462 | exit(Time) 463 | end 464 | ), 465 | #{Child => ReplyTo}. 466 | 467 | set_concurrency_impl(OldConcurrency, Concurrency, #erlperf_job_state{workers = Workers, init_result = IR, exec = Exec, 468 | priority = Prio, initial_priority = InitialPrio}) -> 469 | case Concurrency - OldConcurrency of 470 | 0 -> 471 | Workers; 472 | NeedMore when NeedMore > 0 -> 473 | %% this process must run with higher priority to avoid being de-scheduled by runners 474 | OldConcurrency =:= 0 andalso erlang:process_flag(priority, Prio), 475 | Workers ++ add_workers(NeedMore, Exec, IR, []); 476 | NeedLess -> 477 | {Fire, Keep} = lists:split(-NeedLess, Workers), 478 | stop_workers(Fire), 479 | Keep =:= [] andalso erlang:process_flag(priority, InitialPrio), 480 | Keep 481 | end. 482 | 483 | add_workers(0, _ExecMap, _InitRet, NewWorkers) -> 484 | %% ensure all new workers completed their InitRunner routine 485 | [receive {Worker, init_runner} -> ok end || Worker <- NewWorkers], 486 | [Worker ! go || Worker <- NewWorkers], 487 | NewWorkers; 488 | add_workers(More, #exec{init_runner = InitRunner, runner = {Runner, _RunnerArity}} = Exec, InitRet, NewWorkers) -> 489 | Control = self(), 490 | %% spawn all processes, and then wait until they complete InitRunner 491 | Worker = erlang:spawn_link( 492 | fun () -> 493 | %% need to send a message even if init_runner fails, hence 'after' 494 | IRR = try InitRunner(InitRet) after Control ! {self(), init_runner} end, 495 | receive go -> ok end, 496 | Runner(IRR) 497 | end), 498 | add_workers(More - 1, Exec, InitRet, [Worker | NewWorkers]). 499 | 500 | stop_workers(Workers) -> 501 | %% try to stop concurrently 502 | [exit(Worker, kill) || Worker <- Workers], 503 | [receive {'EXIT', Worker, _Reason} -> ok end || Worker <- Workers]. 504 | 505 | %%%=================================================================== 506 | %%% Internal: code generation 507 | 508 | %% @doc Creates an Erlang module (text) based on the code map passed 509 | %% Returns module name (may be generated), runner arity (for tracing purposes), 510 | %% and module source code (text) 511 | %% Exception: raises error with Reason = {generate, {FunName, Arity, ...}} 512 | %% 513 | %% Important: early erlperf versions were generating AST (forms) instead 514 | %% of source code, which isn't exactly supported - AST is internal thing 515 | %% that can change over time. 516 | -spec generate(code_map()) -> exec(). 517 | generate(Code) -> 518 | Name = list_to_atom(lists:concat(["job_", os:getpid(), "_", erlang:unique_integer([positive])])), 519 | generate(Name, Code). 520 | 521 | generate(Name, #{runner := Runner} = Code) -> 522 | {InitFun, InitArity, InitExport, InitText} = generate_init(Name, maps:get(init, Code, error)), 523 | {IRFun, IRArity, IRExport, IRText} = generate_one(Name, init_runner, maps:get(init_runner, Code, error)), 524 | {DoneFun, DoneArity, DoneExport, DoneText} = generate_one(Name, done, maps:get(done, Code, error)), 525 | 526 | %% Separator: CR/LF 527 | Sep = io_lib:format("~n", []), 528 | 529 | %% RunnerArity: how many arguments _original_ runner wants to accept. 530 | %% Example: run(State) is 1, and run() is 0. 531 | %% Pass two function names: one that is for sample_count, and one for continuous 532 | ContName = atom_to_list(Name), 533 | SampleCountName = list_to_atom(ContName ++ "_finite"), 534 | {RunnerFun, SampleRunnerFun, RunnerArity, RunArity, RunnerText} = generate_runner(Name, SampleCountName, Runner, Sep), 535 | RunnerExports = [{Name, RunArity}, {SampleCountName, RunArity + 1}], 536 | 537 | %% verify compatibility between 4 pieces of code 538 | %% 1. done/1 requires init/0 return value 539 | DoneArity =:= 1 andalso InitArity =:= undefined andalso erlang:error({generate, {done, 1, requires, init}}), 540 | %% 2. init_runner/1 requires init/0,1 541 | IRArity =:= 1 andalso InitArity =:= undefined andalso erlang:error({generate, {init_runner, 1, requires, init}}), 542 | %% 3. runner/1,2 requires init/0,1 543 | RunnerArity > 0 andalso IRArity =:= undefined andalso erlang:error({generate, {runner, RunnerArity, requires, init_runner}}), 544 | %% 4. runner/[3+] is not allowed 545 | RunnerArity > 2 andalso erlang:error({generate, {runner, RunnerArity, not_supported}}), 546 | %% 5. TODO: Verify there are no name clashes 547 | 548 | %% 549 | Exports = lists:concat(lists:join(", ", [io_lib:format("~s/~b", [F, Arity]) || {F, Arity} <- 550 | [InitExport, IRExport, DoneExport | RunnerExports], Arity =/= undefined])), 551 | 552 | Texts = [Text ++ Sep || Text <- [InitText, IRText, DoneText | RunnerText], Text =/= ""], 553 | 554 | Source = ["-module(" ++ atom_to_list(Name) ++ ")." ++ Sep, "-export([" ++ Exports ++ "])." ++ Sep | Texts], 555 | #exec{name = Name, binary = compile(Name, Source), init = InitFun, init_runner = IRFun, source = Source, 556 | runner = {RunnerFun, RunArity}, sample_runner = {SampleRunnerFun, RunArity}, done = DoneFun}. 557 | 558 | %% generates init/0 code 559 | generate_init(_Mod, Fun) when is_function(Fun, 0) -> 560 | {Fun, 0, {[], undefined}, ""}; 561 | generate_init(_Mod, {M, F, Args}) when is_atom(M), is_atom(F), is_list(Args) -> 562 | {fun () -> erlang:apply(M, F, Args) end, 0, {[], undefined}, ""}; 563 | generate_init(_Mod, [{M, F, Args} | _Tail] = MFAList) when is_atom(M), is_atom(F), is_list(Args) -> 564 | [erlang:error({generate, {init, 0, invalid}}) || 565 | {M1, F1, A} <- MFAList, not is_atom(M1) orelse not is_atom(F1) orelse not is_list(A)], 566 | {fun () -> [erlang:apply(M1, F1, A) || {M1, F1, A} <- MFAList] end, 0, {[], undefined}, ""}; 567 | generate_init(Mod, Text) when is_list(Text) -> 568 | case generate_text(init, Text, false) of 569 | {0, NewName, FullText} -> 570 | {fun () -> Mod:NewName() end, 0, {NewName, 0}, FullText}; 571 | {WrongArity, NewName, _} -> 572 | erlang:error({generate, {init, NewName, WrongArity}}) 573 | end; 574 | generate_init(_Mod, error) -> 575 | {fun () -> undefined end, undefined, undefined, ""}. 576 | 577 | %% generates init_runner/1 or done/1 578 | generate_one(_Mod, _FunName, error) -> 579 | {fun (_Ignore) -> undefined end, undefined, {[], undefined}, ""}; 580 | generate_one(_Mod, _FunName, Fun) when is_function(Fun, 1) -> 581 | {Fun, 1, {[], undefined}, ""}; 582 | generate_one(_Mod, _FunName, Fun) when is_function(Fun, 0) -> 583 | {fun (_Ignore) -> Fun() end, 0, {[], undefined}, ""}; 584 | generate_one(_Mod, _FunName, {M, F, Args}) when is_atom(M), is_atom(F), is_list(Args) -> 585 | {fun (_Ignore) -> erlang:apply(M, F, Args) end, 0, {[], undefined}, ""}; 586 | generate_one(_Mod, FunName, [{M, F, Args} | _Tail] = MFAList) when is_atom(M), is_atom(F), is_list(Args) -> 587 | [erlang:error({generate, {FunName, 1, invalid, {M1, F1, A}}}) || 588 | {M1, F1, A} <- MFAList, not is_atom(M1) orelse not is_atom(F1) orelse not is_list(A)], 589 | {fun (_Ignore) -> [erlang:apply(M1, F1, A) || {M1, F1, A} <- MFAList] end, 0, {[], undefined}, ""}; 590 | generate_one(Mod, FunName, Text) when is_list(Text) -> 591 | case generate_text(FunName, Text, false) of 592 | {0, NewName, FullText} -> 593 | {fun (_Ignore) -> Mod:NewName() end, 0, {NewName, 0}, FullText}; 594 | {1, NewName, FullText} -> 595 | {fun (Arg) -> Mod:NewName(Arg) end, 1, {NewName, 1}, FullText}; 596 | {WrongArity, NewName, _} -> 597 | erlang:error({generate, {FunName, WrongArity, NewName}}) 598 | end. 599 | 600 | %% runner wrapper: 601 | %% Generates at least 2 functions, one for continuous, and one for 602 | %% sample-count benchmarking. 603 | generate_runner(Mod, SampleCountName, Fun, Sep) when is_function(Fun, 0) -> 604 | { 605 | fun (_Ignore) -> Mod:Mod(Fun) end, 606 | fun (SampleCount, _Ignore) -> Mod:SampleCountName(SampleCount, Fun) end, 607 | 0, 1, 608 | [lists:concat([Mod, "(Fun) -> Fun(),", Sep, " ", Mod, "(Fun)."]), 609 | lists:concat([SampleCountName, "(0, _Fun) ->", Sep, " ok;", Sep, SampleCountName, "(Count, Fun) ->", Sep, " Fun(),", 610 | Sep, " ", SampleCountName, "(Count - 1, Fun)."])] 611 | }; 612 | generate_runner(Mod, SampleCountName, Fun, Sep) when is_function(Fun, 1) -> 613 | { 614 | fun (Init) -> Mod:Mod(Init, Fun) end, 615 | fun (SampleCount, Init) -> Mod:SampleCountName(SampleCount, Init, Fun) end, 616 | 1, 2, 617 | [lists:concat([Mod, "(Init, Fun) ->", Sep, " Fun(Init),", Sep, " ", Mod, "(Init, Fun)."]), 618 | lists:concat([SampleCountName, "(0, _Init, _Fun) ->", Sep, " ok;", Sep, SampleCountName, 619 | "(Count, Init, Fun) ->", Sep, " Fun(Init),", Sep, " ", SampleCountName, "(Count - 1, Init, Fun)."])] 620 | }; 621 | generate_runner(Mod, SampleCountName, Fun, Sep) when is_function(Fun, 2) -> 622 | { 623 | fun (Init) -> Mod:Mod(Init, Init, Fun) end, 624 | fun (SampleCount, Init) -> Mod:SampleCountName(SampleCount, Init, Init, Fun) end, 625 | 2, 3, 626 | [lists:concat([Mod, "(Init, State, Fun) ->", Sep, " ", Mod, "(Init, Fun(Init, State), Fun)."]), 627 | lists:concat([SampleCountName, "(0, _Init, _State, _Fun) ->", Sep, " ok; ", SampleCountName, "(Count, Init, State, Fun) ->", 628 | Sep, " ", SampleCountName, "(Count - 1, Init, Fun(Init, State), Fun)."])] 629 | }; 630 | 631 | %% runner wrapper: MFA 632 | generate_runner(Mod, SampleCountName, {M, F, Args}, Sep) when is_atom(M), is_atom(F), is_list(Args) -> 633 | { 634 | fun (_Ignore) -> Mod:Mod(M, F, Args) end, 635 | fun (SampleCount, _Ignore) -> Mod:SampleCountName(SampleCount, M, F, Args) end, 636 | 0, 3, 637 | [lists:concat([Mod, "(M, F, A) ->", Sep, " erlang:apply(M, F, A), ", Mod, "(M, F, A)."]), 638 | lists:concat([SampleCountName, "(0, _M, _F, _A) ->", Sep, " ok;", Sep, SampleCountName, 639 | "(Count, M, F, A) ->", Sep, " erlang:apply(M, F, A), ", Sep, " ", SampleCountName, "(Count - 1, M, F, A)."])] 640 | }; 641 | 642 | %% runner wrapper: MFAList 643 | generate_runner(Mod, SampleCountName, [{M, F, Args} | _Tail] = MFAList, Sep) when is_atom(M), is_atom(F), is_list(Args) -> 644 | [erlang:error({generate, {runner, 0, invalid, {M1, F1, A}}}) || 645 | {M1, F1, A} <- MFAList, not is_atom(M1) orelse not is_atom(F1) orelse not is_list(A)], 646 | { 647 | fun (_Ignore) -> Mod:Mod(MFAList) end, 648 | fun (SampleCount, _Ignore) -> Mod:SampleCountName(SampleCount, MFAList) end, 649 | 0, 1, 650 | [lists:concat([Mod, "(MFAList) -> [erlang:apply(M, F, A) || {M, F, A} <- MFAList], ", Mod, "(MFAList)."]), 651 | lists:concat([SampleCountName, "(0, _MFAList) -> ", Sep, " ok;", SampleCountName, 652 | "(Count, MFAList) ->", Sep, " [erlang:apply(M, F, A) || {M, F, A} <- MFAList], ", 653 | SampleCountName, "(Count - 1, MFAList)."])] 654 | }; 655 | 656 | generate_runner(Mod, SampleCountName, Text, Sep) when is_list(Text) -> 657 | case generate_text(runner, Text, true) of 658 | {0, NoDotText} -> 659 | %% very special case: embedding the text directly, without creating a new function 660 | %% at all. 661 | { 662 | fun (_Ignore) -> Mod:Mod() end, 663 | fun (SampleCount, _Ignore) -> Mod:SampleCountName(SampleCount) end, 664 | 0, 0, 665 | [lists:concat([Mod, "() ->", Sep, " ", NoDotText, ",", Sep, " ", Mod, "()."]), 666 | lists:concat([SampleCountName, "(0) ->", Sep, " ok;", Sep, SampleCountName, "(Count) ->", 667 | Sep, " ", NoDotText, ",", Sep, " ", SampleCountName, "(Count - 1)."]), 668 | ""] 669 | }; 670 | {0, NewName, FullText} -> 671 | { 672 | fun (_Ignore) -> Mod:Mod() end, 673 | fun (SampleCount, _Ignore) -> Mod:SampleCountName(SampleCount) end, 674 | 0, 0, 675 | [lists:concat([Mod, "() ->", Sep, " ", NewName, "(),", Sep, " ", Mod, "()."]), 676 | lists:concat([SampleCountName, "(0) ->", Sep, " ok;", Sep, SampleCountName, "(Count) ->", 677 | Sep, " ", NewName, "(),", Sep, " ", SampleCountName, "(Count - 1)."]), 678 | FullText] 679 | }; 680 | {1, NewName, FullText} -> 681 | { 682 | fun (Init) -> Mod:Mod(Init) end, 683 | fun (SampleCount, Init) -> Mod:SampleCountName(SampleCount, Init) end, 684 | 1, 1, 685 | [lists:concat([Mod, "(Init) ->", Sep, " ", NewName, "(Init),", Sep, " ", Mod, "(Init)."]), 686 | lists:concat([SampleCountName, "(0, _Init) ->", Sep, " ok;", Sep, SampleCountName, "(Count, Init) ->", 687 | Sep, " ", NewName, "(Init),", Sep, " ", SampleCountName, "(Count - 1, Init).", Sep]), 688 | FullText] 689 | }; 690 | {2, NewName, FullText} -> 691 | { 692 | fun (Init) -> Mod:Mod(Init, Init) end, 693 | fun (SampleCount, Init) -> Mod:SampleCountName(SampleCount, Init, Init) end, 694 | 2, 2, 695 | [lists:concat([Mod, "(Init, State) ->", Sep, " ", Mod, "(Init, ", NewName, "(Init, State))."]), 696 | lists:concat([SampleCountName, "(0, _Init, _State) ->", Sep, " ok;", Sep, SampleCountName, "(Count, Init, State) -> ", 697 | SampleCountName, "(Count - 1, Init, ", NewName, "(Init, State))."]), 698 | FullText] 699 | } 700 | end; 701 | 702 | generate_runner(_Mod, _SampleCountName, Any, _Sep) -> 703 | erlang:error({generate, {parse, runner, Any}}). 704 | 705 | %% generates function text 706 | generate_text(Name, Text, AllowRaw) when is_list(Text) -> 707 | case erl_scan:string(Text) of 708 | {ok, Scan, _} -> 709 | case erl_parse:parse_form(Scan) of 710 | {ok, {function, _, AnyName, Arity, _}} -> 711 | {Arity, AnyName, Text}; 712 | {error, _} -> 713 | % try if it's an expr 714 | case erl_parse:parse_exprs(Scan) of 715 | {ok, _Clauses} when AllowRaw -> 716 | {0, lists:droplast(Text)}; 717 | {ok, _Clauses} -> 718 | %% just wrap it in fun_name/0 719 | {0, Name, lists:concat([Name, "() -> ", Text])}; 720 | {error, {_Line, ParseMod, Es}} -> 721 | Errors = ParseMod:format_error(Es), 722 | erlang:error({generate, {parse, Name, Errors}}) 723 | end 724 | end; 725 | {error, ErrorInfo, ErrorLocation} -> 726 | error({generate, {scan, Name, ErrorInfo, ErrorLocation}}) 727 | end. 728 | 729 | %% @doc Compiles text string into a binary module ready for code loading. 730 | compile(Name, Lines) -> 731 | %% might not be the best way, but OTP simply does not have file:compile(Source, ...) 732 | %% Original design was to write the actual source file to temporary disk location, 733 | %% but for diskless or write-protected hosts it was less convenient. 734 | Tokens = [begin {ok, T, _} = erl_scan:string(Line), T end || Line <- Lines], 735 | Forms = [begin {ok, F} = erl_parse:parse_form(T), F end || T <- Tokens], 736 | 737 | case compile:forms(Forms, [no_spawn_compiler_process, binary, return]) of 738 | {ok, Name, Bin} -> 739 | Bin; 740 | {ok, Name, Bin, _Warnings} -> 741 | Bin; 742 | {error, Errors, Warnings} -> 743 | erlang:error({compile, Errors, Warnings}) 744 | end. 745 | -------------------------------------------------------------------------------- /src/erlperf_job_sup.erl: -------------------------------------------------------------------------------- 1 | %%% @copyright (C) 2019-2023, Maxim Fedorov 2 | %%% @private 3 | %%% Supervises statically started jobs. 4 | -module(erlperf_job_sup). 5 | -author("maximfca@gmail.com"). 6 | 7 | -behaviour(supervisor). 8 | 9 | -export([ 10 | start_link/0, 11 | init/1 12 | ]). 13 | 14 | -spec start_link() -> supervisor:startlink_ret(). 15 | start_link() -> 16 | supervisor:start_link({local, ?MODULE}, ?MODULE, []). 17 | 18 | -spec init([]) -> {ok, {supervisor:sup_flags(), [supervisor:child_spec()]}}. 19 | init([]) -> 20 | {ok, { 21 | #{strategy => simple_one_for_one, 22 | intensity => 30, 23 | period => 60}, 24 | [ 25 | #{ 26 | id => erlperf_job, 27 | start => {erlperf_job, start_link, []}, 28 | modules => [erlperf_job] 29 | } 30 | ]}}. 31 | -------------------------------------------------------------------------------- /src/erlperf_monitor.erl: -------------------------------------------------------------------------------- 1 | %%%------------------------------------------------------------------- 2 | %%% @copyright (C) 2019-2023, Maxim Fedorov 3 | %%% @doc 4 | %%% System monitor: reports scheduler, RAM, and benchmarks. 5 | %%% 6 | %%% Monitor is started by default when {@link erlperf} starts 7 | %%% as an application. Monitor is not started for ad-hoc 8 | %%% benchmarking (e.g. command-line, unless verbose logging 9 | %%% is requested). 10 | %%% 11 | %%% When started, the monitor provides periodic reports 12 | %%% about Erlang VM state, and registered jobs performance. 13 | %%% The reports are sent to all processes that joined 14 | %%% `{erlperf_monitor, Node}' or `cluster_monitor' process 15 | %%% group in `erlperf' scope. 16 | %%% 17 | %%% Reports can be received by any process, even the shell. Run 18 | %%% the following example in `rebar3 shell' of `erlperf': 19 | %%% ``` 20 | %%% (erlperf@ubuntu22)1> ok = pg:join(erlperf, cluster_monitor, self()). 21 | %%% ok 22 | %%% (erlperf@ubuntu22)2> erlperf:run(rand, uniform, []). 23 | %%% 14976933 24 | %%% (erlperf@ubuntu22)4> flush(). 25 | %%% Shell got {erlperf@ubuntu22,#{dcpu => 0.0,dio => 6.42619095979426e-4, 26 | %%% ets => 44,jobs => [],memory_binary => 928408, 27 | %%% memory_ets => 978056, 28 | %%% memory_processes => 8603392, 29 | %%% memory_total => 34952096,ports => 5, 30 | %%% processes => 95, 31 | %%% sched_util => 0.013187335960637163, 32 | %%% ''' 33 | %%% 34 | %%% Note that the monitor may report differently from the benchmark 35 | %%% run results. It is running with lower priority and may be significantly 36 | %%% affected by scheduler starvation, timing issues etc.. 37 | %%% 38 | %%% 39 | %%% 40 | %%% @end 41 | -module(erlperf_monitor). 42 | -author("maximfca@gmail.com"). 43 | 44 | -behaviour(gen_server). 45 | 46 | %% API 47 | -export([ 48 | start/0, 49 | start/1, 50 | start_link/0, 51 | start_link/1, 52 | register/3, 53 | unregister/1 54 | ]). 55 | 56 | %% gen_server callbacks 57 | -export([ 58 | init/1, 59 | handle_call/3, 60 | handle_cast/2, 61 | handle_info/2 62 | ]). 63 | 64 | 65 | -include_lib("kernel/include/logger.hrl"). 66 | 67 | -define(DEFAULT_TICK_INTERVAL_MS, 1000). 68 | 69 | 70 | -type monitor_sample() :: #{ 71 | time := integer(), 72 | node := node(), 73 | sched_util := float(), 74 | dcpu := float(), 75 | dio := float(), 76 | processes := integer(), 77 | ports := integer(), 78 | ets := integer(), 79 | memory_total := non_neg_integer(), 80 | memory_processes := non_neg_integer(), 81 | memory_binary := non_neg_integer(), 82 | memory_ets := non_neg_integer(), 83 | jobs => [{Job :: pid(), Cycles :: non_neg_integer()}] 84 | }. 85 | %% Monitoring report 86 | %% 87 | %%
    88 | %%
  • `time': timestamp when the report is generates, wall clock, milliseconds
  • 89 | %%
  • `node': originating Erlang node name
  • 90 | %%
  • `sched_util': normal scheduler utilisation, percentage. See {@link scheduler:utilization/1}
  • 91 | %%
  • `dcpu': dirty CPU scheduler utilisation, percentage.
  • 92 | %%
  • `dio': dirty IO scheduler utilisation, percentage
  • 93 | %%
  • `processes': number of processes in the VM.
  • 94 | %%
  • `ports': number of ports in the VM.
  • 95 | %%
  • `ets': number of ETS tables created in the VM.
  • 96 | %%
  • `memory_total': total VM memory usage, see {@link erlang:memory/1}.
  • 97 | %%
  • `memory_processes': processes memory usage, see {@link erlang:system_info/1}.
  • 98 | %%
  • `memory_binary': binary memory usage.
  • 99 | %%
  • `memory_ets': ETS memory usage.
  • 100 | %%
  • `jobs': a map of job process identifier to the iterations surplus 101 | %% since last sample. If the sampling interval is default 1 second, the 102 | %% value of the map is "requests/queries per second" (RPS/QPS).
  • 103 | %%
104 | 105 | -type start_options() :: #{ 106 | interval => pos_integer() 107 | }. 108 | %% Monitor startup options 109 | %% 110 | %%
    111 | %%
  • `interval': monitoring interval, 1000 ms by default
  • 112 | %%
113 | 114 | -export_type([monitor_sample/0, start_options/0]). 115 | 116 | %% @equiv start(#{interval => 1000}) 117 | -spec start() -> {ok, Pid :: pid()} | {error, Reason :: term()}. 118 | start() -> 119 | start(#{interval => ?DEFAULT_TICK_INTERVAL_MS}). 120 | 121 | %% @doc 122 | %% Starts the monitor. 123 | %% 124 | %% `Options' are used to change the monitor behaviour. 125 | %%
    126 | %%
  • `interval': time, in milliseconds, to wait between sample collection
  • 127 | %%
128 | -spec start(Options :: start_options()) -> {ok, Pid :: pid()} | {error, Reason :: term()}. 129 | start(Options) -> 130 | gen_server:start({local, ?MODULE}, ?MODULE, Options, []). 131 | 132 | %% @equiv start_link(#{interval => 1000}) 133 | -spec(start_link() -> {ok, Pid :: pid()} | {error, Reason :: term()}). 134 | start_link() -> 135 | start_link(#{interval => ?DEFAULT_TICK_INTERVAL_MS}). 136 | 137 | %% @doc 138 | %% Starts the monitor and links it to the current process. See {@link start/1} 139 | %% for options description. 140 | start_link(Options) -> 141 | gen_server:start_link({local, ?MODULE}, ?MODULE, Options, []). 142 | 143 | %% @doc 144 | %% Registers an {@link erlperf_job} to monitor. 145 | %% 146 | %% Running monitor queries every registered job, adding 147 | %% the number of iterations performed by all workers of 148 | %% that job to the report. 149 | %% This API is intended to be used by {@link erlperf_job} 150 | %% to enable VM monitoring while benchmarking. 151 | %% 152 | %% `Job' specifies job process identifier, it is only 153 | %% used to detect when job is stopped, to stop reporting 154 | %% counters for that job. 155 | %% 156 | %% `Handle' is the sampling handle, see {@link erlperf_job:handle/1}. 157 | %% 158 | %% `Initial' value should be provided when an existing job 159 | %% is registered, to avoid reporting accumulated counter value 160 | %% in the first report for that job. 161 | %% 162 | %% Always return `ok', even when monitor is not running. 163 | -spec register(pid(), term(), non_neg_integer()) -> ok. 164 | register(Job, Handle, Initial) -> 165 | gen_server:cast(?MODULE, {register, Job, Handle, Initial}). 166 | 167 | %% @doc 168 | %% Removes the job from monitoring. 169 | %% 170 | %% Stops reporting this job performance. 171 | %% 172 | %% `Job' is the process identifier of the job. 173 | -spec unregister(pid()) -> ok. 174 | unregister(Job) -> 175 | gen_server:cast(?MODULE, {unregister, Job}). 176 | 177 | %%%=================================================================== 178 | %%% gen_server callbacks 179 | 180 | %% System monitor state 181 | -record(state, { 182 | % bi-map of job processes to counters 183 | jobs :: [{pid(), reference(), Handle :: erlperf_job:handle(), Prev :: integer()}], 184 | % scheduler data saved from last call 185 | sched_data :: [{pos_integer(), integer(), integer()}], 186 | % number of normal schedulers 187 | normal :: pos_integer(), 188 | % number of dirty schedulers 189 | dcpu :: pos_integer(), 190 | % 191 | tick = ?DEFAULT_TICK_INTERVAL_MS :: pos_integer(), 192 | next_tick :: integer() 193 | }). 194 | 195 | %% @private 196 | init(#{interval := Tick}) -> 197 | %% TODO: figure out if there is a way to find jobs after restart. 198 | %% ask a supervisor? but not all jobs are supervised... 199 | %% Jobs = [{Pid, erlperf_job:handle(Pid), 0} || 200 | %% {_, Pid, _, _} <- try supervisor:which_children(erlperf_job_sup) catch exit:{noproc, _} -> [] end], 201 | %% [monitor(process, Pid) || {Pid, _, _} <- Jobs], 202 | Jobs = [], 203 | %% enable scheduler utilisation calculation 204 | erlang:system_flag(scheduler_wall_time, true), 205 | Next = erlang:monotonic_time(millisecond) + Tick, 206 | erlang:start_timer(Next, self(), tick, [{abs, true}]), 207 | {ok, #state{ 208 | tick = Tick, 209 | jobs = Jobs, 210 | next_tick = Next, 211 | sched_data = lists:sort(erlang:statistics(scheduler_wall_time_all)), 212 | normal = erlang:system_info(schedulers), 213 | dcpu = erlang:system_info(dirty_cpu_schedulers)} 214 | }. 215 | 216 | %% @private 217 | handle_call(_Request, _From, _State) -> 218 | erlang:error(notsup). 219 | 220 | %% @private 221 | handle_cast({register, Job, Handle, Initial}, #state{jobs = Jobs} = State) -> 222 | MRef = monitor(process, Job), 223 | {noreply, State#state{jobs = [{Job, MRef, Handle, Initial} | Jobs]}}; 224 | handle_cast({unregister, Job}, #state{jobs = Jobs} = State) -> 225 | case lists:keyfind(Job, 1, Jobs) of 226 | {Job, MRef, _, _} -> 227 | demonitor(MRef, [flush]), 228 | {noreply, State#state{jobs = lists:keydelete(Job, 1, Jobs)}}; 229 | false -> 230 | {noreply, State} 231 | end. 232 | 233 | %% @private 234 | handle_info({'DOWN', _MRef, process, Pid, _Reason}, #state{jobs = Jobs} = State) -> 235 | {noreply, State#state{jobs = lists:keydelete(Pid, 1, Jobs)}}; 236 | 237 | %% @private 238 | handle_info({timeout, _, tick}, State) -> 239 | {noreply, handle_tick(State)}. 240 | 241 | %%%=================================================================== 242 | %%% Internal functions 243 | 244 | handle_tick(#state{sched_data = Data, normal = Normal, dcpu = Dcpu} = State) -> 245 | NewSched = lists:sort(erlang:statistics(scheduler_wall_time_all)), 246 | {NU, DU, DioU} = fold_normal(Data, NewSched, Normal, Dcpu, 0, 0), 247 | % add benchmarking info 248 | {Jobs, UpdatedJobs} = lists:foldl( 249 | fun ({Pid, MRef, Handle, Prev}, {J, Save}) -> 250 | Cycles = 251 | case erlperf_job:sample(Handle) of 252 | C when is_integer(C) -> C; 253 | undefined -> Prev %% job is stopped, race condition here 254 | end, 255 | {[{Pid, Cycles - Prev} | J], [{Pid, MRef, Handle, Cycles} | Save]} 256 | end, {[], []}, State#state.jobs), 257 | % 258 | Sample = #{ 259 | time => erlang:system_time(millisecond), 260 | node => node(), 261 | memory_total => erlang:memory(total), 262 | memory_processes => erlang:memory(processes), 263 | memory_binary => erlang:memory(binary), 264 | memory_ets => erlang:memory(ets), 265 | sched_util => NU * 100, 266 | dcpu => DU * 100, 267 | dio => DioU * 100, 268 | processes => erlang:system_info(process_count), 269 | ports => erlang:system_info(port_count), 270 | ets => erlang:system_info(ets_count), 271 | jobs => Jobs}, 272 | % notify local & global subscribers 273 | Subscribers = pg:get_members(erlperf, {erlperf_monitor, node()}) ++ pg:get_members(erlperf, cluster_monitor), 274 | [Pid ! Sample || Pid <- Subscribers], 275 | %% 276 | NextTick = State#state.next_tick + State#state.tick, 277 | erlang:start_timer(NextTick, self(), tick, [{abs, true}]), 278 | State#state{sched_data = NewSched, next_tick = NextTick, jobs = lists:reverse(UpdatedJobs)}. 279 | 280 | %% Iterates over normal scheduler 281 | fold_normal(Old, New, 0, Dcpu, AccActive, AccTotal) -> 282 | fold_dirty_cpu(Old, New, Dcpu, AccActive / AccTotal, 0, 0); 283 | fold_normal([{N, OldActive, OldTotal} | Old], 284 | [{N, NewActive, NewTotal} | New], Normal, Dcpu, AccActive, AccTotal) -> 285 | fold_normal(Old, New, Normal - 1, Dcpu, AccActive + (NewActive - OldActive), 286 | AccTotal + (NewTotal - OldTotal)). 287 | 288 | %% Iterates over DCPU 289 | fold_dirty_cpu(Old, New, 0, NormalPct, AccActive, AccTotal) -> 290 | fold_dirty_io(Old, New, NormalPct, AccActive / AccTotal, 0, 0); 291 | fold_dirty_cpu([{N, OldActive, OldTotal} | Old], 292 | [{N, NewActive, NewTotal} | New], Dcpu, NormalPct, AccActive, AccTotal) -> 293 | fold_dirty_cpu(Old, New, Dcpu - 1, NormalPct, AccActive + (NewActive - OldActive), 294 | AccTotal + (NewTotal - OldTotal)). 295 | 296 | %% Remaining are dirty IO 297 | fold_dirty_io([], [], NormalPct, DcpuPct, AccActive, AccTotal) -> 298 | {NormalPct, DcpuPct, AccActive / AccTotal}; 299 | fold_dirty_io([{N, OldActive, OldTotal} | Old], 300 | [{N, NewActive, NewTotal} | New], NormalPct, DcpuPct, AccActive, AccTotal) -> 301 | fold_dirty_io(Old, New, NormalPct, DcpuPct, AccActive + (NewActive - OldActive), 302 | AccTotal + (NewTotal - OldTotal)). 303 | -------------------------------------------------------------------------------- /src/erlperf_sup.erl: -------------------------------------------------------------------------------- 1 | %%% @copyright (C) 2019-2023, Maxim Fedorov 2 | %%% @private 3 | %%% Top-level supervisor. Always starts process group scope 4 | %%% for `erlperf'. Depending on the configuration starts 5 | %%% a number of jobs or a cluster-wide monitoring solution. 6 | -module(erlperf_sup). 7 | -author("maximfca@gmail.com"). 8 | 9 | -behaviour(supervisor). 10 | 11 | -export([ 12 | start_link/0, 13 | init/1 14 | ]). 15 | 16 | -spec start_link() -> supervisor:startlink_ret(). 17 | start_link() -> 18 | supervisor:start_link({local, ?MODULE}, ?MODULE, []). 19 | 20 | -spec init([]) -> {ok, {supervisor:sup_flags(), [supervisor:child_spec()]}}. 21 | init([]) -> 22 | SupFlags = #{strategy => rest_for_one, intensity => 2, period => 60}, 23 | 24 | ChildSpecs = [ 25 | %% start own pg scope, needed for cluster-wide operations 26 | %% even if the node-wide monitoring is not running, the scope 27 | %% needs to be up to send "job started" events for the cluster 28 | #{ 29 | id => pg, 30 | start => {pg, start_link, [erlperf]}, 31 | modules => [pg] 32 | }, 33 | 34 | %% monitoring 35 | #{ 36 | id => erlperf_monitor, 37 | start => {erlperf_monitor, start_link, []}, 38 | modules => [erlperf_monitor] 39 | }, 40 | 41 | %% supervisor for statically started jobs 42 | #{ 43 | id => erlperf_job_sup, 44 | start => {erlperf_job_sup, start_link, []}, 45 | type => supervisor, 46 | modules => [erlperf_job_sup] 47 | }], 48 | 49 | {ok, {SupFlags, ChildSpecs}}. 50 | -------------------------------------------------------------------------------- /test/erlperf_SUITE.erl: -------------------------------------------------------------------------------- 1 | %%%------------------------------------------------------------------- 2 | %%% @author Maxim Fedorov 3 | %%% @copyright (c) 2019-2023 Maxim Fedorov 4 | %%% @doc 5 | %%% Tests benchmark module, machine-readable output for benchmarks. 6 | %%% @end 7 | %%% ------------------------------------------------------------------- 8 | -module(erlperf_SUITE). 9 | -author("maximfca@gmail.com"). 10 | 11 | -include_lib("stdlib/include/assert.hrl"). 12 | 13 | -export([suite/0, all/0, groups/0, init_per_group/2, end_per_group/2]). 14 | 15 | -export([ 16 | start_link/0, 17 | init/1, 18 | handle_call/3, 19 | handle_cast/2 20 | ]). 21 | 22 | %% Continuous mode benchmarks 23 | -export([mfa/1, mfa_with_cv/1, 24 | mfa_with_tiny_cv/0, mfa_with_tiny_cv/1, 25 | mfa_concurrency/1, mfa_no_concurrency/1, 26 | code_extra_node/1, compare/1]). 27 | 28 | %% Timed mode 29 | -export([mfa_timed/1]). 30 | 31 | %% Concurrency estimation tests 32 | -export([mfa_squeeze/0, mfa_squeeze/1, 33 | squeeze_extended/0, squeeze_extended/1, 34 | squeeze_full/0, squeeze_full/1]). 35 | 36 | %% Tests for error handling 37 | -export([crasher/0, crasher/1, undefer/0, undefer/1, errors/0, errors/1]). 38 | 39 | -export([lock_contention/0, lock_contention/1]). 40 | 41 | -export([stat_calc/0, stat_calc/1, rand_stat/0, rand_stat/1]). 42 | 43 | %% Record-replay tests 44 | -export([replay/1, do_anything/1]). 45 | 46 | -behaviour(gen_server). 47 | 48 | %%-------------------------------------------------------------------- 49 | %% COMMON TEST CALLBACK FUNCTIONS 50 | 51 | suite() -> 52 | [{timetrap, {seconds, 10}}]. 53 | 54 | groups() -> 55 | [ 56 | {continuous, [parallel], 57 | [mfa, mfa_with_cv, mfa_with_tiny_cv, mfa_concurrency, mfa_no_concurrency, code_extra_node, compare]}, 58 | {timed, [parallel], 59 | [mfa_timed]}, 60 | {concurrency, [], [mfa_squeeze, squeeze_extended, squeeze_full]}, 61 | {errors, [parallel], [crasher, undefer, errors]}, 62 | {overhead, [], [lock_contention]}, 63 | {statistics, [parallel], [stat_calc, rand_stat]}, 64 | {replay, [], [replay]} 65 | ]. 66 | 67 | init_per_group(squeeze, Config) -> 68 | case erlang:system_info(schedulers_online) of 69 | LowCPU when LowCPU < 3 -> 70 | {skip, {slow_cpu, LowCPU}}; 71 | _ -> 72 | Config 73 | end; 74 | init_per_group(_, Config) -> 75 | Config. 76 | 77 | end_per_group(_, Config) -> 78 | Config. 79 | 80 | all() -> 81 | [{group, continuous}, {group, concurrency}, {group, overhead}, 82 | {group, errors}, {group, statistics}, {group, replay}]. 83 | 84 | %%-------------------------------------------------------------------- 85 | %% Helpers: gen_server implementation 86 | init(Pid) -> 87 | {ok, Pid}. 88 | 89 | handle_call({sleep, Num}, _From, State) -> 90 | {reply, timer:sleep(Num), State}. 91 | 92 | handle_cast(_Req, _State) -> 93 | erlang:error(notsup). 94 | 95 | start_link() -> 96 | {ok, Pid} = gen_server:start_link(?MODULE, [], []), 97 | Pid. 98 | 99 | %%-------------------------------------------------------------------- 100 | %% TEST CASES 101 | 102 | mfa(Config) when is_list(Config) -> 103 | C = erlperf:run(timer, sleep, [1]), 104 | ?assert(C > 250 andalso C < 1101), 105 | %% extended report 106 | Extended = erlperf:run({timer, sleep, [1]}, #{report => extended, sample_duration => 100}), 107 | [?assert(Cs > 25 andalso Cs < 110) || Cs <- Extended], 108 | %% full report 109 | #{result := Result} = erlperf:run({timer, sleep, [1]}, #{report => full, sample_duration => 100}), 110 | #{average := Avg} = Result, 111 | ?assert(Avg > 25 andalso Avg < 110). 112 | 113 | mfa_with_cv(Config) when is_list(Config) -> 114 | %% basic report 115 | C = erlperf:run({timer, sleep, [1]}, #{cv => 0.05}), 116 | ?assert(C > 250 andalso C < 1101). 117 | 118 | mfa_with_tiny_cv() -> 119 | [{doc, "Tests benchmarking with very small coefficient of variation, potentially long"}, 120 | {timetrap, {seconds, 60}}]. 121 | 122 | mfa_with_tiny_cv(Config) when is_list(Config) -> 123 | C = erlperf:run({timer, sleep, [1]}, #{samples => 2, interval => 100, cv => 0.002}), 124 | ?assert(C > 250 andalso C < 1101). 125 | 126 | mfa_concurrency(Config) when is_list(Config) -> 127 | C = erlperf:run({timer, sleep, [1]}, #{concurrency => 2}), 128 | ?assert(C > 500 andalso C < 2202, {out_of_range, C, 500, 2202}). 129 | 130 | compare(Config) when is_list(Config) -> 131 | [C1, C2] = erlperf:compare(["timer:sleep(1).", "timer:sleep(2)."], 132 | #{sample_duration => 100, report => extended}), 133 | ?assertEqual(3, length(C1), {not_extended, C1}), 134 | [?assert(L > R, {left, C1, right, C2}) || {L, R} <- lists:zip(C1, C2)], 135 | %% low-overhead comparison benchmark 136 | %% LEGACY/DEPRECATED: for timed mode, extended report has only 1 sample 137 | [[T1], [T2]] = erlperf:benchmark([#{runner => {timer, sleep, [1]}}, #{runner => "timer:sleep(2)."}], 138 | #{sample_duration => undefined, samples => 50, report => extended}, undefined), 139 | ?assert(is_integer(T1) andalso is_integer(T2)), 140 | ?assert(T1 < T2, {T1, T2}). 141 | 142 | mfa_no_concurrency(Config) when is_list(Config) -> 143 | C = erlperf:run( 144 | #{ 145 | runner => fun (Pid) -> gen_server:call(Pid, {sleep, 1}) end, 146 | init => {?MODULE, start_link, []}, 147 | init_runner => fun(Pid) -> Pid end, 148 | done => {gen_server, stop, []} 149 | }, 150 | #{concurrency => 4}), 151 | ?assert(C > 250 andalso C < 1101, {out_of_range, C, 250, 1101}). 152 | 153 | code_extra_node(Config) when is_list(Config) -> 154 | C = erlperf:run(#{ 155 | runner => "{ok, 1} = application:get_env(kernel, test), timer:sleep(1).", 156 | init => "application:set_env(kernel, test, 1)." 157 | }, 158 | #{concurrency => 2, sample_duration => 100, isolation => #{}}), 159 | ?assertEqual(undefined, application:get_env(kernel, test), {"isolation did not work"}), 160 | ?assert(C > 50 andalso C < 220, {out_of_range, C, 50, 220}). 161 | 162 | %%-------------------------------------------------------------------- 163 | %% timed mode 164 | 165 | mfa_timed(Config) when is_list(Config) -> 166 | %% basic report for 100 'timer:sleep(1) iterations' 167 | Time = erlperf:time({timer, sleep, [1]}, 100), 168 | ?assert(Time > 100 andalso Time < 300, {actual, Time}), %% between 100 and 300 ms 169 | %% extended report for 50 iterations 170 | Times = erlperf:run({timer, sleep, [1]}, #{samples => 5, report => extended, sample_duration => {timed, 50}}), 171 | ?assertEqual(5, length(Times), {times, Times}), 172 | [?assert(T > 50 andalso T < 150, {actual, T}) || T <- Times], %% every run between 50 and 150 ms. 173 | %% full report for 50 iterations 174 | Full = erlperf:run({timer, sleep, [1]}, #{samples => 5, report => full, sample_duration => {timed, 50}}), 175 | #{result := #{average := Avg, samples := FullSsamples}} = Full, 176 | ?assertEqual(5, length(FullSsamples)), 177 | ?assert(Avg > 50000.0 andalso Avg < 150000.0, {actual, Avg}), %% average run between 50 and 150 us (!us!) 178 | %% ensure 'warmup' is supported for timed runs 179 | Now = os:system_time(millisecond), 180 | Warmup = erlperf:run({timer, sleep, [1]}, #{samples => 5, warmup => 10, sample_duration => {timed, 50}}), 181 | ?assert(Warmup > 50 andalso Warmup < 150, {actual, Warmup}), %% between 50 and 150 ms 182 | Elapsed = os:system_time(millisecond) - Now, 183 | ?assert(Elapsed > 750, {warmup_missing, Elapsed}), 184 | ?assert(Elapsed < 3000, {warmup_slow, Elapsed}). 185 | 186 | %%-------------------------------------------------------------------- 187 | %% concurrency estimation test cases 188 | 189 | mfa_squeeze() -> 190 | [{doc, "Tests concurrency estimation mode with basic report"}]. 191 | 192 | mfa_squeeze(Config) when is_list(Config) -> 193 | Scheds = erlang:system_info(schedulers_online), 194 | {QPS, CPU} = erlperf:run({rand, uniform, [1]}, #{sample_duration => 50}, #{}), 195 | ?assert(QPS > 0), 196 | ?assert(CPU > 1, {schedulers, Scheds, detected, CPU}). 197 | 198 | squeeze_extended() -> 199 | [{doc, "Tests concurrency estimation mode with extended report"}]. 200 | 201 | squeeze_extended(Config) when is_list(Config) -> 202 | {{QPS, CPU}, History} = erlperf:run({rand, uniform, [1]}, 203 | #{sample_duration => 50, warmup => 1, report => extended}, #{}), 204 | %% find the best historical result, and ensure it's 3 steps away from the last 205 | [Best | _] = lists:reverse(lists:keysort(1, History)), 206 | ?assertEqual({QPS, CPU}, Best), 207 | ?assertEqual(Best, lists:nth(4, History), History). 208 | 209 | squeeze_full() -> 210 | [{doc, "Tests concurrency estimation mode with full report"}]. 211 | 212 | squeeze_full(Config) when is_list(Config) -> 213 | Report = erlperf:run({rand, uniform, [1]}, #{sample_duration => 50, warmup => 1, report => full}, #{}), 214 | #{mode := concurrency, result := Best, history := History, sleep := sleep, 215 | run_options := #{concurrency := Concurrency}} = Report, 216 | #{time := Time} = Best, 217 | ct:pal("Best run took ~b ms,~n~p", [Time div 1000, Best]), 218 | %% taking 3 samples 219 | ?assert(Time >= 3 * 50000, {too_fast, Time}), 220 | ?assert(Time < 3 * 100000, {too_slow, Time}), 221 | ?assertEqual({Concurrency, Best}, lists:nth(4, History)). 222 | 223 | %%-------------------------------------------------------------------- 224 | %% error handling test cases 225 | 226 | crasher() -> 227 | [{doc, "Tests job that crashes"}]. 228 | 229 | crasher(Config) when is_list(Config) -> 230 | ?assertException(error, {benchmark, {'EXIT', _, _}}, 231 | erlperf:run({erlang, throw, [ball]}, #{concurrency => 2})). 232 | 233 | undefer() -> 234 | [{doc, "Tests job undefs - e.g. wrong module name"}]. 235 | 236 | undefer(Config) when is_list(Config) -> 237 | ?assertException(error, {benchmark, {'EXIT', _, {undef, _}}}, 238 | erlperf:run({'$cannot_be_this', throw, []}, #{concurrency => 2})). 239 | 240 | errors() -> 241 | [{doc, "Tests various error conditions"}]. 242 | 243 | errors(Config) when is_list(Config) -> 244 | ?assertException(error, {generate, {parse, init, _}}, 245 | erlperf:run(#{runner => {erlang, node, []}, init => []})), 246 | ?assertException(error, {generate, {parse, runner, _}}, 247 | erlperf:run(#{runner => []})), 248 | ?assertException(error, {generate, {parse, runner, _}}, 249 | erlperf:run(#{runner => {[]}})). 250 | 251 | %%-------------------------------------------------------------------- 252 | %% timer skew detection 253 | 254 | lock_contention() -> 255 | [{doc, "Ensures that benchmarking overhead when running multiple concurrent processes is not too high"}, 256 | {timetrap, {seconds, 20}}]. 257 | 258 | lock_contention(Config) when is_list(Config) -> 259 | %% need at the very least 4 schedulers to create enough contention 260 | case erlang:system_info(schedulers_online) of 261 | Enough when Enough >= 4 -> 262 | Tuple = {lists:seq(1, 5000), list_to_tuple(lists:seq(1, 10000))}, 263 | Init = fun() -> ets:new(tab, [public, named_table]) end, 264 | Done = fun(Tab) -> ets:delete(Tab) end, 265 | Runner = fun() -> true = ets:insert(tab, Tuple) end, %% this inevitably causes lock contention 266 | %% take 50 samples of 10 ms, which should complete in about a second, and 10 extra warmup samples 267 | %% hoping that lock contention is detected at warmup 268 | Before = os:system_time(millisecond), 269 | Report = erlperf:run(#{runner => Runner, init => Init, done => Done}, 270 | #{concurrency => Enough * 4, samples => 50, sample_duration => 10, warmup => 10, report => full}), 271 | TimeSpent = os:system_time(millisecond) - Before, 272 | #{result := #{average := QPS}, sleep := DetectedSleepType} = Report, 273 | ?assertEqual(busy_wait, DetectedSleepType, {"Lock contention was not detected", Report}), 274 | ?assert(QPS > 0, {qps, QPS}), 275 | ?assert(TimeSpent > 500, {too_quick, TimeSpent, expected, 1000}), 276 | ?assert(TimeSpent < 3000, {too_slow, TimeSpent, expected, 1000}); 277 | NotEnough -> 278 | {skip, {not_enough_schedulers_online, NotEnough}} 279 | end. 280 | 281 | %%-------------------------------------------------------------------- 282 | %% statistics 283 | 284 | %% simplified delta-comparison 285 | -define(assertApprox(Expect, Expr), 286 | begin 287 | ((fun () -> 288 | X__X = (Expect), 289 | X__Y = (Expr), 290 | case (erlang:abs(X__Y - X__X) < 0.0001) of 291 | true -> ok; 292 | false -> erlang:error({assertEqual, 293 | [{module, ?MODULE}, 294 | {line, ?LINE}, 295 | {expression, (??Expr)}, 296 | {expected, X__X}, 297 | {value, X__Y}]}) 298 | end 299 | end)()) 300 | end). 301 | 302 | stat_calc() -> 303 | [{doc, "Tests correctness of statistical calculations over samples"}]. 304 | 305 | stat_calc(Config) when is_list(Config) -> 306 | %% generate with: [erlang:round(rand:normal(40, 100)) || _ <- lists:seq(1, 30)]. 307 | Sample = [36,42,42,47,51,39,37,32,41,32,15,44,41,46,50,36,48,33,35, 308 | 35,25,21,47,40,33,57,55,64,40,30], 309 | 310 | Stats = erlperf:report_stats(Sample), 311 | 312 | ?assertApprox(39.8, maps:get(average, Stats)), 313 | %% ?assertApprox(109.0620, maps:get(variance, Stats)), 314 | ?assertApprox(10.4432, maps:get(stddev, Stats)), 315 | ?assertEqual(40, maps:get(median, Stats)), 316 | ?assertEqual(15, maps:get(min, Stats)), 317 | ?assertEqual(64, maps:get(max, Stats)), 318 | %% ?assertApprox(47, maps:get({percentile, 0.75}, Stats)), 319 | ?assertApprox(64, maps:get(p99, Stats)). 320 | 321 | rand_stat() -> 322 | [{doc, "Use rand module to generate some wildly random results"}]. 323 | 324 | rand_stat(Config) when is_list(Config) -> 325 | Report = erlperf:run({rand, uniform, []}, #{report => full, samples => 100, sample_duration => 5}), 326 | #{result := Result, mode := continuous, system := System} = Report, 327 | #{min := Min, max := Max, average := Avg, median := Mid, p99 := P99} = Result, 328 | %% just run some sanity checks assertions 329 | ?assertEqual(erlang:system_info(os_type), maps:get(os, System)), 330 | ?assert(is_map_key(cpu, System), {cpu_missing, System}), 331 | ?assert(Min < Max, {min, Min, max, Max}), 332 | ?assert(Avg > Min andalso Avg < Max, {avg, Avg, min, Min, max, Max}), 333 | ?assert(Mid > Min andalso Mid < Max, {median, Mid, min, Min, max, Max}), 334 | ?assert(P99 =< Max, {p99, P99, max, Max}). 335 | 336 | %%-------------------------------------------------------------------- 337 | %% record-replay 338 | 339 | replay(Config) when is_list(Config) -> 340 | spawn(fun () -> timer:sleep(10), do_anything(10) end), 341 | Trace = erlperf:record(?MODULE, '_', '_', 100), 342 | QPS = erlperf:run(Trace), 343 | ?assert(QPS > 10). 344 | 345 | do_anything(0) -> 346 | timer:sleep(1); 347 | do_anything(N) -> 348 | ?MODULE:do_anything(N - 1). -------------------------------------------------------------------------------- /test/erlperf_cli_SUITE.erl: -------------------------------------------------------------------------------- 1 | %%%------------------------------------------------------------------- 2 | %%% @author Maxim Fedorov 3 | %%% @copyright (c) 2019-2023 Maxim Fedorov 4 | %%% ------------------------------------------------------------------- 5 | -module(erlperf_cli_SUITE). 6 | -author("maximfca@gmail.com"). 7 | 8 | -include_lib("stdlib/include/assert.hrl"). 9 | 10 | -export([suite/0, all/0]). 11 | 12 | -export([ 13 | simple/1, concurrent/1, verbose/1, zero/1, compare/1, 14 | usage/1, init/1, 15 | double/1, triple/1, pg/1, mfa/1, 16 | full_report/1, basic_timed_report/1, full_timed_report/1, 17 | recorded/1, 18 | squeeze/0, squeeze/1, step/1, 19 | init_all/0, init_all/1, 20 | label/1 21 | ]). 22 | 23 | %%-------------------------------------------------------------------- 24 | %% COMMON TEST CALLBACK FUNCTIONS 25 | 26 | suite() -> 27 | [{timetrap, {seconds, 20}}]. 28 | 29 | all() -> 30 | [simple, concurrent, verbose, zero, compare, squeeze, step, usage, init, double, 31 | triple, pg, mfa, full_report, basic_timed_report, full_timed_report, recorded, init_all, 32 | label]. 33 | 34 | %%-------------------------------------------------------------------- 35 | %% helper functions 36 | capture_io(Fun) -> 37 | ok = ct:capture_start(), 38 | Fun(), 39 | ok = ct:capture_stop(), 40 | lists:flatten(ct:capture_get()). 41 | 42 | %%-------------------------------------------------------------------- 43 | %% command-line testing 44 | 45 | parse_qps(QPST, "") -> list_to_integer(QPST); 46 | parse_qps(QPST, "Ki") -> list_to_integer(QPST) * 1000; 47 | parse_qps(QPST, "Mi") -> list_to_integer(QPST) * 1000000; 48 | parse_qps(QPST, "Gi") -> list_to_integer(QPST) * 1000000000; 49 | parse_qps(QPST, "Ti") -> list_to_integer(QPST) * 1000000000000. 50 | 51 | parse_duration(TT, "ns") -> list_to_integer(TT); 52 | parse_duration(TT, "us") -> list_to_integer(TT) * 1000; 53 | parse_duration(TT, "ms") -> list_to_integer(TT) * 1000000; 54 | parse_duration(TT, "s") -> list_to_integer(TT) * 1000000000; 55 | parse_duration(TT, "m") -> list_to_integer(TT) * 60 * 1000000000. 56 | 57 | filtersplit(Str, Sep) -> 58 | [L || L <- string:split(Str, Sep, all), L =/= ""]. 59 | 60 | parse_out(Out) -> 61 | [Header | Lines] = filtersplit(Out, "\n"), 62 | case filtersplit(Header, " ") of 63 | ["Code", "||", "QPS", "Time"] -> 64 | [begin 65 | case filtersplit(Ln, " ") of 66 | [Code, ConcT, QPST, TT, TTU] -> 67 | {Code, list_to_integer(ConcT), parse_qps(QPST, ""), parse_duration(TT, TTU)}; 68 | [Code, ConcT, QPST, QU, TT, TTU] -> 69 | {Code, list_to_integer(ConcT), parse_qps(QPST, QU), parse_duration(TT, TTU)} 70 | end 71 | end || Ln <- Lines]; 72 | ["Code", "||", "QPS", "Time", "Rel"] -> 73 | [begin 74 | case filtersplit(Ln, " ") of 75 | [Code, ConcT, "0", "inf", Rel] -> 76 | {Code, list_to_integer(ConcT), 0, infinity, 77 | list_to_integer(lists:droplast(Rel))}; 78 | [Code, ConcT, QPST, TT, TTU, Rel] -> 79 | {Code, list_to_integer(ConcT), parse_qps(QPST, ""), parse_duration(TT, TTU), 80 | list_to_integer(lists:droplast(Rel))}; 81 | [Code, ConcT, QPST, QU, TT, TTU, Rel] -> 82 | {Code, list_to_integer(ConcT), parse_qps(QPST, QU), parse_duration(TT, TTU), 83 | list_to_integer(lists:droplast(Rel))} 84 | end 85 | end || Ln <- Lines]; 86 | ["Code", "||", "Samples", "Avg", "StdDev", "Median", "P99", "Iteration" | Rel] -> 87 | [begin 88 | [Code, ConcT, Samples, Avg0 | T1] = filtersplit(Ln, " "), 89 | {Avg, T2} = maybe_unit(Avg0, T1), 90 | [StdDevPercent, Median0 | T3] = T2, 91 | {Median, T4} = maybe_unit(Median0, T3), 92 | [P990 | T5] = T4, 93 | {P99, [TT, TU | T6]} = maybe_unit(P990, T5), 94 | ?assertEqual($%, lists:last(StdDevPercent)), 95 | StdDev = list_to_float(lists:droplast(StdDevPercent)), 96 | Returned = [Code, list_to_integer(ConcT), list_to_integer(Samples), Avg, 97 | StdDev, Median, P99, parse_duration(TT, TU)], 98 | case Rel of 99 | [] -> 100 | list_to_tuple(Returned); 101 | ["Rel"] -> 102 | ?assertEqual($%, lists:last(T6)), 103 | Relative = list_to_integer(lists:droplast(T6)), 104 | list_to_tuple(Returned ++ [Relative]) 105 | end 106 | end || Ln <- Lines]; 107 | Unparsed -> 108 | ct:pal("Unkonwn header: ~p", [Unparsed]), 109 | ?assert(false) 110 | end. 111 | 112 | maybe_unit(Num, [[U, $i] | Tail]) -> 113 | {parse_qps(Num, [U, $i]), Tail}; 114 | maybe_unit(Num, [TimeUnit | Tail]) when TimeUnit =:= "m"; TimeUnit =:= "s"; TimeUnit =:= "ms"; TimeUnit =:= "ns"; TimeUnit =:= "us" -> 115 | {parse_duration(Num, TimeUnit), Tail}; 116 | maybe_unit(Num, Rem) -> 117 | {list_to_integer(Num), Rem}. 118 | 119 | %%-------------------------------------------------------------------- 120 | %% TEST CASES 121 | 122 | % erlperf 'timer:sleep(1). -d 100' 123 | simple(Config) when is_list(Config) -> 124 | Code = "timer:sleep(1).", 125 | Out = capture_io(fun() -> erlperf_cli:main([Code, "-d", "100"]) end), 126 | [{Code, 1, C, T}] = parse_out(Out), 127 | ?assert(C > 25 andalso C < 110, {qps, C}), 128 | ?assert(T > 1000000 andalso T < 3000000, {time, T}). 129 | 130 | concurrent(Config) when is_list(Config) -> 131 | Code = "timer:sleep(1).", 132 | Out = capture_io(fun() -> erlperf_cli:main([Code, "-d", "100", "-c", "8"]) end), 133 | [{Code, 8, C, T}] = parse_out(Out), 134 | ?assert(C > 8 * 25 andalso C < 8 * 110, {qps, C}), 135 | ?assert(T > 1000000 andalso T < 3000000, {time, T}). 136 | 137 | % erlperf 'timer:sleep(1). -v' 138 | verbose(Config) when is_list(Config) -> 139 | Code = "timer:sleep(1).", 140 | Out = capture_io(fun () -> erlperf_cli:main([Code, "-v"]) end), 141 | Lines = filtersplit(Out, "\n"), 142 | %% TODO: actually verify that stuff printed is monitoring stuff 143 | ?assert(length(Lines) > 3), 144 | %% expect first 5 lines to contain source code 145 | Generated = lists:sublist(Lines, 1, 12), 146 | ?assertEqual(Generated, [ 147 | ">>>>>>>>>>>>>>> timer:sleep(1). ", 148 | "-module(benchmark).", 149 | "-export([benchmark/0, benchmark_finite/1]).","benchmark() ->", 150 | " timer:sleep(1),"," benchmark().", 151 | "benchmark_finite(0) ->"," ok;","benchmark_finite(Count) ->", 152 | " timer:sleep(1),"," benchmark_finite(Count - 1).", 153 | "<<<<<<<<<<<<<<< "]), 154 | %% parse last 2 lines 155 | [{Code, 1, C, T}] = parse_out(lists:join("\n", lists:sublist(Lines, length(Lines) - 1, 2))), 156 | ?assert(C > 250 andalso C < 1101, {qps, C}), 157 | ?assert(T > 1000000 andalso T < 3000000, {time, T}). 158 | 159 | % erlperf 'timer:sleep(100).' 'timer:sleep(200).' -d 10 160 | zero(Config) when is_list(Config) -> 161 | Out = capture_io(fun () -> erlperf_cli:main(["timer:sleep(100).", "timer:sleep(200).", "-d", "10"]) end), 162 | % Code Concurrency Throughput Time Rel 163 | % timer:sleep(200). 1 0 inf 0% 164 | % timer:sleep(100). 1 0 inf 0% 165 | [{_Code, 1, 0, infinity, 0}, {_Code2, 1, 0, infinity, 0}] = parse_out(Out). 166 | 167 | % erlperf 'timer:sleep(1).' 'timer:sleep(2).' -d 100 -s 5 -w 1 -c 2 168 | compare(Config) when is_list(Config) -> 169 | Out = capture_io( 170 | fun () -> erlperf_cli:main(["timer:sleep(1).", "timer:sleep(2).", "-s", "5", "-d", "100", "-w", "1", "-c", "2"]) end), 171 | % Code Concurrency Throughput Time Rel 172 | % timer:sleep(). 2 950 100 ns 100% 173 | % timer:sleep(2). 2 475 200 ns 50% 174 | [{_Code, 2, C, T, R}, {_Code2, 2, C2, T2, R2}] = parse_out(Out), 175 | ?assert(C > 66 andalso C < 220, {qps, C}), 176 | ?assert(C2 > 50 andalso C2 < 110, {qps, C2}), 177 | ?assert(T < T2), 178 | ?assert(R > R2). 179 | 180 | squeeze() -> 181 | [{doc, "Tests concurrency test via command line"}, {timetrap, {seconds, 30}}]. 182 | 183 | % erlperf 'timer:sleep(1).' --duration 50 --squeeze --min 2 --max 4 --threshold 2 184 | squeeze(Config) when is_list(Config) -> 185 | Out = capture_io( 186 | fun () -> erlperf_cli:main(["timer:sleep(1).", "--duration", "50", "--squeeze", "--min", "2", "--max", "4", "--threshold", "2"]) end), 187 | [{_Code, 4, C, T}] = parse_out(Out), 188 | ?assert(C > 50 andalso C < 220, {qps, C}), 189 | ?assert(T > 1000000 andalso T < 3000000, {time, T}). 190 | 191 | % erlperf 'timer:sleep(1).' --duration 50 --squeeze --min 1 --max 25 --step 10 192 | step(Config) when is_list(Config) -> 193 | Out = capture_io( 194 | fun () -> erlperf_cli:main(["timer:sleep(1).", "--duration", "50", "--squeeze", "--min", "1", "--max", "25", "--step", "10"]) end), 195 | [{_Code, 20, C, T}] = parse_out(Out), 196 | ?assert(C > 400 andalso C < 600, {qps, C}), 197 | ?assert(T > 1000000 andalso T < 3000000, {time, T}). 198 | % erlperf -q 199 | usage(Config) when is_list(Config) -> 200 | Out = capture_io(fun () -> erlperf_cli:main(["-q"]) end), 201 | Line1 = "Error: erlperf: required argument missing: code", 202 | ?assertEqual(Line1, lists:sublist(Out, length(Line1))), 203 | Out2 = capture_io(fun () -> erlperf_cli:main(["--un code"]) end), 204 | ?assertEqual("Error: erlperf: unrecognised argument: --un code", lists:sublist(Out2, 48)), 205 | ok. 206 | 207 | % erlperf '{file,_}=code:is_loaded(pool).' --init 'code:ensure_loaded(pool).' --done 'code:purge(pool), code:delete(pool).' 208 | init(Config) when is_list(Config) -> 209 | Code = "{file,_}=code:is_loaded(pool).", 210 | Out = capture_io(fun () -> erlperf_cli:main( 211 | [Code, "--init", "code:ensure_loaded(pool).", "--done", "code:purge(pool), code:delete(pool)."]) 212 | end), 213 | % verify 'done' was done 214 | ?assertEqual(false, code:is_loaded(pool)), 215 | % verify output 216 | [{_Code, 1, C, T}] = parse_out(Out), 217 | ?assert(C > 50, {qps, C}), 218 | ?assert(T > 0, {time, T}). 219 | 220 | % erlperf 'runner(X) -> timer:sleep(X).' --init '1.' 'runner(Y) -> timer:sleep(Y).' --init '2.' -s 2 --duration 100 221 | double(Config) when is_list(Config) -> 222 | Code = "runner(X)->timer:sleep(X).", 223 | Out = capture_io(fun () -> erlperf_cli:main([Code, "--init_runner", "1.", Code, "--init_runner", "2.", "-s", "2", 224 | "--duration", "100"]) end), 225 | [{Code, 1, C, T, R}, {Code, 1, C2, T2, R2}] = parse_out(Out), 226 | ?assert(C > 25 andalso C < 110, {qps, C}), 227 | ?assert(C2 > 25 andalso C2 < 55, {qps, C2}), 228 | ?assert(T < T2), 229 | ?assert(R > R2). 230 | 231 | triple(Config) when is_list(Config) -> 232 | Out = capture_io(fun () -> erlperf_cli:main(["timer:sleep(1).", "-s", "2", "--duration", "100", 233 | "timer:sleep(2).", "timer:sleep(3)."]) end), 234 | [_, _, {_, 1, C3, _T3, R3}] = parse_out(Out), 235 | ?assert(C3 >= 20 andalso C3 =< 30, {"expected between 20 and 30, got", C3}), 236 | ?assert(R3 >= 40 andalso R3 =< 60, {"expected between 40 and 60, got", R3}), 237 | ok. 238 | 239 | % erlperf 'runner(Arg) -> ok = pg:join(Arg, self()), ok = pg:leave(Arg, self()).' --init_runner 'pg:create(self()), self().' 240 | pg(Config) when is_list(Config) -> 241 | ?assertEqual(undefined, whereis(scope)), %% ensure scope is not left 242 | Code = "runner(S)->pg:join(S,g,self()),pg:leave(S,g,self()).", 243 | Out = capture_io(fun () -> erlperf_cli:main( 244 | [Code, "--init_runner", "{ok,Scope}=pg:start_link(scope),Scope."]) 245 | end), 246 | ?assertEqual(undefined, whereis(scope)), %% ensure runner exited 247 | [{_Code, 1, C, _T}] = parse_out(Out), 248 | ?assert(C > 100, {qps, C}). 249 | 250 | % erlperf '{rand, uniform, [4]}' 251 | mfa(Config) when is_list(Config) -> 252 | Code = "{rand,uniform,[4]}", 253 | Out = capture_io(fun () -> erlperf_cli:main([Code]) end), 254 | [{Code, 1, _C, _T}] = parse_out(Out). 255 | 256 | % erlperf 'timer:sleep(1). -d 100 -r full' 257 | full_report(Config) when is_list(Config) -> 258 | Code = "timer:sleep(1).", 259 | AllOut = capture_io(fun () -> erlperf_cli:main([Code, "-d", "100", "-r", "full"]) end), 260 | [[$O, $S | _], A1] = string:split(AllOut, "\n"), %% test that first string is OS 261 | [[$C, $P, $U | _], A2] = string:split(A1, "\n"), 262 | [[$V, $M, $ , $:, $ | VM], A3] = string:split(A2, "\n"), %% extract VM 263 | [_, Out] = string:split(A3, "\n"), 264 | ?assertEqual(string:trim(erlang:system_info(system_version)), string:trim(VM, both)), 265 | [{Code, 1, Samples, Avg, Dev, Med, P99, Time}] = parse_out(Out), 266 | ?assertEqual(3, Samples), 267 | ?assert(Med =< P99), 268 | ?assert(Dev < 50, {deviation, Dev}), 269 | ?assert(Avg > 25 andalso Avg < 110, {avg, Avg}), 270 | ?assert(Time > 1000000 andalso Time < 3000000, {time, Time}). 271 | 272 | % erlperf 'timer:sleep(1).' -r basic -s 3 -l 50 273 | basic_timed_report(Config) when is_list(Config) -> 274 | Code = "timer:sleep(1).", 275 | Out = capture_io(fun () -> erlperf_cli:main([Code, "-r", "basic", "-s", "3", "-l", "50"]) end), 276 | [{_Code, 1, QPS, IterTime}] = parse_out(Out), 277 | ct:pal("Basic Timed Report:~n~p", [Out]), 278 | ?assert(QPS > 250 andalso QPS < 1100, {qps, QPS}), %% QPS of 'timer:sleep(1)' is ~500 279 | ?assert(IterTime >= 1000000 andalso IterTime < 3000000, {time, IterTime}). %% single iteration of timer:sleep(1) 280 | 281 | % erlperf 'timer:sleep(1).' -r full -l 100 -s 5 282 | full_timed_report(Config) when is_list(Config) -> 283 | Code = "timer:sleep(1).", 284 | AllOut = capture_io(fun () -> erlperf_cli:main([Code, "-r", "full", "-l", "100", "-s", "5"]) end), 285 | ct:pal("Full Timed Report:~n~p", [AllOut]), 286 | [[$O, $S | _], A1] = string:split(AllOut, "\n"), %% test that first string is OS 287 | [[$C, $P, $U | _], A2] = string:split(A1, "\n"), 288 | [[$V, $M, $ , $:, $ | VM], A3] = string:split(A2, "\n"), %% extract VM 289 | [_, Out] = string:split(A3, "\n"), 290 | ?assertEqual(string:trim(erlang:system_info(system_version)), string:trim(VM, both)), 291 | [{Code, 1, Samples, Avg, Dev, Med, P99, Time}] = parse_out(Out), 292 | ?assertEqual(5, Samples), 293 | ?assert(Med =< P99), 294 | ?assert(Dev < 50, {deviation, Dev}), 295 | ?assert(Avg >= 200000000 andalso Avg < 400000000, {avg, Avg}), %% average time to complete 100 iterations of sleep(1) 296 | ?assert(Time >= 1000000 andalso Time < 3000000, {time, Time}). %% single timer:sleep(1) time, in us 297 | 298 | 299 | % erlperf 'runner(Arg) -> ok = pg2:join(Arg, self()), ok = pg2:leave(Arg, self()).' --init 'ets:file2tab("pg2.tab").' 300 | recorded(Config) -> 301 | % write down ETS table to file 302 | Priv = proplists:get_value(priv_dir, Config), 303 | EtsFile = filename:join(Priv, "ets.tab"), 304 | RecFile = filename:join(Priv, "recorded.list"), 305 | test_ets_tab = ets:new(test_ets_tab, [named_table, public, ordered_set]), 306 | [true = ets:insert(test_ets_tab, {N, rand:uniform(100)}) || N <- lists:seq(1, 100)], 307 | ok = ets:tab2file(test_ets_tab, EtsFile), 308 | true = ets:delete(test_ets_tab), 309 | % 310 | ok = file:write_file(RecFile, term_to_binary( 311 | [ 312 | {ets, insert, [test_ets_tab, {100, 40}]}, 313 | {ets, delete, [test_ets_tab, 100]} 314 | ])), 315 | % 316 | Out = capture_io(fun () -> erlperf_cli:main( 317 | [RecFile, "--init", "ets:file2tab(\"" ++ EtsFile ++ "\")."]) 318 | end), 319 | [LN1, LN2] = string:split(Out, "\n"), 320 | ?assertEqual(["Code", "||", "QPS", "Time"], string:lexemes(LN1, " ")), 321 | ?assertMatch(["[{ets,insert,[test_ets_tab,{100,40}]},", "...]", "1" | _], string:lexemes(LN2, " ")), 322 | ok. 323 | 324 | init_all() -> 325 | [{doc, "Test init_all, done_all, init_runner_all options "}]. 326 | 327 | %% ./erlperf 'runner(X)->timer:sleep(X).' 'runner(X)->timer:sleep(X).' 'runner(X)->timer:sleep(X).' 328 | %% --init_all '5.' --init '1.' --init_runner_all 'ir(Z) -> Z * 2.' --init_runner '5.' --init_runner '2.' --done_all '2.' 329 | init_all(Config) when is_list(Config) -> 330 | Code = "runner(X)->timer:sleep(X).", 331 | Code2 = "runner(Y)->timer:sleep(Y).", 332 | %% how this test works: 333 | %% --init_all returns 5 for all 3 tests, for code#1 --init is overridden to be 1. 334 | %% --init_runner_all returns 2x of init result, but there is override for #1 and #2 returning 5 and 2 335 | %% resulting delays are 5, 2 and 10. 336 | Out = capture_io(fun () -> erlperf_cli:main( 337 | [Code, Code2, Code, "--init_all", "5.", "--init", "1.", "--init_runner_all", "ir(Z) -> Z * 2.", 338 | "--init_runner", "5.", "--init_runner", "2.", 339 | "--done_all", "2.", "-s", "2", "--duration", "100"]) end), %% unrelated parts to make the test quicker 340 | [{Code2, 1, C1, _, R}, {Code, 1, C2, _, R2}, {Code, 1, C3, _, R3}] = parse_out(Out), 341 | %% tests sorting as well 342 | ?assert(C1 > 25 andalso C1 < 55, {qps, C1}), %% 2 ms delay 343 | ?assert(C2 > 10 andalso C2 < 25, {qps, C2}), %% 5 ms delay 344 | ?assert(C3 > 5 andalso C3 < 11, {qps, C3}), %% 10 ms delay 345 | ?assert(R > R2), %% 5 ms delay is less than 2 ms 346 | ?assert(R2 > R3). %% 5 ms delay is more than 10 ms 347 | 348 | % erlperf 'foo.' --label bar 349 | label(Config) when is_list(Config) -> 350 | Out = capture_io( 351 | fun () -> erlperf_cli:main(["foo.", "--label", "bar"]) end), 352 | [{Label, _, _, _}] = parse_out(Out), 353 | ?assertEqual("bar", Label). 354 | -------------------------------------------------------------------------------- /test/erlperf_cluster_monitor_SUITE.erl: -------------------------------------------------------------------------------- 1 | %%% @copyright (c) 2019-2023 Maxim Fedorov 2 | %%% @doc 3 | %%% Tests combination of erlperf_monitor, erlperf_cluster_monitor, 4 | %%% erlperf_history and erlperf_job. This is an integration test 5 | %%% for the entire cluster monitoring subsystem. 6 | %%% @end 7 | -module(erlperf_cluster_monitor_SUITE). 8 | -author("maximfca@gmail.com"). 9 | 10 | %% Common Test headers 11 | -include_lib("stdlib/include/assert.hrl"). 12 | 13 | %% Test server callbacks 14 | -export([suite/0, all/0]). 15 | 16 | %% Test cases 17 | -export([monitor_cluster/0, monitor_cluster/1]). 18 | 19 | -export([handle_update/2]). 20 | 21 | suite() -> 22 | [{timetrap, {seconds, 20}}]. 23 | 24 | all() -> 25 | [monitor_cluster]. 26 | 27 | %%-------------------------------------------------------------------- 28 | %% TEST CASES 29 | 30 | monitor_cluster() -> 31 | [{doc, "Tests 3 separate cluster monitors watching the same data"}]. 32 | 33 | monitor_cluster(Config) -> 34 | {ok, Pg} = pg:start_link(erlperf), 35 | {ok, HistPid} = erlperf_history:start_link(), 36 | 37 | Control = self(), 38 | LogFile = filename:join(proplists:get_value(priv_dir, Config), "cluster_log.txt"), 39 | ok = ct:capture_start(), 40 | 41 | %% TODO deliberately omit memory fields? 42 | AllFields = [time, node, sched_util, dcpu, dio, processes, ports, ets, 43 | memory_total, memory_processes, memory_binary, memory_ets, jobs], 44 | 45 | %% start cluster monitor 46 | {ok, ClusterHandlePid} = erlperf_cluster_monitor:start_link({?MODULE, handle_update, [Control]}, 1000, AllFields), 47 | %% start another cluster monitor (now printing to console) 48 | {ok, ClusterMonPid} = erlperf_cluster_monitor:start_link(), 49 | %% start 3rd cluster monitor printing to a file 50 | {ok, ClusterFilePid} = erlperf_cluster_monitor:start_link(LogFile, 1000, AllFields), 51 | 52 | Started = os:system_time(millisecond), 53 | %% simulate 3 jobs from 3 nodes sending cluster 3 data samples (monitor is expected to eventually catch those) 54 | LocalJobs = [{self(), 100}, {Pg, 200}], 55 | Node2Jobs = [{HistPid, 500}], 56 | Nodes = [{node(), LocalJobs}, {'node2@localhost', Node2Jobs}, {'node3@localhost', []}], 57 | Times = [Started + Seq * 1000 || Seq <- lists:seq(1, 3)], 58 | %% common message template 59 | Template = #{sched_util => 0.1, dcpu => 0.1, dio => 0.1, processes => 10, ports => 20, ets => 30, 60 | memory_total => 100, memory_processes => 10, memory_binary => 20, memory_ets => 30}, 61 | Samples = [HistPid ! Template#{node => Node, jobs => Jobs, time => Time} || {Node, Jobs} <- Nodes, Time <- Times], 62 | 63 | %% wait for 3 monitoring handler calls from the cluster monitor (3 seconds) 64 | RawHandlerHistory = poll_history([], 3), 65 | ClusterHandlerHistory = [S || {_T, S} <- RawHandlerHistory], 66 | RawHistory = erlperf_history:get(Started), 67 | History = [S || {_T, S} <- RawHistory], 68 | 69 | %% capture text output 70 | ct:capture_stop(), 71 | Console = ct:capture_get(), 72 | 73 | {ok, FileBin} = file:read_file(LogFile), 74 | [ok = gen:stop(Pid) || Pid <- [ClusterFilePid, ClusterMonPid, ClusterHandlePid, HistPid, Pg]], 75 | 76 | %% all 5 sources should be identical: cluster history, raw history, sent samples 77 | %% and parsed files 78 | 79 | ct:pal("File:~n~s", [FileBin]), 80 | 81 | %% compare Samples to ClusterHandleHistory 82 | ?assertEqual([], ClusterHandlerHistory -- Samples, {extra_events, ClusterHandlerHistory, expected, Samples}), 83 | ?assertEqual([], Samples -- ClusterHandlerHistory, {missing_events, Samples, expected, ClusterHandlerHistory}), 84 | 85 | %% Samples to History 86 | ?assertEqual([], History -- Samples, {extra_events, History, expected, Samples}), 87 | ?assertEqual([], Samples -- History, {missing_events, Samples, expected, History}), 88 | 89 | %% flatten + split lines of console output 90 | NewLine = io_lib:nl(), 91 | [ConsoleHdr | ConsoleData] = string:split(lists:flatten(Console), NewLine, all), 92 | [FileHdr | FileData] = string:split(binary_to_list(FileBin), NewLine, all), 93 | 94 | %% compare headers and first 3 lines of data 95 | ?assertEqual(ConsoleHdr, FileHdr), 96 | ?assertEqual(lists:sublist(ConsoleData, 1, 3), lists:sublist(FileData, 1, 3)), 97 | 98 | %% TODO: parse first 3 lines of file/console output and find those samples 99 | %?assertEqual([ExpectedHeader | ExpectedData], lists:sublist(FileLines, 1, 4)), 100 | %?assertEqual([ExpectedHeader | ExpectedData], lists:sublist(ConsoleLines, 1, 4)), 101 | ok. 102 | 103 | handle_update(Sample, [Control]) -> 104 | Control ! {monitor, Sample}, 105 | [Control]. 106 | 107 | -define(INTERVAL, 1000). 108 | 109 | poll_history(Events, 0) -> 110 | lists:reverse(Events); 111 | poll_history(Events, Count) -> 112 | % collect cluster_monitor events too 113 | receive 114 | {monitor, Sample} -> 115 | poll_history(Sample ++ Events, Count - 1) 116 | after 5000 -> 117 | erlang:error(timeout) 118 | end. 119 | -------------------------------------------------------------------------------- /test/erlperf_file_log_SUITE.erl: -------------------------------------------------------------------------------- 1 | %%% @copyright (c) 2019-2023 Maxim Fedorov 2 | %%% @doc 3 | %%% Tests erlperf_file_log 4 | %%% @end 5 | -module(erlperf_file_log_SUITE). 6 | -author("maximfca@gmail.com"). 7 | 8 | %% Include stdlib header to enable ?assert() for readable output 9 | -include_lib("stdlib/include/assert.hrl"). 10 | 11 | %% Test server callbacks 12 | -export([ 13 | suite/0, 14 | all/0 15 | ]). 16 | 17 | %% Test cases 18 | -export([ 19 | file_log/0, file_log/1, 20 | formatters/0, formatters/1 21 | ]). 22 | 23 | suite() -> 24 | [{timetrap, {seconds, 10}}]. 25 | 26 | all() -> 27 | [formatters, file_log]. 28 | 29 | formatters() -> 30 | [{doc, "Basic tests for formatters like Ki (Kilo-calls) and Kb (Kilo-bytes)"}]. 31 | 32 | formatters(Config) when is_list(Config) -> 33 | ?assertEqual("88", erlperf_file_log:format_size(88)), 34 | ?assertEqual("88000", erlperf_file_log:format_number(88000)), 35 | ?assertEqual("881 Mb", erlperf_file_log:format_size(881 * 1024 * 1024)), 36 | ?assertEqual("881 Mb", erlperf_file_log:format_size(881 * 1024 * 1024)), 37 | ?assertEqual("123 Gb", erlperf_file_log:format_size(123 * 1024 * 1024 * 1024)), 38 | % rounding 39 | ?assertEqual("42", erlperf_file_log:format_number(42)), 40 | ?assertEqual("432 Ki", erlperf_file_log:format_number(431992)), 41 | ?assertEqual("333 Mi", erlperf_file_log:format_number(333000000)), 42 | ?assertEqual("999 Gi", erlperf_file_log:format_number(998500431992)). 43 | 44 | file_log() -> 45 | [{doc, "Tests console and file logging sanity and equality"}]. 46 | 47 | file_log(Config) when is_list(Config) -> 48 | {ok, Pg} = pg:start_link(erlperf), 49 | {ok, Mon} = erlperf_monitor:start_link(), 50 | Filename = filename:join(proplists:get_value(priv_dir, Config), "file_log_manual.txt"), 51 | ok = ct:capture_start(), 52 | {ok, FileLog} = erlperf_file_log:start_link(Filename), 53 | {ok, ConsoleLog} = erlperf_file_log:start_link(erlang:group_leader()), 54 | erlperf:run(timer, sleep, [1]), 55 | ok = ct:capture_stop(), 56 | [gen:stop(Srv) || Srv <- [ConsoleLog, FileLog, Mon, Pg]], 57 | ConsoleLines = ct:capture_get(), 58 | Console = list_to_binary(lists:concat(ConsoleLines)), 59 | {ok, Logs} = file:read_file(Filename), 60 | ?assertEqual(Logs, Console), 61 | ?assert(length(ConsoleLines) > 3, {"at least header and 3 samples are expected to be printed", ConsoleLines}), 62 | %% header must contain the job 63 | [Hdr, S1, S2, S3 | _] = [string:trim(lists:last(string:lexemes(Line, " "))) || Line <- ConsoleLines], 64 | ?assert(is_pid(list_to_pid(Hdr)), {not_a_pid, Hdr}), 65 | Samples = [list_to_integer(S) || S <- [S1, S2, S3]], 66 | [?assert(Sample > 10 andalso Sample < 1000) || Sample <- Samples]. 67 | -------------------------------------------------------------------------------- /test/erlperf_history_SUITE.erl: -------------------------------------------------------------------------------- 1 | %%% @copyright (c) 2019-2023 Maxim Fedorov 2 | %%% Smoke tests for erlperf_history. 3 | -module(erlperf_history_SUITE). 4 | -author("maximfca@gmail.com"). 5 | 6 | %% Common Test headers 7 | -include_lib("stdlib/include/assert.hrl"). 8 | 9 | %% Test server callbacks 10 | -export([suite/0, all/0]). 11 | 12 | %% Test cases 13 | -export([basic/1]). 14 | 15 | suite() -> 16 | [{timetrap, {seconds, 10}}]. 17 | 18 | all() -> 19 | [basic]. 20 | 21 | %%-------------------------------------------------------------------- 22 | %% TEST CASES 23 | 24 | basic(Config) when is_list(Config) -> 25 | {ok, Pg} = pg:start_link(erlperf), 26 | {ok, HistoryServer} = erlperf_history:start_link(1000), %% keep history for 1 second 27 | %% simulate a number of samples via pg 28 | Template = #{sched_util => 0.05}, 29 | FutureTime = os:system_time(millisecond) + 500, %% half a second in the future, +100 ms for samples 30 | Nodes = [node(), 'second@anywhere'], 31 | Samples = [Template#{time => FutureTime + Seq * 10, node => Node} || Seq <- lists:seq(1, 10), Node <- Nodes], 32 | [HistoryServer ! S || S <- Samples], 33 | sys:get_state(HistoryServer), 34 | %% all of these samples must be still available 35 | {_Times, FullHistory} = lists:unzip(erlperf_history:get(FutureTime, FutureTime + 1000)), 36 | ?assertEqual(Samples, FullHistory), 37 | %% fire cleanup 38 | timer:sleep(2000), 39 | HistoryServer ! Template#{time => FutureTime - 1000, node => node()}, 40 | sys:get_state(HistoryServer), 41 | %% ensure cleanup worked 42 | ?assertEqual([], erlperf_history:get(FutureTime, FutureTime + 1000)), 43 | %% ensure get/1,2 work 44 | gen:stop(HistoryServer), 45 | gen:stop(Pg). 46 | -------------------------------------------------------------------------------- /test/erlperf_job_SUITE.erl: -------------------------------------------------------------------------------- 1 | %%% @copyright (c) 2019-2023 Maxim Fedorov 2 | %%% @doc 3 | %%% Tests all combinations of code maps accepted by erlperf_job 4 | %%% @end 5 | -module(erlperf_job_SUITE). 6 | -author("maximfca@gmail.com"). 7 | 8 | %% Include stdlib header to enable ?assert() for readable output 9 | -include_lib("stdlib/include/assert.hrl"). 10 | 11 | %% Test server callbacks 12 | -export([ 13 | suite/0, 14 | all/0, 15 | groups/0 16 | ]). 17 | 18 | %% Runner variants test cases 19 | -export([ 20 | runner_code/1, runner_mfa/1, runner_mfa_list/1, runner_fun/1, runner_mod_fun/1, 21 | runner_fun1/1, runner_fun2/1, runner_code_fun/1, runner_code_fun1/1, 22 | runner_code_fun2/1, runner_code_name2/1]). 23 | 24 | %% Basic test cases 25 | -export([priority/0, priority/1, overhead/0, overhead/1, module/0, module/1]). 26 | 27 | %% internal exports 28 | -export([recv/0, recv/1]). 29 | 30 | suite() -> 31 | [{timetrap, {seconds, 120}}]. 32 | 33 | groups() -> 34 | [{variants, [parallel], [runner_code, runner_mfa, runner_mfa_list, runner_fun, runner_mod_fun, 35 | runner_fun1, runner_fun2, runner_code_fun, runner_code_fun1, 36 | runner_code_fun2, runner_code_name2]}]. 37 | 38 | all() -> 39 | [{group, variants}, priority, overhead, module]. 40 | 41 | %%-------------------------------------------------------------------- 42 | %% Convenience helpers 43 | 44 | recv() -> 45 | recv(check). 46 | 47 | recv(check) -> 48 | receive 49 | {Ref, ReplyTo} -> 50 | ReplyTo ! Ref 51 | end. 52 | 53 | %%-------------------------------------------------------------------- 54 | %% Runner definitions 55 | 56 | 57 | %%-------------------------------------------------------------------- 58 | %% TEST CASES 59 | 60 | priority() -> 61 | [{doc, "Tests job controller priority setting"}]. 62 | 63 | priority(Config) when is_list(Config) -> 64 | {ok, Job} = erlperf_job:start_link(#{runner => {?MODULE, recv, []}}), 65 | high = erlperf_job:set_priority(Job, max), 66 | ok = erlperf_job:set_concurrency(Job, 1), 67 | {priority, max} = erlang:process_info(Job, priority), 68 | ok = erlperf_job:set_concurrency(Job, 0), 69 | {priority, normal} = erlang:process_info(Job, priority), 70 | gen:stop(Job). 71 | 72 | overhead() -> 73 | [{doc, "Compares timed and continuous mode, may be failing sporadically"}]. 74 | 75 | overhead(Config) when is_list(Config) -> 76 | SampleCount = 10000000, 77 | %% must use code (it's the fastest method), cannot use sleep (imprecise and slow), 78 | %% and cannot rely on message passing for it cannot control timing 79 | {ok, Job} = erlperf_job:start_link(#{runner => "rand:uniform(1000)."}), 80 | Sampler = erlperf_job:handle(Job), 81 | TimeUs = erlperf_job:measure(Job, SampleCount), 82 | %% measure the same thing now with continuous benchmark 83 | ok = erlperf_job:set_concurrency(Job, 1), 84 | %% fetch a sample sleeping ~ same time as for timed run 85 | Start = erlperf_job:sample(Sampler), 86 | timer:sleep(TimeUs div 1000 + 1), 87 | Finish = erlperf_job:sample(Sampler), 88 | gen:stop(Job), 89 | ContinuousQPS = Finish - Start, 90 | Effy = ContinuousQPS * 100 div SampleCount, 91 | ct:pal("Continuous benchmarking efficiency: ~b% (~b time for ~b, ~b continuous)~n", 92 | [Effy, TimeUs, SampleCount, ContinuousQPS]), 93 | ?assert(Effy > 50, {efficiency, Effy}). 94 | 95 | module() -> 96 | [{doc, "Tests that generated module gets unloaded after job stops"}]. 97 | 98 | module(Config) when is_list(Config) -> 99 | sys:module_info(), %% just in case if it wasn't loaded 100 | PreJob = code:all_loaded(), 101 | {ok, Job} = erlperf_job:start_link(#{runner => "ok."}), 102 | InJob = code:all_loaded() -- PreJob, 103 | gen:stop(Job), 104 | PostJob = code:all_loaded(), 105 | ?assertEqual([], PostJob -- PreJob), 106 | ?assert(length(InJob) == 1, InJob). 107 | 108 | %%-------------------------------------------------------------------- 109 | %% Code map variations 110 | 111 | %% code below is a simple hack to make run with some parallelism. Original code 112 | %% just had RunnerVariants as a list comprehension. 113 | 114 | runner_code(Config) when is_list(Config) -> 115 | variants("erlperf_job_SUITE:recv().", ?FUNCTION_NAME). 116 | 117 | runner_mfa(Config) when is_list(Config) -> 118 | variants({?MODULE, recv, [check]}, ?FUNCTION_NAME). 119 | 120 | runner_mfa_list(Config) when is_list(Config) -> 121 | variants([{?MODULE, recv, [check]}, {erlang, unique_integer, []}], ?FUNCTION_NAME). 122 | 123 | runner_fun(Config) when is_list(Config) -> 124 | variants(fun () -> recv(check) end, ?FUNCTION_NAME). 125 | 126 | runner_mod_fun(Config) when is_list(Config) -> 127 | variants(fun ?MODULE:recv/0, ?FUNCTION_NAME). 128 | 129 | runner_fun1(Config) when is_list(Config) -> 130 | variants(fun (1) -> recv(check), 1 end, ?FUNCTION_NAME). 131 | 132 | runner_fun2(Config) when is_list(Config) -> 133 | variants(fun (1, 1) -> recv(check), 1 end, ?FUNCTION_NAME). 134 | 135 | runner_code_fun(Config) when is_list(Config) -> 136 | variants("runner() -> erlperf_job_SUITE:recv(check).", ?FUNCTION_NAME). 137 | 138 | runner_code_fun1(Config) when is_list(Config) -> 139 | variants("runner(1) -> erlperf_job_SUITE:recv(check), 1.", ?FUNCTION_NAME). 140 | 141 | runner_code_fun2(Config) when is_list(Config) -> 142 | variants("runner(1, 1) -> erlperf_job_SUITE:recv(check), 1.", ?FUNCTION_NAME). 143 | 144 | runner_code_name2(Config) when is_list(Config) -> 145 | variants("baz(1, 1) -> erlperf_job_SUITE:recv(check), 1.", ?FUNCTION_NAME). 146 | 147 | variants(Runner, ProcName) -> 148 | ProcStr = atom_to_list(ProcName), 149 | Sep = io_lib:format("~n ", []), 150 | %% register this process to send messages from init/init_runner/done 151 | register(ProcName, self()), 152 | %% 153 | InitVariants = [ 154 | undefined, 155 | {erlang, send, [ProcName, init]}, 156 | fun () -> erlang:send(ProcName, {init, self()}) end, 157 | lists:concat(["erlang:send(" ++ ProcStr ++ ", {init, self()})."]), 158 | lists:concat(["init() -> erlang:send(" ++ ProcStr ++ ", {init, self()})."]), 159 | lists:concat(["foo() -> erlang:send(" ++ ProcStr ++ ", {init, self()})."]) 160 | ], 161 | InitRunnerVariants = [ 162 | undefined, 163 | {erlang, send, [ProcName, 1]}, 164 | fun () -> erlang:send(ProcName, 1) end, 165 | fun (_) -> erlang:send(ProcName, 1) end, 166 | "erlang:send(" ++ ProcStr ++ ", 1).", 167 | lists:concat(["init_runner() ->", Sep, "erlang:send(" ++ ProcStr ++ ", 1)."]), 168 | lists:concat(["init_runner(_) ->", Sep, "erlang:send(" ++ ProcStr ++ ", 1)."]), 169 | lists:concat(["bar(_) ->", Sep, "erlang:send(" ++ ProcStr ++ ", 1)."]) 170 | ], 171 | DoneVariants = [ 172 | undefined, 173 | {erlang, send, [ProcName, done]}, 174 | fun () -> erlang:send(ProcName, done) end, 175 | fun (_) -> erlang:send(ProcName, done) end, 176 | lists:concat(["erlang:send(" ++ ProcStr ++ ", done)."]), 177 | lists:concat(["done() -> erlang:send(" ++ ProcStr ++ ", done)."]), 178 | lists:concat(["done(_) -> erlang:send(" ++ ProcStr ++ ", done)."]), 179 | lists:concat(["buzz(_) -> erlang:send(" ++ ProcStr ++ ", done)."]) 180 | ], 181 | %% try all variants 182 | Variants = [#{init => Init, init_runner => InitRunner, runner => Runner, done => Done} 183 | || Init <- InitVariants, InitRunner <- InitRunnerVariants, Done <- DoneVariants], 184 | %% filter "undefined" entries from the map 185 | Maps = [maps:filter(fun (_Key, Value) -> Value =/= undefined end, Variant) 186 | || Variant <- Variants], 187 | 188 | %% generate code for each variant and measure performance 189 | [measure_variant(Variant) || Variant <- Maps]. 190 | 191 | measure_variant(Code) -> 192 | try 193 | {ok, Job} = erlperf_job:start_link(Code), 194 | Handle = erlperf_job:handle(Job), 195 | %% wait for "init" function to complete, when possible, ensure it's sent from the job process 196 | is_map_key(init, Code) andalso 197 | receive 198 | InitResult -> 199 | ?assert((InitResult =:= init) orelse (InitResult =:= {init, Job}), {bad_init_result, InitResult}) 200 | after 1000 -> throw({init, timeout}) 201 | end, 202 | %% ensure it does not crash attempting to do a single measurement, 203 | %% basic sanity check that timed mode returns time > 0 204 | TimeUs = measure_timed(Job, is_map_key(init_runner, Code)), 205 | ?assert(TimeUs > 1, {timed_mode_too_fast, TimeUs}), 206 | %% 207 | ok = erlperf_job:set_concurrency(Job, 1), 208 | %% wait for 1 worker to get started 209 | is_map_key(init_runner, Code) andalso expect_message(1, init_runner), 210 | %% whitebox... 211 | {erlperf_job_state, _, _, [Worker], _, _, _} = sys:get_state(Job), 212 | %% by now, function may have been _called_ once (but not yet returned) 213 | Before = erlperf_job:sample(Handle), 214 | ?assert(Before =:= 0 orelse Before =:= 1, {unexpected_sample, Before}), 215 | BumpCount = 50, 216 | %% do exactly BumpCount iterations 217 | [Worker ! {Seq, self()} || Seq <- lists:seq(1, BumpCount)], 218 | %% receive BumpCount replies 219 | [receive Seq -> ok end || Seq <- lists:seq(1, BumpCount)], 220 | %% by now, extra 50 calls happened 221 | After = erlperf_job:sample(Handle), 222 | ?assert(After >= BumpCount, {unexpected_after, After}), 223 | 224 | %% stop the job 225 | gen:stop(Job), 226 | is_map_key(done, Code) andalso expect_message(done, done), 227 | %% must not have anything in the message queue 228 | receive 229 | Unexpected -> 230 | ?assert(false, {unexpected_message, Unexpected}) 231 | after 0 -> ok 232 | end 233 | catch error:{generate, {What, Arity, requires, Dependency}} -> 234 | %% verify this combination is indeed invalid 235 | ?assertNot(is_map_key(Dependency, Code)), 236 | ?assert((What =:= init_runner andalso Arity =:= 1) orelse (What =:= runner andalso Arity > 0) 237 | orelse (What =:= done andalso Arity =:= 1)) 238 | %% io:format(user, "Invalid combination: ~s/~b requires ~s~n~n", [What, Arity, Dependency]) 239 | end. 240 | 241 | expect_message(Expect, Operation) -> 242 | receive 243 | Message -> 244 | ?assertEqual(Expect, Message, {Operation, Message}) 245 | after 246 | 1000 -> 247 | throw({Operation, timeout}) 248 | end. 249 | 250 | measure_timed(Job, InitRunnerPresent) -> 251 | Iterations = 10, 252 | Control = self(), 253 | spawn_link( 254 | fun () -> 255 | TimeUs = erlperf_job:measure(Job, Iterations), 256 | Control ! {time, TimeUs} 257 | end), 258 | 259 | %% timed mode starts exactly 1 worker 260 | InitRunnerPresent andalso expect_message(1, init_runner), 261 | Worker = find_timed_worker(Job), 262 | 263 | %% send exactly "iterations" messages 264 | [Worker ! {Seq, self()} || Seq <- lists:seq(1, Iterations)], 265 | [receive Seq -> ok end || Seq <- lists:seq(1, Iterations)], 266 | receive 267 | {time, Time} -> 268 | Time 269 | end. 270 | 271 | find_timed_worker(Job) -> 272 | {erlperf_job_state, _, _, _, TimedWorkers, _, _} = sys:get_state(Job), 273 | case map_size(TimedWorkers) of 274 | 1 -> hd(maps:keys(TimedWorkers)); 275 | 0 -> timer:sleep(1), find_timed_worker(Job) 276 | end. 277 | -------------------------------------------------------------------------------- /test/erlperf_monitor_SUITE.erl: -------------------------------------------------------------------------------- 1 | %%%------------------------------------------------------------------- 2 | %%% @author Maxim Fedorov 3 | %%% @copyright (c) 2019-2023 Maxim Fedorov 4 | %%% @doc 5 | %%% Tests monitor 6 | %%% @end 7 | 8 | -module(erlperf_monitor_SUITE). 9 | 10 | -include_lib("stdlib/include/assert.hrl"). 11 | 12 | %% Test server callbacks 13 | -export([ 14 | suite/0, 15 | all/0 16 | ]). 17 | 18 | %% Test cases 19 | -export([ 20 | subscribe/0, subscribe/1 21 | ]). 22 | 23 | suite() -> 24 | [{timetrap, {seconds, 10}}]. 25 | 26 | all() -> 27 | [subscribe]. 28 | 29 | %%-------------------------------------------------------------------- 30 | %% TEST CASES 31 | 32 | subscribe() -> 33 | [{doc, "Tests monitoring subscription"}]. 34 | 35 | subscribe(_Config) -> 36 | {ok, Mon} = erlperf_sup:start_link(), %% instead of starting the app 37 | ok = pg:join(erlperf, {erlperf_monitor, node()}, self()), 38 | % start a benchmark and see it running on 1 scheduler 39 | {ok, Job} = erlperf_job:start_link(#{runner => {timer, sleep, [10]}}), 40 | ok = erlperf_job:set_concurrency(Job, 4), 41 | ?assertEqual(4, erlperf_job:concurrency(Job)), 42 | % wait for 3 seconds, receive updates 43 | First = receive_updates(Job, 0, 2), 44 | ok = erlperf_job:set_concurrency(Job, 2), 45 | Second = receive_updates(Job, 0, 1), 46 | ok = gen_server:stop(Job), 47 | ?assert(First > 0), 48 | ?assert(Second > 0), 49 | ?assert(First > Second), 50 | pg:leave(erlperf, {erlperf_monitor, node()}, self()), 51 | gen:stop(Mon). 52 | 53 | receive_updates(_, Total, 0) -> 54 | Total; 55 | receive_updates(Job, Total, Count) -> 56 | receive 57 | #{jobs := [{Job, Cycles}]} -> 58 | receive_updates(Job, Total + Cycles, Count - 1); 59 | Other -> 60 | ?assertEqual([], Other) 61 | end. 62 | --------------------------------------------------------------------------------