├── .github
    └── workflows
    │   └── erlang.yml
├── .gitignore
├── .gitlab-ci.yml
├── CHANGELOG.md
├── CLI.md
├── DETAILS.md
├── LICENSE.md
├── README.md
├── rebar.config
├── rebar.lock
├── src
    ├── erlperf.app.src
    ├── erlperf.erl
    ├── erlperf_app.erl
    ├── erlperf_cli.erl
    ├── erlperf_cluster_monitor.erl
    ├── erlperf_file_log.erl
    ├── erlperf_history.erl
    ├── erlperf_job.erl
    ├── erlperf_job_sup.erl
    ├── erlperf_monitor.erl
    └── erlperf_sup.erl
└── test
    ├── erlperf_SUITE.erl
    ├── erlperf_cli_SUITE.erl
    ├── erlperf_cluster_monitor_SUITE.erl
    ├── erlperf_file_log_SUITE.erl
    ├── erlperf_history_SUITE.erl
    ├── erlperf_job_SUITE.erl
    └── erlperf_monitor_SUITE.erl


/.github/workflows/erlang.yml:
--------------------------------------------------------------------------------
 1 | name: Build, Test, Dialyze
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     types: [ opened, reopened, synchronize ]
 6 |   push:
 7 |     branches:
 8 |       - 'master'
 9 | 
10 | jobs:
11 |   linux:
12 |     name: Test on OTP ${{ matrix.otp_version }} and ${{ matrix.os }}
13 |     runs-on: ${{ matrix.os }}
14 | 
15 |     strategy:
16 |       matrix:
17 |         otp_version: [23, 24, 25, 26, 27]
18 |         os: [ubuntu-latest]
19 | 
20 |     container:
21 |       image: erlang:${{ matrix.otp_version }}
22 | 
23 |     steps:
24 |       - uses: actions/checkout@v2
25 |       - name: Run tests
26 |         run: rebar3 ct
27 |       - name: Documentation
28 |         run: rebar3 edoc
29 |       - name: ExDoc Documentation
30 |         run: if [ $(rebar3 version | awk '{print $5}') -gt 23 ]; then rebar3 ex_doc; fi;
31 |       - shell: bash
32 |         name: Dialyzer
33 |         run: rebar3 dialyzer
34 |       - shell: bash
35 |         name: Escriptize
36 |         run: rebar3 as prod escriptize
37 |       - shell: bash
38 |         name: Smoke test
39 |         run: ./erlperf 'timer:sleep(1).'
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | _*
2 | .idea
3 | *.iml
4 | *~
5 | erlperf
6 | doc


--------------------------------------------------------------------------------
/.gitlab-ci.yml:
--------------------------------------------------------------------------------
 1 | stages:
 2 |   - test
 3 |   - deploy
 4 | 
 5 | test-default-docker:
 6 |   tags:
 7 |     - linux
 8 |     - x86_64
 9 |   image: ${CI_DEPENDENCY_PROXY_GROUP_IMAGE_PREFIX}/erlang:latest
10 |   stage: test
11 |   script:
12 |     - rebar3 compile
13 |     - rebar3 edoc
14 |     - rebar3 dialyzer
15 |     - rebar3 ct
16 |     - rebar3 as prod escriptize
17 |     - ./erlperf 'timer:sleep(1).'
18 |   after_script:
19 |     - mv "_build/test/logs" ./public
20 |   artifacts:
21 |     when: always
22 |     paths:
23 |       - "./public"
24 |     expire_in: 3 days
25 |     reports:
26 |       junit:
27 |         - "./public/last/junit_report.xml"
28 | 
29 | # Pages: publishing Common Test results
30 | pages:
31 |   stage: deploy
32 |   needs:
33 |     - test-default-docker
34 |   script:
35 |     - echo "Uploading to pages"
36 |   artifacts:
37 |     paths:
38 |       - public
39 |   rules:
40 |     - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | ## 2.3.0
 4 | - added warning for non-optimised ERTS build running the benchmark
 5 | - fixed output for continuous mode when samples are zero
 6 | - added `step` for quicker concurrency estimation mode (@mkuratczyk)
 7 | 
 8 | ## 2.2.2
 9 | - added generated source code output in verbose mode
10 | 
11 | ## 2.2.1
12 | - tested with OTP 26 and 27
13 | - updated to argparse 2.0.0
14 | 
15 | ## 2.2.0
16 | - added extended and full reporting capabilities
17 | - implemented additional statistics (standard deviation, median, p99)
18 | - exported formatting APIs to allow escript-based benchmarks
19 | - improved documentation, switched from edoc to ex_doc
20 | - added convenience functions and defaults to monitor, file_log, cluster_monitor and history
21 | - fixed cluster monitor output for multi-node configurations
22 | - breaking change: consolidated monitor sample structure for cluster and local process groups
23 | - fixed history store
24 | - refined types for better Dialyzer analysis
25 | 
26 | ## 2.1.0
27 | - fixed -w (--warmup) argument missing from command line
28 | - synchronised worker startup when adding concurrency
29 | - concurrent worker shutdown when reducing concurrency
30 | - elevated job & benchmark process priority to avoid result skew
31 | - implemented scheduling problem detection (e.g. lock contention),
32 |   added a busy loop method workaround
33 | 
34 | ## 2.0.2
35 | - added convenience command line options: init_all, done_all, init_runner_all
36 | 
37 | ## 2.0.1
38 | - minor bugfixes (friendlier error reporting)
39 | 
40 | ## 2.0
41 | - incompatible change: `erlperf` requires runner arity to be defined explicitly.
42 |   Code example: `erlperf:run(#{runner => {timer, sleep, []}, init_runner => "1."})`,
43 |   with `erlperf` making a guess that `init_runner` is defined, therefore its return
44 |   value can be passed as the argument to `timer:sleep/1`. This behaviour was confusing
45 |   and is no longer supported.
46 | - incompatible change: crashed runner causes entire job to stop (error contains the
47 |   reason and stack trace)
48 | - incompatible change: removed fprof/profiling support in favour of JIT + `perf`
49 | - `erlperf` application is no longer required to be started for one-off benchmark runs
50 | 
51 | ## 1.1.5:
52 | - support for OTP 25 (peer replacing slave)
53 | 
54 | ## 1.1.4:
55 | - fixed an issue with pg already started
56 | - moved profiling to spawned process
57 | 
58 | ## 1.1.3:
59 | - addressed deprecation, updated to argparse 1.1.4
60 | 
61 | ## 1.1.2:
62 | - updated command line parser to new argparse
63 | 
64 | ## 1.1.1:
65 | - added support for OTP 24
66 | - added edoc documentation
67 | 
68 | ## 1.0.0:
69 | - initial release
70 | 


--------------------------------------------------------------------------------
/CLI.md:
--------------------------------------------------------------------------------
 1 | # Command Line
 2 | Run `erlperf` with no arguments to get command line usage.
 3 | 
 4 | ## Synopsis
 5 | 
 6 | ```bash
 7 | erlperf [FLAG] runner [INIT] [INIT_RUNNER] [DONE] [runner...]
 8 | ```
 9 | 
10 | ## Flags
11 | 
12 | | Short | Long              | Description                                                                                                                                                       |
13 | |-------|-------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------|
14 | | -c    | --concurrency     | Specifies the number of workers per job. Allowed only in continuous mode                                                                                          |
15 | |       | --cv              | Coefficient of variation. Accepted in continuous and concurrency estimation mode. Benchmark keeps running until standard deviation is below the specified number  |
16 | | -i    | --isolation       | Requests to run every benchmark in a separate Erlang VM for isolation purposes                                                                                    |
17 | | -s    | --samples         | Number of samples to take. Defaults to 1 for timed mode, 3 for continuous and concurrency estimation                                                              |
18 | | -d    | --sample_duration | Sample duration, in milliseconds, for continuous and concurrency estimation modes                                                                                 |
19 | | -l    | --loop            | Sample duration (iterations) for the timed mode. Engages timed mode when specified                                                                                |
20 | |       | --max             | Maximum number of workers allowed in the concurrency estimation mode                                                                                              |
21 | |       | --min             | Starting number of workers in concurrency estimation mode                                                                                                         |
22 | | -pa   |                   | Adds extra code path to the Erlang VM. Useful for benchmarking *.beam files on your filesystem                                                                    |
23 | | -r    | --report          | Requests `basic`, `extended` or `full` report. Defaults to `basic` when less than 10 samples are requested, and `extended` for 10 and more                        |
24 | | -q    | -squeeze          | Engages concurrency estimation mode                                                                                                                               |
25 | | -t    | -threshold        | Sets number of extra workers to try in concurrency estimation mode before concluding the test                                                                     |
26 | | -v    | --verbose         | Turns on verbose mode, including generated source code, VM statistics and performance of continuous jobs)                                                         |
27 | | -w    | --warmup          | Warmup                                                                                                                                                            |
28 | 
29 | ## Benchmark code
30 | At least one runner code is required. Specify multiple runner codes to perform
31 | a comparison run.
32 | 
33 | Initialisation and cleanup definitions are read in the same order as runner codes. Example:
34 | ```bash
35 | # first runner receives 1 as input, second - 2
36 | erlperf --init_runner '1.' 'run(1) -> ok.' 'run(2) -> ok.' --init_runner '2.'
37 | # next run fails with function_clause, because first runner receives '2', and second - 1
38 | erlperf --init_runner '2.' 'run(1) -> ok.' 'run(2) -> ok.' --init_runner '1.' 
39 | ```
40 | 
41 | |                   | Description                                                               |
42 | |-------------------|---------------------------------------------------------------------------|
43 | | --init            | Job initialisation code, see accepted callable formats below              |
44 | | --init_runner     | Worker initialisation code                                                |
45 | | --done            | Job cleanup code                                                          |
46 | | --label           | Runner label                                                              |
47 | |                   |                                                                           |
48 | | --init_all        | Default init code for all runners that do not have a specific code        |
49 | | --init_runner_all | Default init_runner code                                                  |
50 | | --done_all        | Default done code                                                         |
51 | 
52 | Accepted callable formats:
53 | * valid Erlang code: `timer:sleep(1).`
54 | * valid Erlang function: `run() -> timer:sleep(1).`
55 | * function with arguments: `run(X) -> timer:sleep(X).', 'run(X, Y) -> timer:sleep(X), Y.`
56 | * tuple with module, function name and arguments: `{timer, sleep, [1]}`
57 | * file name with call chain recording: `record.trace`. **deprecated**, do not use 


--------------------------------------------------------------------------------
/DETAILS.md:
--------------------------------------------------------------------------------
 1 | # Implementation details
 2 | 
 3 | Starting with 2.0, `erlperf` uses call counting for continuous benchmarking purposes. This allows
 4 | the tightest possible loop without extra runtime calls. Running
 5 | `erlperf 'rand:uniform().' --init '1'. --done '2.' --init_runner '3.'` results in creating,
 6 | compiling and loading a module with this source code:
 7 | 
 8 | ```erlang
 9 |     -module(unique_name).
10 |     -export([init/0, init_runner/0, done/0, run/0]).
11 | 
12 |     init() ->
13 |         1.
14 | 
15 |     init_runner() ->
16 |         3.
17 | 
18 |     done() ->
19 |         2.
20 | 
21 |     run() ->
22 |         runner(), 
23 |         run().
24 | 
25 |     runner() ->
26 |         rand:uniform().
27 | ```
28 | 
29 | Number of `run/0` calls per second is reported as throughput. Before 2.0, `erlperf`
30 | used `atomics` to maintain a counter shared between all runner processes, introducing
31 | unnecessary BIF call overhead.
32 | 
33 | Timed (low-overhead) mode tightens it even further, turning runner into this function:
34 | ```erlang
35 | runner(0) ->
36 |     ok;
37 | runner(Count) ->
38 |     rand:uniform(),
39 |     runner(Count - 1).
40 | ```


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2019-2021 Maxim Fedorov <maximfca@gmail.com>
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | * Neither the name of the copyright holder nor the names of its
15 |   contributors may be used to endorse or promote products derived from
16 |   this software without specific prior written permission.
17 | 
18 | NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
19 | THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
20 | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
22 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
23 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26 | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
27 | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 | POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # erlperf
  2 | 
  3 | [![Build Status](https://github.com/max-au/erlperf/actions/workflows/erlang.yml/badge.svg?branch=master)](https://github.com/max-au/erlperf/actions) [![Hex.pm](https://img.shields.io/hexpm/v/erlperf.svg)](https://hex.pm/packages/erlperf) [![Hex Docs](https://img.shields.io/badge/hex-docs-blue.svg)](https://hexdocs.pm/erlperf)
  4 | 
  5 | Erlang Performance & Benchmarking Suite.
  6 | Simple way to say "this code is faster than that one". See [CLI reference](CLI.md)
  7 | and detailed API reference for `erlperf` and `erlperf_job` modules.
  8 | 
  9 | Build (tested with OTP 23-27):
 10 | 
 11 | ```bash
 12 |     $ rebar3 as prod escriptize
 13 | ```
 14 | 
 15 | ## Quick start: command line
 16 | Beware of the shell escaping your code in an unpredictable way!
 17 | 
 18 | 1. Run a single process iterating `rand:uniform()` in a tight loop for 3 seconds,
 19 | printing **average iterations per second** (~17 millions) and an average time
 20 | to run a single iteration (57 ns).
 21 | 
 22 | ```bash
 23 |     $ ./erlperf 'rand:uniform().'
 24 |     Code                    ||        QPS       Time
 25 |     rand:uniform().          1   17266 Ki      57 ns
 26 | ```
 27 | 
 28 | 2. Run four processes doing this same concurrently.
 29 | 
 30 | ```bash
 31 |     $ ./erlperf 'rand:uniform().' -c 4
 32 |     Code                    ||        QPS       Time
 33 |     rand:uniform().          4   53893 Ki      74 ns
 34 | ```
 35 | 
 36 | 3. Benchmark `rand:uniform()` vs `crypto:strong_rand_bytes/1` for 10 seconds, adding
 37 | an extra second to warm up the algorithms.
 38 | 
 39 | ```bash
 40 |     $ ./erlperf 'rand:uniform().' 'crypto:strong_rand_bytes(2).' --samples 10 --warmup 1
 41 |     Code                             ||   Samples       Avg   StdDev    Median      P99  Iteration    Rel
 42 |     rand:uniform().                   1        10  16611 Ki    0.20%  16614 Ki 16664 Ki      60 ns   100%
 43 |     crypto:strong_rand_bytes(2).      1        10   1804 Ki    0.79%   1797 Ki  1829 Ki     554 ns    11%
 44 | ```
 45 | 
 46 | 4. Run a function passing the state into the next iteration. This code demonstrates performance difference
 47 | between `rand:uniform_s/1` with state passed explicitly, and `rand:uniform/1` reading state from the process
 48 | dictionary.
 49 | 
 50 | ```bash
 51 |     $ ./erlperf 'r(_, S) -> {_, N} = rand:uniform_s(S), N.' --init_runner 'rand:seed(exsss).' \
 52 |                 'r() -> rand:uniform().'
 53 |     Code                                              ||        QPS       Time   Rel
 54 |     r(_, S) -> {_, N} = rand:uniform_s(S), N.          1   26180 Ki      38 ns  100%
 55 |     r() -> rand:uniform().                             1   16958 Ki      58 ns   65%
 56 | ```
 57 | 
 58 | 5. Estimate `./erlperf 'application_controller:is_running(kernel).` concurrency characteristics. This function
 59 | is implemented as `gen_server:call`, and all calculations are done in a single process. It is still
 60 | possible to squeeze a bit more from a single process by putting work into the queue from multiple runners.
 61 | 
 62 | ```bash
 63 |     $ ./erlperf 'application_controller:is_running(kernel).' --squeeze
 64 |     Code                                               ||        QPS       Time
 65 |     application_controller:is_running(kernel).          3    1189 Ki    2524 ns
 66 | 
 67 | 
 68 | 
 69 |     $ ./erlperf 'persistent_term:put(atom, "string").' -q
 70 |     Code                                         ||        QPS       Time
 71 |     persistent_term:put(atom, "string").          1    8882 Ki     112 ns
 72 | ```
 73 | 
 74 | 6. Start a server (`pg` scope in this example), use it in benchmark, and shut down after.
 75 | 
 76 | ```bash
 77 |     $ ./erlperf 'pg:join(scope, group, self()), pg:leave(scope, group, self()).' \
 78 |                 --init 'pg:start_link(scope).' --done 'gen_server:stop(scope).'
 79 |     Code                                                                   ||        QPS       Time
 80 |     pg:join(scope, group, self()), pg:leave(scope, group, self()).          1     336 Ki    2976 ns
 81 | ```
 82 | 
 83 | 7. Run the same code with different arguments, returned from `init_runner` function. Note the trick
 84 | of adding extra spaces in the source code to know which code is where.
 85 | 
 86 | ```bash
 87 |     $ ./erlperf 'runner(X) -> timer:sleep(X).' --init_runner '1.' \
 88 |                 '  runner(X) -> timer:sleep(X).' --init_runner '2.'
 89 |     Code                                   ||        QPS       Time   Rel
 90 |     runner(X) -> timer:sleep(X).            1        500    2001 us  100%
 91 |       runner(X) -> timer:sleep(X).          1        333    3001 us   67%
 92 | ```
 93 | 
 94 | 8. Determine how many times a process can join/leave pg2 group on a single node (requires OTP 23
 95 | or older, as pg2 is removed in later versions).
 96 | 
 97 | ```bash
 98 |     $ ./erlperf 'ok = pg2:join(g, self()), ok = pg2:leave(g, self()).' --init 'pg2:create(g).'
 99 |     Code                                                         ||        QPS       Time
100 |     ok = pg2:join(g, self()), ok = pg2:leave(g, self()).          1      64021   15619 ns
101 | ```
102 | 
103 | 9. Compare `pg` with `pg2` running in a 3-node cluster. Note the `-i` argument spawning an isolated
104 | extra Erlang VM for each benchmark.
105 | 
106 | ```bash
107 |     ./erlperf 'ok = pg2:join(g, self()), ok = pg2:leave(g, self()).' --init 'pg2:create(g).' \
108 |               'ok = pg:join(g, self()), ok = pg:leave(g, self()).' --init 'pg:start(pg).' -i
109 |     Code                                                         ||        QPS       Time     Rel
110 |     ok = pg:join(g, self()), ok = pg:leave(g, self()).            1     241 Ki    4147 ns    100%
111 |     ok = pg2:join(g, self()), ok = pg2:leave(g, self()).          1       1415     707 us      0%
112 | ```
113 | 
114 | 10. Watch the progress of your test running (`-v` option) with extra information: scheduler utilisation, dirty CPU & IO
115 | schedulers, number of running processes, ports, ETS tables, and memory consumption. Last column is the job throughput.
116 | When there are multiple jobs, multiple columns are printed. Test will continue until adding 8 more workers (`-t 8`)
117 | does not increase total throughput.
118 | 
119 | ```bash
120 |     $ ./erlperf 'rand:uniform().' -q -v -t 8
121 | 
122 |     YYYY-MM-DDTHH:MM:SS-oo:oo  Sched   DCPU    DIO    Procs    Ports     ETS Mem Total  Mem Proc   Mem Bin   Mem ETS     <0.84.0>
123 |     2023-01-22T11:02:51-08:00   6.12   0.00   0.20       46        2      21  24737 Kb   4703 Kb    191 Kb    471 Kb     14798 Ki
124 |     2023-01-22T11:02:52-08:00   6.31   0.00   0.00       46        2      21  25105 Kb   5565 Kb    218 Kb    472 Kb     16720 Ki
125 |     2023-01-22T11:02:53-08:00   6.26   0.00   0.00       46        2      21  25501 Kb   5427 Kb    218 Kb    472 Kb     16715 Ki
126 |     <...>
127 |     2023-01-22T11:03:37-08:00 100.00   0.00   0.00       61        2      21  25874 Kb   5696 Kb    221 Kb    472 Kb     55235 Ki
128 |     2023-01-22T11:03:38-08:00 100.00   0.00   0.00       61        2      21  25955 Kb   5565 Kb    218 Kb    472 Kb     55139 Ki
129 |     Code                    ||        QPS       Time
130 |     rand:uniform().          8   61547 Ki     130 ns
131 | 
132 | ```
133 | 
134 | ## Benchmark
135 | Running benchmark is called a **job**, see `erlperf_job` for detailed description.
136 | Every job has a controller process, responsible for starting and stopping worker
137 | processes, or **workers**. Worker processes execute **runner** function in a tight
138 | loop, incrementing **iteration** counter.
139 | 
140 | Benchmark runs either for a specified amount of time (**sample duration** in
141 | continuous mode), or until requested number of iterations is made (timed mode).
142 | Resulting **sample** is total number of *iterations* for all workers, or elapsed time
143 | it took in timed mode.
144 | 
145 | The process repeats until the specified amount of *samples* is collected, producing
146 | a **report** (see details below).
147 | 
148 | For comparison convenience, basic reports contain **QPS** - historical metric
149 | from the original implementation (designed for network service throughput assessment).
150 | It is approximate amount of *runner iterations per sample_duration achieved by all workers
151 | of the job*. Given that default duration is 1 second, *QPS* is a good proxy for
152 | the total job throughput.
153 | 
154 | Single worker performance can be estimated using **time** metric. It can also be
155 | considered as function latency - how long it takes on average to execute a
156 | single *iteration* of a *runner*.
157 | 
158 | ### Benchmark definition
159 | A benchmark may define following functions:
160 | * **runner**: code that is executed in the tight loop
161 | * **init** (optional): executed once when the job starts
162 | * **done** (optional): executed once when the job is about to stop
163 | * **init_runner** (optional): executed on every worker process startup
164 | * **label** (optional): string that overrides the code value in reports
165 | 
166 | See `erlperf_job` for the detailed reference and ways to define a function (**callable**).
167 | 
168 | Note that different ways to call a function have different performance characteristics:
169 | 
170 | ```bash
171 |     $ ./erlperf '{rand, uniform, []}' 'rand:uniform().' -l 10M
172 |     Code                      ||        QPS       Time   Rel
173 |     rand:uniform().            1   18519 Ki      54 ns  100%
174 |     {rand,uniform,[]}          1   16667 Ki      60 ns   90%
175 | ```
176 | 
177 | This difference may get more pronounced depending on ERTS version and *runner* code:
178 | 
179 | ```erlang
180 |     (erlperf@max-au)7> erlperf:benchmark([
181 |             #{runner => "runner(X) -> is_float(X).", init_runner=>"2."},
182 |             #{runner => {erlang, is_float, [2]}},
183 |             #{runner => fun (X) -> is_float(X) end, init_runner => "2."}],
184 |         #{}, undefined).
185 |     [105824351,66424280,5057372]
186 | ```
187 | 
188 | It is caused by the ERTS: running compiled code (first variant) with OTP 25 is
189 | two times faster than applying a function, and 20 times faster than repeatedly
190 | calling anonymous `fun`. Use the same invocation method to get a relevant result.
191 | 
192 | Absolute benchmarking overhead may be significant for very fast functions taking just a few nanoseconds.
193 | Use timed mode for such occasions.
194 | 
195 | ### Run options
196 | See `erlperf` module documentation and [command line reference](CLI.md) for all available options.
197 | 
198 | ## Benchmarking modes
199 | 
200 | ### Continuous mode
201 | Benchmarking is done by counting number of *runner* iterations done over
202 | a specified period of time (**sample_duration**).
203 | 
204 | Two examples below demonstrate the effect caused by changing *sample_duration*.
205 | First run takes 20 samples (`-s 20`) with 100 ms duration. Second invocation
206 | takes the same 20 sample, but with 200 ms duration (`-d 200`). Note that all metrics,
207 | except a single *iteration* time, doubled.
208 | 
209 | ```bash
210 |     $ ./erlperf 'rand:uniform().' -d 100 -s 20
211 |     Code                ||   Samples       Avg   StdDev    Median      P99  Iteration
212 |     rand:uniform().      1        20   1647 Ki    0.39%   1648 Ki  1660 Ki      60 ns
213 |     $ ./erlperf 'rand:uniform().' -d 200 -s 20
214 |     Code                ||   Samples       Avg   StdDev    Median      P99  Iteration
215 |     rand:uniform().      1        20   3354 Ki    0.16%   3354 Ki  3368 Ki      59 ns
216 | ```
217 | 
218 | ### Timed mode
219 | In this mode *runner* code is executed for *sample_duration* iterations for every *sample*.
220 | Report contains average/median/p99 *time* it takes to produce a single sample. In the
221 | example below, it takes an average of 554 ms to make 10 million calls to `rand:uniform()`.
222 | 
223 | ```bash
224 |     $ ./erlperf 'rand:uniform().' 'rand:uniform(1000).' -l 10M -s 20
225 |     Code                    ||   Samples       Avg   StdDev    Median      P99  Iteration    Rel
226 |     rand:uniform(1000).      1        20    554 ms    0.37%    554 ms   563 ms      55 ns   100%
227 |     rand:uniform().          1        20    560 ms    0.60%    560 ms   564 ms      55 ns    99%
228 | ```
229 | 
230 | Effectively, this example runs following code: `loop(0) -> ok; loop(Count) -> rand:uniform(), loop(Count - 1).`
231 | Timed mode has slightly less overhead compared to continuous mode.
232 | 
233 | Timed mode does not support `--concurrency` setting, using only
234 | one process. However, it does support comparison run with multiple concurrent jobs.
235 | 
236 | ### Concurrency estimation mode
237 | In this mode `erlperf` performs multiple continuous benchmarks with
238 | increasing concurrency. The test concludes when increasing worker
239 | count does not result in increase of the total throughput. Report
240 | contains statistics of the most successful run.
241 | 
242 | This mode can also be used to detect bottlenecks, e.g. lock contention, single
243 | `gen_server` processes, or VM-wide shared resources (`persistent_term`s).
244 | Example (with maximum concurrency limited to 50):
245 | 
246 | ```bash
247 |     $ ./erlperf '{code, is_loaded, [local_udp]}' -w 1 --max 50 -q
248 |     Code                                 ||        QPS       Time
249 |     {code,is_loaded,[local_udp]}          6    1665 Ki    3604 ns
250 | ```
251 | 
252 | Same in the Erlang shell:
253 | 
254 | ```erlang
255 |     > erlperf:run({code, is_loaded, [local_udp]}, #{warmup => 1}, #{max => 50}).
256 |    {1676758,6}
257 | ```
258 | 
259 | In this example, 6 concurrent processes were able to squeeze 1676758 calls per second
260 | for `code:is_loaded(local_udp)`. In current OTP version `code:is_loaded` is implemented
261 | as a `gen_server:call` to a single process (`code_server`), that limits potential
262 | performance.
263 | 
264 | See `erlperf_job` for the detailed description of different benchmarking modes.
265 | 
266 | ## Reports
267 | Historically `erlperf` had only the basic reporting available for command line
268 | usage. Since 2.2 it is possible to request additional information.
269 | 
270 | ### Basic report
271 | This is the default report form when less than 10 samples were collected.
272 | Use `-r basic` to force basic reports with 10 and more samples.
273 | 
274 | Basic report contains following columns:
275 |  * **Code**: Erlang code or label supplied to the benchmark
276 |  * **||**: how many concurrent processes were running. In the timed mode, it is always 1. In the concurrency
277 |    estimation mode, the number that achieved the highest total throughput (QPS)
278 |  * **QPS**: average number of runner code *iterations* (throughput). Measure per single *sample_duration*
279 |       in the continuous mode. In the timed mode, calculated with the assumption that *sample_duration* is 1 second
280 |  * **Time**: single runner iteration time
281 |  * **Rel**: relative performance of this code, compared to others. Printed only when more than
282 |    one runner is specified.
283 | 
284 | ### Extended report
285 | When 10 or more samples were collected, this mode is the default. Use `-r extended`
286 | to force printing this report for smaller sample sets.
287 | 
288 | Note that average, deviation, median and 99th percentile are calculated for the *sample_duration*.
289 | If you requested 20 samples of 100 ms in the continuous mode, these fields will contain *iteration*
290 | count per 100 ms. If you requested 10 million iterations (`-l 10M`), extended report for timed mode
291 | displays average time it takes to do 10M iterations. Single iteration time is printed as *Iteration*.
292 | 
293 | Code, concurrency, and relative performance fields have the same meaning as in basic report. In addition,
294 | following columns are printed:
295 |  * **Samples**: how many samples were collected (useful when requesting continuous test with standard deviation requirement)
296 |  * **Avg**: same as QPS for continuous mode, but in the timed mode, average sample time
297 |  * **StdDev**: standard deviation from average
298 |  * **Median**: median value, in the continuous mode, median estimated throughput, in the timed mode - time to
299 |  complete the requested iterations
300 |  * **Iteration**: single runner iteration time
301 |  * **P99**: 99th percentile
302 | 
303 | ### Full report
304 | This mode must be explicitly specified with `-r full`.
305 | 
306 | Contains everything that extended report has. Includes extra information about the system
307 | used for benchmarking - OS type, CPU and Erlang VM characteristics.
308 | 
309 | ## Benchmarking compiled code
310 | `erlperf` can be used to measure performance of your application running in production, or code that is stored
311 | on disk.
312 | 
313 | Use `-pa` argument to add an extra code path. Example:
314 | ```bash
315 |     $ ./erlperf 'args:parse([], #{}).' -pa _build/test/lib/argparse/ebin
316 |     Code                             ||        QPS       Time
317 |     args:parse([], #{}).              1     955 Ki    1047 ns
318 | ```
319 | 
320 | If you need to add multiple released applications, supply `ERL_LIBS` environment variable instead:
321 | ```bash
322 |     $ ERL_LIBS="_build/test/lib" erlperf 'args:parse([], #{}).'
323 |     Code                             ||        QPS       Time
324 |     args:parse([], #{}).              1     735 Ki    1361 ns
325 | ```
326 | 
327 | ### Usage in production
328 | It is possible to use `erlperf` to benchmark an application running in production.
329 | Add `erlperf` as a dependency, and use remote shell:
330 | 
331 | ```bash
332 |     # connect a remote shell to the production node
333 |     erl -remsh production@max-au
334 |     (production@max-au)3> erlperf:run(timer, sleep, [1]).
335 |     488
336 | ```
337 | 
338 | ### Permanent continuous benchmarking
339 | You can run a job continuously, to examine performance gains or losses while doing
340 | hot code reload. This process is designed to help during development and testing stages,
341 | allowing to quickly notice performance regressions.
342 | 
343 | Example source code:
344 | ```erlang
345 |     -module(mymod).
346 |     -export([do/1]).
347 |     do(Arg) -> timer:sleep(Arg).
348 | ```
349 | 
350 | Example below assumes you have `erlperf` application started (e.g. in a `rebar3 shell`)
351 | 
352 | ```erlang
353 |     % start a logger that prints VM monitoring information
354 |     > {ok, Logger} = erlperf_file_log:start_link().
355 |     {ok,<0.235.0>}
356 | 
357 |     % start a job that will continuously benchmark mymod:do(),
358 |     %  with initial concurrency 2.
359 |     > JobPid = erlperf:start(#{init_runner => "rand:uniform(10).",
360 |         runner => "runner(Arg) -> mymod:do(Arg)."}, 2).
361 |     {ok,<0.291.0>}
362 | 
363 |     % increase concurrency to 4
364 |     > erlperf_job:set_concurrency(JobPid, 4).
365 |     ok.
366 | 
367 |     % watch your job performance
368 | 
369 |     % modify your application code,
370 |     % set do(Arg) -> timer:sleep(2*Arg), do hot code reload
371 |     > c(mymod).
372 |     {module, mymod}.
373 | 
374 |     % see that after hot code reload throughput halved!
375 | ```
376 | 
377 | ## Timer precision
378 | ERTS cannot guarantee precise timing when there is severe lock contention happening,
379 | and scheduler utilisation is 100%. This often happens with ETS:
380 | 
381 | ```bash
382 |     $ ./erlperf -c 50 'ets:insert(ac_tab, {1, 2}).' -d 100 -s 50
383 |     Timer accuracy problem detected, results may be inaccurate
384 | 
385 |     Code                            ||   Samples       Avg   StdDev    Median      P99  Iteration
386 |     ets:insert(ac_tab, {1, 2}).     50        50      6079   82.27%      5497    40313     823 us
387 | ```
388 | 
389 | Running 50 concurrent processes trying to overwrite the very same key of an ETS
390 | table leads to lock contention on a shared resource (ETS table/bucket lock). `erlperf`
391 | may detect this issue and switch to a busy wait loop for precise timing. This may
392 | result in lowered throughput and other metrics skew. `erlperf` does not attempt to
393 | pinpoint the source of contention, it is up to user to figure that out. It's recommended
394 | to use lock-counting emulator, or Linux `perf` utility to troubleshoot VM-level issues.
395 | 
396 | 
397 | ## Experimental features
398 | These features are not fully supported. APIs may change in the future `erlperf`
399 | releases.
400 | 
401 | ### Benchmarking in a cluster
402 | It's possible to run a job on a separate node in the cluster. See
403 | `erlperf_cluster_monitor` for additional details.
404 | 
405 | ```erlang
406 |     % watch the entire cluster (printed to console)
407 |     (node1@host)> {ok, _} = erlperf_history:start_link().
408 |     {ok,<0.213.0>}
409 |     (node1@host)> {ok, ClusterLogger} = erlperf_cluster_monitor:start_link(group_leader(), 1000, [node, sched_util, jobs]).
410 |     {ok, <0.216.0>}
411 | 
412 |     % also log cluster-wide reports to file (jobs & sched_util)
413 |     (node1@host)> {ok, FileLogger} = erlperf_cluster_monitor:start_link("/tmp/cluster", 1000, [time, node, sched_util, jobs]).
414 |     {ok, <0.223.0>}
415 | 
416 |     % run the benchmarking process in a different node of your cluster
417 |     (node1@host)> rpc:call('node2@host', erlperf, run, [#{runner => {rand, uniform, []}}]).
418 | ```
419 | 


--------------------------------------------------------------------------------
/rebar.config:
--------------------------------------------------------------------------------
 1 | {erl_opts, [debug_info]}.
 2 | {deps, [argparse]}.
 3 | 
 4 | {shell, [
 5 |     {apps, [erlperf]}
 6 | ]}.
 7 | 
 8 | {dist_node, [
 9 |     {sname, erlperf}
10 | ]}.
11 | 
12 | {escript_incl_apps, [argparse]}.
13 | {escript_emu_args, "%%! +pc unicode -escript main erlperf_cli\n"}.
14 | 
15 | {cover_enabled, true}.
16 | {cover_opts, [verbose]}.
17 | 
18 | {ct_opts, [
19 |     %% {ct_hooks, [cth_surefire]},
20 |     {keep_logs, 1}
21 | ]}.
22 | 
23 | {post_hooks, [{"(linux|darwin|solaris|freebsd|netbsd|openbsd)",
24 |     escriptize,
25 |     "cp \"$REBAR_BUILD_DIR/bin/erlperf\" ./erlperf"},
26 |     {"win32",
27 |         escriptize,
28 |         "robocopy \"%REBAR_BUILD_DIR%/bin/\" ./ erlperf* "
29 |         "/njs /njh /nfl /ndl & exit /b 0"} % silence things
30 | ]}.
31 | 
32 | {hex, [
33 |     {doc, #{provider => ex_doc}}
34 | ]}.
35 | 
36 | {project_plugins, [rebar3_ex_doc]}.
37 | 
38 | {ex_doc, [
39 |     {extras, [
40 |         {"README.md", #{title => "Overview"}},
41 |         {"CLI.md", #{title => "Command Line"}},
42 |         {"CHANGELOG.md", #{title => "Changelog"}},
43 |         {"LICENSE.md", #{title => "License"}}
44 |     ]},
45 |     {main, "README.md"},
46 |     {source_url, "https://github.com/max-au/erlperf"},
47 |     {source_ref, <<"master">>}
48 | ]}.
49 | 


--------------------------------------------------------------------------------
/rebar.lock:
--------------------------------------------------------------------------------
1 | {"1.2.0",
2 | [{<<"argparse">>,{pkg,<<"argparse">>,<<"2.0.0">>},0}]}.
3 | [
4 | {pkg_hash,[
5 |  {<<"argparse">>, <<"3EDF299FB5BC089E6AF2F1A7C6532104B4CD6136E0147C6CF9622E6E4A741434">>}]},
6 | {pkg_hash_ext,[
7 |  {<<"argparse">>, <<"525979122BEA3641A1DD3ABC53F2ADD19F7F427D507018F8C7CAF0693A6E78C8">>}]}
8 | ].
9 | 


--------------------------------------------------------------------------------
/src/erlperf.app.src:
--------------------------------------------------------------------------------
 1 | {application, erlperf,
 2 |  [{description, "Erlang Performance & Benchmarking Suite"},
 3 |   {vsn, "2.3.0"},
 4 |   {registered, [
 5 |     erlperf_sup, erlperf_job_sup, erlperf_monitor,
 6 |    erlperf_history, erlperf_file_log, erlperf_cluster_monitor
 7 |   ]},
 8 |   {mod, {erlperf_app, []}},
 9 |   {applications,
10 |    [kernel,
11 |     stdlib,
12 |     compiler,
13 |     argparse
14 |    ]},
15 |   {env,[]},
16 |   {modules, []},
17 | 
18 |   {licenses, ["BSD-3-Clause-Clear"]},
19 |   {links, [{"Github", "https://github.com/max-au/erlperf"}]},
20 |   {include_paths, ["CLI.md", "DETAILS.md"]}
21 |  ]}.
22 | 


--------------------------------------------------------------------------------
/src/erlperf.erl:
--------------------------------------------------------------------------------
  1 | %%% @copyright (C) 2019-2023, Maxim Fedorov
  2 | %%% @doc
  3 | %%% Convenience APIs for benchmarking.
  4 | %%%
  5 | %%% This module implements following benchmarking modes:
  6 | %%% <ul>
  7 | %%%   <li>Continuous mode</li>
  8 | %%%   <li>Timed (low overhead) mode</li>
  9 | %%%   <li>Concurrency estimation (squeeze) mode</li>
 10 | %%% </ul>
 11 | %%%
 12 | %%% <h2>Continuous mode</h2>
 13 | %%% This is the default mode. Separate {@link erlperf_job} is started for
 14 | %%% each benchmark, iterating supplied runner in a tight loop,
 15 | %%% bumping a counter for each iteration of each worker. `erlperf' reads
 16 | %%% this counter every second (or `sample_duration'), calculating the
 17 | %%% difference between current and previous value. This difference is
 18 | %%% called a <strong>sample</strong>.
 19 | %%%
 20 | %%% By default, `erlperf' collects 3 samples and stops, reporting the average.
 21 | %%% To give an example, if your function runs for 20 milliseconds, `erlperf'
 22 | %%% may capture samples with 48, 52 and 50 iterations. The average would be 50.
 23 | %%%
 24 | %%% This approach works well for CPU-bound calculations, but may produce
 25 | %%% unexpected results for slow functions taking longer than sample duration.
 26 | %%% For example, timer:sleep(2000) with default settings yields zero throughput.
 27 | %%% You can change the sample duration and the number of samples to take to
 28 | %%% avoid that.
 29 | %%%
 30 | %%% <h2>Timed mode</h2>
 31 | %%% In this mode `erlperf' loops your code a specified amount of times, measuring
 32 | %%% how long it took to complete. It is essentially what {@link timer:tc/3} does. This mode
 33 | %%% has slightly less overhead compared to continuous mode. This difference may be
 34 | %%% significant if you’re profiling low-level ERTS primitives.
 35 | %%%
 36 | %%% This mode does not support `concurrency' setting (concurrency locked to 1).
 37 | %%%
 38 | %%% <h2>Concurrency estimation mode</h2>
 39 | %%% In this mode `erlperf' attempts to estimate how concurrent the supplied
 40 | %%% runner code is. The run consists of multiple passes, increasing concurrency
 41 | %%% with each pass, and stopping when total throughput is no longer growing.
 42 | %%% This mode proves useful to find concurrency bottlenecks. For example, some
 43 | %%% functions may have limited throughput because they execute remote calls
 44 | %%% served by a single process. See {@link benchmark/3} for the detailed
 45 | %%% description.
 46 | %%%
 47 | %%%
 48 | %%% @end
 49 | -module(erlperf).
 50 | -author("maximfca@gmail.com").
 51 | 
 52 | %% Public API for single-run simple benchmarking
 53 | %% Programmatic access.
 54 | -export([
 55 |     benchmark/3,
 56 |     compare/2,
 57 |     record/4,
 58 |     run/1,
 59 |     run/2,
 60 |     run/3,
 61 |     start/2,
 62 |     time/2
 63 | ]).
 64 | 
 65 | %% Exported for testing purposes only.
 66 | -export([report_stats/1]).
 67 | 
 68 | %% compare/2 accepts code map, or just the runner code
 69 | -type code() :: erlperf_job:code_map() | erlperf_job:callable().
 70 | %% Convenience type used in `run/1,2,3' and `compare/2'.
 71 | 
 72 | %% node isolation options:
 73 | -type isolation() :: #{
 74 |     host => string()
 75 | }.
 76 | %% Node isolation settings.
 77 | %%
 78 | %% Currently, `host' selection is not supported.
 79 | 
 80 | -type run_options() :: #{
 81 |     concurrency => pos_integer(),
 82 |     sample_duration => pos_integer() | undefined | {timed, pos_integer()},
 83 |     warmup => non_neg_integer(),
 84 |     samples => pos_integer(),
 85 |     cv => float() | undefined,
 86 |     priority => erlang:priority_level(),
 87 |     report => basic | extended | full,
 88 |     isolation => isolation()
 89 | }.
 90 | %% Benchmarking mode selection and parameters of the benchmark run.
 91 | %%
 92 | %% <ul>
 93 | %%   <li>`concurrency': number of workers to run, applies only for the continuous
 94 | %%       benchmarking mode</li>
 95 | %%   <li>`cv': coefficient of variation. Acceptable standard deviation for the test
 96 | %%       to conclude. Not applicable for timed mode. When the value is set, benchmark
 97 | %%       will continue running until standard deviation for the last collected `samples'
 98 | %%       divided by average value (arithmetic mean) is smaller than `cv' specified.</li>
 99 | %%   <li>`isolation': request separate Erlang VM instance
100 | %%       for each job. Some benchmarks may lead to internal VM structures corruption,
101 | %%       or change global structures affecting other benchmarks when running in the
102 | %%       same VM. `host' sub-option is currently ignored.</li>
103 | %%   <li>`priority': sets the job controller process priority (defaults to `high').
104 | %%       Running with `normal' or lower priority may prevent the controller from timely
105 | %%       starting ot stopping workers.</li>
106 | %%   <li>`report': applies only for continuous mode. `basic' report contains only
107 | %%       the average value. Specify `extended' to get the list of actual samples,
108 | %%       to calculate exotic statistics. Pass `full' to receive full report, including
109 | %%       benchmark settings and extra statistics for continuous mode - minimum, maximum,
110 | %%       average, median and 99th percentile (more metrics may be added in future
111 | %%       releases)</li>
112 | %%   <li>`samples': number of measurements to take. Default is 3. For continuous mode
113 | %%       it results in a 3 second run, when `sample_duration' is set to default 1000 ms.
114 | %%       </li>
115 | %%   <li>`sample_duration': time, milliseconds, between taking iteration counter
116 | %%       samples. Multiplied by `samples', this parameter defines the total benchmark
117 | %%       run duration. Default is 1000 ms. Passing `{timed, Counter}` engages timed
118 | %%       mode with `Counter' iterations taken `samples' times</li>
119 | %%   <li>`warmup': how many extra samples are collected and discarded at the beginning
120 | %%       of the continuous run</li>
121 | %% </ul>
122 | 
123 | %% Concurrency test options
124 | -type concurrency_test() :: #{
125 |     threshold => pos_integer(),
126 |     min => pos_integer(),
127 |     step => pos_integer(),
128 |     max => pos_integer()
129 | }.
130 | %% Concurrency estimation mode options.
131 | %%
132 | %% <ul>
133 | %%   <li>`min': initial number of workers, default is 1</li>
134 | %%   <li>`step': increase the number of workers by this value on each iteration, default is 1</li>
135 | %%   <li>`max': maximum number of workers, defaults to `erlang:system_info(process_limit) - 1000'</li>
136 | %%   <li>`threshold': stop concurrency run when adding this amount of workers does
137 | %%     not result in further total throughput increase. Default is 3</li>
138 | %% </ul>
139 | 
140 | %% Single run result: one or multiple samples (depending on report verbosity)
141 | -type run_result() :: non_neg_integer() | [non_neg_integer()].
142 | %% Benchmark results.
143 | %%
144 | %% For continuous mode, an average (arithmetic mean) of the collected samples,
145 | %% or a list of all samples collected.
146 | %% Timed mode returns elapsed time (microseconds).
147 | 
148 | 
149 | %% Concurrency test result (non-verbose)
150 | -type concurrency_result() :: {QPS :: non_neg_integer(), Concurrency :: non_neg_integer()}.
151 | %% Basic concurrency estimation report
152 | %%
153 | %% Only the highest throughput run is reported. `Concurrency' contains the number of
154 | %% concurrently running workers when the best result is achieved.
155 | 
156 | %% Extended report returns all samples collected.
157 | -type concurrency_test_result() :: concurrency_result() | {Max :: concurrency_result(), [concurrency_result()]}.
158 | %% Concurrency estimation mode result
159 | %%
160 | %% Extended report contains results for all runs, starting from the minimum number
161 | %% of workers, to the highest throughput detected, plus up to `threshold' more.
162 | 
163 | -type system_information() :: #{
164 |     os := {unix | win32, atom()},
165 |     system_version := string(),
166 |     debug => boolean(),     %% true if the emulator has been debug-compiled, otherwise false
167 |     emu_type => atom(),     %% see system_info(emu_type), since OTP 24
168 |     emu_flavor => atom(),   %% see system_info(emu_flavor), since OTP 24
169 |     dynamic_trace => atom(),%% see system_info(dynamic_trace), since OTP 24
170 |     cpu => string()
171 | }.
172 | %% System information, as returned by {@link erlang:system_info/1}
173 | %% May also contain CPU model name on supported operating systems.
174 | 
175 | -type run_statistics() :: #{
176 |     average => non_neg_integer(),
177 |     variance => float(),
178 |     stddev => float(),
179 |     median => non_neg_integer(),
180 |     p99 => non_neg_integer(),
181 |     best => non_neg_integer(),
182 |     worst => non_neg_integer(),
183 |     samples => [non_neg_integer()],
184 |     time => non_neg_integer(),
185 |     iteration_time => non_neg_integer()
186 | }.
187 | %% Results reported by a single benchmark run.
188 | %%
189 | %% <ul>
190 | %%   <li>`best': highest throughput for continuous mode, or lowest time for timed</li>
191 | %%   <li>`worst': lowest throughput, or highest time</li>
192 | %%   <li>`average': arithmetic mean, iterations for continuous
193 | %%        mode and microseconds for timed</li>
194 | %%   <li>`stddev': standard deviation</li>
195 | %%   <li>`median': median (50% percentile)</li>
196 | %%   <li>`p99': 99th percentile</li>
197 | %%   <li>`samples': raw samples from the run, monotonic counter for
198 | %%        continuous mode, and times measured for timed run</li>
199 | %%   <li>`time': total benchmark duration (us), may exceed `sample' * `sample_duration'
200 | %%       when `cv' is specified and results are not immediately stable</li>
201 | %%   <li>`iteration_time': approximate single iteration time (of one runner)</li>
202 | %% </ul>
203 | 
204 | -type report() :: #{
205 |     mode := timed | continuous | concurrency,
206 |     result := run_statistics(),
207 |     history => [{Concurrency :: pos_integer(), Result :: run_statistics()}],
208 |     code := erlperf_job:code_map(),
209 |     run_options := run_options(),
210 |     concurrency_options => concurrency_test(),
211 |     system => system_information(),
212 |     sleep => sleep | busy_wait
213 | }.
214 | %% Full benchmark report, containing all collected samples and statistics
215 | %%
216 | %% <ul>
217 | %%   <li>`mode': benchmark run mode</li>
218 | %%   <li>`result': benchmark result. Concurrency estimation mode contains
219 | %%       the best result (with the highest average throughput recorded)</li>
220 | %%   <li>`code': original code</li>
221 | %%   <li>`run_options': full set of options, with all defaults filled in</li>
222 | %%   <li>`system': information about the system benchmark is running on</li>
223 | %%   <li>`history': returned only for concurrency estimation mode,
224 | %%       contains a list of all runs with their results</li>
225 | %%   <li>`concurrency_options': returned only for concurrency estimation
226 | %%       mode, with all defaults filled in</li>
227 | %%   <li>`sleep': method used for waiting for a specified amount of time.
228 | %%       Normally set to `sleep', but may be reported as `busy_wait' if
229 | %%       `erlperf' scheduling is impacted by lock contention or another
230 | %%       problem preventing it from using precise timing</li>
231 | %% </ul>
232 | 
233 | -export_type([code/0, isolation/0, run_options/0, concurrency_test/0, report/0,
234 |     system_information/0, run_statistics/0]).
235 | 
236 | %% Milliseconds, timeout for any remote node operation
237 | -define(REMOTE_NODE_TIMEOUT, 10000).
238 | 
239 | 
240 | %% @doc
241 | %% Generic benchmarking suite, accepting multiple code maps, modes and options.
242 | %%
243 | %% `Codes' contain a list of code versions. Every element is a separate job that runs
244 | %% in parallel with all other jobs. Same `RunOptions' are applied to all jobs.
245 | %%
246 | %% `ConcurrencyTestOpts' specifies options for concurrency estimation mode. Passing
247 | %% `undefined' results in a continuous or a timed run. It is not supported to
248 | %% run multiple jobs while doing a concurrency estimation run.
249 | %%
250 | %% Concurrency estimation run consists of multiple passes. First pass is done with
251 | %% a `min' number of workers, subsequent passes are increasing concurrency by 1, until
252 | %% `max' concurrency is reached, or total job iterations stop growing for `threshold'
253 | %% consecutive passes. To give an example, if your code is not concurrent at all,
254 | %% and you try to benchmark it with `threshold' set to 3, there will be 4 passes in
255 | %% total: first with a single worker, then 3 more, demonstrating no throughput growth.
256 | %%
257 | %% In this mode, job is started once before the first pass. Subsequent passes only
258 | %% change the concurrency. All other options passed in `RunOptions' are honoured. So,
259 | %% if you set `samples' to 30, keeping default duration of a second, every single
260 | %% pass will last for 30 seconds.
261 | %% @end
262 | -spec benchmark([erlperf_job:code_map()], RunOptions :: run_options(), undefined) -> run_result() | [run_result()] | [report()];
263 |                ([erlperf_job:code_map()], RunOptions :: run_options(), concurrency_test()) -> concurrency_test_result() | [report()].
264 | benchmark(Codes, #{isolation := Isolation} = RunOptions, ConcurrencyTestOpts) ->
265 |     erlang:is_alive() orelse erlang:error(not_alive),
266 |     %% isolation requested: need to rely on cluster_monitor and other distributed things.
267 |     {Peers, Nodes} = prepare_nodes(length(Codes)),
268 |     Opts = maps:remove(isolation, RunOptions),
269 |     try
270 |         %% no timeout here (except that rpc itself could time out)
271 |         Promises =
272 |             [erpc:send_request(Node, erlperf, run, [Code, Opts, ConcurrencyTestOpts])
273 |                 || {Node, Code} <- lists:zip(Nodes, Codes)],
274 |         %% now wait for everyone to respond
275 |         Reports = [erpc:receive_response(Promise) || Promise <- Promises],
276 |         %% if full reports were requested, restore isolation flag
277 |         case maps:get(report, RunOptions, basic) of
278 |             full ->
279 |                 [maps:update_with(run_options, fun(RO) -> RO#{isolation => Isolation} end, Report)
280 |                     || Report <- Reports];
281 |             _ ->
282 |                 Reports
283 |         end
284 |     catch
285 |         error:{exception, Reason, Stack} ->
286 |             erlang:raise(error, Reason, Stack)
287 |     after
288 |         stop_nodes(Peers, Nodes)
289 |     end;
290 | 
291 | %% foolproofing
292 | benchmark([_, _ | _], _RunOptions, #{}) ->
293 |     erlang:error(not_supported);
294 | 
295 | %% No isolation requested.
296 | %% This is the primary entry point for all benchmark jobs.
297 | benchmark(Codes, RunOptions0, ConOpts0) ->
298 |     %% fill in all missing defaults
299 |     ConOpts = concurrency_mode_defaults(ConOpts0),
300 |     #{report := ReportType, priority := SetPrio} = RunOptions = run_options_defaults(RunOptions0),
301 |     %% elevate priority to reduce timer skew
302 |     PrevPriority = process_flag(priority, SetPrio),
303 |     Jobs = start_jobs(Codes, []),
304 |     {JobPids, Handles, _} = lists:unzip3(Jobs),
305 |     Reports =
306 |         try
307 |             benchmark_impl(JobPids, RunOptions, ConOpts, Handles)
308 |         after
309 |             stop_jobs(Jobs),
310 |             process_flag(priority, PrevPriority)
311 |         end,
312 |     %% generate statistical information from the samples returned
313 |     report(ReportType, Codes, Reports).
314 | 
315 | %% @doc
316 | %% Comparison run: benchmark multiple jobs at the same time.
317 | %%
318 | %% A job is defined by either {@link erlperf_job:code_map()},
319 | %% or just the runner {@link erlperf_job:callable(). callable}.
320 | %% Example comparing {@link rand:uniform/0} %% performance
321 | %% to {@link rand:mwc59/1}:
322 | %% ```
323 | %% (erlperf@ubuntu22)7> erlperf:compare([
324 | %%     {rand, uniform, []},
325 | %%     #{runner => "run(X) -> rand:mwc59(X).", init_runner => {rand, mwc59_seed, []}}
326 | %% ], #{}).
327 | %% [14823854,134121999]
328 | %% '''
329 | %%
330 | %% See {@link benchmark/3} for `RunOptions' definition and return values.
331 | -spec compare(Codes :: [code()], RunOptions :: run_options()) -> [run_result()] | [report()].
332 | compare(Codes, RunOptions) ->
333 |     benchmark([code(Code) || Code <- Codes], RunOptions, undefined).
334 | 
335 | %% @doc
336 | %% Runs a single benchmark for 3 seconds, returns average number of iterations per second.
337 | %%
338 | %% Accepts either a full {@link erlperf_job:code_map()}, or just the runner
339 | %% {@link erlperf_job:callable(). callable}.
340 | -spec run(code()) -> non_neg_integer().
341 | run(Code) ->
342 |     [Report] = benchmark([code(Code)], #{}, undefined),
343 |     Report.
344 | 
345 | %% @doc
346 | %% Runs a single benchmark job, returns average number of iterations per second,
347 | %% or a full report.
348 | %%
349 | %% Accepts either a full {@link erlperf_job:code_map()}, or just the runner
350 | %% {@link erlperf_job:callable(). callable}.
351 | %% Equivalent of returning the first result of `run([Code], RunOptions)'.
352 | -spec run(Code :: code(), RunOptions :: run_options()) -> run_result() | report().
353 | run(Code, RunOptions) ->
354 |     [Report] = benchmark([code(Code)], RunOptions, undefined),
355 |     Report.
356 | 
357 | %% @doc
358 | %% Concurrency estimation run, or an alias for quick benchmarking of an MFA tuple.
359 | %%
360 | %% Attempt to find concurrency characteristics of the runner code,
361 | %% see {@link benchmark/3} for a detailed description. Accepts either a full
362 | %% {@link erlperf_job:code_map()}, or just the runner
363 | %% {@link erlperf_job:callable(). callable}.
364 | %%
365 | %% When `Module' and `Function' are atoms, and `Args' is a list, this call is
366 | %% equivalent of `run(Module, Function, Args)'.
367 | -spec run(code(), run_options(), concurrency_test()) -> concurrency_test_result() | report();
368 |          (module(), atom(), [term()]) -> QPS :: non_neg_integer().
369 | run(Module, Function, Args) when is_atom(Module), is_atom(Function), is_list(Args) ->
370 |     %% this typo is so common that I decided to have this as an unofficial API
371 |     run({Module, Function, Args});
372 | run(Code, RunOptions, ConTestOpts) ->
373 |     [Report] = benchmark([code(Code)], RunOptions, ConTestOpts),
374 |     Report.
375 | 
376 | %% @doc
377 | %% Starts a new supervised job with the specified concurrency.
378 | %%
379 | %% Requires `erlperf' application to be running. Returns job
380 | %% controller process identifier.
381 | %% This function is designed for distributed benchmarking, when
382 | %% jobs are started in different nodes, and monitored via
383 | %% {@link erlperf_cluster_monitor}.
384 | -spec start(code(), Concurrency :: non_neg_integer()) -> pid().
385 | start(Code, Concurrency) ->
386 |     {ok, Job} = supervisor:start_child(erlperf_job_sup, [code(Code)]),
387 |     ok = erlperf_job:set_concurrency(Job, Concurrency),
388 |     Job.
389 | 
390 | %% @doc
391 | %% Timed benchmarking mode. Iterates the runner code `Count' times and returns
392 | %% elapsed time in microseconds.
393 | %%
394 | %% This method has lower overhead compared to continuous benchmarking. It is
395 | %% not supported to run multiple workers in this mode.
396 | -spec time(code(), Count :: non_neg_integer()) -> TimeUs :: non_neg_integer().
397 | time(Code, Count) ->
398 |     [Report] = benchmark([code(Code)], #{samples => Count, sample_duration => undefined}, undefined),
399 |     Report.
400 | 
401 | %% @private
402 | %% @doc
403 | %% Records call trace, so it could be used to benchmark later.
404 | %% Experimental, do not use.
405 | -spec record(module(), atom(), non_neg_integer(), pos_integer()) ->
406 |     [[{module(), atom(), [term()]}]].
407 | record(Module, Function, Arity, TimeMs) ->
408 |     TracerPid = spawn_link(fun rec_tracer/0),
409 |     TraceSpec = [{'_', [], []}],
410 |     MFA = {Module, Function, Arity},
411 |     erlang:trace_pattern(MFA, TraceSpec, [global]),
412 |     erlang:trace(all, true, [call, {tracer, TracerPid}]),
413 |     receive after TimeMs -> ok end,
414 |     erlang:trace(all, false, [call]),
415 |     erlang:trace_pattern(MFA, false, [global]),
416 |     TracerPid ! {stop, self()},
417 |     receive
418 |         {data, Samples} ->
419 |             Samples
420 |     end.
421 | 
422 | %% ===================================================================
423 | %% Implementation details
424 | concurrency_mode_defaults(undefined) ->
425 |     undefined;
426 | concurrency_mode_defaults(ConOpts) ->
427 |     maps:merge(#{min => 1, step => 1, max => erlang:system_info(process_limit) - 1000, threshold => 3}, ConOpts).
428 | 
429 | run_options_defaults(RunOptions) ->
430 |     maps:merge(#{
431 |         concurrency => 1,
432 |         sample_duration => 1000,
433 |         warmup => 0,
434 |         samples => 3,
435 |         cv => undefined,
436 |         priority => high,
437 |         report => basic},
438 |         RunOptions).
439 | 
440 | %%===================================================================
441 | %% Codification: translate from {M, F, A} to #{runner => ...} map
442 | code(#{runner := _Runner} = Code) ->
443 |     Code;
444 | code({M, F, A}) when is_atom(M), is_atom(F), is_list(A) ->
445 |     #{runner => {M, F, A}};
446 | code(Fun) when is_function(Fun) ->
447 |     #{runner => Fun};
448 | code(Text) when is_list(Text) ->
449 |     #{runner => Text}.
450 | 
451 | %%===================================================================
452 | %% Benchmarking itself
453 | 
454 | %% OTP 25 support
455 | -dialyzer({no_missing_calls, start_node/1}).
456 | -compile({nowarn_deprecated_function, [{slave, start_link, 3}, {slave, stop, 1}]}).
457 | -compile({nowarn_removed, [{slave, start_link, 3}, {slave, stop, 1}]}).
458 | 
459 | start_node({module, peer}) ->
460 |     {ok, _Peer, _Node} = peer:start_link(#{name => peer:random_name()});
461 | start_node({error, nofile}) ->
462 |     OsPid = os:getpid(),
463 |     [_, HostString] = string:split(atom_to_list(node()), "@"),
464 |     Host = list_to_atom(HostString),
465 |     Args = "-setcookie " ++ atom_to_list(erlang:get_cookie()),
466 |     Uniq = erlang:unique_integer([positive]),
467 |     NodeId = list_to_atom(lists:concat(["job-", Uniq, "-", OsPid])),
468 |     {ok, Node} = slave:start_link(Host, NodeId, Args),
469 |     {ok, undefined, Node}.
470 | 
471 | prepare_nodes(HowMany) ->
472 |     %% start 'erlperf' parts on all peers
473 |     %% Cannot do this via "code:add_path" because actual *.beam files are
474 |     %%  parts of the binary escript.
475 |     _ = application:load(erlperf),
476 |     {ok, ModNames} = application:get_key(erlperf, modules),
477 |     Modules = [{Mod, _Bin, _Path} = code:get_object_code(Mod) || Mod <- ModNames],
478 |     PeerPresent = code:ensure_loaded(peer),
479 |     %% start multiple nodes
480 |     lists:unzip([begin
481 |          {ok, Peer, Node} =  start_node(PeerPresent),
482 |          [{module, Mod} = erpc:call(Node, code, load_binary, [Mod, Path, Bin], ?REMOTE_NODE_TIMEOUT)
483 |              || {Mod, Bin, Path} <- Modules],
484 |          {ok, _PgPid} = erpc:call(Node, pg, start, [erlperf]),
485 |          {ok, _MonPid} = erpc:call(Node, erlperf_monitor, start, []),
486 |          {Peer, Node}
487 |      end || _ <- lists:seq(1, HowMany)]).
488 | 
489 | stop_nodes([undefined | _], Nodes) ->
490 |     [slave:stop(Node) || Node <- Nodes];
491 | stop_nodes(Peers, _Nodes) ->
492 |     [peer:stop(Peer) || Peer <- Peers].
493 | 
494 | start_jobs([], Jobs) ->
495 |     lists:reverse(Jobs);
496 | start_jobs([Code | Codes], Jobs) ->
497 |     try
498 |         {ok, Pid} = erlperf_job:start(Code),
499 |         Handle = erlperf_job:handle(Pid),
500 |         MonRef = monitor(process, Pid),
501 |         start_jobs(Codes, [{Pid, Handle, MonRef} | Jobs])
502 |     catch Class:Reason:Stack ->
503 |         %% stop jobs that were started
504 |         stop_jobs(Jobs),
505 |         erlang:raise(Class, Reason, Stack)
506 |     end.
507 | 
508 | stop_jobs(Jobs) ->
509 |     %% do not use gen:stop/1,2 or sys:terminate/2,3 here, as they spawn process running
510 |     %%  with normal priority, and they don't get scheduled fast enough when there is severe
511 |     %%  lock contention
512 |     WaitFor = [begin erlperf_job:request_stop(Pid), {Pid, Mon} end || {Pid, _, Mon} <- Jobs, is_process_alive(Pid)],
513 |     %% now wait for all monitors to fire
514 |     [receive {'DOWN', Mon, process, Pid, _R} -> ok end || {Pid, Mon} <- WaitFor].
515 | 
516 | %% Benchmark implementation. Always returns a full report (post-processing will dumb it down if needed).
517 | 
518 | %% Timed mode,backwards compatibility conversion
519 | benchmark_impl(Jobs, #{sample_duration := undefined, samples := Samples} = RunOptions, undefined, Handles) ->
520 |     benchmark_impl(Jobs, RunOptions#{sample_duration => {timed, Samples}, samples => 1}, undefined, Handles);
521 | 
522 | %% timed mode
523 | benchmark_impl(Jobs, #{sample_duration := {timed, Duration}, samples := Samples, warmup := Warmup} = RunOptions, undefined, _Handles) ->
524 |     Proxies = [
525 |         spawn_monitor(
526 |             fun () ->
527 |                 _Discarded = [erlperf_job:measure(Job, Duration) || _ <- lists:seq(1, Warmup)],
528 |                 Times = [erlperf_job:measure(Job, Duration) || _ <- lists:seq(1, Samples)],
529 |                 exit({success, Times})
530 |             end)
531 |         || Job <- Jobs],
532 |     [case Res of
533 |          {success, TimesUs} ->
534 |              #{mode => timed, result => #{samples => TimesUs}, run_options => RunOptions};
535 |          Error ->
536 |              erlang:error(Error)
537 |      end || Res <- multicall_result(Proxies, [])];
538 | 
539 | %% Continuous mode
540 | %% QPS considered stable when:
541 | %%  * 'warmup' done
542 | %%  * 'samples' received
543 | %%  * (optional) for the last 'samples' standard deviation must not exceed 'cv'
544 | benchmark_impl(Jobs, #{sample_duration := Interval, cv := CV, samples := SampleCount,
545 |     warmup := Warmup, concurrency := Concurrency} = RunOptions, undefined, Handles) ->
546 |     %% TODO: turn the next sequential call into a multi-call, to make warmup time fair
547 |     [ok = erlperf_job:set_concurrency(Job, Concurrency) || Job <- Jobs],
548 |     %% warmup: intended to figure out sleep method (whether to apply busy_wait immediately)
549 |     NowTime = os:system_time(millisecond),
550 |     SleepMethod = warmup(Warmup, NowTime, NowTime + Interval, Interval, sleep),
551 |     %% remember initial counters in Before
552 |     Before = [[erlperf_job:sample(Handle)] || Handle <- Handles],
553 |     StartedAt = os:system_time(millisecond),
554 |     {Samples, TimerSkew, FinishedAt} = measure_impl(Before, Handles, StartedAt, StartedAt + Interval, Interval,
555 |         SleepMethod, SampleCount, CV),
556 |     Time = FinishedAt - StartedAt,
557 |     [#{mode => continuous, result => #{samples => lists:reverse(S), time => Time * 1000},
558 |         run_options => RunOptions, sleep => TimerSkew}
559 |         || S <- Samples];
560 | 
561 | %% squeeze test - concurrency benchmark
562 | benchmark_impl(Jobs, RunOptions, #{min := Min} = ConOpts, Handles) ->
563 |     [estimate_concurrency(Jobs, RunOptions, ConOpts, Handles, Min, [], {0, 0})].
564 | 
565 | %% warmup procedure: figure out if sleep/4 can work without falling back to busy wait
566 | warmup(0, _LastSampleTime, _NextSampleTime, _Interval, Method) ->
567 |     Method;
568 | warmup(Count, LastSampleTime, NextSampleTime, Interval, Method) ->
569 |     SleepFor = NextSampleTime - LastSampleTime,
570 |     NextMethod = sleep(Method, SleepFor, NextSampleTime),
571 |     NowTime = os:system_time(millisecond),
572 |     warmup(Count - 1, NowTime, NextSampleTime + Interval, Interval, NextMethod).
573 | 
574 | %% collected all samples, CV is not defined
575 | measure_impl(Before, _Handles, LastSampleTime, _NextSampleTime, _Interval, SleepMethod, 0, undefined) ->
576 |     {Before, SleepMethod, LastSampleTime};
577 | 
578 | %% collected all samples, but CV is defined - check whether to collect more samples
579 | measure_impl(Before, Handles, LastSampleTime, NextSampleTime, Interval, SleepMethod, 0, CV) ->
580 |     %% Complication: some jobs may need a long time to stabilise compared to others.
581 |     %% Decision: wait for all jobs to stabilise. Stopping completed jobs skews the measurements.
582 |     case
583 |         lists:any(
584 |             fun (Samples) ->
585 |                 Normal = difference(Samples),
586 |                 Len = length(Normal),
587 |                 Mean = lists:sum(Normal) / Len,
588 |                 StdDev = math:sqrt(lists:sum([(S - Mean) * (S - Mean) || S <- Normal]) / (Len - 1)),
589 |                 StdDev / Mean > CV
590 |             end, Before)
591 |     of
592 |         false ->
593 |             {Before, SleepMethod, LastSampleTime};
594 |         true ->
595 |             %% imitate queue - drop last sample, push another in the head
596 |             %% TODO: change the behaviour to return all samples in the full report
597 |             TailLess = [lists:droplast(L) || L <- Before],
598 |             measure_impl(TailLess, Handles, LastSampleTime, NextSampleTime + Interval,
599 |                 Interval, SleepMethod, 1, CV)
600 |     end;
601 | 
602 | %% LastSampleTime: system time of the last sample
603 | %% NextSampleTime: system time when to take the next sample
604 | %% Interval: to calculate the next NextSampleTime
605 | %% Count: how many more samples to take
606 | %% CV: acceptable standard deviation
607 | measure_impl(Before, Handles, LastSampleTime, NextSampleTime, Interval, SleepMethod, Count, CV) ->
608 |     SleepFor = NextSampleTime - LastSampleTime,
609 |     NextSleepMethod = sleep(SleepMethod, SleepFor, NextSampleTime),
610 |     Counts = [erlperf_job:sample(Handle) || Handle <- Handles],
611 |     NowTime = os:system_time(millisecond),
612 |     measure_impl(merge(Counts, Before), Handles, NowTime, NextSampleTime + Interval, Interval,
613 |         NextSleepMethod, Count - 1, CV).
614 | 
615 | %% ERTS real-time properties are easily broken by lock contention (e.g. ETS misuse)
616 | %% When it happens, even the 'max' priority process may not run for an extended
617 | %% period of time.
618 | sleep(sleep, SleepFor, _WaitUntil) when SleepFor > 0 ->
619 |     receive
620 |         {'DOWN', _Ref, process, Pid, Reason} ->
621 |             erlang:error({benchmark, {'EXIT', Pid, Reason}})
622 |     after SleepFor ->
623 |         sleep
624 |     end;
625 | sleep(_Mode, _SleepFor, WaitUntil) ->
626 |     busy_wait(WaitUntil).
627 | 
628 | %% When sleep detects significant difference in the actual sleep time vs. expected,
629 | %% loop is switched to the busy wait.
630 | %% Once switched to busy wait, erlperf stays there until the end of the test.
631 | busy_wait(WaitUntil) ->
632 |     receive
633 |         {'DOWN', _Ref, process, Pid, Reason} ->
634 |             erlang:error({benchmark, {'EXIT', Pid, Reason}})
635 |     after 0 ->
636 |         case os:system_time(millisecond) of
637 |             Now when Now > WaitUntil ->
638 |                 busy_wait;
639 |             _ ->
640 |                 busy_wait(WaitUntil)
641 |         end
642 |     end.
643 | 
644 | merge([], []) ->
645 |     [];
646 | merge([M | T], [H | T2]) ->
647 |     [[M | H] | merge(T, T2)].
648 | 
649 | difference([_]) ->
650 |     [];
651 | difference([S, F | Tail]) ->
652 |     [F - S | difference([F | Tail])].
653 | 
654 | %% Determine maximum throughput by measuring multiple times with different concurrency.
655 | %% Test considered complete when either:
656 | %%  * maximum number of workers reached
657 | %%  * last 'threshold' added workers did not increase throughput
658 | estimate_concurrency(Jobs, Options, #{threshold := Threshold, step := Step, max := Max} = ConOpts, Handles, Current, History, QMax) ->
659 |     RunOptions = Options#{concurrency => Current},
660 |     [Report] = benchmark_impl(Jobs, RunOptions, undefined, Handles),
661 |     #{result := Result0} = Report,
662 |     #{samples := Samples} = Result0,
663 |     %% calculate average QPS
664 |     QPS = lists:sum(difference(Samples)) div (length(Samples) - 1),
665 |     Result = Result0#{average => QPS},
666 |     NewHistory = [{Current, Result} | History],
667 |     %% this gives us nice round numbers (eg. with step of 10, we'll have [1, 10, 20...])
668 |     Next = (Current + Step) div Step * Step,
669 |     %% test if we are at Max concurrency, or saturated the node
670 |     case maxed(QPS, Current, QMax, Threshold) of
671 |         true ->
672 |             %% QPS are either stable or decreasing, get back to the best run
673 |             #{sleep := SleepMethod} = Report,
674 |             {_BestMax, BestConcurrency} = QMax,
675 |             {BestConcurrency, BestResult} = lists:keyfind(BestConcurrency, 1, History),
676 |             #{mode => concurrency, result => BestResult, history => NewHistory, sleep => SleepMethod,
677 |                 concurrency_options => ConOpts, run_options => Options#{concurrency => BestConcurrency}};
678 |         _NewQMax when Next > Max ->
679 |             #{sleep := SleepMethod} = Report,
680 |             #{mode => concurrency, result => Result, history => NewHistory, sleep => SleepMethod,
681 |                 concurrency_options => ConOpts, run_options => RunOptions};
682 |         NewQMax ->
683 |             % need more workers
684 |             estimate_concurrency(Jobs, RunOptions, ConOpts, Handles, Next, NewHistory, NewQMax)
685 |     end.
686 | 
687 | maxed(QPS, Current, {Q, _}, _) when QPS > Q ->
688 |     {QPS, Current};
689 | maxed(_, Current, {_, W}, Count) when Current - W >= Count ->
690 |     true;
691 | maxed(_, _, QMax, _) ->
692 |     QMax.
693 | 
694 | multicall_result([], Acc) ->
695 |     lists:reverse(Acc);
696 | multicall_result([{Pid, Ref} | Proxies], Acc) ->
697 |     receive
698 |         {'DOWN', Ref, process, Pid, Result} ->
699 |             multicall_result(Proxies, [Result | Acc])
700 |     end.
701 | 
702 | %%%===================================================================
703 | %%% Tracer process, uses heap to store tracing information.
704 | rec_tracer() ->
705 |     process_flag(message_queue_data, off_heap),
706 |     tracer_loop([]).
707 | 
708 | -spec tracer_loop([{module(), atom(), [term()]}]) -> ok.
709 | tracer_loop(Trace) ->
710 |     receive
711 |         {trace, _Pid, call, MFA} ->
712 |             tracer_loop([MFA | Trace]);
713 |         {stop, Control} ->
714 |             Control ! {data, Trace},
715 |             ok
716 |     end.
717 | 
718 | %% Reporting: in full mode, add extra information (e.g. codes and statistics)
719 | %% full report for continuous mode (needs history rewritten)
720 | report(full, [Code], [#{mode := concurrency, history := History, result := Result, run_options := RunOpts} = Report]) ->
721 |     System = system_report(),
722 |     [Report#{system => System, code => Code,
723 |         result => process_result(Result, continuous, RunOpts, #{}),
724 |         history => [{C, process_result(R, continuous, #{report => full, concurrency => C}, #{})} || {C, R} <- History]}];
725 | %% full reports
726 | report(full, Codes, Reports) ->
727 |     System = system_report(),
728 |     [Report#{system => System, code => Code, result => process_result(Result, Mode, RunOptions, Report)}
729 |         || {Code, #{mode := Mode, result := Result, run_options := RunOptions} = Report} <- lists:zip(Codes, Reports)];
730 | report(_ReportType, _Codes, Reports) ->
731 |     [process_result(Result, Mode, RunOptions, Report)
732 |         || #{mode := Mode, result := Result, run_options := RunOptions} = Report <-Reports].
733 | 
734 | %% Transform raw samples into requested report
735 | process_result(#{samples := Samples}, timed, #{report := full, samples := Count,
736 |     sample_duration := {timed, Loop}}, _Report) ->
737 |     Stat = report_stats(Samples),
738 |     TotalTime = lists:sum(Samples),
739 |     Stat#{time => TotalTime, iteration_time => TotalTime * 1000 div (Count * Loop), samples => Samples};
740 | process_result(#{samples := Samples}, timed, #{report := basic}, _Report) ->
741 |     %% timed mode, basic report
742 |     lists:sum(Samples) div length(Samples) div 1000;
743 | process_result(#{samples := Samples}, timed, #{report := extended}, _Report) ->
744 |     %% timed mode, extended report, convert to milliseconds for backwards compatibility
745 |     [S div 1000 || S <- Samples];
746 | process_result(#{samples := Samples, time := TimeUs}, continuous, #{report := full, concurrency := C}, _Report) ->
747 |     Stat = report_stats(difference(Samples)),
748 |     IterationTime = case lists:last(Samples) - hd(Samples) of
749 |                         0 ->
750 |                             infinity;
751 |                         Total ->
752 |                             erlang:round(TimeUs * C * 1000 div Total)
753 |                     end,
754 |     Stat#{samples => Samples, time => TimeUs, iteration_time => IterationTime};
755 | process_result(#{samples := Samples}, continuous, #{report := extended}, _Report) ->
756 |     difference(Samples);
757 | process_result(#{samples := Samples}, continuous, #{report := basic}, _Report) ->
758 |     Diffs = difference(Samples),
759 |     lists:sum(Diffs) div length(Diffs);
760 | process_result(#{average := Avg}, concurrency, #{report := basic, concurrency := C}, _Report) ->
761 |     {Avg, C};
762 | process_result(#{average := Avg}, concurrency, #{report := extended, concurrency := C}, #{history := H}) ->
763 |     %% return {Best, History}
764 |     {{Avg, C}, [{A, W} || {W, #{average := A}} <- H]}.
765 | 
766 | %% @private
767 | %% Calculates a requested statistical function over the passed samples.
768 | %% Exported for unit-testing purposes
769 | report_stats(Samples) ->
770 |     Sum = lists:sum(Samples),
771 |     Len = length(Samples),
772 |     Avg = Sum / Len, %% arithmetic mean
773 |     Variance = if Len =:= 0 -> 0; true -> lists:sum([(S - Avg) * (S - Avg) || S <- Samples]) / (Len - 1) end,
774 |     Sorted = lists:sort(Samples),
775 |     #{
776 |         average => Avg,
777 |         min => hd(Sorted),
778 |         max => lists:last(Sorted),
779 |         stddev => math:sqrt(Variance),
780 |         median => lists:nth(erlang:round(0.50 * Len), Sorted),
781 |         p99 => lists:nth(erlang:round(0.99 * Len), Sorted)
782 |     }.
783 | 
784 | system_report() ->
785 |     OSType = erlang:system_info(os_type),
786 |     Guaranteed = detect_feature([emu_type, emu_flavor, dynamic_trace], #{
787 |         os => OSType,
788 |         system_version => string:trim(erlang:system_info(system_version), trailing)
789 |     }),
790 |     try Guaranteed#{cpu => string:trim(detect_cpu(OSType), both)}
791 |     catch _:_ -> Guaranteed
792 |     end.
793 | 
794 | detect_feature([], System) ->
795 |     System;
796 | detect_feature([F | T], System) ->
797 |     try detect_feature(T, System#{F => erlang:system_info(F)})
798 |     catch error:badarg -> detect_feature(T, System)
799 |     end.
800 | 
801 | detect_cpu({unix, freebsd}) ->
802 |     os:cmd("sysctl -n hw.model");
803 | detect_cpu({unix, darwin}) ->
804 |     os:cmd("sysctl -n machdep.cpu.brand_string");
805 | detect_cpu({unix, linux}) ->
806 |     {ok, Bin} = file:read_file("/proc/cpuinfo"),
807 |     linux_cpu_model(binary:split(Bin, <<"\n">>));
808 | detect_cpu({win32, nt}) ->
809 |     [_, CPU] = string:split(os:cmd("WMIC CPU GET NAME"), "\n"),
810 |     CPU.
811 | 
812 | linux_cpu_model([<<"model name", Model/binary>>, _]) ->
813 |     [_, ModelName] = binary:split(Model, <<":">>),
814 |     binary_to_list(ModelName);
815 | linux_cpu_model([_Skip, Tail]) ->
816 |     linux_cpu_model(binary:split(Tail, <<"\n">>)).


--------------------------------------------------------------------------------
/src/erlperf_app.erl:
--------------------------------------------------------------------------------
 1 | %%% @copyright (C) 2019-2023, Maxim Fedorov
 2 | %%% @private
 3 | %%% Continuous benchmarking application behaviour.
 4 | -module(erlperf_app).
 5 | -author("maximfca@gmail.com").
 6 | 
 7 | -behaviour(application).
 8 | 
 9 | -export([start/2, stop/1]).
10 | 
11 | -spec start(application:start_type(), term()) -> {ok, pid()}.
12 | start(_StartType, _StartArgs) ->
13 |     {ok, Sup} = erlperf_sup:start_link(),
14 |     {ok, Sup}.
15 | 
16 | -spec stop(term()) -> ok.
17 | stop(_State) ->
18 |     ok.
19 | 
20 | 


--------------------------------------------------------------------------------
/src/erlperf_cli.erl:
--------------------------------------------------------------------------------
  1 | %%% @copyright (C) 2019-2023, Maxim Fedorov
  2 | %%% @doc
  3 | %%% Command line interface adapter.
  4 | %%%
  5 | %%% Exports functions to format {@link erlperf:benchmark/3} output
  6 | %%% in the same way as command line interface.
  7 | %%%
  8 | %%% Example:
  9 | %%% ```
 10 | %%% #!/usr/bin/env escript
 11 | %%%  %%! +pc unicode -pa /home/max-au/git/max-au/erlperf/_build/default/lib/erlperf/ebin
 12 | %%% -mode(compile).
 13 | %%%
 14 | %%% main(_) ->
 15 | %%%     Report = erlperf:benchmark([
 16 | %%%         #{runner => fun() -> rand:uniform(10) end},
 17 | %%%         #{runner => {rand, mwc59, [1]}}
 18 | %%%     ], #{report => full}, undefined),
 19 | %%%     Out = erlperf_cli:format(Report, #{format => extended, viewport_width => 120}),
 20 | %%%     io:format(Out),
 21 | %%%     halt(0).
 22 | %%% '''
 23 | %%% Running the script produces following output:
 24 | %%% ```
 25 | %%% $ ./bench
 26 | %%% Code                                                ||   Samples       Avg   StdDev    Median      P99  Iteration    Rel
 27 | %%% {rand,mwc59,[1]}                                     1         3  80515 Ki    0.59%  80249 Ki 81067 Ki      12 ns   100%
 28 | %%% #Fun<bench__escript__1674__432325__319865__16.0.     1         3  15761 Ki    0.48%  15726 Ki 15847 Ki      63 ns    20%
 29 | %%% '''
 30 | -module(erlperf_cli).
 31 | -author("maximfca@gmail.com").
 32 | 
 33 | %% Public API for escript-based benchmarks
 34 | -export([
 35 |     format/2,
 36 |     main/1
 37 | ]).
 38 | 
 39 | -type format_options() :: #{
 40 |     viewport_width => pos_integer(),
 41 |     format => basic | extended | full
 42 | }.
 43 | %% Defines text report format.
 44 | %%
 45 | %% <ul>
 46 | %%   <li>`format':
 47 | %%     <ul>
 48 | %%       <li>`basic': default format containing only average throughput per `sample_duration'
 49 | %%          and average runner runtime</li>
 50 | %%       <li>`extended': includes median, p99 and other metrics(default for 10 and more
 51 | %%          samples)</li>
 52 | %%       <li>`full': includes system information in addition to `extended' output</li>
 53 | %%     </ul>
 54 | %%     See overview for the detailed description</li>
 55 | %%   <li>`viewport_width': how wide the report can be, defaults to {@link io:columns/0}.
 56 | %%       Falls back to 80 when terminal does not report width.</li>
 57 | %% </ul>
 58 | 
 59 | -export_type([format_options/0]).
 60 | 
 61 | %% @doc
 62 | %% Formats result produced by {@link erlperf:benchmark/3}.
 63 | %%
 64 | %% Requires full report. Does not accept basic or extended variants.
 65 | -spec format(Reports, Options) -> iolist() when
 66 |     Reports :: [erlperf:report()],
 67 |     Options :: format_options().
 68 | format(Reports, Options) ->
 69 |     Format =
 70 |         case maps:find(format, Options) of
 71 |             {ok, F} ->
 72 |                 F;
 73 |             error ->
 74 |                 %% if format is not specified, choose between basic and extended
 75 |                 %% based on amount of samples collected. Extended report does
 76 |                 %% not make much sense for 3 samples.
 77 |                 case maps:find(samples, maps:get(result, hd(Reports))) of
 78 |                     {ok, Samples} when length(Samples) >= 10 ->
 79 |                         extended;
 80 |                     _ ->
 81 |                         basic
 82 |                 end
 83 |         end,
 84 |     Width = maps:get(viewport_width, Options, viewport_width()),
 85 |     %% if any of the reports has "sleep" set to busy_wait, write a warning
 86 |     Prefix =
 87 |         case lists:any(fun (#{sleep := busy_wait}) -> true; (_) -> false end, Reports) of
 88 |             true ->
 89 |                 color(warning, io_lib:format("Timer accuracy problem detected, results may be inaccurate~n", []));
 90 |             false ->
 91 |                 ""
 92 |         end,
 93 |     %%
 94 |     Prefix ++ format_report(Format, Reports, Width).
 95 | 
 96 | %%-------------------------------------------------------------------
 97 | %% Internal implementation
 98 | 
 99 | %% @private
100 | %% Used from escript invocation
101 | -spec main([string()]) -> no_return().
102 | main(Args) ->
103 |     Prog = #{progname => "erlperf"},
104 |     try
105 |         ParsedOpts = args:parse(Args, arguments(), Prog),
106 | 
107 |         Verbose = maps:get(verbose, ParsedOpts, false),
108 | 
109 |         %% turn off logger unless verbose output is requested
110 |         Verbose orelse
111 |             logger:add_primary_filter(suppress_sasl, {
112 |                 fun(#{meta := #{error_logger := #{tag := Tag}}}, _) when Tag =:= error; Tag =:= error_report ->
113 |                         stop;
114 |                     (_, _) ->
115 |                         ignore
116 |                 end, ok}),
117 | 
118 |         %% timed benchmarking is not compatible with many options, and may have "loop" written as 100M, 100K
119 |         {RunOpts0, ConcurrencyTestOpts} = determine_mode(ParsedOpts),
120 | 
121 |         %% add code paths
122 |         [case code:add_path(P) of true -> ok; {error, Error} -> erlang:error({add_path, {P,Error}}) end
123 |             || P <- maps:get(code_path, ParsedOpts, [])],
124 | 
125 |         %% find all runners
126 |         Code0 = [parse_code(C) || C <- maps:get(code, ParsedOpts)],
127 |         %% find associated init, init_runner, done, label
128 |         {_, Codes} = lists:foldl(fun callable/2, {ParsedOpts, Code0},
129 |             [{init, init_all}, {init_runner, init_runner_all}, {done, done_all}, {label, undefined}]),
130 | 
131 |         %% when isolation is requested, the node must be distributed
132 |         RunOpts = case is_map_key(isolation, ParsedOpts) of
133 |                       true ->
134 |                           erlang:is_alive() orelse start_distribution(),
135 |                           RunOpts0#{isolation => #{}};
136 |                       false ->
137 |                           RunOpts0
138 |                   end,
139 | 
140 |         FormatOpts = case maps:find(report, ParsedOpts) of
141 |                          {ok, Fmt1} ->
142 |                              #{format => Fmt1};
143 |                          error ->
144 |                              #{}
145 |                      end,
146 |         %% do the actual run
147 |         Results = benchmark(Codes, RunOpts#{report => full}, ConcurrencyTestOpts, Verbose),
148 |         %% format results
149 |         Formatted = format(Results, FormatOpts#{viewport_width => viewport_width()}),
150 |         io:format(Formatted)
151 |     catch
152 |         error:{args, Reason} ->
153 |             Fmt = args:format_error(Reason, arguments(), Prog),
154 |             format(info, "Error: ~s", [Fmt]);
155 |         throw:{parse, FunName, Other} ->
156 |             format(error, "Unable to read file named '~s' (expected to contain call chain recording)~nReason: ~p\n"
157 |                 "Did you forget to end your function with period? (dot)~n", [FunName, Other]);
158 |         error:{add_path, {Path, Error}} ->
159 |             format(error, "Error adding code path ~s: ~p~n", [Path, Error]);
160 |         error:{generic, Error} ->
161 |             format(error, "Error: ~s~n", [Error]);
162 |         error:{loop, Option} ->
163 |             format(error, "Timed benchmarking is not compatible with --~s option~n", [Option]);
164 |         error:{concurrency, Option} ->
165 |             format(error, "Concurrency estimation is not compatible with --~s option~n", [Option]);
166 |         error:{generate, {parse, FunName, Error}} ->
167 |             format(error, "Parse error for ~s: ~s~n", [FunName, lists:flatten(Error)]);
168 |         error:{generate, {What, WhatArity, requires, Dep}} ->
169 |             format(error, "~s/~b requires ~s function defined~n", [What, WhatArity, Dep]);
170 |         error:{compile, Errors, Warnings} ->
171 |             Errors =/= [] andalso format(error, "Compile error: ~s~n", [compile_errors(Errors)]),
172 |             Warnings =/= [] andalso format(warning, "Warning: ~s~n", [compile_errors(Warnings)]);
173 |         error:{benchmark, {'EXIT', Job, Error}} ->
174 |             node(Job) =/= node() andalso format(error, "~s reported an error:~n", [node(Job)]),
175 |             format(error, "~p~n", [Error]);
176 |         Cls:Rsn:Stack ->
177 |             format(error, "Unhandled exception: ~ts:~p~n~p~n", [Cls, Rsn, Stack])
178 |     after
179 |         logger:remove_primary_filter(suppress_sasl)
180 |     end.
181 | 
182 | %% timed mode
183 | determine_mode(#{loop := Loop} = ParsedOpts) ->
184 |     [erlang:error({loop, Option}) || Option <-
185 |         [concurrency, sample_duration, warmup, cv, concurrency_estimation], is_map_key(Option, ParsedOpts)],
186 |     RunOpts = maps:with([samples], ParsedOpts),
187 |     {RunOpts#{sample_duration => {timed, parse_loop(Loop)}}, undefined};
188 | 
189 | %% concurrency estimation mode
190 | determine_mode(#{concurrency_estimation := true} = ParsedOpts) ->
191 |     [erlang:error({concurrency, Option}) || Option <-
192 |         [concurrency], is_map_key(Option, ParsedOpts)],
193 |     length(maps:get(code, ParsedOpts)) > 1 andalso
194 |         erlang:error({generic, "Parallel concurrency estimation runs are not supported~n"}),
195 |     RunOpts = maps:with([sample_duration, samples, warmup, cv], ParsedOpts),
196 |     {RunOpts, maps:with([min, step, max, threshold], ParsedOpts)};
197 | 
198 | %% continuous mode
199 | determine_mode(ParsedOpts) ->
200 |     RunOpts = maps:with([concurrency, sample_duration, samples, warmup, cv], ParsedOpts),
201 |     {RunOpts, undefined}.
202 | 
203 | %% wrapper to ensure verbose output
204 | benchmark(Codes, RunOpts, ConcurrencyTestOpts, false) ->
205 |     erlperf:benchmark(Codes, RunOpts, ConcurrencyTestOpts);
206 | benchmark(Codes, RunOpts, ConcurrencyTestOpts, true) ->
207 |     [begin
208 |          io:format(">>>>>>>>>>>>>>> ~-32ts ~n", [format_code(C)]),
209 |          [io:format("~ts~n", [L]) || L <- erlperf_job:source(C)],
210 |          io:format("<<<<<<<<<<<<<<< ~n")
211 |      end|| C <- Codes],
212 |     {ok, Pg} = pg:start_link(erlperf),
213 |     {ok, Monitor} = erlperf_monitor:start_link(),
214 |     {ok, Logger} = erlperf_file_log:start_link(),
215 |     try
216 |         erlperf:benchmark(Codes, RunOpts, ConcurrencyTestOpts)
217 |     after
218 |         gen:stop(Logger),
219 |         gen:stop(Monitor),
220 |         gen:stop(Pg)
221 |     end.
222 | 
223 | start_distribution() ->
224 |     Node = list_to_atom(lists:concat(["erlperf-", erlang:unique_integer([positive]), "-", os:getpid()])),
225 |     {ok, _} = net_kernel:start([Node, shortnames]).
226 | 
227 | %% formats compiler errors/warnings
228 | compile_errors([]) -> "";
229 | compile_errors([{_, []} | Tail]) ->
230 |     compile_errors(Tail);
231 | compile_errors([{L, [{_Anno, Mod, Err} | T1]} | Tail]) ->
232 |     lists:flatten(Mod:format_error(Err) ++ io_lib:format("~n", [])) ++ compile_errors([{L, T1} | Tail]).
233 | 
234 | callable({Type, Default}, {Args, Acc}) ->
235 |     case maps:find(Type, Args) of
236 |         error when is_map_key(Default, Args) ->
237 |             %% default is set, no overrides
238 |             {Args, merge_callable(Type, lists:duplicate(length(Acc), [maps:get(Default, Args)]), Acc, [])};
239 |         error ->
240 |             %% no overrides, no default - most common case
241 |             {Args, merge_callable(Type, [], Acc, [])};
242 |         {ok, Overrides} when is_map_key(Default, Args) ->
243 |             %% some overrides, and the default as well
244 |             %% extend the Overrides array to expected size by adding default value
245 |             Def = [maps:get(Default, Args)],
246 |             Complete = Overrides ++ [Def || _ <- lists:seq(1, length(Acc) - length(Overrides))],
247 |             {Args, merge_callable(Type, Complete, Acc, [])};
248 |         {ok, NoDefault} ->
249 |             %% no default, but some arguments are defined
250 |             {Args, merge_callable(Type, NoDefault, Acc, [])}
251 |     end.
252 | 
253 | merge_callable(_Type, [], Acc, Merged) ->
254 |     lists:reverse(Merged) ++ Acc;
255 | merge_callable(_Type, _, [], Merged) ->
256 |     lists:reverse(Merged);
257 | merge_callable(Type, [[H] | T], [HA | Acc], Merged) ->
258 |     merge_callable(Type, T, Acc, [HA#{Type => H} | Merged]).
259 | 
260 | parse_code(Code) ->
261 |     case lists:last(Code) of
262 |         $. ->
263 |             #{runner => Code};
264 |         $} when hd(Code) =:= ${ ->
265 |             % parse MFA tuple with added "."
266 |             #{runner => parse_mfa_tuple(Code)};
267 |         _ ->
268 |             case file:read_file(Code) of
269 |                 {ok, Bin} ->
270 |                     #{runner => parse_call_record(Bin)};
271 |                 Other ->
272 |                     erlang:throw({parse, Code, Other})
273 |             end
274 |     end.
275 | 
276 | parse_mfa_tuple(Code) ->
277 |     {ok, Scan, _} = erl_scan:string(Code ++ "."),
278 |     {ok, Term} = erl_parse:parse_term(Scan),
279 |     Term.
280 | 
281 | parse_call_record(Bin) ->
282 |     binary_to_term(Bin).
283 | 
284 | parse_loop(Loop) ->
285 |     case string:to_integer(Loop) of
286 |         {Int, "M"} -> Int * 1000000;
287 |         {Int, "K"} -> Int * 1000;
288 |         {Int, []} -> Int;
289 |         {Int, "G"} -> Int * 1000000000;
290 |         _Other -> erlang:error({generic, "unsupported syntax for timed iteration count: " ++ Loop})
291 |     end.
292 | 
293 | arguments() ->
294 |     #{help =>
295 |         "\nFull documentation available at: https://hexdocs.pm/erlperf/\n"
296 |         "\nBenchmark timer:sleep(1):\n    erlperf 'timer:sleep(1).'\n"
297 |         "Benchmark rand:uniform() vs crypto:strong_rand_bytes(2):\n    erlperf 'rand:uniform().' 'crypto:strong_rand_bytes(2).' --samples 10 --warmup 1\n"
298 |         "Figure out concurrency limits:\n    erlperf 'application_controller:is_running(kernel).' -q'\n"
299 |         "Benchmark pg join/leave operations:\n    erlperf 'pg:join(s, foo, self()), pg:leave(s, foo, self()).' --init 'pg:start_link(s).'\n"
300 |         "Timed benchmark for a single BIF:\n    erlperf 'erlang:unique_integer().' -l 1000000\n",
301 |         arguments => [
302 |             #{name => concurrency, short => $c, long => "-concurrency",
303 |                 help => "number of concurrently executed runner processes",
304 |                 type => {int, [{min, 1}, {max, 1024 * 1024 * 1024}]}},
305 |             #{name => sample_duration, short => $d, long => "-duration",
306 |                 help => "single sample duration, milliseconds (1000)",
307 |                 type => {int, [{min, 1}]}},
308 |             #{name => samples, short => $s, long => "-samples",
309 |                 help => "minimum number of samples to collect (3)",
310 |                 type => {int, [{min, 1}]}},
311 |             #{name => loop, short => $l, long => "-loop",
312 |                 help => "timed mode (lower overhead) iteration count: 50, 100K, 200M, 3G"},
313 |             #{name => warmup, short => $w, long => "-warmup",
314 |                 help => "number of samples to skip (0)",
315 |                 type => {int, [{min, 0}]}},
316 |             #{name => report, short => $r, long => "-report",
317 |                 help => "report verbosity, full adds system information",
318 |                 type => {atom, [basic, extended, full]}},
319 |             #{name => cv, long => "-cv",
320 |                 help => "coefficient of variation",
321 |                 type => {float, [{min, 0.0}]}},
322 |             #{name => verbose, short => $v, long => "-verbose",
323 |                 type => boolean, help => "print monitoring statistics"},
324 |             #{name => code_path, long => "pa", type => string,
325 |                 action => append, help => "extra code path, see -pa erl documentation"},
326 |             #{name => isolation, short => $i, long => "-isolated", type => boolean,
327 |                 help => "run benchmarks in an isolated environment (peer node)"},
328 |             #{name => concurrency_estimation, short => $q, long => "-squeeze", type => boolean,
329 |                 help => "run concurrency estimation test"},
330 |             #{name => min, long => "-min",
331 |                 help => "start with this amount of processes (1)",
332 |                 type => {int, [{min, 1}]}},
333 |             #{name => step, long => "-step",
334 |                 help => "increase the number of processes by this value on each iteration (1)",
335 |                 type => {int, [{min, 1}]}},
336 |             #{name => max, long => "-max",
337 |                 help => "do not exceed this number of processes",
338 |                 type => {int, [{max, erlang:system_info(process_limit) - 1000}]}},
339 |             #{name => threshold, short => $t, long => "-threshold",
340 |                 help => "cv at least <threshold> samples should be less than <cv> to increase concurrency", default => 3,
341 |                 type => {int, [{min, 1}]}},
342 |             #{name => init, long => "-init",
343 |                 help => "init code, see erlperf_job documentation for details", nargs => 1, action => append},
344 |             #{name => done, long => "-done",
345 |                 help => "done code", nargs => 1, action => append},
346 |             #{name => init_runner, long => "-init_runner",
347 |                 help => "init_runner code", nargs => 1, action => append},
348 |             #{name => label, long => "-label", type => string,
349 |                 help => "runner label", nargs => 1, action => append},
350 |             #{name => init_all, long => "-init_all",
351 |                 help => "default init code for all runners"},
352 |             #{name => done_all, long => "-done_all",
353 |                 help => "default done code for all runners"},
354 |             #{name => init_runner_all, long => "-init_runner_all",
355 |                 help => "default init_runner code for all runners"},
356 |             #{name => code,
357 |                 help => "code to test", nargs => nonempty_list, action => extend}
358 |         ]}.
359 | 
360 | %%-------------------------------------------------------------------
361 | %% Color output
362 | 
363 | -spec format(error | warning | info, string(), [term()]) -> ok.
364 | format(Level, Format, Terms) ->
365 |     io:format(color(Level, Format), Terms).
366 | 
367 | -define(RED, "\e[31m").
368 | -define(MAGENTA, "\e[35m").
369 | -define(END, "\e[0m~n").
370 | 
371 | color(error, Text) -> ?RED ++ Text ++ ?END;
372 | color(warning, Text) -> ?MAGENTA ++ Text ++ ?END;
373 | color(info, Text) -> Text.
374 | 
375 | %% Report formatter
376 | format_report(full, [#{system := System} | _] = Reports, Width) ->
377 |     warn_system(System) ++ [format_system(System), format_report(extended, Reports, Width)];
378 | 
379 | format_report(extended, [#{system := System} | _] = Reports, Width) ->
380 |     Sorted = sort_by(Reports),
381 |     #{result := #{average := MaxAvg}} = hd(Sorted),
382 |     Header = ["Code", "   ||", "  Samples", "      Avg", "  StdDev", "   Median", "     P99", " Iteration", "   Rel"],
383 |     Data = [format_report_line(MaxAvg, ReportLine, extended) || ReportLine <- Sorted],
384 |     warn_system(System) ++ format_table(remove_relative_column([Header | Data]), Width);
385 | 
386 | format_report(basic, [#{system := System} | _] = Reports, Width) ->
387 |     Sorted = sort_by(Reports),
388 |     #{result := #{average := MaxAvg}} = hd(Sorted),
389 |     Header = ["Code", "       ||", "       QPS", "      Time", "  Rel"],
390 |     Data0 = [format_report_line(MaxAvg, ReportLine, basic) || ReportLine <- Sorted],
391 |     %% remove columns that should not be displayed in basic mode
392 |     Data = [[C1, C2, C3, C4, C5] || [C1, C2, _, C3, _, _, _, C4, C5] <- Data0],
393 |     warn_system(System) ++ format_table(remove_relative_column([Header | Data]), Width).
394 | 
395 | sort_by([#{mode := timed} | _] = Reports) ->
396 |     lists:sort(fun (#{result := #{average := L}}, #{result := #{average := R}}) -> L < R end, Reports);
397 | sort_by([#{mode := _} | _] = Reports) ->
398 |     lists:sort(fun (#{result := #{average := L}}, #{result := #{average := R}}) -> L > R end, Reports).
399 | 
400 | remove_relative_column([H, D]) ->
401 |     [lists:droplast(H), lists:droplast(D)];
402 | remove_relative_column(HasRelative) ->
403 |     HasRelative.
404 | 
405 | format_report_line(MaxAvg, #{mode := timed, code := Code, result := #{average := Avg, stddev := StdDev,
406 |     iteration_time := IterationTime, p99 := P99, median := Median, samples := Samples},
407 |     run_options := #{concurrency := Concurrency}}, ReportFormat) ->
408 |     [
409 |         format_code(Code),
410 |         integer_to_list(Concurrency),
411 |         integer_to_list(length(Samples)),
412 |         if ReportFormat =:= basic -> erlperf_file_log:format_number(erlang:round(1000000000 / IterationTime));
413 |             true ->erlperf_file_log:format_duration(erlang:round(Avg * 1000)) end,
414 |         io_lib:format("~.2f%", [StdDev * 100 / Avg]),
415 |         erlperf_file_log:format_duration(Median * 1000), %% convert from ms to us
416 |         erlperf_file_log:format_duration(P99 * 1000), %% convert from ms to us
417 |         erlperf_file_log:format_duration(IterationTime), %% already in us
418 |         integer_to_list(erlang:round(MaxAvg * 100 / Avg)) ++ "%"
419 |     ];
420 | 
421 | format_report_line(MaxAvg, #{code := Code, result := #{average := Avg, stddev := StdDev,
422 |     iteration_time := IterationTime, p99 := P99, median := Median, samples := Samples},
423 |     run_options := #{concurrency := Concurrency}}, _ReportFormat) when Avg > 0.5 ->
424 |     [
425 |         format_code(Code),
426 |         integer_to_list(Concurrency),
427 |         integer_to_list(length(Samples) - 1),
428 |         erlperf_file_log:format_number(erlang:round(Avg)),
429 |         io_lib:format("~.2f%", [StdDev * 100 / Avg]),
430 |         erlperf_file_log:format_number(Median),
431 |         erlperf_file_log:format_number(P99),
432 |         erlperf_file_log:format_duration(IterationTime),
433 |         integer_to_list(erlang:round(Avg * 100 / MaxAvg)) ++ "%"
434 |     ];
435 | 
436 | format_report_line(_MaxAvg, #{code := Code, result := #{samples := Samples},
437 |     run_options := #{concurrency := Concurrency}}, _ReportFormat) ->
438 |     [
439 |         format_code(Code),
440 |         integer_to_list(Concurrency),
441 |         integer_to_list(length(Samples) - 1),
442 |         "0",
443 |         "inf",
444 |         "0",
445 |         "0",
446 |         "inf",
447 |         "0%"
448 |     ].
449 | 
450 | %% generic table formatter routine, accepting list of lists
451 | format_table([Header | Data] = Rows, Width) ->
452 |     %% find the longest string in each column
453 |     HdrWidths = [string:length(H) + 1 || H <- Header],
454 |     ColWidths = lists:foldl(
455 |         fun (Row, Acc) ->
456 |             [max(string:length(D) + 1, Old) || {D, Old} <- lists:zip(Row, Acc)]
457 |         end, HdrWidths, Data),
458 |     %% reserved (non-adjustable) columns
459 |     Reserved = lists:sum(tl(ColWidths)),
460 |     FirstColWidth = min(hd(ColWidths), Width - Reserved),
461 |     Format = "~*s" ++ lists:concat([io_lib:format("~~~bs", [W]) || W <- tl(ColWidths)]) ++ "~n",
462 |     %% just format the table
463 |     [io_lib:format(Format, [-FirstColWidth | Row]) || Row <- Rows].
464 | 
465 | %% detects terminal width (characters) to shorten long output lines
466 | viewport_width() ->
467 |     case io:columns() of {ok, C} -> C; _ -> 80 end.
468 | 
469 | format_code(#{label := Label}) when is_list(Label) ->
470 |     Label;
471 | format_code(#{label := Label}) when is_binary(Label) ->
472 |     binary_to_list(Label);
473 | format_code(#{label := undefined, runner := Runner}) ->
474 |     format_code_1(Runner);
475 | format_code(#{runner := Runner}) ->
476 |     format_code_1(Runner).
477 | 
478 | format_code_1(Code) when is_tuple(Code) ->
479 |     lists:flatten(io_lib:format("~tp", [Code]));
480 | format_code_1(Code) when is_tuple(hd(Code)) ->
481 |     lists:flatten(io_lib:format("[~tp, ...]", [hd(Code)]));
482 | format_code_1(Code) when is_function(Code) ->
483 |     lists:flatten(io_lib:format("~tp", [Code]));
484 | format_code_1(Code) when is_list(Code) ->
485 |     Code;
486 | format_code_1(Code) when is_binary(Code) ->
487 |     binary_to_list(Code).
488 | 
489 | warn_system(#{dynamic_trace := Trace} = System) when Trace =/= none ->
490 |     [io_lib:format("WARNING: Dynamic Trace Probes enabled (~s detected)~n", [Trace]) | warn_system(maps:remove(dynamic_trace, System))];
491 | warn_system(#{emu_type := Type} = System) when Type =/= opt ->
492 |     [io_lib:format("WARNING: Emulator is not optimised (~s detected)~n", [Type]) | warn_system(maps:remove(emu_type, System))];
493 | warn_system(#{emu_flavor := Flavor} = System) when Flavor =/= jit ->
494 |     [io_lib:format("WARNING: Emulator is not JIT (~s detected)~n", [Flavor]) | warn_system(maps:remove(emu_flavor, System))];
495 | warn_system(_) ->
496 |     [].
497 | 
498 | format_system(#{os := OSType, system_version := SystemVsn} = System) ->
499 |     OS = io_lib:format("OS : ~s~n", [format_os(OSType)]),
500 |     CPU = if is_map_key(cpu, System) -> io_lib:format("CPU: ~s~n", [maps:get(cpu, System)]); true -> "" end,
501 |     VM = io_lib:format("VM : ~s~n~n", [SystemVsn]),
502 |     [OS, CPU, VM].
503 | 
504 | format_os({unix, freebsd}) -> "FreeBSD";
505 | format_os({unix, darwin}) -> "MacOS";
506 | format_os({unix, linux}) -> "Linux";
507 | format_os({win32, nt}) -> "Windows";
508 | format_os({Family, OS}) -> lists:flatten(io_lib:format("~s/~s", [Family, OS])).


--------------------------------------------------------------------------------
/src/erlperf_cluster_monitor.erl:
--------------------------------------------------------------------------------
  1 | %%% @copyright (C) 2019-2023, Maxim Fedorov
  2 | %%% @doc
  3 | %%% Logs monitoring events for the entire cluster, to file or device.
  4 | %%%  Requires {@link erlperf_history} service running, fails otherwise.
  5 | %%% Uses completely different to {@link erlperf_monitor} approach; instead of waiting
  6 | %%%  for new samples to come, cluster monitor just outputs existing
  7 | %%%  samples periodically.
  8 | %%%
  9 | %%% Example primary node:
 10 | %%% ```
 11 | %%%     rebar3 shell --sname primary
 12 | %%%     (primary@ubuntu22)1> erlperf_history:start_link().
 13 | %%%     {ok,<0.211.0>}
 14 | %%%     (primary@ubuntu22)2> erlperf_cluster_monitor:start_link().
 15 | %%%     {ok,<0.216.0>}
 16 | %%% '''
 17 | %%%
 18 | %%% Example benchmarking node:
 19 | %%% ```
 20 | %%%     rebar3 shell --sname bench1
 21 | %%%     (bench1@ubuntu22)1> net_kernel:connect_node('primary@ubuntu22').
 22 | %%%     true
 23 | %%%     (bench1@ubuntu22)2> erlperf:run(rand, uniform, []).
 24 | %%% '''
 25 | %%%
 26 | %%% As soon as the new benchmarking jon on the node `bench' is started, it is
 27 | %%% reported in the cluster monitoring output.
 28 | %%% @end
 29 | -module(erlperf_cluster_monitor).
 30 | -author("maximfca@gmail.com").
 31 | 
 32 | -behaviour(gen_server).
 33 | 
 34 | %% API
 35 | -export([
 36 |     start_link/0,
 37 |     start_link/3
 38 | ]).
 39 | 
 40 | %% gen_server callbacks
 41 | -export([
 42 |     init/1,
 43 |     handle_call/3,
 44 |     handle_cast/2,
 45 |     handle_info/2
 46 | ]).
 47 | 
 48 | %% Handler: just like gen_event handler.
 49 | %% If you do need gen_event handler, make a fun of it.
 50 | -type handler() :: {module(), atom(), term()} | file:filename_all() | {fd, io:device()} | io:device().
 51 | %% Specifies monitoring output device.
 52 | %%
 53 | %% It could be an output {@link io:device()} (such as
 54 | %% {@link erlang:group_leader/0}, `user' or `standard_error'), a file name, or a
 55 | %% `{Module, Function, UserState}' tuple. In the latter case, instead of printing, cluster monitor
 56 | %% calls the specified function, which must have arity of 2, accepting filtered
 57 | %% {@link erlperf_monitor:monitor_sample()} as the first argument, and `Userstate' as the second,
 58 | %% returning next `UserState'.
 59 | 
 60 | 
 61 | %% Take a sample every second
 62 | -define(DEFAULT_INTERVAL, 1000).
 63 | 
 64 | -define(KNOWN_FIELDS, [time, node, sched_util, dcpu, dio, processes, ports, ets, memory_total,
 65 |     memory_processes, memory_binary, memory_ets, jobs]).
 66 | 
 67 | %% @equiv start_link(erlang:group_leader(), 1000, undefined)
 68 | -spec start_link() -> {ok, Pid :: pid()} | {error, Reason :: term()}.
 69 | start_link() ->
 70 |     start_link(erlang:group_leader(), ?DEFAULT_INTERVAL, undefined).
 71 | 
 72 | %% @doc
 73 | %% Starts cluster-wide monitor process, and links it to the caller.
 74 | %%
 75 | %% Intended to be used in a supervisor `ChildSpec', making the process a part of the supervision tree.
 76 | %%
 77 | %% `IntervalMs' specifies time, in milliseconds, between output handler invocations.
 78 | %%
 79 | %% Fields specifies the list of field names to report, and the order in which columns are printed.
 80 | %% see {@link erlperf_monitor:monitor_sample()} for options. Passing `undefined' prints all columns
 81 | %% known by this version of `erlperf'.
 82 | %% @end
 83 | -spec start_link(Handler :: handler(), IntervalMs :: pos_integer(), Fields :: [atom()] | undefined) ->
 84 |     {ok, Pid :: pid()} | {error, Reason :: term()}.
 85 | start_link(Handler, Interval, Fields) ->
 86 |     gen_server:start_link(?MODULE, [Handler, Interval, Fields], []).
 87 | 
 88 | %%%===================================================================
 89 | %%% gen_server callbacks
 90 | 
 91 | %% System monitor state
 92 | -record(state, {
 93 |     next :: integer(), %% absolute timer for the next tick
 94 |     interval :: pos_integer(),
 95 |     handler :: handler(),
 96 |     fields :: [atom()] | undefined,
 97 |     %% previously printed header
 98 |     %% if the new header is different from the previous one, it gets printed
 99 |     header = [] :: [atom() | [pid()]]
100 | }).
101 | 
102 | %% @private
103 | init([Handler, Interval, Fields0]) ->
104 |     Fields = if Fields0 =:= undefined -> ?KNOWN_FIELDS; true -> Fields0 end,
105 |     %% use absolute timer to avoid skipping ticks
106 |     Now = erlang:monotonic_time(millisecond),
107 |     {ok, handle_tick(#state{next = Now, interval = Interval, handler = make_handler(Handler), fields = Fields})}.
108 | 
109 | %% @private
110 | handle_call(_Request, _From, _State) ->
111 |     erlang:error(notsup).
112 | 
113 | %% @private
114 | handle_cast(_Request, _State) ->
115 |     erlang:error(notsup).
116 | 
117 | %% @private
118 | handle_info({timeout, _, tick}, State) ->
119 |     {noreply, handle_tick(State)}.
120 | 
121 | %%%===================================================================
122 | %%% Internal functions
123 | 
124 | handle_tick(#state{next = Now, interval = Interval, fields = Fields, handler = Handler, header = Header} = State) ->
125 |     Next = Now + Interval,
126 |     %%
127 |     erlang:start_timer(Next, self(), tick, [{abs, true}]),
128 |     %% last interval updates
129 |     GetHistoryTo = Now + erlang:time_offset(millisecond),
130 |     %% be careful not to overlap the timings (history:get is inclusive)
131 |     Samples = erlperf_history:get(GetHistoryTo - Interval + 1, GetHistoryTo),
132 |     %% now invoke the handler
133 |     {NewHandler, NewHeader} = run_handler(Handler, Fields, Header, Samples),
134 |     State#state{next = Next, handler = NewHandler, header = NewHeader}.
135 | 
136 | make_handler({_M, _F, _A} = MFA) ->
137 |     MFA;
138 | make_handler(IoDevice) when is_pid(IoDevice); is_atom(IoDevice) ->
139 |     {fd, IoDevice};
140 | make_handler({fd, IoDevice}) when is_pid(IoDevice); is_atom(IoDevice) ->
141 |     {fd, IoDevice};
142 | make_handler(Filename) when is_list(Filename); is_binary(Filename) ->
143 |     {ok, Fd} = file:open(Filename, [raw, append]),
144 |     {fd, Fd}.
145 | 
146 | run_handler(Handler, _Fields, Header, []) ->
147 |     {Handler, Header};
148 | 
149 | %% handler: MFA callback
150 | run_handler({M, F, A}, Fields, Header, Samples) ->
151 |     Filtered = [{Time, maps:with(Fields, Sample)} || {Time, Sample} <- Samples],
152 |     {{M, F, M:F(Filtered, A)}, Header};
153 | 
154 | %% built-in handler: file/console output
155 | run_handler({fd, IoDevice}, Fields, Header, Samples) ->
156 |     %% the idea of the formatter below is to print lines like this:
157 |     %% Dane       Time     node         sched ets  memory   <123.456.1> <0.123.0>
158 |     %% 2022-11-12 08:35:16 node1@host   33.5%  16  128111         12345
159 |     %% 2022-11-12 08:35:16 node1@host   33.5%  16  128111                    9111
160 | 
161 |     %% collect all jobs from all samples
162 |     Jobs = lists:usort(lists:foldl(
163 |         fun ({_Time, #{jobs := Jobs}}, Acc) -> {Pids, _} = lists:unzip(Jobs), Pids ++ Acc end,
164 |         [], Samples)),
165 | 
166 |     %% replace atom 'jobs' with list of Jobs. This is effectively lists:keyreplace, but with no key
167 |     NewHeader = [if F =:= jobs -> Jobs; true -> F end || F <- Fields],
168 | 
169 |     %% format specific fields of samples
170 |     Formatted = [
171 |         [formatter(F, if is_list(F) -> maps:get(jobs, Sample); true -> maps:get(F, Sample) end) || F <- NewHeader]
172 |         || {_Time, Sample} <- Samples],
173 | 
174 |     NewLine = io_lib:nl(),
175 |     BinNl = list_to_binary(NewLine),
176 | 
177 |     %% check if header has changed and print if it has
178 |     NewHeader =/= Header andalso
179 |         begin
180 |             FmtHdr = [header(S) || S <- NewHeader] ++ [BinNl],
181 |             ok = file:write(IoDevice, FmtHdr)
182 |         end,
183 | 
184 |     %% print the actual line
185 |     Data = [F ++ NewLine || F <- Formatted],
186 |     ok = file:write(IoDevice, Data),
187 |     {{fd, IoDevice}, NewHeader}.
188 | 
189 | header(time) -> <<"      date     time    TZ ">>;
190 | header(sched_util) -> <<" %sched">>;
191 | header(dcpu) -> <<"  %dcpu">>;
192 | header(dio) -> <<"   %dio">>;
193 | header(processes) -> <<"   procs">>;
194 | header(ports) -> <<"   ports">>;
195 | header(ets) -> <<"   ets">>;
196 | header(memory_total) -> <<" mem_total">>;
197 | header(memory_processes) -> <<"  mem_proc">>;
198 | header(memory_binary) -> <<"   mem_bin">>;
199 | header(memory_ets) -> <<"   mem_ets">>;
200 | header(Jobs) when is_list(Jobs) ->
201 |     iolist_to_binary([io_lib:format("~16s", [pid_to_list(Pid)]) || Pid <- Jobs]);
202 | header(node) -> <<"node                  ">>.
203 | 
204 | formatter(time, Time) ->
205 |     calendar:system_time_to_rfc3339(Time div 1000) ++ " ";
206 | formatter(Percent, Num) when Percent =:= sched_util; Percent =:= dcpu; Percent =:= dio ->
207 |     io_lib:format("~7.2f", [Num]);
208 | formatter(Number, Num) when Number =:= processes; Number =:= ports ->
209 |     io_lib:format("~8b", [Num]);
210 | formatter(ets, Num) ->
211 |     io_lib:format("~6b", [Num]);
212 | formatter(Size, Num) when Size =:= memory_total; Size =:= memory_processes; Size =:= memory_binary; Size =:= memory_ets ->
213 |     io_lib:format("~10s", [erlperf_file_log:format_size(Num)]);
214 | formatter(Jobs, JobsInSample) when is_list(Jobs) ->
215 |     %% here, all Jobs must be formatter, potentially empty (if they are not in JobsInSample)
216 |     [case lists:keyfind(Job, 1, JobsInSample) of
217 |          {Job, Num} -> io_lib:format("~16s", [erlperf_file_log:format_number(Num)]);
218 |          false -> "                " end
219 |         || Job <- Jobs];
220 | formatter(node, Node) ->
221 |     io_lib:format("~*s", [-22, Node]).
222 | 


--------------------------------------------------------------------------------
/src/erlperf_file_log.erl:
--------------------------------------------------------------------------------
  1 | %%% @copyright (C) 2019-2023, Maxim Fedorov
  2 | %%% @doc
  3 | %%% Prints monitoring reports produced by {@link erlperf_monitor} to file
  4 | %%% or an output device.
  5 | %%%
  6 | %%% When the server starts up, it joins `{erlperf, Node}' process group
  7 | %%% in the `erlperf' scope. If {@link erlperf_monitor} is also running in
  8 | %%% the same node, reports are printed to the specified device or file.
  9 | %%%
 10 | %%% See {@link erlperf_monitor} for description of the monitoring report.
 11 | %%%
 12 | %%% `erlperf' leverages this service for verbose output during benchmarking.
 13 | %%% @end
 14 | -module(erlperf_file_log).
 15 | -author("maximfca@gmail.com").
 16 | 
 17 | -behaviour(gen_server).
 18 | 
 19 | %% API
 20 | -export([
 21 |     start_link/0,
 22 |     start_link/1,
 23 |     %% leaky API...
 24 |     format_number/1,
 25 |     format_size/1,
 26 |     format_duration/1
 27 | ]).
 28 | 
 29 | %% gen_server callbacks
 30 | -export([
 31 |     init/1,
 32 |     handle_call/3,
 33 |     handle_cast/2,
 34 |     handle_info/2
 35 | ]).
 36 | 
 37 | %% @equiv start_link(erlang:group_leader())
 38 | -spec start_link() -> {ok, Pid :: pid()} | {error, Reason :: term()}.
 39 | start_link() ->
 40 |     start_link(erlang:group_leader()).
 41 | 
 42 | %% @doc
 43 | %% Starts the file log process.
 44 | -spec start_link(Filename :: string() | file:io_device()) -> {ok, Pid :: pid()} | {error, Reason :: term()}.
 45 | start_link(Filename) ->
 46 |     gen_server:start_link(?MODULE, [Filename], []).
 47 | 
 48 | %%%===================================================================
 49 | %%% gen_server callbacks
 50 | 
 51 | %% Repeat header every 30 lines (by default)
 52 | -define(LOG_REPEAT_HEADER, 30).
 53 | 
 54 | %% System monitor state
 55 | -record(state, {
 56 |     % file logger counter
 57 |     log_counter = ?LOG_REPEAT_HEADER :: non_neg_integer(),
 58 |     % when to print the header once again
 59 |     log_limit = ?LOG_REPEAT_HEADER :: pos_integer(),
 60 |     % file descriptor
 61 |     log_file :: file:io_device(),
 62 |     % current format line
 63 |     format = "" :: string(),
 64 |     % saved list of job IDs executed previously
 65 |     jobs = [] :: [erlperf_monitor:monitor_sample()]
 66 | }).
 67 | 
 68 | %% @private
 69 | init([Target]) ->
 70 |     % subscribe to monitor events
 71 |     ok = pg:join(erlperf, {erlperf_monitor, node()}, self()),
 72 |     WriteTo = if is_list(Target) -> {ok, LogFile} = file:open(Target, [raw, append]), LogFile; true -> Target end,
 73 |     {ok, #state{log_file = WriteTo}}.
 74 | 
 75 | %% @private
 76 | handle_call(_Request, _From, _State) ->
 77 |     erlang:error(notsup).
 78 | 
 79 | %% @private
 80 | handle_cast(_Request, _State) ->
 81 |     erlang:error(notsup).
 82 | 
 83 | %% @private
 84 | handle_info(#{jobs := Jobs, time := Time, sched_util := SchedUtil, dcpu := DCPU, dio := DIO, processes := Processes,
 85 |     ports := Ports, ets := Ets, memory_total := MemoryTotal, memory_processes := MemoryProcesses,
 86 |     memory_binary := MemoryBinary, memory_ets := MemoryEts}, #state{log_file = File} = State) ->
 87 |     {JobIds, Ts} = lists:unzip(Jobs),
 88 |     State1 = maybe_write_header(JobIds, State),
 89 |     % actual line
 90 |     TimeFormat = calendar:system_time_to_rfc3339(Time div 1000),
 91 |     Formatted = iolist_to_binary(io_lib:format(State1#state.format, [
 92 |         TimeFormat, SchedUtil, DCPU, DIO, Processes,
 93 |         Ports, Ets,
 94 |         format_size(MemoryTotal),
 95 |         format_size(MemoryProcesses),
 96 |         format_size(MemoryBinary),
 97 |         format_size(MemoryEts)
 98 |     ] ++ [format_number(T) || T <- Ts])),
 99 |     ok = file:write(File, Formatted),
100 |     {noreply, State1}.
101 | 
102 | %%%===================================================================
103 | %%% Internal functions
104 | 
105 | maybe_write_header(Jobs, #state{log_counter = LC, log_limit = LL, jobs = Prev} = State) when LC >= LL; Jobs =/= Prev ->
106 |     State#state{format = write_header(State#state.log_file, Jobs), log_counter = 0, jobs = Jobs};
107 | maybe_write_header(_, State) ->
108 |     State#state{log_counter = State#state.log_counter + 1}.
109 | 
110 | write_header(File, Jobs) ->
111 |     JobCount = length(Jobs),
112 |     Format = "~s ~6.2f ~6.2f ~6.2f ~8b ~8b ~7b ~9s ~9s ~9s ~9s" ++
113 |         lists:concat(lists:duplicate(JobCount, "~13s")) ++ "~n",
114 |     JobIds = list_to_binary(lists:flatten([io_lib:format(" ~12s", [pid_to_list(J)]) || J <- Jobs])),
115 |     Header =  <<"\nYYYY-MM-DDTHH:MM:SS-oo:oo  Sched   DCPU    DIO    Procs    Ports     ETS Mem Total  Mem Proc   Mem Bin   Mem ETS", JobIds/binary, "\n">>,
116 |     ok = file:write(File, Header),
117 |     Format.
118 | 
119 | %% @private
120 | %% @doc Formats size (bytes) rounded to 3 digits.
121 | %%  Unlike @see format_number, used 1024 as a base,
122 | %%  so 200 * 1024 is 200 Kb.
123 | -spec format_size(non_neg_integer()) -> string().
124 | format_size(Num) when Num > 1024*1024*1024*1024 * 100 ->
125 |     integer_to_list(round(Num / (1024*1024*1024*1024))) ++ " Tb";
126 | format_size(Num) when Num > 1024*1024*1024 * 100 ->
127 |     integer_to_list(round(Num / (1024*1024*1024))) ++ " Gb";
128 | format_size(Num) when Num > 1024*1024*100 ->
129 |     integer_to_list(round(Num / (1024 * 1024))) ++ " Mb";
130 | format_size(Num) when Num > 1024 * 100 ->
131 |     integer_to_list(round(Num / 1024)) ++ " Kb";
132 | format_size(Num) ->
133 |     integer_to_list(Num).
134 | 
135 | %% @private
136 | %% @doc Formats number rounded to 3 digits.
137 | %%  Example: 88 -> 88, 880000 -> 880 Ki, 100501 -> 101 Ki
138 | -spec format_number(non_neg_integer()) -> string().
139 | format_number(Num) when Num > 100000000000000 ->
140 |     integer_to_list(round(Num / 1000000000000)) ++ " Ti";
141 | format_number(Num) when Num > 100000000000 ->
142 |     integer_to_list(round(Num / 1000000000)) ++ " Gi";
143 | format_number(Num) when Num > 100000000 ->
144 |     integer_to_list(round(Num / 1000000)) ++ " Mi";
145 | format_number(Num) when Num > 100000 ->
146 |     integer_to_list(round(Num / 1000)) ++ " Ki";
147 | format_number(Num) ->
148 |     integer_to_list(Num).
149 | 
150 | %% @private
151 | %% @doc Formats time duration, from nanoseconds to seconds
152 | %%  Example: 88 -> 88 ns, 88000 -> 88 us, 10000000 -> 10 ms
153 | -spec format_duration(non_neg_integer() | infinity) -> string().
154 | format_duration(infinity) ->
155 |     "inf";
156 | format_duration(Num) when Num > 6000000000000 ->
157 |     integer_to_list(round(Num / 60000000000)) ++ " m";
158 | format_duration(Num) when Num > 100000000000 ->
159 |     integer_to_list(round(Num / 1000000000)) ++ " s";
160 | format_duration(Num) when Num > 100000000 ->
161 |     integer_to_list(round(Num / 1000000)) ++ " ms";
162 | format_duration(Num) when Num > 100000 ->
163 |     integer_to_list(round(Num / 1000)) ++ " us";
164 | format_duration(Num) ->
165 |     integer_to_list(Num) ++ " ns".


--------------------------------------------------------------------------------
/src/erlperf_history.erl:
--------------------------------------------------------------------------------
  1 | %%% @copyright (C) 2019-2023, Maxim Fedorov
  2 | %%% @doc
  3 | %%% Collects, accumulates &amp; filters cluster-wide monitoring events.
  4 | %%% Essentially a simple in-memory database for quick cluster overview.
  5 | %%%
  6 | %%% History server helps to collect monitoring reports from multiple
  7 | %%% nodes of a single Erlang cluster. Example setup: single primary
  8 | %%% node running `erlperf_history' and {@link erlperf_cluster_monitor}
  9 | %%% listens to reports sent by several more nodes in a cluster, running
 10 | %%% continuous benchmarking jobs. Nodes may run the same Erlang code,
 11 | %%% but using different hardware or OS version. Or, conversely, same
 12 | %%% hardware and OS, but variants of Erlang code. See {@link erlperf_cluster_monitor}
 13 | %%% for a code sample.
 14 | %%%
 15 | %%%
 16 | %%% @end
 17 | -module(erlperf_history).
 18 | -author("maximfca@gmail.com").
 19 | 
 20 | -behaviour(gen_server).
 21 | 
 22 | %% API
 23 | -export([
 24 |     start_link/0,
 25 |     start_link/1,
 26 |     get/1,
 27 |     get/2
 28 | ]).
 29 | 
 30 | %% gen_server callbacks
 31 | -export([
 32 |     init/1,
 33 |     handle_call/3,
 34 |     handle_cast/2,
 35 |     handle_info/2
 36 | ]).
 37 | 
 38 | -define(TABLE, ?MODULE).
 39 | 
 40 | %% default: keep history for 120 seconds
 41 | -define(DEFAULT_HISTORY_DURATION, 120000).
 42 | 
 43 | %% @equiv start_link(120_000)
 44 | -spec(start_link() ->
 45 |     {ok, Pid :: pid()} | ignore | {error, Reason :: term()}).
 46 | start_link() ->
 47 |     start_link(?DEFAULT_HISTORY_DURATION).
 48 | 
 49 | %% @doc
 50 | %% Starts the history server and links it to the calling process.
 51 | %%
 52 | %% Designed for use as a part of a supervision tree.
 53 | %% `Duration' is time (in milliseconds), how long to keep the
 54 | %% reports for. Older reports are discarded.
 55 | -spec(start_link(Duration :: pos_integer()) ->
 56 |     {ok, Pid :: pid()} | ignore | {error, Reason :: term()}).
 57 | start_link(Duration) ->
 58 |     gen_server:start_link({local, ?MODULE}, ?MODULE, [Duration], []).
 59 | 
 60 | %% @doc
 61 | %% Returns cluster history.
 62 | %%
 63 | %% Returns all reports since `From' timestamp to now, sorted by timestamp.
 64 | %% `From' is wall clock time, in milliseconds (e.g. `os:system_time(millisecond)').
 65 | -spec get(From :: integer()) -> [{Time :: non_neg_integer(), erlperf_monitor:monitor_sample()}].
 66 | get(From) ->
 67 |     get(From, os:system_time(millisecond)).
 68 | 
 69 | %% @doc
 70 | %% Returns cluster history reports between From and To (inclusive).
 71 | %%
 72 | %% `From' and `To' are wall clock time, in milliseconds (e.g. `os:system_time(millisecond)').
 73 | -spec get(From :: integer(), To :: integer()) -> [{Time :: non_neg_integer(), erlperf_monitor:monitor_sample()}].
 74 | get(From, To) ->
 75 |     % ets:fun2ms(fun ({{T, _}, _} = R) when T =< To, T >= From -> {T, R} end).
 76 |     ets:select(?TABLE, [{{{'$1', '_'}, '$2'},[{'=<', '$1', To}, {'>=', '$1', From}], [{{'$1', '$2'}}]}]).
 77 | 
 78 | %%===================================================================
 79 | %% gen_server implementation
 80 | 
 81 | %% Keep an ordered set of samples (node, sample) ordered by time.
 82 | -record(state, {
 83 |     duration :: pos_integer()
 84 | }).
 85 | 
 86 | %% @private
 87 | init([Duration]) ->
 88 |     ok = pg:join(erlperf, cluster_monitor, self()),
 89 |     ?TABLE = ets:new(?TABLE, [protected, ordered_set, named_table, {write_concurrency, true}]),
 90 |     {ok, #state{duration = Duration}}.
 91 | 
 92 | %% @private
 93 | handle_call(_Request, _From, _State) ->
 94 |     erlang:error(notsup).
 95 | 
 96 | %% @private
 97 | handle_cast(_Request, _State) ->
 98 |     erlang:error(notsup).
 99 | 
100 | %% @private
101 | handle_info(#{time := Time, node := Node} = Sample, State) ->
102 |     ets:insert(?TABLE, {{Time, Node}, Sample}),
103 |     {noreply, maybe_clean(State)}.
104 | 
105 | %% ===================================================================
106 | %% Internal functions
107 | 
108 | maybe_clean(#state{duration = Duration} =State) ->
109 |     Expired = os:system_time(millisecond) - Duration,
110 |     %% ets:fun2ms(fun ({{T, _}, _}) -> T =< Expired end).
111 |     ets:select_delete(?TABLE, [{{{'$1', '_'},'_'},[{'=<','$1', Expired}],[true]}]),
112 |     State.


--------------------------------------------------------------------------------
/src/erlperf_job.erl:
--------------------------------------------------------------------------------
  1 | %%% @copyright (C) 2019-2023, Maxim Fedorov
  2 | %%% @doc
  3 | %%% Job is an instance of a benchmark.
  4 | %%%
  5 | %%% Every job has a corresponding temporary Erlang module generated. Use
  6 | %%% {@link source/1} to get the source code of the generated module.
  7 | %%% The structure of this code is an implementation detail and may change
  8 | %%% between releases.
  9 | %%%
 10 | %%% Job controls how many workers are executing runner code in
 11 | %%% a tight loop. It does not restart a failing worker, user must ensure
 12 | %%% proper error handing and reporting. If a worker process crashes,
 13 | %%% standard CRASH REPORT message is printed to the log (console).
 14 | %%%
 15 | %%% Job accepts a {@link code_map()} containing at least a runner
 16 | %%% function definition.
 17 | %%%
 18 | %%% See {@link callable()} for accepted function definitions.
 19 | %%%
 20 | %%% Different callable forms have different performance overhead. Overhead can be measured
 21 | %%% with {@link erlperf:compare/2}:
 22 | %%% ```erlang
 23 | %%% erlperf:compare([
 24 | %%%    #{runner => fun (V) -> rand:mwc59(V) end, init_runner => {rand, mwc59_seed, []}},
 25 | %%%    #{runner => "run(V) -> rand:mwc59(V).", init_runner => {rand, mwc59_seed, []}}
 26 | %%% ], #{}).
 27 | %%  [4371541,131460130]
 28 | %%% '''
 29 | %%% In the example above, callable defined as `fun' is 30 times slower than the code compiled
 30 | %%% from the source. The difference is caused by the Erlang Runtime implementation, where
 31 | %%% indirect calls via `fun' are considerably more expensive. As a rule of thumb, source
 32 | %%% code provides the smallest overhead, followed by MFA tuples.
 33 | %%%
 34 | %%% You can mix &amp; match various definition styles. In the example below, `init/0'
 35 | %%% starts an extra {@link pg} scope, `done/0' stops it, and `init_runner/1' takes
 36 | %%% the total heap size of `pg' scope controller to pass it to the `runner/1'.
 37 | %%% ```erlang
 38 | %%%   erlperf_job:start_link(
 39 | %%%       #{
 40 | %%%           runner => "run(Max) -> rand:uniform(Max).",
 41 | %%%           init => {pg, start_link, [scope]},
 42 | %%%           init_runner =>
 43 | %%%               fun ({ok, Pid}) ->
 44 | %%%                   {total_heap_size, THS} = erlang:process_info(Pid, total_heap_size),
 45 | %%%                   THS
 46 | %%%               end,
 47 | %%%           done => fun ({ok, Pid}) -> gen_server:stop(Pid) end
 48 | %%%       }
 49 | %%%   ).
 50 | %%% '''
 51 | %%% Same example defined with just the source code:
 52 | %%% ```erlang
 53 | %%% erlperf_job:start_link(
 54 | %%%     #{
 55 | %%%         runner => "runner(Max) -> rand:uniform(Max).",
 56 | %%%         init => "init() -> pg:start_link().",
 57 | %%%         init_runner => "init_runner({ok, Pid}) ->
 58 | %%%             {total_heap_size, THS} = erlang:process_info(Pid, total_heap_size),
 59 | %%%             THS.",
 60 | %%%         done => "done({ok, Pid}) -> gen_server:stop(Pid)."
 61 | %%%     }
 62 | %%% ).
 63 | %%% '''
 64 | %%%
 65 | %%% <h2>Runner function</h2>
 66 | %%% Runner function represents code that is run in the tight loop, counting iterations
 67 | %%% aggregated between all workers. To give an example, benchmarking a function that takes
 68 | %%% exactly a millisecond to execute, with 2 workers, for 2 seconds, will result in
 69 | %%% 4000 iterations in total. This would be the value returned by {@link sample/1}.
 70 | %%%
 71 | %%%
 72 | %%% Runner definition can accept zero, one or two arguments.
 73 | %%%
 74 | %%% `runner/0' ignores the value returned by init_runner/0,1.
 75 | %%%
 76 | %%% `runner/1' accepts the value returned by init_runner/0,1. It is an error to define
 77 | %%% `runner/1' without `init_runner/0,1' defined. This example prints "0" in a
 78 | %%% tight loop, measuring {@link io:format/2} performance:
 79 | %%% ```erlang
 80 | %%% #{
 81 | %%%    runner => "run(Init) -> io:format(\"~b~n\", [Init]).",
 82 | %%%    init_runner => "0."
 83 | %%% }
 84 | %%% '''
 85 | %%%
 86 | %%% `runner/2' adds second argument, accumulator, initially set to the
 87 | %%% value returned by init_runner/0,1. Subsequent invocations receive
 88 | %%% value returned by the previous runner invocation. Example:
 89 | %%% ```erlang
 90 | %%% #{
 91 | %%%    runner => "run(Init, Acc) -> io:format(\"~b~n\", [Init + Acc]), Acc + 1.",
 92 | %%%    init_runner => "0."
 93 | %%% }
 94 | %%% '''
 95 | %%% Running this benchmark prints monotonically increasing numbers. This
 96 | %%% may be useful to test stateful functions, for example, fast Random Number
 97 | %%% Generators introduced in OTP 25:
 98 | %%% ```bash
 99 | %%% ./erlperf --init_runner 'rand:mwc59_seed().' 'run(_, Cur) -> rand:mwc59(Cur).'
100 | %%% Code                                    ||        QPS       Time
101 | %%% run(_, Cur) -> rand:mwc59(Cur).          1     123 Mi       8 ns
102 | %%% '''
103 | %%%
104 | %%%
105 | %%% <h2>Common Test usage</h2>
106 | %%%
107 | %%% Example using `erlperf_job' directly, as a part of Common Test test case:
108 | %%% ```erlang
109 | %%% benchmark_rand(Config) when is_list(Config) ->
110 | %%%     %% run timer:sleep(1000) for 5 second, 4 runners
111 | %%%     {ok, Job} = erlperf_job:start_link(#{runner => {timer, sleep, [1000]}}),
112 | %%%     Handle = erlperf_job:handle(Job),
113 | %%%     ok = erlperf_job:set_concurrency(Job, 4), %% 4 runner instances
114 | %%%     InitialIterations = erlperf_job:sample(Handle),
115 | %%%     timer:sleep(5000),
116 | %%%     IterationsIn5Sec = erlperf_job:sample(Handle) - InitialIterations,
117 | %%%     erlperf_job:request_stop(Job), %% use gen:stop(Job) for synchronous call
118 | %%%     %% expect at least 16 iterations (and up to 20)
119 | %%%     ?assert(IterationsIn5Sec >= 16, {too_slow, IterationsIn5Sec}),
120 | %%%     ?assert(IterationsIn5Sec =< 20, {too_fast, IterationsIn5Sec}).
121 | %%% '''
122 | %%%
123 | %%% @end
124 | -module(erlperf_job).
125 | -author("maximfca@gmail.com").
126 | 
127 | -behaviour(gen_server).
128 | 
129 | %% Job API
130 | -export([
131 |     start/1,
132 |     start_link/1,
133 |     request_stop/1,
134 |     concurrency/1,
135 |     set_concurrency/2,
136 |     measure/2,
137 |     sample/1,
138 |     handle/1,
139 |     source/1,
140 |     set_priority/2
141 | ]).
142 | 
143 | %% gen_server callbacks
144 | -export([
145 |     init/1,
146 |     handle_call/3,
147 |     handle_cast/2,
148 |     handle_info/2,
149 |     terminate/2
150 | ]).
151 | 
152 | %% MFArgs: module, function, arguments.
153 | -type mfargs() :: {Module :: module(), Function :: atom(), Args :: [term()]}.
154 | %% `Module', `Function', `Args' accepted by {@link erlang:apply/3}.
155 | 
156 | %% Callable: one or more MFArgs, or a function object, or source code
157 | -type callable() ::
158 |     string() |
159 |     fun() |
160 |     fun((term()) -> term()) |
161 |     fun((term(), term()) -> term()) |
162 |     mfargs() |
163 |     [mfargs()].
164 | %% Function definition to use as a runner, init, done or init_runner.
165 | %%
166 | %% <ul>
167 | %%   <li>`string().' Erlang code ending with `.' (period). Example, zero arity:
168 | %%       `"runner() -> timer:sleep(1)."', arity one: `"runner(T) -> timer:sleep(T)."',
169 | %%       arity two: `"runner(Init, Acc) -> Acc + Init."'. It is allowed to omit the header
170 | %%       for zero arity function, so it becomes `"timer:sleep(1)."'</li>
171 | %%   <li>`fun()' function accepting no arguments, example: `fun() -> timer:sleep(1000) end'</li>
172 | %%   <li>`fun(term()) -> term()' function accepting one argument, example: `fun(Time) -> timer:sleep(Time) end'</li>
173 | %%   <li>`fun(term(), term()) -> term()' function accepting two arguments, example: `fun() -> timer:sleep(1000) end'</li>
174 | %%   <li>`mfargs()' tuple accepted by {@link erlang:apply/3}.
175 | %%        Example: `{rand, uniform, [10]}'</li>
176 | %%   <li>`[mfargs()]' list of MFA tuples, example: `[{rand, uniform, [10]}]'.
177 | %%        This functionality is experimental, and only used to replay a recorded calls
178 | %%        list. May not be supported in future releases.</li>
179 | %% </ul>
180 | 
181 | 
182 | %% Benchmark code: init, init_runner, runner, done.
183 | -type code_map() :: #{
184 |     runner := callable(),
185 |     init => callable(),
186 |     init_runner => callable(),
187 |     done => callable(),
188 |     label => iodata()
189 | }.
190 | %% Code map contains definitions for:
191 | %%
192 | %% <ul>
193 | %%   <li>`init/0' - called once when starting the job for the first time.
194 | %%       The call is made in the context of the job controller. It is
195 | %%       guaranteed to run through the entire benchmark job. So if your
196 | %%       benchmark needs to create additional resources - ETS tables, or
197 | %%       linked processes, like extra {@link pg} scopes, - init/0 is a
198 | %%       good choice. If init/0 fails, the entire job startup fails</li>
199 | %%   <li>`init_runner/0,1' - called when the job starts a new worker. init_runner/1
200 | %%       accepts the value returned by init/0. It is an error to omit init/0
201 | %%       if init_runner/1 is defined. It is allowed to have init_runner/0
202 | %%       when init/0 exists. The call to init_runner is made in the context of the
203 | %%       worker process, so you can initialise process-local values (e.g.
204 | %%       process dictionary)</li>
205 | %%   <li>`runner/0,1,2' defines the function that will be called in a tight loop.
206 | %%       See <a href="#module-runner-function">Runner Function</a> for
207 | %%       overview of a runner function variants.</li>
208 | %%   <li>`done/0,1' - called when the job terminates, to clean up any resources
209 | %%       that are not destroyed automatically. done/0 accepts the return of init/0.
210 | %%       Call is made in the context of the job controller</li>
211 | %%   <li>`label' - runner label displayed in reports.
212 | %%       By default, the runner code is converted to a string</li>
213 | %% </ul>
214 | 
215 | %% Internal (opaque) type, please do not use
216 | -opaque handle() :: {module(), non_neg_integer()}.
217 | 
218 | %% Temporary type until OTP25+ is everywhere, and OTP <25 support is no longer needed
219 | -type server_ref() :: gen_server:server_ref().
220 | 
221 | -export_type([mfargs/0, handle/0, callable/0, code_map/0]).
222 | 
223 | %% @doc
224 | %% Starts the benchmark job.
225 | %%
226 | %% Job starts with no workers, use {@link set_concurrency/2} to start workers.
227 | -spec start(code_map()) -> {ok, pid()} | {error, term()}.
228 | start(#{runner := _MustHave} = Code) ->
229 |     gen_server:start(?MODULE, generate(Code), []).
230 | 
231 | %% @doc
232 | %% Starts the job and links it to caller.
233 | %%
234 | %% Job starts with no workers, use {@link set_concurrency/2} to start workers.
235 | -spec start_link(code_map()) -> {ok, pid()} | {error, term()}.
236 | start_link(#{runner := _MustHave} = Code) ->
237 |     gen_server:start_link(?MODULE, generate(Code), []).
238 | 
239 | %% @doc
240 | %% Requests this job to stop.
241 | %%
242 | %% Job is stopped asynchronously. Caller should monitor the job process
243 | %% to find out when the job actually stopped.
244 | -spec request_stop(server_ref()) -> ok.
245 | request_stop(JobId) ->
246 |     gen_server:cast(JobId, stop).
247 | 
248 | %% @doc
249 | %% Returns the number of concurrently running workers for this job.
250 | %%
251 | %% This number may be lower than the amount requested by {@link set_concurrency/2}
252 | %% if workers crash.
253 | -spec concurrency(server_ref()) -> Concurrency :: non_neg_integer().
254 | concurrency(JobId) ->
255 |     gen_server:call(JobId, concurrency).
256 | 
257 | %% @doc
258 | %% Sets the number of concurrently running workers for this job.
259 | %%
260 | %% Does not reset counting. May never return if init_runner
261 | %% hangs and does not return control the the job.
262 | %% `Concurrency': number of processes to run. It can be higher than
263 | %% the current count (making the job to start more workers), or
264 | %% lower, making the job to stop some.
265 | %%
266 | %% Workers that crashes are not restarted automatically.
267 | -spec set_concurrency(server_ref(), non_neg_integer()) -> ok.
268 | set_concurrency(JobId, Concurrency) ->
269 |     gen_server:call(JobId, {set_concurrency, Concurrency}, infinity).
270 | 
271 | %% @doc
272 | %% Run the timed mode benchmark for a job, similar to {@link timer:tc/3}.
273 | %%
274 | %% Executes the runner `SampleCount' times. Returns time in microseconds.
275 | %% Has less overhead compared to continuous benchmarking, therefore can
276 | %% be used even for very fast functions.
277 | -spec measure(server_ref(), SampleCount :: non_neg_integer()) ->
278 |     TimeUs :: non_neg_integer() | already_started.
279 | measure(JobId, SampleCount) ->
280 |     gen_server:call(JobId, {measure, SampleCount}, infinity).
281 | 
282 | %% @doc
283 | %% Returns the sampling handle for the job.
284 | %%
285 | %% The returned value is opaque, and is an implementation detail,
286 | %% do not use it in any quality other than passing to {@link sample/1}.
287 | -spec handle(server_ref()) -> handle().
288 | handle(JobId) ->
289 |     gen_server:call(JobId, handle).
290 | 
291 | %% @doc
292 | %% Returns the current iteration counter.
293 | %%
294 | %% The iteration counter (sample) monotonically grows by 1
295 | %% every time the runner function is called (without waiting
296 | %% for it to return, so a function that unconditionally crashes
297 | %% still generates a counter of 1).
298 | -spec sample(Handle :: handle()) -> non_neg_integer() | undefined.
299 | sample({Module, Arity}) ->
300 |     {call_count, Count} = erlang:trace_info({Module, Module, Arity}, call_count),
301 |     Count.
302 | 
303 | %%% Internal, not exported, record.
304 | -record(exec, {
305 |     name :: atom(),         %% generated module name (must be generated for tracing to work)
306 |     source :: [string()],   %% module source code
307 |     binary :: binary(),     %% generated bytecode
308 |     init :: fun(() -> term()),  %% init function
309 |     init_runner :: fun((term()) -> term()), %% must accept 1 argument
310 |     runner :: {fun((term()) -> term()), non_neg_integer()},
311 |     sample_runner :: {fun((non_neg_integer(), term()) -> term()), non_neg_integer()},
312 |     done :: fun((term()) -> term())  %% must accept 1 argument
313 | }).
314 | 
315 | -type exec() :: #exec{}.
316 | 
317 | %% @doc
318 | %% Returns the source code generated from the code map, or for a running job.
319 | -spec source(server_ref() | code_map()) -> [string()].
320 | source(Code) when is_map(Code) ->
321 |     #exec{source = Src} = generate(benchmark, Code),
322 |     Src;
323 | source(JobId) ->
324 |     gen_server:call(JobId, source).
325 | 
326 | 
327 | %% @doc
328 | %% Sets job process priority when there are workers running.
329 | %%
330 | %% Worker processes may utilise all schedulers, making job
331 | %%  process to lose control over starting and stopping workers.
332 | %% By default, job process sets 'high' priority when there are
333 | %%  any workers running.
334 | %% Returns the previous setting.
335 | %%
336 | %% This function must be called before {@link set_concurrency/2},
337 | %% otherwise it has no effect until all workers are stopped, and
338 | %% then restarted.
339 | -spec set_priority(server_ref(), erlang:priority_level()) -> erlang:priority_level().
340 | set_priority(JobId, Priority) ->
341 |     gen_server:call(JobId, {priority, Priority}).
342 | 
343 | %%--------------------------------------------------------------------
344 | %% Internal definitions
345 | 
346 | -include_lib("kernel/include/logger.hrl").
347 | 
348 | -record(erlperf_job_state, {
349 |     %% original spec
350 |     exec :: exec(),
351 |     %% return value of init/1
352 |     init_result :: term(),
353 |     %% continuous workers
354 |     workers = [] :: [pid()],
355 |     %% temporary workers (for sample_count call)
356 |     sample_workers = #{} :: #{pid() => {pid(), reference()}},
357 |     %% priority to return to when no workers left
358 |     initial_priority :: erlang:priority_level(),
359 |     %% priority to set when workers are running
360 |     priority = high :: erlang:priority_level()
361 | }).
362 | 
363 | -type state() :: #erlperf_job_state{}.
364 | 
365 | %%%===================================================================
366 | %%% gen_server callbacks
367 | 
368 | %% @private
369 | init(#exec{name = Mod, binary = Bin, init = Init, runner = {_Fun, Arity}} = Exec) ->
370 |     %% need to trap exits to avoid crashing and not cleaning up the loaded module
371 |     erlang:process_flag(trap_exit, true),
372 |     {module, Mod} = code:load_binary(Mod, Mod, Bin),
373 |     %% run the init/0 if defined
374 |     InitRet =
375 |         try Init()
376 |         catch
377 |             Class:Reason:Stack ->
378 |                 %% clean up loaded module before crashing
379 |                 code:purge(Mod),
380 |                 code:delete(Mod),
381 |                 erlang:raise(Class, Reason, Stack)
382 |         end,
383 |     %% register in the monitor
384 |     ok = erlperf_monitor:register(self(), {Mod, Arity}, 0),
385 |     %% start tracing this module runner function
386 |     1 = erlang:trace_pattern({Mod, Mod, Arity}, true, [local, call_count]),
387 |     {priority, Prio} = erlang:process_info(self(), priority),
388 |     {ok, #erlperf_job_state{exec = Exec, init_result = InitRet, initial_priority = Prio}}.
389 | 
390 | %% @private
391 | -spec handle_call(term(), {pid(), reference()}, state()) -> {reply, term(), state()}.
392 | handle_call(handle, _From, #erlperf_job_state{exec = #exec{name = Name, runner = {_Fun, Arity}}} = State) ->
393 |     {reply, {Name, Arity}, State};
394 | 
395 | handle_call(concurrency, _From, #erlperf_job_state{workers = Workers} = State) ->
396 |     {reply, length(Workers), State};
397 | 
398 | handle_call({measure, SampleCount}, From, #erlperf_job_state{sample_workers = SampleWorkers,
399 |     exec = #exec{init_runner = InitRunner, sample_runner = SampleRunner},
400 |     init_result = IR} = State) when SampleWorkers =:= #{} ->
401 |     {noreply, State#erlperf_job_state{sample_workers =
402 |         start_sample_count(SampleCount, From, InitRunner, IR, SampleRunner)}};
403 | 
404 | handle_call({measure, _SampleCount}, _From, #erlperf_job_state{} = State) ->
405 |     {reply, already_started, State};
406 | 
407 | handle_call(source, _From, #erlperf_job_state{exec = #exec{source = Source}} = State) ->
408 |     {reply, Source, State};
409 | 
410 | handle_call({priority, Prio}, _From, #erlperf_job_state{priority = Old} = State) ->
411 |     {reply, Old, State#erlperf_job_state{priority = Prio}};
412 | 
413 | handle_call({set_concurrency, Concurrency}, _From, #erlperf_job_state{workers = Workers} = State) ->
414 |     {reply, ok, State#erlperf_job_state{workers = set_concurrency_impl(length(Workers), Concurrency, State)}}.
415 | 
416 | %% @private
417 | handle_cast(stop, State) ->
418 |     {stop, normal, State}.
419 | 
420 | %% @private
421 | -spec handle_info(term(), state()) -> {noreply, state()}.
422 | handle_info({'EXIT', SampleWorker, Reason},
423 |     #erlperf_job_state{sample_workers = SampleWorkers} = State) when is_map_key(SampleWorker, SampleWorkers) ->
424 |     {ReplyTo, MoreSW} = maps:take(SampleWorker, SampleWorkers),
425 |     gen:reply(ReplyTo, Reason),
426 |     {noreply, State#erlperf_job_state{sample_workers = MoreSW}};
427 | 
428 | handle_info({'EXIT', Worker, Reason}, #erlperf_job_state{workers = Workers} = State) when Reason =:= shutdown ->
429 |     {noreply, State#erlperf_job_state{workers = lists:delete(Worker, Workers)}};
430 | handle_info({'EXIT', Worker, Reason}, #erlperf_job_state{workers = Workers} = State) ->
431 |     {stop, Reason, State#erlperf_job_state{workers = lists:delete(Worker, Workers)}}.
432 | 
433 | %% @private
434 | -spec terminate(term(), state()) -> ok.
435 | terminate(_Reason, #erlperf_job_state{init_result = IR, workers = Workers, exec = #exec{name = Mod, done = Done}} = State) ->
436 |     %% terminate all workers first
437 |     set_concurrency_impl(length(Workers), 0, State),
438 |     %% call "done" for cleanup
439 |     try Done(IR)
440 |     catch
441 |         Class:Reason:Stack ->
442 |             %% duly note, but do not crash, it is pointless at this moment
443 |             ?LOG_ERROR("Exception while executing 'done': ~s:~0p~n~0p", [Class, Reason, Stack])
444 |     after
445 |         _ = code:purge(Mod),
446 |         true = code:delete(Mod)
447 |     end.
448 | 
449 | %%%===================================================================
450 | %%% Internal: runner implementation
451 | 
452 | %% Single run
453 | start_sample_count(SampleCount, ReplyTo, InitRunner, InitRet, {SampleRunner, _}) ->
454 |     Child = erlang:spawn_link(
455 |         fun() ->
456 |             %% need to send a message even if init_runner fails, hence 'after'
457 |             IRR = InitRunner(InitRet),
458 |             T1 = erlang:monotonic_time(),
459 |             SampleRunner(SampleCount, IRR),
460 |             T2 = erlang:monotonic_time(),
461 |             Time = erlang:convert_time_unit(T2 - T1, native, microsecond),
462 |             exit(Time)
463 |         end
464 |     ),
465 |     #{Child => ReplyTo}.
466 | 
467 | set_concurrency_impl(OldConcurrency, Concurrency, #erlperf_job_state{workers = Workers, init_result = IR, exec = Exec,
468 |     priority = Prio, initial_priority = InitialPrio}) ->
469 |     case Concurrency - OldConcurrency of
470 |         0 ->
471 |             Workers;
472 |         NeedMore when NeedMore > 0 ->
473 |             %% this process must run with higher priority to avoid being de-scheduled by runners
474 |             OldConcurrency =:= 0 andalso erlang:process_flag(priority, Prio),
475 |             Workers ++ add_workers(NeedMore, Exec, IR, []);
476 |         NeedLess ->
477 |             {Fire, Keep} = lists:split(-NeedLess, Workers),
478 |             stop_workers(Fire),
479 |             Keep =:= [] andalso erlang:process_flag(priority, InitialPrio),
480 |             Keep
481 |     end.
482 | 
483 | add_workers(0, _ExecMap, _InitRet, NewWorkers) ->
484 |     %% ensure all new workers completed their InitRunner routine
485 |     [receive {Worker, init_runner} -> ok end || Worker <- NewWorkers],
486 |     [Worker ! go || Worker <- NewWorkers],
487 |     NewWorkers;
488 | add_workers(More, #exec{init_runner = InitRunner, runner = {Runner, _RunnerArity}} = Exec, InitRet, NewWorkers) ->
489 |     Control = self(),
490 |     %% spawn all processes, and then wait until they complete InitRunner
491 |     Worker = erlang:spawn_link(
492 |         fun () ->
493 |             %% need to send a message even if init_runner fails, hence 'after'
494 |             IRR = try InitRunner(InitRet) after Control ! {self(), init_runner} end,
495 |             receive go -> ok end,
496 |             Runner(IRR)
497 |         end),
498 |     add_workers(More - 1, Exec, InitRet, [Worker | NewWorkers]).
499 | 
500 | stop_workers(Workers) ->
501 |     %% try to stop concurrently
502 |     [exit(Worker, kill) || Worker <- Workers],
503 |     [receive {'EXIT', Worker, _Reason} -> ok end || Worker <- Workers].
504 | 
505 | %%%===================================================================
506 | %%% Internal: code generation
507 | 
508 | %% @doc Creates an Erlang module (text) based on the code map passed
509 | %%      Returns module name (may be generated), runner arity (for tracing purposes),
510 | %%      and module source code (text)
511 | %%      Exception: raises error with Reason = {generate, {FunName, Arity, ...}}
512 | %%
513 | %%      Important: early erlperf versions were generating AST (forms) instead
514 | %%      of source code, which isn't exactly supported - AST is internal thing
515 | %%      that can change over time.
516 | -spec generate(code_map()) -> exec().
517 | generate(Code) ->
518 |     Name = list_to_atom(lists:concat(["job_", os:getpid(), "_", erlang:unique_integer([positive])])),
519 |     generate(Name, Code).
520 | 
521 | generate(Name, #{runner := Runner} = Code) ->
522 |     {InitFun, InitArity, InitExport, InitText} = generate_init(Name, maps:get(init, Code, error)),
523 |     {IRFun, IRArity, IRExport, IRText} = generate_one(Name, init_runner, maps:get(init_runner, Code, error)),
524 |     {DoneFun, DoneArity, DoneExport, DoneText} = generate_one(Name, done, maps:get(done, Code, error)),
525 | 
526 |     %% Separator: CR/LF
527 |     Sep = io_lib:format("~n", []),
528 | 
529 |     %% RunnerArity: how many arguments _original_ runner wants to accept.
530 |     %% Example: run(State) is 1, and run() is 0.
531 |     %% Pass two function names: one that is for sample_count, and one for continuous
532 |     ContName = atom_to_list(Name),
533 |     SampleCountName = list_to_atom(ContName ++ "_finite"),
534 |     {RunnerFun, SampleRunnerFun, RunnerArity, RunArity, RunnerText} = generate_runner(Name, SampleCountName, Runner, Sep),
535 |     RunnerExports = [{Name, RunArity}, {SampleCountName, RunArity + 1}],
536 | 
537 |     %% verify compatibility between 4 pieces of code
538 |     %% 1. done/1 requires init/0 return value
539 |     DoneArity =:= 1 andalso InitArity =:= undefined andalso erlang:error({generate, {done, 1, requires, init}}),
540 |     %% 2. init_runner/1 requires init/0,1
541 |     IRArity =:= 1 andalso InitArity =:= undefined andalso erlang:error({generate, {init_runner, 1, requires, init}}),
542 |     %% 3. runner/1,2 requires init/0,1
543 |     RunnerArity > 0 andalso IRArity =:= undefined andalso erlang:error({generate, {runner, RunnerArity, requires, init_runner}}),
544 |     %% 4. runner/[3+] is not allowed
545 |     RunnerArity > 2 andalso erlang:error({generate, {runner, RunnerArity, not_supported}}),
546 |     %% 5. TODO: Verify there are no name clashes
547 | 
548 |     %%
549 |     Exports = lists:concat(lists:join(", ", [io_lib:format("~s/~b", [F, Arity]) || {F, Arity} <-
550 |         [InitExport, IRExport, DoneExport | RunnerExports], Arity =/= undefined])),
551 | 
552 |     Texts = [Text ++ Sep || Text <- [InitText, IRText, DoneText | RunnerText], Text =/= ""],
553 | 
554 |     Source = ["-module(" ++ atom_to_list(Name) ++ ")." ++ Sep, "-export([" ++ Exports ++ "])." ++ Sep | Texts],
555 |     #exec{name = Name, binary = compile(Name, Source), init = InitFun, init_runner = IRFun, source = Source,
556 |         runner = {RunnerFun, RunArity}, sample_runner = {SampleRunnerFun, RunArity}, done = DoneFun}.
557 | 
558 | %% generates init/0 code
559 | generate_init(_Mod, Fun) when is_function(Fun, 0) ->
560 |     {Fun, 0, {[], undefined}, ""};
561 | generate_init(_Mod, {M, F, Args}) when is_atom(M), is_atom(F), is_list(Args) ->
562 |     {fun () -> erlang:apply(M, F, Args) end, 0, {[], undefined}, ""};
563 | generate_init(_Mod, [{M, F, Args} | _Tail] = MFAList) when is_atom(M), is_atom(F), is_list(Args) ->
564 |     [erlang:error({generate, {init, 0, invalid}}) ||
565 |         {M1, F1, A} <- MFAList, not is_atom(M1) orelse not is_atom(F1) orelse not is_list(A)],
566 |     {fun () -> [erlang:apply(M1, F1, A) || {M1, F1, A} <- MFAList] end, 0, {[], undefined}, ""};
567 | generate_init(Mod, Text) when is_list(Text) ->
568 |     case generate_text(init, Text, false) of
569 |         {0, NewName, FullText} ->
570 |             {fun () -> Mod:NewName() end, 0, {NewName, 0}, FullText};
571 |         {WrongArity, NewName, _} ->
572 |             erlang:error({generate, {init, NewName, WrongArity}})
573 |     end;
574 | generate_init(_Mod, error) ->
575 |     {fun () -> undefined end, undefined, undefined, ""}.
576 | 
577 | %% generates init_runner/1 or done/1
578 | generate_one(_Mod, _FunName, error) ->
579 |     {fun (_Ignore) -> undefined end, undefined, {[], undefined}, ""};
580 | generate_one(_Mod, _FunName, Fun) when is_function(Fun, 1) ->
581 |     {Fun, 1, {[], undefined}, ""};
582 | generate_one(_Mod, _FunName, Fun) when is_function(Fun, 0) ->
583 |     {fun (_Ignore) -> Fun() end, 0, {[], undefined}, ""};
584 | generate_one(_Mod, _FunName, {M, F, Args}) when is_atom(M), is_atom(F), is_list(Args) ->
585 |     {fun (_Ignore) -> erlang:apply(M, F, Args) end, 0, {[], undefined}, ""};
586 | generate_one(_Mod, FunName, [{M, F, Args} | _Tail] = MFAList) when is_atom(M), is_atom(F), is_list(Args) ->
587 |     [erlang:error({generate, {FunName, 1, invalid, {M1, F1, A}}}) ||
588 |         {M1, F1, A} <- MFAList, not is_atom(M1) orelse not is_atom(F1) orelse not is_list(A)],
589 |     {fun (_Ignore) -> [erlang:apply(M1, F1, A) || {M1, F1, A} <- MFAList] end, 0, {[], undefined}, ""};
590 | generate_one(Mod, FunName, Text) when is_list(Text) ->
591 |     case generate_text(FunName, Text, false) of
592 |         {0, NewName, FullText} ->
593 |             {fun (_Ignore) -> Mod:NewName() end, 0, {NewName, 0}, FullText};
594 |         {1, NewName, FullText} ->
595 |             {fun (Arg) -> Mod:NewName(Arg) end, 1, {NewName, 1}, FullText};
596 |         {WrongArity, NewName, _} ->
597 |             erlang:error({generate, {FunName, WrongArity, NewName}})
598 |     end.
599 | 
600 | %% runner wrapper:
601 | %% Generates at least 2 functions, one for continuous, and one for
602 | %%  sample-count benchmarking.
603 | generate_runner(Mod, SampleCountName, Fun, Sep) when is_function(Fun, 0) ->
604 |     {
605 |         fun (_Ignore) -> Mod:Mod(Fun) end,
606 |         fun (SampleCount, _Ignore) -> Mod:SampleCountName(SampleCount, Fun) end,
607 |         0, 1,
608 |         [lists:concat([Mod, "(Fun) -> Fun(),", Sep, "    ", Mod, "(Fun)."]),
609 |             lists:concat([SampleCountName, "(0, _Fun) ->", Sep, "    ok;", Sep, SampleCountName, "(Count, Fun) ->", Sep, "    Fun(),",
610 |                 Sep, "    ", SampleCountName, "(Count - 1, Fun)."])]
611 |     };
612 | generate_runner(Mod, SampleCountName, Fun, Sep) when is_function(Fun, 1) ->
613 |     {
614 |         fun (Init) -> Mod:Mod(Init, Fun) end,
615 |         fun (SampleCount, Init) -> Mod:SampleCountName(SampleCount, Init, Fun) end,
616 |         1, 2,
617 |         [lists:concat([Mod, "(Init, Fun) ->", Sep, "    Fun(Init),", Sep, "    ", Mod, "(Init, Fun)."]),
618 |             lists:concat([SampleCountName, "(0, _Init, _Fun) ->", Sep, "    ok;", Sep, SampleCountName,
619 |                 "(Count, Init, Fun) ->", Sep, "    Fun(Init),", Sep, "    ", SampleCountName, "(Count - 1, Init, Fun)."])]
620 |     };
621 | generate_runner(Mod, SampleCountName, Fun, Sep) when is_function(Fun, 2) ->
622 |     {
623 |         fun (Init) -> Mod:Mod(Init, Init, Fun) end,
624 |         fun (SampleCount, Init) -> Mod:SampleCountName(SampleCount, Init, Init, Fun) end,
625 |         2, 3,
626 |         [lists:concat([Mod, "(Init, State, Fun) ->", Sep, "    ", Mod, "(Init, Fun(Init, State), Fun)."]),
627 |             lists:concat([SampleCountName, "(0, _Init, _State, _Fun) ->", Sep, "    ok; ", SampleCountName, "(Count, Init, State, Fun) ->",
628 |                 Sep, "    ", SampleCountName, "(Count - 1, Init, Fun(Init, State), Fun)."])]
629 |     };
630 | 
631 | %% runner wrapper: MFA
632 | generate_runner(Mod, SampleCountName, {M, F, Args}, Sep) when is_atom(M), is_atom(F), is_list(Args) ->
633 |     {
634 |         fun (_Ignore) -> Mod:Mod(M, F, Args) end,
635 |         fun (SampleCount, _Ignore) -> Mod:SampleCountName(SampleCount, M, F, Args) end,
636 |         0, 3,
637 |         [lists:concat([Mod, "(M, F, A) ->", Sep, "    erlang:apply(M, F, A), ", Mod, "(M, F, A)."]),
638 |             lists:concat([SampleCountName, "(0, _M, _F, _A) ->", Sep, "    ok;", Sep, SampleCountName,
639 |                 "(Count, M, F, A) ->", Sep, "    erlang:apply(M, F, A), ", Sep, "    ", SampleCountName, "(Count - 1, M, F, A)."])]
640 |     };
641 | 
642 | %% runner wrapper: MFAList
643 | generate_runner(Mod, SampleCountName, [{M, F, Args} | _Tail] = MFAList, Sep) when is_atom(M), is_atom(F), is_list(Args) ->
644 |     [erlang:error({generate, {runner, 0, invalid, {M1, F1, A}}}) ||
645 |         {M1, F1, A} <- MFAList, not is_atom(M1) orelse not is_atom(F1) orelse not is_list(A)],
646 |     {
647 |         fun (_Ignore) -> Mod:Mod(MFAList) end,
648 |         fun (SampleCount, _Ignore) -> Mod:SampleCountName(SampleCount, MFAList) end,
649 |         0, 1,
650 |         [lists:concat([Mod, "(MFAList) -> [erlang:apply(M, F, A) || {M, F, A} <- MFAList], ", Mod, "(MFAList)."]),
651 |             lists:concat([SampleCountName, "(0, _MFAList) -> ", Sep, "    ok;", SampleCountName,
652 |                 "(Count, MFAList) ->", Sep, "    [erlang:apply(M, F, A) || {M, F, A} <- MFAList], ",
653 |                 SampleCountName, "(Count - 1, MFAList)."])]
654 |     };
655 | 
656 | generate_runner(Mod, SampleCountName, Text, Sep) when is_list(Text) ->
657 |     case generate_text(runner, Text, true) of
658 |         {0, NoDotText} ->
659 |             %% very special case: embedding the text directly, without creating a new function
660 |             %%  at all.
661 |             {
662 |                 fun (_Ignore) -> Mod:Mod() end,
663 |                 fun (SampleCount, _Ignore) -> Mod:SampleCountName(SampleCount) end,
664 |                 0, 0,
665 |                 [lists:concat([Mod, "() ->", Sep, "    ", NoDotText, ",", Sep, "    ", Mod, "()."]),
666 |                     lists:concat([SampleCountName, "(0) ->", Sep, "    ok;", Sep, SampleCountName, "(Count) ->",
667 |                         Sep, "    ", NoDotText, ",", Sep, "    ", SampleCountName, "(Count - 1)."]),
668 |                     ""]
669 |             };
670 |         {0, NewName, FullText} ->
671 |             {
672 |                 fun (_Ignore) -> Mod:Mod() end,
673 |                 fun (SampleCount, _Ignore) -> Mod:SampleCountName(SampleCount) end,
674 |                 0, 0,
675 |                 [lists:concat([Mod, "() ->", Sep, "    ", NewName, "(),", Sep, "    ", Mod, "()."]),
676 |                     lists:concat([SampleCountName, "(0) ->", Sep, "    ok;", Sep, SampleCountName, "(Count) ->",
677 |                         Sep, "    ", NewName, "(),", Sep, "    ", SampleCountName, "(Count - 1)."]),
678 |                     FullText]
679 |             };
680 |         {1, NewName, FullText} ->
681 |             {
682 |                 fun (Init) -> Mod:Mod(Init) end,
683 |                 fun (SampleCount, Init) -> Mod:SampleCountName(SampleCount, Init) end,
684 |                 1, 1,
685 |                 [lists:concat([Mod, "(Init) ->", Sep, "    ", NewName, "(Init),", Sep, "    ", Mod, "(Init)."]),
686 |                     lists:concat([SampleCountName, "(0, _Init) ->", Sep, "    ok;", Sep, SampleCountName, "(Count, Init) ->",
687 |                         Sep, "    ", NewName, "(Init),", Sep, "    ", SampleCountName, "(Count - 1, Init).", Sep]),
688 |                     FullText]
689 |             };
690 |         {2, NewName, FullText} ->
691 |             {
692 |                 fun (Init) -> Mod:Mod(Init, Init) end,
693 |                 fun (SampleCount, Init) -> Mod:SampleCountName(SampleCount, Init, Init) end,
694 |                 2, 2,
695 |                 [lists:concat([Mod, "(Init, State) ->", Sep, "    ", Mod, "(Init, ", NewName, "(Init, State))."]),
696 |                     lists:concat([SampleCountName, "(0, _Init, _State) ->", Sep, "    ok;", Sep, SampleCountName, "(Count, Init, State) -> ",
697 |                         SampleCountName, "(Count - 1, Init, ", NewName, "(Init, State))."]),
698 |                     FullText]
699 |             }
700 |     end;
701 | 
702 | generate_runner(_Mod, _SampleCountName, Any, _Sep) ->
703 |     erlang:error({generate, {parse, runner, Any}}).
704 | 
705 | %% generates function text
706 | generate_text(Name, Text, AllowRaw) when is_list(Text) ->
707 |     case erl_scan:string(Text) of
708 |         {ok, Scan, _} ->
709 |             case erl_parse:parse_form(Scan) of
710 |                 {ok, {function, _, AnyName, Arity, _}} ->
711 |                     {Arity, AnyName, Text};
712 |                 {error, _} ->
713 |                     % try if it's an expr
714 |                     case erl_parse:parse_exprs(Scan) of
715 |                         {ok, _Clauses} when AllowRaw ->
716 |                             {0, lists:droplast(Text)};
717 |                         {ok, _Clauses} ->
718 |                             %% just wrap it in fun_name/0
719 |                             {0, Name, lists:concat([Name, "() -> ", Text])};
720 |                         {error, {_Line, ParseMod, Es}} ->
721 |                             Errors = ParseMod:format_error(Es),
722 |                             erlang:error({generate, {parse, Name, Errors}})
723 |                     end
724 |             end;
725 |         {error, ErrorInfo, ErrorLocation} ->
726 |             error({generate, {scan, Name, ErrorInfo, ErrorLocation}})
727 |     end.
728 | 
729 | %% @doc Compiles text string into a binary module ready for code loading.
730 | compile(Name, Lines) ->
731 |     %% might not be the best way, but OTP simply does not have file:compile(Source, ...)
732 |     %% Original design was to write the actual source file to temporary disk location,
733 |     %%  but for diskless or write-protected hosts it was less convenient.
734 |     Tokens = [begin {ok, T, _} = erl_scan:string(Line), T end || Line <- Lines],
735 |     Forms = [begin {ok, F} = erl_parse:parse_form(T), F end || T <- Tokens],
736 | 
737 |     case compile:forms(Forms, [no_spawn_compiler_process, binary, return]) of
738 |         {ok, Name, Bin} ->
739 |             Bin;
740 |         {ok, Name, Bin, _Warnings} ->
741 |             Bin;
742 |         {error, Errors, Warnings} ->
743 |             erlang:error({compile, Errors, Warnings})
744 |     end.
745 | 


--------------------------------------------------------------------------------
/src/erlperf_job_sup.erl:
--------------------------------------------------------------------------------
 1 | %%% @copyright (C) 2019-2023, Maxim Fedorov
 2 | %%% @private
 3 | %%% Supervises statically started jobs.
 4 | -module(erlperf_job_sup).
 5 | -author("maximfca@gmail.com").
 6 | 
 7 | -behaviour(supervisor).
 8 | 
 9 | -export([
10 |     start_link/0,
11 |     init/1
12 | ]).
13 | 
14 | -spec start_link() -> supervisor:startlink_ret().
15 | start_link() ->
16 |     supervisor:start_link({local, ?MODULE}, ?MODULE, []).
17 | 
18 | -spec init([]) -> {ok, {supervisor:sup_flags(), [supervisor:child_spec()]}}.
19 | init([]) ->
20 |     {ok, {
21 |         #{strategy => simple_one_for_one,
22 |             intensity => 30,
23 |             period => 60},
24 |         [
25 |             #{
26 |                 id => erlperf_job,
27 |                 start => {erlperf_job, start_link, []},
28 |                 modules => [erlperf_job]
29 |             }
30 |         ]}}.
31 | 


--------------------------------------------------------------------------------
/src/erlperf_monitor.erl:
--------------------------------------------------------------------------------
  1 | %%%-------------------------------------------------------------------
  2 | %%% @copyright (C) 2019-2023, Maxim Fedorov
  3 | %%% @doc
  4 | %%% System monitor: reports scheduler, RAM, and benchmarks.
  5 | %%%
  6 | %%% Monitor is started by default when {@link erlperf} starts
  7 | %%% as an application. Monitor is not started for ad-hoc
  8 | %%% benchmarking (e.g. command-line, unless verbose logging
  9 | %%% is requested).
 10 | %%%
 11 | %%% When started, the monitor provides periodic reports
 12 | %%% about Erlang VM state, and registered jobs performance.
 13 | %%% The reports are sent to all processes that joined
 14 | %%% `{erlperf_monitor, Node}' or `cluster_monitor' process
 15 | %%% group in `erlperf' scope.
 16 | %%%
 17 | %%% Reports can be received by any process, even the shell. Run
 18 | %%% the following example in `rebar3 shell' of `erlperf':
 19 | %%% ```
 20 | %%% (erlperf@ubuntu22)1> ok = pg:join(erlperf, cluster_monitor, self()).
 21 | %%% ok
 22 | %%% (erlperf@ubuntu22)2> erlperf:run(rand, uniform, []).
 23 | %%% 14976933
 24 | %%% (erlperf@ubuntu22)4> flush().
 25 | %%% Shell got {erlperf@ubuntu22,#{dcpu => 0.0,dio => 6.42619095979426e-4,
 26 | %%%                         ets => 44,jobs => [],memory_binary => 928408,
 27 | %%%                         memory_ets => 978056,
 28 | %%%                         memory_processes => 8603392,
 29 | %%%                         memory_total => 34952096,ports => 5,
 30 | %%%                         processes => 95,
 31 | %%%                         sched_util => 0.013187335960637163,
 32 | %%% '''
 33 | %%%
 34 | %%% Note that the monitor may report differently from the benchmark
 35 | %%% run results. It is running with lower priority and may be significantly
 36 | %%% affected by scheduler starvation, timing issues etc..
 37 | %%%
 38 | %%%
 39 | %%%
 40 | %%% @end
 41 | -module(erlperf_monitor).
 42 | -author("maximfca@gmail.com").
 43 | 
 44 | -behaviour(gen_server).
 45 | 
 46 | %% API
 47 | -export([
 48 |     start/0,
 49 |     start/1,
 50 |     start_link/0,
 51 |     start_link/1,
 52 |     register/3,
 53 |     unregister/1
 54 | ]).
 55 | 
 56 | %% gen_server callbacks
 57 | -export([
 58 |     init/1,
 59 |     handle_call/3,
 60 |     handle_cast/2,
 61 |     handle_info/2
 62 | ]).
 63 | 
 64 | 
 65 | -include_lib("kernel/include/logger.hrl").
 66 | 
 67 | -define(DEFAULT_TICK_INTERVAL_MS, 1000).
 68 | 
 69 | 
 70 | -type monitor_sample() :: #{
 71 |     time := integer(),
 72 |     node := node(),
 73 |     sched_util := float(),
 74 |     dcpu := float(),
 75 |     dio := float(),
 76 |     processes := integer(),
 77 |     ports := integer(),
 78 |     ets := integer(),
 79 |     memory_total := non_neg_integer(),
 80 |     memory_processes := non_neg_integer(),
 81 |     memory_binary := non_neg_integer(),
 82 |     memory_ets := non_neg_integer(),
 83 |     jobs => [{Job :: pid(), Cycles :: non_neg_integer()}]
 84 | }.
 85 | %% Monitoring report
 86 | %%
 87 | %% <ul>
 88 | %%   <li>`time': timestamp when the report is generates, wall clock, milliseconds</li>
 89 | %%   <li>`node': originating Erlang node name</li>
 90 | %%   <li>`sched_util': normal scheduler utilisation, percentage. See {@link scheduler:utilization/1}</li>
 91 | %%   <li>`dcpu': dirty CPU scheduler utilisation, percentage.</li>
 92 | %%   <li>`dio': dirty IO scheduler utilisation, percentage</li>
 93 | %%   <li>`processes': number of processes in the VM.</li>
 94 | %%   <li>`ports': number of ports in the VM.</li>
 95 | %%   <li>`ets': number of ETS tables created in the VM.</li>
 96 | %%   <li>`memory_total': total VM memory usage, see {@link erlang:memory/1}.</li>
 97 | %%   <li>`memory_processes': processes memory usage, see {@link erlang:system_info/1}.</li>
 98 | %%   <li>`memory_binary': binary memory usage.</li>
 99 | %%   <li>`memory_ets': ETS memory usage.</li>
100 | %%   <li>`jobs': a map of job process identifier to the iterations surplus
101 | %%    since last sample. If the sampling interval is default 1 second, the
102 | %%    value of the map is "requests/queries per second" (RPS/QPS).</li>
103 | %% </ul>
104 | 
105 | -type start_options() :: #{
106 |     interval => pos_integer()
107 | }.
108 | %% Monitor startup options
109 | %%
110 | %% <ul>
111 | %%   <li>`interval': monitoring interval, 1000 ms by default</li>
112 | %% </ul>
113 | 
114 | -export_type([monitor_sample/0, start_options/0]).
115 | 
116 | %% @equiv start(#{interval => 1000})
117 | -spec start() -> {ok, Pid :: pid()} | {error, Reason :: term()}.
118 | start() ->
119 |     start(#{interval => ?DEFAULT_TICK_INTERVAL_MS}).
120 | 
121 | %% @doc
122 | %% Starts the monitor.
123 | %%
124 | %% `Options' are used to change the monitor behaviour.
125 | %% <ul>
126 | %%    <li>`interval': time, in milliseconds, to wait between sample collection</li>
127 | %% </ul>
128 | -spec start(Options :: start_options()) -> {ok, Pid :: pid()} | {error, Reason :: term()}.
129 | start(Options) ->
130 |     gen_server:start({local, ?MODULE}, ?MODULE, Options, []).
131 | 
132 | %% @equiv start_link(#{interval => 1000})
133 | -spec(start_link() -> {ok, Pid :: pid()} | {error, Reason :: term()}).
134 | start_link() ->
135 |     start_link(#{interval => ?DEFAULT_TICK_INTERVAL_MS}).
136 | 
137 | %% @doc
138 | %% Starts the monitor and links it to the current process. See {@link start/1}
139 | %% for options description.
140 | start_link(Options) ->
141 |     gen_server:start_link({local, ?MODULE}, ?MODULE, Options, []).
142 | 
143 | %% @doc
144 | %% Registers an {@link erlperf_job} to monitor.
145 | %%
146 | %% Running monitor queries every registered job, adding
147 | %% the number of iterations performed by all workers of
148 | %% that job to the report.
149 | %% This API is intended to be used by {@link erlperf_job}
150 | %% to enable VM monitoring while benchmarking.
151 | %%
152 | %% `Job' specifies job process identifier, it is only
153 | %% used to detect when job is stopped, to stop reporting
154 | %% counters for that job.
155 | %%
156 | %% `Handle' is the sampling handle, see {@link erlperf_job:handle/1}.
157 | %%
158 | %% `Initial' value should be provided when an existing job
159 | %% is registered, to avoid reporting accumulated counter value
160 | %% in the first report for that job.
161 | %%
162 | %% Always return `ok', even when monitor is not running.
163 | -spec register(pid(), term(), non_neg_integer()) -> ok.
164 | register(Job, Handle, Initial) ->
165 |     gen_server:cast(?MODULE, {register, Job, Handle, Initial}).
166 | 
167 | %% @doc
168 | %% Removes the job from monitoring.
169 | %%
170 | %% Stops reporting this job performance.
171 | %%
172 | %% `Job' is the process identifier of the job.
173 | -spec unregister(pid()) -> ok.
174 | unregister(Job) ->
175 |     gen_server:cast(?MODULE, {unregister, Job}).
176 | 
177 | %%%===================================================================
178 | %%% gen_server callbacks
179 | 
180 | %% System monitor state
181 | -record(state, {
182 |     % bi-map of job processes to counters
183 |     jobs :: [{pid(), reference(), Handle :: erlperf_job:handle(), Prev :: integer()}],
184 |     % scheduler data saved from last call
185 |     sched_data :: [{pos_integer(), integer(), integer()}],
186 |     % number of normal schedulers
187 |     normal :: pos_integer(),
188 |     % number of dirty schedulers
189 |     dcpu :: pos_integer(),
190 |     %
191 |     tick = ?DEFAULT_TICK_INTERVAL_MS :: pos_integer(),
192 |     next_tick :: integer()
193 | }).
194 | 
195 | %% @private
196 | init(#{interval := Tick}) ->
197 |     %% TODO: figure out if there is a way to find jobs after restart.
198 |     %% ask a supervisor? but not all jobs are supervised...
199 |     %% Jobs = [{Pid, erlperf_job:handle(Pid), 0} ||
200 |     %%        {_, Pid, _, _} <- try supervisor:which_children(erlperf_job_sup) catch exit:{noproc, _} -> [] end],
201 |     %% [monitor(process, Pid) || {Pid, _, _} <- Jobs],
202 |     Jobs = [],
203 |     %% enable scheduler utilisation calculation
204 |     erlang:system_flag(scheduler_wall_time, true),
205 |     Next = erlang:monotonic_time(millisecond) + Tick,
206 |     erlang:start_timer(Next, self(), tick, [{abs, true}]),
207 |     {ok, #state{
208 |         tick = Tick,
209 |         jobs = Jobs,
210 |         next_tick = Next,
211 |         sched_data = lists:sort(erlang:statistics(scheduler_wall_time_all)),
212 |         normal = erlang:system_info(schedulers),
213 |         dcpu = erlang:system_info(dirty_cpu_schedulers)}
214 |     }.
215 | 
216 | %% @private
217 | handle_call(_Request, _From, _State) ->
218 |     erlang:error(notsup).
219 | 
220 | %% @private
221 | handle_cast({register, Job, Handle, Initial}, #state{jobs = Jobs} = State) ->
222 |     MRef = monitor(process, Job),
223 |     {noreply, State#state{jobs = [{Job, MRef, Handle, Initial} | Jobs]}};
224 | handle_cast({unregister, Job}, #state{jobs = Jobs} = State) ->
225 |     case lists:keyfind(Job, 1, Jobs) of
226 |         {Job, MRef, _, _} ->
227 |             demonitor(MRef, [flush]),
228 |             {noreply, State#state{jobs = lists:keydelete(Job, 1, Jobs)}};
229 |         false ->
230 |             {noreply, State}
231 |     end.
232 | 
233 | %% @private
234 | handle_info({'DOWN', _MRef, process, Pid, _Reason}, #state{jobs = Jobs} = State) ->
235 |     {noreply, State#state{jobs = lists:keydelete(Pid, 1, Jobs)}};
236 | 
237 | %% @private
238 | handle_info({timeout, _, tick}, State) ->
239 |     {noreply, handle_tick(State)}.
240 | 
241 | %%%===================================================================
242 | %%% Internal functions
243 | 
244 | handle_tick(#state{sched_data = Data, normal = Normal, dcpu = Dcpu} = State) ->
245 |     NewSched = lists:sort(erlang:statistics(scheduler_wall_time_all)),
246 |     {NU, DU, DioU} = fold_normal(Data, NewSched, Normal, Dcpu, 0, 0),
247 |     % add benchmarking info
248 |     {Jobs, UpdatedJobs} = lists:foldl(
249 |         fun ({Pid, MRef, Handle, Prev}, {J, Save}) ->
250 |             Cycles =
251 |                 case erlperf_job:sample(Handle) of
252 |                     C when is_integer(C) -> C;
253 |                     undefined -> Prev %% job is stopped, race condition here
254 |                 end,
255 |             {[{Pid, Cycles - Prev} | J], [{Pid, MRef, Handle, Cycles} | Save]}
256 |         end, {[], []}, State#state.jobs),
257 |     %
258 |     Sample = #{
259 |         time => erlang:system_time(millisecond),
260 |         node => node(),
261 |         memory_total => erlang:memory(total),
262 |         memory_processes => erlang:memory(processes),
263 |         memory_binary => erlang:memory(binary),
264 |         memory_ets => erlang:memory(ets),
265 |         sched_util => NU * 100,
266 |         dcpu => DU * 100,
267 |         dio => DioU * 100,
268 |         processes => erlang:system_info(process_count),
269 |         ports => erlang:system_info(port_count),
270 |         ets => erlang:system_info(ets_count),
271 |         jobs => Jobs},
272 |     % notify local & global subscribers
273 |     Subscribers = pg:get_members(erlperf, {erlperf_monitor, node()}) ++ pg:get_members(erlperf, cluster_monitor),
274 |     [Pid ! Sample || Pid <- Subscribers],
275 |     %%
276 |     NextTick = State#state.next_tick + State#state.tick,
277 |     erlang:start_timer(NextTick, self(), tick, [{abs, true}]),
278 |     State#state{sched_data = NewSched, next_tick = NextTick, jobs = lists:reverse(UpdatedJobs)}.
279 | 
280 | %% Iterates over normal scheduler
281 | fold_normal(Old, New, 0, Dcpu, AccActive, AccTotal) ->
282 |     fold_dirty_cpu(Old, New, Dcpu, AccActive / AccTotal, 0, 0);
283 | fold_normal([{N, OldActive, OldTotal} | Old],
284 |     [{N, NewActive, NewTotal} | New], Normal, Dcpu, AccActive, AccTotal) ->
285 |     fold_normal(Old, New, Normal - 1, Dcpu, AccActive + (NewActive - OldActive),
286 |         AccTotal + (NewTotal - OldTotal)).
287 | 
288 | %% Iterates over DCPU
289 | fold_dirty_cpu(Old, New, 0, NormalPct, AccActive, AccTotal) ->
290 |     fold_dirty_io(Old, New, NormalPct, AccActive / AccTotal, 0, 0);
291 | fold_dirty_cpu([{N, OldActive, OldTotal} | Old],
292 |     [{N, NewActive, NewTotal} | New], Dcpu, NormalPct, AccActive, AccTotal) ->
293 |     fold_dirty_cpu(Old, New, Dcpu - 1, NormalPct, AccActive + (NewActive - OldActive),
294 |         AccTotal + (NewTotal - OldTotal)).
295 | 
296 | %% Remaining are dirty IO
297 | fold_dirty_io([], [], NormalPct, DcpuPct, AccActive, AccTotal) ->
298 |     {NormalPct, DcpuPct, AccActive / AccTotal};
299 | fold_dirty_io([{N, OldActive, OldTotal} | Old],
300 |     [{N, NewActive, NewTotal} | New], NormalPct, DcpuPct, AccActive, AccTotal) ->
301 |     fold_dirty_io(Old, New, NormalPct, DcpuPct, AccActive + (NewActive - OldActive),
302 |         AccTotal + (NewTotal - OldTotal)).
303 | 


--------------------------------------------------------------------------------
/src/erlperf_sup.erl:
--------------------------------------------------------------------------------
 1 | %%% @copyright (C) 2019-2023, Maxim Fedorov
 2 | %%% @private
 3 | %%% Top-level supervisor. Always starts process group scope
 4 | %%%  for `erlperf'. Depending on the configuration starts
 5 | %%%  a number of jobs or a cluster-wide monitoring solution.
 6 | -module(erlperf_sup).
 7 | -author("maximfca@gmail.com").
 8 | 
 9 | -behaviour(supervisor).
10 | 
11 | -export([
12 |     start_link/0,
13 |     init/1
14 | ]).
15 | 
16 | -spec start_link() -> supervisor:startlink_ret().
17 | start_link() ->
18 |     supervisor:start_link({local, ?MODULE}, ?MODULE, []).
19 | 
20 | -spec init([]) -> {ok, {supervisor:sup_flags(), [supervisor:child_spec()]}}.
21 | init([]) ->
22 |     SupFlags = #{strategy => rest_for_one, intensity => 2, period => 60},
23 | 
24 |     ChildSpecs = [
25 |         %% start own pg scope, needed for cluster-wide operations
26 |         %% even if the node-wide monitoring is not running, the scope
27 |         %% needs to be up to send "job started" events for the cluster
28 |         #{
29 |             id => pg,
30 |             start => {pg, start_link, [erlperf]},
31 |             modules => [pg]
32 |         },
33 | 
34 |         %% monitoring
35 |         #{
36 |             id => erlperf_monitor,
37 |             start => {erlperf_monitor, start_link, []},
38 |             modules => [erlperf_monitor]
39 |         },
40 | 
41 |         %% supervisor for statically started jobs
42 |         #{
43 |             id => erlperf_job_sup,
44 |             start => {erlperf_job_sup, start_link, []},
45 |             type => supervisor,
46 |             modules => [erlperf_job_sup]
47 |         }],
48 | 
49 |     {ok, {SupFlags, ChildSpecs}}.
50 | 


--------------------------------------------------------------------------------
/test/erlperf_SUITE.erl:
--------------------------------------------------------------------------------
  1 | %%%-------------------------------------------------------------------
  2 | %%% @author Maxim Fedorov <maximfca@gmail.com>
  3 | %%% @copyright (c) 2019-2023 Maxim Fedorov
  4 | %%% @doc
  5 | %%%     Tests benchmark module, machine-readable output for benchmarks.
  6 | %%% @end
  7 | %%% -------------------------------------------------------------------
  8 | -module(erlperf_SUITE).
  9 | -author("maximfca@gmail.com").
 10 | 
 11 | -include_lib("stdlib/include/assert.hrl").
 12 | 
 13 | -export([suite/0, all/0, groups/0, init_per_group/2, end_per_group/2]).
 14 | 
 15 | -export([
 16 |     start_link/0,
 17 |     init/1,
 18 |     handle_call/3,
 19 |     handle_cast/2
 20 | ]).
 21 | 
 22 | %% Continuous mode benchmarks
 23 | -export([mfa/1, mfa_with_cv/1,
 24 |     mfa_with_tiny_cv/0, mfa_with_tiny_cv/1,
 25 |     mfa_concurrency/1, mfa_no_concurrency/1,
 26 |     code_extra_node/1, compare/1]).
 27 | 
 28 | %% Timed mode
 29 | -export([mfa_timed/1]).
 30 | 
 31 | %% Concurrency estimation tests
 32 | -export([mfa_squeeze/0, mfa_squeeze/1,
 33 |     squeeze_extended/0, squeeze_extended/1,
 34 |     squeeze_full/0, squeeze_full/1]).
 35 | 
 36 | %% Tests for error handling
 37 | -export([crasher/0, crasher/1, undefer/0, undefer/1, errors/0, errors/1]).
 38 | 
 39 | -export([lock_contention/0, lock_contention/1]).
 40 | 
 41 | -export([stat_calc/0, stat_calc/1, rand_stat/0, rand_stat/1]).
 42 | 
 43 | %% Record-replay tests
 44 | -export([replay/1, do_anything/1]).
 45 | 
 46 | -behaviour(gen_server).
 47 | 
 48 | %%--------------------------------------------------------------------
 49 | %% COMMON TEST CALLBACK FUNCTIONS
 50 | 
 51 | suite() ->
 52 |     [{timetrap, {seconds, 10}}].
 53 | 
 54 | groups() ->
 55 |     [
 56 |         {continuous, [parallel],
 57 |             [mfa, mfa_with_cv, mfa_with_tiny_cv, mfa_concurrency, mfa_no_concurrency, code_extra_node, compare]},
 58 |         {timed, [parallel],
 59 |             [mfa_timed]},
 60 |         {concurrency, [], [mfa_squeeze, squeeze_extended, squeeze_full]},
 61 |         {errors, [parallel], [crasher, undefer, errors]},
 62 |         {overhead, [], [lock_contention]},
 63 |         {statistics, [parallel], [stat_calc, rand_stat]},
 64 |         {replay, [], [replay]}
 65 |     ].
 66 | 
 67 | init_per_group(squeeze, Config) ->
 68 |     case erlang:system_info(schedulers_online) of
 69 |         LowCPU when LowCPU < 3 ->
 70 |             {skip, {slow_cpu, LowCPU}};
 71 |         _ ->
 72 |             Config
 73 |     end;
 74 | init_per_group(_, Config) ->
 75 |     Config.
 76 | 
 77 | end_per_group(_, Config) ->
 78 |     Config.
 79 | 
 80 | all() ->
 81 |     [{group, continuous}, {group, concurrency}, {group, overhead},
 82 |         {group, errors}, {group, statistics}, {group, replay}].
 83 | 
 84 | %%--------------------------------------------------------------------
 85 | %% Helpers: gen_server implementation
 86 | init(Pid) ->
 87 |     {ok, Pid}.
 88 | 
 89 | handle_call({sleep, Num}, _From, State) ->
 90 |     {reply, timer:sleep(Num), State}.
 91 | 
 92 | handle_cast(_Req, _State) ->
 93 |     erlang:error(notsup).
 94 | 
 95 | start_link() ->
 96 |     {ok, Pid} = gen_server:start_link(?MODULE, [], []),
 97 |     Pid.
 98 | 
 99 | %%--------------------------------------------------------------------
100 | %% TEST CASES
101 | 
102 | mfa(Config) when is_list(Config) ->
103 |     C = erlperf:run(timer, sleep, [1]),
104 |     ?assert(C > 250 andalso C < 1101),
105 |     %% extended report
106 |     Extended = erlperf:run({timer, sleep, [1]}, #{report => extended, sample_duration => 100}),
107 |     [?assert(Cs > 25 andalso Cs < 110) || Cs <- Extended],
108 |     %% full report
109 |     #{result := Result} = erlperf:run({timer, sleep, [1]}, #{report => full, sample_duration => 100}),
110 |     #{average := Avg} = Result,
111 |     ?assert(Avg > 25 andalso Avg < 110).
112 | 
113 | mfa_with_cv(Config) when is_list(Config) ->
114 |     %% basic report
115 |     C = erlperf:run({timer, sleep, [1]}, #{cv => 0.05}),
116 |     ?assert(C > 250 andalso C < 1101).
117 | 
118 | mfa_with_tiny_cv() ->
119 |     [{doc, "Tests benchmarking with very small coefficient of variation, potentially long"},
120 |         {timetrap, {seconds, 60}}].
121 | 
122 | mfa_with_tiny_cv(Config) when is_list(Config) ->
123 |     C = erlperf:run({timer, sleep, [1]}, #{samples => 2, interval => 100, cv => 0.002}),
124 |     ?assert(C > 250 andalso C < 1101).
125 | 
126 | mfa_concurrency(Config) when is_list(Config) ->
127 |     C = erlperf:run({timer, sleep, [1]}, #{concurrency => 2}),
128 |     ?assert(C > 500 andalso C < 2202, {out_of_range, C, 500, 2202}).
129 | 
130 | compare(Config) when is_list(Config) ->
131 |     [C1, C2] = erlperf:compare(["timer:sleep(1).", "timer:sleep(2)."],
132 |         #{sample_duration => 100, report => extended}),
133 |     ?assertEqual(3, length(C1), {not_extended, C1}),
134 |     [?assert(L > R, {left, C1, right, C2}) || {L, R} <- lists:zip(C1, C2)],
135 |     %% low-overhead comparison benchmark
136 |     %% LEGACY/DEPRECATED: for timed mode, extended report has only 1 sample
137 |     [[T1], [T2]] = erlperf:benchmark([#{runner => {timer, sleep, [1]}}, #{runner => "timer:sleep(2)."}],
138 |         #{sample_duration => undefined, samples => 50, report => extended}, undefined),
139 |     ?assert(is_integer(T1) andalso is_integer(T2)),
140 |     ?assert(T1 < T2, {T1, T2}).
141 | 
142 | mfa_no_concurrency(Config) when is_list(Config) ->
143 |     C = erlperf:run(
144 |         #{
145 |             runner => fun (Pid) -> gen_server:call(Pid, {sleep, 1}) end,
146 |             init => {?MODULE, start_link, []},
147 |             init_runner => fun(Pid) -> Pid end,
148 |             done => {gen_server, stop, []}
149 |         },
150 |         #{concurrency => 4}),
151 |     ?assert(C > 250 andalso C < 1101, {out_of_range, C, 250, 1101}).
152 | 
153 | code_extra_node(Config) when is_list(Config) ->
154 |     C = erlperf:run(#{
155 |             runner => "{ok, 1} = application:get_env(kernel, test), timer:sleep(1).",
156 |             init => "application:set_env(kernel, test, 1)."
157 |         },
158 |         #{concurrency => 2, sample_duration => 100, isolation => #{}}),
159 |     ?assertEqual(undefined, application:get_env(kernel, test), {"isolation did not work"}),
160 |     ?assert(C > 50 andalso C < 220, {out_of_range, C, 50, 220}).
161 | 
162 | %%--------------------------------------------------------------------
163 | %% timed mode
164 | 
165 | mfa_timed(Config) when is_list(Config) ->
166 |     %% basic report for 100 'timer:sleep(1) iterations'
167 |     Time = erlperf:time({timer, sleep, [1]}, 100),
168 |     ?assert(Time > 100 andalso Time < 300, {actual, Time}), %% between 100 and 300 ms
169 |     %% extended report for 50 iterations
170 |     Times = erlperf:run({timer, sleep, [1]}, #{samples => 5, report => extended, sample_duration => {timed, 50}}),
171 |     ?assertEqual(5, length(Times), {times, Times}),
172 |     [?assert(T > 50 andalso T < 150, {actual, T}) || T <- Times], %% every run between 50 and 150 ms.
173 |     %% full report for 50 iterations
174 |     Full = erlperf:run({timer, sleep, [1]}, #{samples => 5, report => full, sample_duration => {timed, 50}}),
175 |     #{result := #{average := Avg, samples := FullSsamples}} = Full,
176 |     ?assertEqual(5, length(FullSsamples)),
177 |     ?assert(Avg > 50000.0 andalso Avg < 150000.0, {actual, Avg}), %% average run between 50 and 150 us (!us!)
178 |     %% ensure 'warmup' is supported for timed runs
179 |     Now = os:system_time(millisecond),
180 |     Warmup = erlperf:run({timer, sleep, [1]}, #{samples => 5, warmup => 10, sample_duration => {timed, 50}}),
181 |     ?assert(Warmup > 50 andalso Warmup < 150, {actual, Warmup}), %% between 50 and 150 ms
182 |     Elapsed = os:system_time(millisecond) - Now,
183 |     ?assert(Elapsed > 750, {warmup_missing, Elapsed}),
184 |     ?assert(Elapsed < 3000, {warmup_slow, Elapsed}).
185 | 
186 | %%--------------------------------------------------------------------
187 | %% concurrency estimation test cases
188 | 
189 | mfa_squeeze() ->
190 |     [{doc, "Tests concurrency estimation mode with basic report"}].
191 | 
192 | mfa_squeeze(Config) when is_list(Config) ->
193 |     Scheds = erlang:system_info(schedulers_online),
194 |     {QPS, CPU} = erlperf:run({rand, uniform, [1]}, #{sample_duration => 50}, #{}),
195 |     ?assert(QPS > 0),
196 |     ?assert(CPU > 1, {schedulers, Scheds, detected, CPU}).
197 | 
198 | squeeze_extended() ->
199 |     [{doc, "Tests concurrency estimation mode with extended report"}].
200 | 
201 | squeeze_extended(Config) when is_list(Config) ->
202 |     {{QPS, CPU}, History} = erlperf:run({rand, uniform, [1]},
203 |         #{sample_duration => 50, warmup => 1, report => extended}, #{}),
204 |     %% find the best historical result, and ensure it's 3 steps away from the last
205 |     [Best | _] = lists:reverse(lists:keysort(1, History)),
206 |     ?assertEqual({QPS, CPU}, Best),
207 |     ?assertEqual(Best, lists:nth(4, History), History).
208 | 
209 | squeeze_full() ->
210 |     [{doc, "Tests concurrency estimation mode with full report"}].
211 | 
212 | squeeze_full(Config) when is_list(Config) ->
213 |     Report = erlperf:run({rand, uniform, [1]}, #{sample_duration => 50, warmup => 1, report => full}, #{}),
214 |     #{mode := concurrency, result := Best, history := History, sleep := sleep,
215 |         run_options := #{concurrency := Concurrency}} = Report,
216 |     #{time := Time} = Best,
217 |     ct:pal("Best run took ~b ms,~n~p", [Time div 1000, Best]),
218 |     %% taking 3 samples
219 |     ?assert(Time >= 3 * 50000, {too_fast, Time}),
220 |     ?assert(Time < 3 * 100000, {too_slow, Time}),
221 |     ?assertEqual({Concurrency, Best}, lists:nth(4, History)).
222 | 
223 | %%--------------------------------------------------------------------
224 | %% error handling test cases
225 | 
226 | crasher() ->
227 |     [{doc, "Tests job that crashes"}].
228 | 
229 | crasher(Config) when is_list(Config) ->
230 |     ?assertException(error, {benchmark, {'EXIT', _, _}},
231 |         erlperf:run({erlang, throw, [ball]}, #{concurrency => 2})).
232 | 
233 | undefer() ->
234 |     [{doc, "Tests job undefs - e.g. wrong module name"}].
235 | 
236 | undefer(Config) when is_list(Config) ->
237 |     ?assertException(error, {benchmark, {'EXIT', _, {undef, _}}},
238 |         erlperf:run({'$cannot_be_this', throw, []}, #{concurrency => 2})).
239 | 
240 | errors() ->
241 |     [{doc, "Tests various error conditions"}].
242 | 
243 | errors(Config) when is_list(Config) ->
244 |     ?assertException(error, {generate, {parse, init, _}},
245 |         erlperf:run(#{runner => {erlang, node, []}, init => []})),
246 |     ?assertException(error, {generate, {parse, runner, _}},
247 |         erlperf:run(#{runner => []})),
248 |     ?assertException(error, {generate, {parse, runner, _}},
249 |         erlperf:run(#{runner => {[]}})).
250 | 
251 | %%--------------------------------------------------------------------
252 | %% timer skew detection
253 | 
254 | lock_contention() ->
255 |     [{doc, "Ensures that benchmarking overhead when running multiple concurrent processes is not too high"},
256 |         {timetrap, {seconds, 20}}].
257 | 
258 | lock_contention(Config) when is_list(Config) ->
259 |     %% need at the very least 4 schedulers to create enough contention
260 |     case erlang:system_info(schedulers_online) of
261 |         Enough when Enough >= 4 ->
262 |             Tuple = {lists:seq(1, 5000), list_to_tuple(lists:seq(1, 10000))},
263 |             Init = fun() -> ets:new(tab, [public, named_table]) end,
264 |             Done = fun(Tab) -> ets:delete(Tab) end,
265 |             Runner = fun() -> true = ets:insert(tab, Tuple) end, %% this inevitably causes lock contention
266 |             %% take 50 samples of 10 ms, which should complete in about a second, and 10 extra warmup samples
267 |             %% hoping that lock contention is detected at warmup
268 |             Before = os:system_time(millisecond),
269 |             Report = erlperf:run(#{runner => Runner, init => Init, done => Done},
270 |                 #{concurrency => Enough * 4, samples => 50, sample_duration => 10, warmup => 10, report => full}),
271 |             TimeSpent = os:system_time(millisecond) - Before,
272 |             #{result := #{average := QPS}, sleep := DetectedSleepType} = Report,
273 |             ?assertEqual(busy_wait, DetectedSleepType, {"Lock contention was not detected", Report}),
274 |             ?assert(QPS > 0, {qps, QPS}),
275 |             ?assert(TimeSpent > 500, {too_quick, TimeSpent, expected, 1000}),
276 |             ?assert(TimeSpent < 3000, {too_slow, TimeSpent, expected, 1000});
277 |         NotEnough ->
278 |             {skip, {not_enough_schedulers_online, NotEnough}}
279 |     end.
280 | 
281 | %%--------------------------------------------------------------------
282 | %% statistics
283 | 
284 | %% simplified delta-comparison
285 | -define(assertApprox(Expect, Expr),
286 |     begin
287 |         ((fun () ->
288 |             X__X = (Expect),
289 |             X__Y = (Expr),
290 |             case (erlang:abs(X__Y - X__X) < 0.0001) of
291 |                 true -> ok;
292 |                 false -> erlang:error({assertEqual,
293 |                     [{module, ?MODULE},
294 |                         {line, ?LINE},
295 |                         {expression, (??Expr)},
296 |                         {expected, X__X},
297 |                         {value, X__Y}]})
298 |             end
299 |           end)())
300 |     end).
301 | 
302 | stat_calc() ->
303 |     [{doc, "Tests correctness of statistical calculations over samples"}].
304 | 
305 | stat_calc(Config) when is_list(Config) ->
306 |     %% generate with: [erlang:round(rand:normal(40, 100)) || _ <- lists:seq(1, 30)].
307 |     Sample = [36,42,42,47,51,39,37,32,41,32,15,44,41,46,50,36,48,33,35,
308 |         35,25,21,47,40,33,57,55,64,40,30],
309 | 
310 |     Stats = erlperf:report_stats(Sample),
311 | 
312 |     ?assertApprox(39.8, maps:get(average, Stats)),
313 |     %% ?assertApprox(109.0620, maps:get(variance, Stats)),
314 |     ?assertApprox(10.4432, maps:get(stddev, Stats)),
315 |     ?assertEqual(40, maps:get(median, Stats)),
316 |     ?assertEqual(15, maps:get(min, Stats)),
317 |     ?assertEqual(64, maps:get(max, Stats)),
318 |     %% ?assertApprox(47, maps:get({percentile, 0.75}, Stats)),
319 |     ?assertApprox(64, maps:get(p99, Stats)).
320 | 
321 | rand_stat() ->
322 |     [{doc, "Use rand module to generate some wildly random results"}].
323 | 
324 | rand_stat(Config) when is_list(Config) ->
325 |     Report = erlperf:run({rand, uniform, []}, #{report => full, samples => 100, sample_duration => 5}),
326 |     #{result := Result, mode := continuous, system := System} = Report,
327 |     #{min := Min, max := Max, average := Avg, median := Mid, p99 := P99} = Result,
328 |     %% just run some sanity checks assertions
329 |     ?assertEqual(erlang:system_info(os_type), maps:get(os, System)),
330 |     ?assert(is_map_key(cpu, System), {cpu_missing, System}),
331 |     ?assert(Min < Max, {min, Min, max, Max}),
332 |     ?assert(Avg > Min andalso Avg < Max, {avg, Avg, min, Min, max, Max}),
333 |     ?assert(Mid > Min andalso Mid < Max, {median, Mid, min, Min, max, Max}),
334 |     ?assert(P99 =< Max, {p99, P99, max, Max}).
335 | 
336 | %%--------------------------------------------------------------------
337 | %% record-replay
338 | 
339 | replay(Config) when is_list(Config) ->
340 |     spawn(fun () -> timer:sleep(10), do_anything(10) end),
341 |     Trace = erlperf:record(?MODULE, '_', '_', 100),
342 |     QPS = erlperf:run(Trace),
343 |     ?assert(QPS > 10).
344 | 
345 | do_anything(0) ->
346 |     timer:sleep(1);
347 | do_anything(N) ->
348 |     ?MODULE:do_anything(N - 1).


--------------------------------------------------------------------------------
/test/erlperf_cli_SUITE.erl:
--------------------------------------------------------------------------------
  1 | %%%-------------------------------------------------------------------
  2 | %%% @author Maxim Fedorov <maximfca@gmail.com>
  3 | %%% @copyright (c) 2019-2023 Maxim Fedorov
  4 | %%% -------------------------------------------------------------------
  5 | -module(erlperf_cli_SUITE).
  6 | -author("maximfca@gmail.com").
  7 | 
  8 | -include_lib("stdlib/include/assert.hrl").
  9 | 
 10 | -export([suite/0, all/0]).
 11 | 
 12 | -export([
 13 |     simple/1, concurrent/1, verbose/1, zero/1, compare/1,
 14 |     usage/1, init/1,
 15 |     double/1, triple/1, pg/1, mfa/1,
 16 |     full_report/1, basic_timed_report/1, full_timed_report/1,
 17 |     recorded/1,
 18 |     squeeze/0, squeeze/1, step/1,
 19 |     init_all/0, init_all/1,
 20 |     label/1
 21 | ]).
 22 | 
 23 | %%--------------------------------------------------------------------
 24 | %% COMMON TEST CALLBACK FUNCTIONS
 25 | 
 26 | suite() ->
 27 |     [{timetrap, {seconds, 20}}].
 28 | 
 29 | all() ->
 30 |     [simple, concurrent, verbose, zero, compare, squeeze, step, usage, init, double,
 31 |         triple, pg, mfa, full_report, basic_timed_report, full_timed_report, recorded, init_all,
 32 |             label].
 33 | 
 34 | %%--------------------------------------------------------------------
 35 | %% helper functions
 36 | capture_io(Fun) ->
 37 |     ok = ct:capture_start(),
 38 |     Fun(),
 39 |     ok = ct:capture_stop(),
 40 |     lists:flatten(ct:capture_get()).
 41 | 
 42 | %%--------------------------------------------------------------------
 43 | %% command-line testing
 44 | 
 45 | parse_qps(QPST, "") -> list_to_integer(QPST);
 46 | parse_qps(QPST, "Ki") -> list_to_integer(QPST) * 1000;
 47 | parse_qps(QPST, "Mi") -> list_to_integer(QPST) * 1000000;
 48 | parse_qps(QPST, "Gi") -> list_to_integer(QPST) * 1000000000;
 49 | parse_qps(QPST, "Ti") -> list_to_integer(QPST) * 1000000000000.
 50 | 
 51 | parse_duration(TT, "ns") -> list_to_integer(TT);
 52 | parse_duration(TT, "us") -> list_to_integer(TT) * 1000;
 53 | parse_duration(TT, "ms") -> list_to_integer(TT) * 1000000;
 54 | parse_duration(TT, "s") -> list_to_integer(TT) * 1000000000;
 55 | parse_duration(TT, "m") -> list_to_integer(TT) * 60 * 1000000000.
 56 | 
 57 | filtersplit(Str, Sep) ->
 58 |     [L || L <- string:split(Str, Sep, all), L =/= ""].
 59 | 
 60 | parse_out(Out) ->
 61 |     [Header | Lines] = filtersplit(Out, "\n"),
 62 |     case filtersplit(Header, " ") of
 63 |         ["Code", "||", "QPS", "Time"] ->
 64 |             [begin
 65 |                  case filtersplit(Ln, " ") of
 66 |                      [Code, ConcT, QPST, TT, TTU] ->
 67 |                          {Code, list_to_integer(ConcT), parse_qps(QPST, ""), parse_duration(TT, TTU)};
 68 |                      [Code, ConcT, QPST, QU, TT, TTU] ->
 69 |                          {Code, list_to_integer(ConcT), parse_qps(QPST, QU), parse_duration(TT, TTU)}
 70 |                  end
 71 |              end || Ln <- Lines];
 72 |         ["Code", "||", "QPS", "Time", "Rel"] ->
 73 |             [begin
 74 |                  case filtersplit(Ln, " ") of
 75 |                      [Code, ConcT, "0", "inf", Rel] ->
 76 |                          {Code, list_to_integer(ConcT), 0, infinity,
 77 |                              list_to_integer(lists:droplast(Rel))};
 78 |                      [Code, ConcT, QPST, TT, TTU, Rel] ->
 79 |                          {Code, list_to_integer(ConcT), parse_qps(QPST, ""), parse_duration(TT, TTU),
 80 |                              list_to_integer(lists:droplast(Rel))};
 81 |                      [Code, ConcT, QPST, QU, TT, TTU, Rel] ->
 82 |                          {Code, list_to_integer(ConcT), parse_qps(QPST, QU), parse_duration(TT, TTU),
 83 |                              list_to_integer(lists:droplast(Rel))}
 84 |                  end
 85 |              end || Ln <- Lines];
 86 |         ["Code", "||", "Samples", "Avg", "StdDev", "Median", "P99", "Iteration" | Rel] ->
 87 |             [begin
 88 |                  [Code, ConcT, Samples, Avg0 | T1] = filtersplit(Ln, " "),
 89 |                  {Avg, T2} = maybe_unit(Avg0, T1),
 90 |                  [StdDevPercent, Median0 | T3] = T2,
 91 |                  {Median, T4} = maybe_unit(Median0, T3),
 92 |                  [P990 | T5] = T4,
 93 |                  {P99, [TT, TU | T6]} = maybe_unit(P990, T5),
 94 |                  ?assertEqual($%, lists:last(StdDevPercent)),
 95 |                  StdDev = list_to_float(lists:droplast(StdDevPercent)),
 96 |                  Returned = [Code, list_to_integer(ConcT), list_to_integer(Samples), Avg,
 97 |                      StdDev, Median, P99, parse_duration(TT, TU)],
 98 |                  case Rel of
 99 |                      [] ->
100 |                          list_to_tuple(Returned);
101 |                      ["Rel"] ->
102 |                          ?assertEqual($%, lists:last(T6)),
103 |                          Relative = list_to_integer(lists:droplast(T6)),
104 |                          list_to_tuple(Returned ++ [Relative])
105 |                  end
106 |              end || Ln <- Lines];
107 |         Unparsed ->
108 |             ct:pal("Unkonwn header: ~p", [Unparsed]),
109 |             ?assert(false)
110 |     end.
111 | 
112 | maybe_unit(Num, [[U, $i] | Tail]) ->
113 |     {parse_qps(Num, [U, $i]), Tail};
114 | maybe_unit(Num, [TimeUnit | Tail]) when TimeUnit =:= "m"; TimeUnit =:= "s"; TimeUnit =:= "ms"; TimeUnit =:= "ns"; TimeUnit =:= "us"  ->
115 |     {parse_duration(Num, TimeUnit), Tail};
116 | maybe_unit(Num, Rem) ->
117 |     {list_to_integer(Num), Rem}.
118 | 
119 | %%--------------------------------------------------------------------
120 | %% TEST CASES
121 | 
122 | % erlperf 'timer:sleep(1). -d 100'
123 | simple(Config) when is_list(Config) ->
124 |     Code = "timer:sleep(1).",
125 |     Out = capture_io(fun() -> erlperf_cli:main([Code, "-d", "100"]) end),
126 |     [{Code, 1, C, T}] = parse_out(Out),
127 |     ?assert(C > 25 andalso C < 110, {qps, C}),
128 |     ?assert(T > 1000000 andalso T < 3000000, {time, T}).
129 | 
130 | concurrent(Config) when is_list(Config) ->
131 |     Code = "timer:sleep(1).",
132 |     Out = capture_io(fun() -> erlperf_cli:main([Code, "-d", "100", "-c", "8"]) end),
133 |     [{Code, 8, C, T}] = parse_out(Out),
134 |     ?assert(C > 8 * 25 andalso C < 8 * 110, {qps, C}),
135 |     ?assert(T > 1000000 andalso T < 3000000, {time, T}).
136 | 
137 | % erlperf 'timer:sleep(1). -v'
138 | verbose(Config) when is_list(Config) ->
139 |     Code = "timer:sleep(1).",
140 |     Out = capture_io(fun () -> erlperf_cli:main([Code, "-v"]) end),
141 |     Lines = filtersplit(Out, "\n"),
142 |     %% TODO: actually verify that stuff printed is monitoring stuff
143 |     ?assert(length(Lines) > 3),
144 |     %% expect first 5 lines to contain source code
145 |     Generated = lists:sublist(Lines, 1, 12),
146 |     ?assertEqual(Generated, [
147 |         ">>>>>>>>>>>>>>> timer:sleep(1).                  ",
148 |         "-module(benchmark).",
149 |         "-export([benchmark/0, benchmark_finite/1]).","benchmark() ->",
150 |         "    timer:sleep(1),","    benchmark().",
151 |         "benchmark_finite(0) ->","    ok;","benchmark_finite(Count) ->",
152 |         "    timer:sleep(1),","    benchmark_finite(Count - 1).",
153 |         "<<<<<<<<<<<<<<< "]),
154 |     %% parse last 2 lines
155 |     [{Code, 1, C, T}] = parse_out(lists:join("\n", lists:sublist(Lines, length(Lines) - 1, 2))),
156 |     ?assert(C > 250 andalso C < 1101, {qps, C}),
157 |     ?assert(T > 1000000 andalso T < 3000000, {time, T}).
158 | 
159 | % erlperf 'timer:sleep(100).' 'timer:sleep(200).' -d 10
160 | zero(Config) when is_list(Config) ->
161 |     Out = capture_io(fun () -> erlperf_cli:main(["timer:sleep(100).", "timer:sleep(200).", "-d", "10"]) end),
162 |     % Code            Concurrency   Throughput      Time      Rel
163 |     % timer:sleep(200).          1          0        inf       0%
164 |     % timer:sleep(100).          1          0        inf       0%
165 |     [{_Code, 1, 0, infinity, 0}, {_Code2, 1, 0, infinity, 0}] = parse_out(Out).
166 | 
167 | % erlperf 'timer:sleep(1).' 'timer:sleep(2).' -d 100 -s 5 -w 1 -c 2
168 | compare(Config) when is_list(Config) ->
169 |     Out = capture_io(
170 |         fun () -> erlperf_cli:main(["timer:sleep(1).", "timer:sleep(2).", "-s", "5", "-d", "100", "-w", "1", "-c", "2"]) end),
171 |     % Code            Concurrency   Throughput      Time      Rel
172 |     % timer:sleep().            2          950    100 ns     100%
173 |     % timer:sleep(2).           2          475    200 ns      50%
174 |     [{_Code, 2, C, T, R}, {_Code2, 2, C2, T2, R2}] = parse_out(Out),
175 |     ?assert(C > 66 andalso C < 220, {qps, C}),
176 |     ?assert(C2 > 50 andalso C2 < 110, {qps, C2}),
177 |     ?assert(T < T2),
178 |     ?assert(R > R2).
179 | 
180 | squeeze() ->
181 |     [{doc, "Tests concurrency test via command line"}, {timetrap, {seconds, 30}}].
182 | 
183 | % erlperf 'timer:sleep(1).' --duration 50 --squeeze --min 2 --max 4 --threshold 2
184 | squeeze(Config) when is_list(Config) ->
185 |     Out = capture_io(
186 |         fun () -> erlperf_cli:main(["timer:sleep(1).", "--duration", "50", "--squeeze", "--min", "2", "--max", "4", "--threshold", "2"]) end),
187 |     [{_Code, 4, C, T}] = parse_out(Out),
188 |     ?assert(C > 50 andalso C < 220, {qps, C}),
189 |     ?assert(T > 1000000 andalso T < 3000000, {time, T}).
190 | 
191 | % erlperf 'timer:sleep(1).' --duration 50 --squeeze --min 1 --max 25 --step 10
192 | step(Config) when is_list(Config) ->
193 |     Out = capture_io(
194 |         fun () -> erlperf_cli:main(["timer:sleep(1).", "--duration", "50", "--squeeze", "--min", "1", "--max", "25", "--step", "10"]) end),
195 |     [{_Code, 20, C, T}] = parse_out(Out),
196 |     ?assert(C > 400 andalso C < 600, {qps, C}),
197 |     ?assert(T > 1000000 andalso T < 3000000, {time, T}).
198 | % erlperf -q
199 | usage(Config) when is_list(Config) ->
200 |     Out = capture_io(fun () -> erlperf_cli:main(["-q"]) end),
201 |     Line1 = "Error: erlperf: required argument missing: code",
202 |     ?assertEqual(Line1, lists:sublist(Out, length(Line1))),
203 |     Out2 = capture_io(fun () -> erlperf_cli:main(["--un code"]) end),
204 |     ?assertEqual("Error: erlperf: unrecognised argument: --un code", lists:sublist(Out2, 48)),
205 |     ok.
206 | 
207 | % erlperf '{file,_}=code:is_loaded(pool).' --init 'code:ensure_loaded(pool).' --done 'code:purge(pool), code:delete(pool).'
208 | init(Config) when is_list(Config) ->
209 |     Code = "{file,_}=code:is_loaded(pool).",
210 |     Out = capture_io(fun () -> erlperf_cli:main(
211 |         [Code, "--init", "code:ensure_loaded(pool).", "--done", "code:purge(pool), code:delete(pool)."])
212 |                                   end),
213 |     % verify 'done' was done
214 |     ?assertEqual(false, code:is_loaded(pool)),
215 |     % verify output
216 |     [{_Code, 1, C, T}] = parse_out(Out),
217 |     ?assert(C > 50, {qps, C}),
218 |     ?assert(T > 0, {time, T}).
219 | 
220 | % erlperf 'runner(X) -> timer:sleep(X).' --init '1.' 'runner(Y) -> timer:sleep(Y).' --init '2.' -s 2 --duration 100
221 | double(Config) when is_list(Config) ->
222 |     Code = "runner(X)->timer:sleep(X).",
223 |     Out = capture_io(fun () -> erlperf_cli:main([Code, "--init_runner", "1.", Code, "--init_runner", "2.", "-s", "2",
224 |         "--duration", "100"]) end),
225 |     [{Code, 1, C, T, R}, {Code, 1, C2, T2, R2}] = parse_out(Out),
226 |     ?assert(C > 25 andalso C < 110, {qps, C}),
227 |     ?assert(C2 > 25 andalso C2 < 55, {qps, C2}),
228 |     ?assert(T < T2),
229 |     ?assert(R > R2).
230 | 
231 | triple(Config) when is_list(Config) ->
232 |     Out = capture_io(fun () -> erlperf_cli:main(["timer:sleep(1).", "-s", "2", "--duration", "100",
233 |         "timer:sleep(2).", "timer:sleep(3)."]) end),
234 |     [_, _, {_, 1, C3, _T3, R3}] = parse_out(Out),
235 |     ?assert(C3 >= 20 andalso C3 =< 30, {"expected between 20 and 30, got", C3}),
236 |     ?assert(R3 >= 40 andalso R3 =< 60, {"expected between 40 and 60, got", R3}),
237 |     ok.
238 | 
239 | % erlperf 'runner(Arg) -> ok = pg:join(Arg, self()), ok = pg:leave(Arg, self()).' --init_runner 'pg:create(self()), self().'
240 | pg(Config) when is_list(Config) ->
241 |     ?assertEqual(undefined, whereis(scope)), %% ensure scope is not left
242 |     Code = "runner(S)->pg:join(S,g,self()),pg:leave(S,g,self()).",
243 |     Out = capture_io(fun () -> erlperf_cli:main(
244 |         [Code, "--init_runner", "{ok,Scope}=pg:start_link(scope),Scope."])
245 |                                   end),
246 |     ?assertEqual(undefined, whereis(scope)), %% ensure runner exited
247 |     [{_Code, 1, C, _T}] = parse_out(Out),
248 |     ?assert(C > 100, {qps, C}).
249 | 
250 | % erlperf '{rand, uniform, [4]}'
251 | mfa(Config) when is_list(Config) ->
252 |     Code = "{rand,uniform,[4]}",
253 |     Out = capture_io(fun () -> erlperf_cli:main([Code]) end),
254 |     [{Code, 1, _C, _T}] = parse_out(Out).
255 | 
256 | % erlperf 'timer:sleep(1). -d 100 -r full'
257 | full_report(Config) when is_list(Config) ->
258 |     Code = "timer:sleep(1).",
259 |     AllOut = capture_io(fun () -> erlperf_cli:main([Code, "-d", "100", "-r", "full"]) end),
260 |     [[$O, $S | _], A1] = string:split(AllOut, "\n"), %% test that first string is OS
261 |     [[$C, $P, $U | _], A2] = string:split(A1, "\n"),
262 |     [[$V, $M, $ , $:, $ | VM], A3] = string:split(A2, "\n"), %% extract VM
263 |     [_, Out] = string:split(A3, "\n"),
264 |     ?assertEqual(string:trim(erlang:system_info(system_version)), string:trim(VM, both)),
265 |     [{Code, 1, Samples, Avg, Dev, Med, P99, Time}] = parse_out(Out),
266 |     ?assertEqual(3, Samples),
267 |     ?assert(Med =< P99),
268 |     ?assert(Dev < 50, {deviation, Dev}),
269 |     ?assert(Avg > 25 andalso Avg < 110, {avg, Avg}),
270 |     ?assert(Time > 1000000 andalso Time < 3000000, {time, Time}).
271 | 
272 | % erlperf 'timer:sleep(1).' -r basic -s 3 -l 50
273 | basic_timed_report(Config) when is_list(Config) ->
274 |     Code = "timer:sleep(1).",
275 |     Out = capture_io(fun () -> erlperf_cli:main([Code, "-r", "basic", "-s", "3", "-l", "50"]) end),
276 |     [{_Code, 1, QPS, IterTime}] = parse_out(Out),
277 |     ct:pal("Basic Timed Report:~n~p", [Out]),
278 |     ?assert(QPS > 250 andalso QPS < 1100, {qps, QPS}), %% QPS of 'timer:sleep(1)' is ~500
279 |     ?assert(IterTime >= 1000000 andalso IterTime < 3000000, {time, IterTime}). %% single iteration of timer:sleep(1)
280 | 
281 | % erlperf 'timer:sleep(1).' -r full -l 100 -s 5
282 | full_timed_report(Config) when is_list(Config) ->
283 |     Code = "timer:sleep(1).",
284 |     AllOut = capture_io(fun () -> erlperf_cli:main([Code, "-r", "full", "-l", "100", "-s", "5"]) end),
285 |     ct:pal("Full Timed Report:~n~p", [AllOut]),
286 |     [[$O, $S | _], A1] = string:split(AllOut, "\n"), %% test that first string is OS
287 |     [[$C, $P, $U | _], A2] = string:split(A1, "\n"),
288 |     [[$V, $M, $ , $:, $ | VM], A3] = string:split(A2, "\n"), %% extract VM
289 |     [_, Out] = string:split(A3, "\n"),
290 |     ?assertEqual(string:trim(erlang:system_info(system_version)), string:trim(VM, both)),
291 |     [{Code, 1, Samples, Avg, Dev, Med, P99, Time}] = parse_out(Out),
292 |     ?assertEqual(5, Samples),
293 |     ?assert(Med =< P99),
294 |     ?assert(Dev < 50, {deviation, Dev}),
295 |     ?assert(Avg >= 200000000 andalso Avg < 400000000, {avg, Avg}), %% average time to complete 100 iterations of sleep(1)
296 |     ?assert(Time >= 1000000 andalso Time < 3000000, {time, Time}). %% single timer:sleep(1) time, in us
297 | 
298 | 
299 | % erlperf 'runner(Arg) -> ok = pg2:join(Arg, self()), ok = pg2:leave(Arg, self()).' --init 'ets:file2tab("pg2.tab").'
300 | recorded(Config) ->
301 |     % write down ETS table to file
302 |     Priv = proplists:get_value(priv_dir, Config),
303 |     EtsFile = filename:join(Priv, "ets.tab"),
304 |     RecFile = filename:join(Priv, "recorded.list"),
305 |     test_ets_tab = ets:new(test_ets_tab, [named_table, public, ordered_set]),
306 |     [true = ets:insert(test_ets_tab, {N, rand:uniform(100)}) || N <- lists:seq(1, 100)],
307 |     ok = ets:tab2file(test_ets_tab, EtsFile),
308 |     true = ets:delete(test_ets_tab),
309 |     %
310 |     ok = file:write_file(RecFile, term_to_binary(
311 |         [
312 |             {ets, insert, [test_ets_tab, {100, 40}]},
313 |             {ets, delete, [test_ets_tab, 100]}
314 |         ])),
315 |     %
316 |     Out = capture_io(fun () -> erlperf_cli:main(
317 |         [RecFile, "--init", "ets:file2tab(\"" ++ EtsFile ++ "\")."])
318 |                                   end),
319 |     [LN1, LN2] = string:split(Out, "\n"),
320 |     ?assertEqual(["Code", "||", "QPS", "Time"], string:lexemes(LN1, " ")),
321 |     ?assertMatch(["[{ets,insert,[test_ets_tab,{100,40}]},", "...]", "1" | _], string:lexemes(LN2, " ")),
322 |     ok.
323 | 
324 | init_all() ->
325 |     [{doc, "Test init_all, done_all, init_runner_all options "}].
326 | 
327 | %% ./erlperf 'runner(X)->timer:sleep(X).' 'runner(X)->timer:sleep(X).' 'runner(X)->timer:sleep(X).'
328 | %%      --init_all '5.' --init '1.' --init_runner_all 'ir(Z) -> Z * 2.' --init_runner '5.' --init_runner '2.' --done_all '2.'
329 | init_all(Config) when is_list(Config) ->
330 |     Code = "runner(X)->timer:sleep(X).",
331 |     Code2 = "runner(Y)->timer:sleep(Y).",
332 |     %% how this test works:
333 |     %%  --init_all returns 5 for all 3 tests, for code#1 --init is overridden to be 1.
334 |     %%  --init_runner_all returns 2x of init result, but there is override for #1 and #2 returning 5 and 2
335 |     %%  resulting delays are 5, 2 and 10.
336 |     Out = capture_io(fun () -> erlperf_cli:main(
337 |         [Code, Code2, Code, "--init_all", "5.", "--init", "1.", "--init_runner_all", "ir(Z) -> Z * 2.",
338 |             "--init_runner", "5.", "--init_runner", "2.",
339 |             "--done_all", "2.", "-s", "2", "--duration", "100"]) end), %% unrelated parts to make the test quicker
340 |     [{Code2, 1, C1, _, R}, {Code, 1, C2, _, R2}, {Code, 1, C3, _, R3}] = parse_out(Out),
341 |     %% tests sorting as well
342 |     ?assert(C1 > 25 andalso C1 < 55, {qps, C1}), %% 2 ms delay
343 |     ?assert(C2 > 10 andalso C2 < 25, {qps, C2}), %% 5 ms delay
344 |     ?assert(C3 > 5 andalso C3 < 11, {qps, C3}), %% 10 ms delay
345 |     ?assert(R > R2), %% 5 ms delay is less than 2 ms
346 |     ?assert(R2 > R3). %% 5 ms delay is more than 10 ms
347 | 
348 | % erlperf 'foo.' --label bar
349 | label(Config) when is_list(Config) ->
350 |     Out = capture_io(
351 |         fun () -> erlperf_cli:main(["foo.", "--label", "bar"]) end),
352 |     [{Label, _, _, _}] = parse_out(Out),
353 |     ?assertEqual("bar", Label).
354 | 


--------------------------------------------------------------------------------
/test/erlperf_cluster_monitor_SUITE.erl:
--------------------------------------------------------------------------------
  1 | %%% @copyright (c) 2019-2023 Maxim Fedorov
  2 | %%% @doc
  3 | %%% Tests combination of erlperf_monitor, erlperf_cluster_monitor,
  4 | %%%  erlperf_history and erlperf_job. This is an integration test
  5 | %%%  for the entire cluster monitoring subsystem.
  6 | %%% @end
  7 | -module(erlperf_cluster_monitor_SUITE).
  8 | -author("maximfca@gmail.com").
  9 | 
 10 | %% Common Test headers
 11 | -include_lib("stdlib/include/assert.hrl").
 12 | 
 13 | %% Test server callbacks
 14 | -export([suite/0, all/0]).
 15 | 
 16 | %% Test cases
 17 | -export([monitor_cluster/0, monitor_cluster/1]).
 18 | 
 19 | -export([handle_update/2]).
 20 | 
 21 | suite() ->
 22 |     [{timetrap, {seconds, 20}}].
 23 | 
 24 | all() ->
 25 |     [monitor_cluster].
 26 | 
 27 | %%--------------------------------------------------------------------
 28 | %% TEST CASES
 29 | 
 30 | monitor_cluster() ->
 31 |     [{doc, "Tests 3 separate cluster monitors watching the same data"}].
 32 | 
 33 | monitor_cluster(Config) ->
 34 |     {ok, Pg} = pg:start_link(erlperf),
 35 |     {ok, HistPid} = erlperf_history:start_link(),
 36 | 
 37 |     Control = self(),
 38 |     LogFile = filename:join(proplists:get_value(priv_dir, Config), "cluster_log.txt"),
 39 |     ok = ct:capture_start(),
 40 | 
 41 |     %% TODO deliberately omit memory fields?
 42 |     AllFields = [time, node, sched_util, dcpu, dio, processes, ports, ets,
 43 |         memory_total, memory_processes, memory_binary, memory_ets, jobs],
 44 | 
 45 |     %% start cluster monitor
 46 |     {ok, ClusterHandlePid} = erlperf_cluster_monitor:start_link({?MODULE, handle_update, [Control]}, 1000, AllFields),
 47 |     %% start another cluster monitor (now printing to console)
 48 |     {ok, ClusterMonPid} = erlperf_cluster_monitor:start_link(),
 49 |     %% start 3rd cluster monitor printing to a file
 50 |     {ok, ClusterFilePid} = erlperf_cluster_monitor:start_link(LogFile, 1000, AllFields),
 51 | 
 52 |     Started = os:system_time(millisecond),
 53 |     %% simulate 3 jobs from 3 nodes sending cluster 3 data samples (monitor is expected to eventually catch those)
 54 |     LocalJobs = [{self(), 100}, {Pg, 200}],
 55 |     Node2Jobs = [{HistPid, 500}],
 56 |     Nodes = [{node(), LocalJobs}, {'node2@localhost', Node2Jobs}, {'node3@localhost', []}],
 57 |     Times = [Started + Seq * 1000 || Seq <- lists:seq(1, 3)],
 58 |     %% common message template
 59 |     Template = #{sched_util => 0.1, dcpu => 0.1, dio => 0.1, processes => 10, ports => 20, ets => 30,
 60 |         memory_total => 100, memory_processes => 10, memory_binary => 20, memory_ets => 30},
 61 |     Samples = [HistPid ! Template#{node => Node, jobs => Jobs, time => Time} || {Node, Jobs} <- Nodes, Time <- Times],
 62 | 
 63 |     %% wait for 3 monitoring handler calls from the cluster monitor (3 seconds)
 64 |     RawHandlerHistory = poll_history([], 3),
 65 |     ClusterHandlerHistory = [S || {_T, S} <- RawHandlerHistory],
 66 |     RawHistory = erlperf_history:get(Started),
 67 |     History = [S || {_T, S} <- RawHistory],
 68 | 
 69 |     %% capture text output
 70 |     ct:capture_stop(),
 71 |     Console = ct:capture_get(),
 72 | 
 73 |     {ok, FileBin} = file:read_file(LogFile),
 74 |     [ok = gen:stop(Pid) || Pid <- [ClusterFilePid, ClusterMonPid, ClusterHandlePid, HistPid, Pg]],
 75 | 
 76 |     %% all 5 sources should be identical: cluster history, raw history, sent samples
 77 |     %% and parsed files
 78 | 
 79 |     ct:pal("File:~n~s", [FileBin]),
 80 | 
 81 |     %% compare Samples to ClusterHandleHistory
 82 |     ?assertEqual([], ClusterHandlerHistory -- Samples, {extra_events, ClusterHandlerHistory, expected, Samples}),
 83 |     ?assertEqual([], Samples -- ClusterHandlerHistory, {missing_events, Samples, expected, ClusterHandlerHistory}),
 84 | 
 85 |     %% Samples to History
 86 |     ?assertEqual([], History -- Samples, {extra_events, History, expected, Samples}),
 87 |     ?assertEqual([], Samples -- History, {missing_events, Samples, expected, History}),
 88 | 
 89 |     %% flatten + split lines of console output
 90 |     NewLine = io_lib:nl(),
 91 |     [ConsoleHdr | ConsoleData] = string:split(lists:flatten(Console), NewLine, all),
 92 |     [FileHdr | FileData] = string:split(binary_to_list(FileBin), NewLine, all),
 93 | 
 94 |     %% compare headers and first 3 lines of data
 95 |     ?assertEqual(ConsoleHdr, FileHdr),
 96 |     ?assertEqual(lists:sublist(ConsoleData, 1, 3), lists:sublist(FileData, 1, 3)),
 97 | 
 98 |     %% TODO: parse first 3 lines of file/console output and find those samples
 99 |     %?assertEqual([ExpectedHeader | ExpectedData], lists:sublist(FileLines, 1, 4)),
100 |     %?assertEqual([ExpectedHeader | ExpectedData], lists:sublist(ConsoleLines, 1, 4)),
101 |     ok.
102 | 
103 | handle_update(Sample, [Control]) ->
104 |     Control ! {monitor, Sample},
105 |     [Control].
106 | 
107 | -define(INTERVAL, 1000).
108 | 
109 | poll_history(Events, 0) ->
110 |     lists:reverse(Events);
111 | poll_history(Events, Count) ->
112 |     % collect cluster_monitor events too
113 |     receive
114 |         {monitor, Sample}  ->
115 |             poll_history(Sample ++ Events, Count - 1)
116 |     after 5000 ->
117 |         erlang:error(timeout)
118 |     end.
119 | 


--------------------------------------------------------------------------------
/test/erlperf_file_log_SUITE.erl:
--------------------------------------------------------------------------------
 1 | %%% @copyright (c) 2019-2023 Maxim Fedorov
 2 | %%% @doc
 3 | %%%     Tests erlperf_file_log
 4 | %%% @end
 5 | -module(erlperf_file_log_SUITE).
 6 | -author("maximfca@gmail.com").
 7 | 
 8 | %% Include stdlib header to enable ?assert() for readable output
 9 | -include_lib("stdlib/include/assert.hrl").
10 | 
11 | %% Test server callbacks
12 | -export([
13 |     suite/0,
14 |     all/0
15 | ]).
16 | 
17 | %% Test cases
18 | -export([
19 |     file_log/0, file_log/1,
20 |     formatters/0, formatters/1
21 | ]).
22 | 
23 | suite() ->
24 |     [{timetrap, {seconds, 10}}].
25 | 
26 | all() ->
27 |     [formatters, file_log].
28 | 
29 | formatters() ->
30 |     [{doc, "Basic tests for formatters like Ki (Kilo-calls) and Kb (Kilo-bytes)"}].
31 | 
32 | formatters(Config) when is_list(Config) ->
33 |     ?assertEqual("88", erlperf_file_log:format_size(88)),
34 |     ?assertEqual("88000", erlperf_file_log:format_number(88000)),
35 |     ?assertEqual("881 Mb", erlperf_file_log:format_size(881 * 1024 * 1024)),
36 |     ?assertEqual("881 Mb", erlperf_file_log:format_size(881 * 1024 * 1024)),
37 |     ?assertEqual("123 Gb", erlperf_file_log:format_size(123 * 1024 * 1024 * 1024)),
38 |     % rounding
39 |     ?assertEqual("42", erlperf_file_log:format_number(42)),
40 |     ?assertEqual("432 Ki", erlperf_file_log:format_number(431992)),
41 |     ?assertEqual("333 Mi", erlperf_file_log:format_number(333000000)),
42 |     ?assertEqual("999 Gi", erlperf_file_log:format_number(998500431992)).
43 | 
44 | file_log() ->
45 |     [{doc, "Tests console and file logging sanity and equality"}].
46 | 
47 | file_log(Config) when is_list(Config) ->
48 |     {ok, Pg} = pg:start_link(erlperf),
49 |     {ok, Mon} = erlperf_monitor:start_link(),
50 |     Filename = filename:join(proplists:get_value(priv_dir, Config), "file_log_manual.txt"),
51 |     ok = ct:capture_start(),
52 |     {ok, FileLog} = erlperf_file_log:start_link(Filename),
53 |     {ok, ConsoleLog} = erlperf_file_log:start_link(erlang:group_leader()),
54 |     erlperf:run(timer, sleep, [1]),
55 |     ok = ct:capture_stop(),
56 |     [gen:stop(Srv) || Srv <- [ConsoleLog, FileLog, Mon, Pg]],
57 |     ConsoleLines = ct:capture_get(),
58 |     Console = list_to_binary(lists:concat(ConsoleLines)),
59 |     {ok, Logs} = file:read_file(Filename),
60 |     ?assertEqual(Logs, Console),
61 |     ?assert(length(ConsoleLines) > 3, {"at least header and 3 samples are expected to be printed", ConsoleLines}),
62 |     %% header must contain the job
63 |     [Hdr, S1, S2, S3 | _] = [string:trim(lists:last(string:lexemes(Line, " "))) || Line <- ConsoleLines],
64 |     ?assert(is_pid(list_to_pid(Hdr)), {not_a_pid, Hdr}),
65 |     Samples = [list_to_integer(S) || S <- [S1, S2, S3]],
66 |     [?assert(Sample > 10 andalso Sample < 1000) || Sample <- Samples].
67 | 


--------------------------------------------------------------------------------
/test/erlperf_history_SUITE.erl:
--------------------------------------------------------------------------------
 1 | %%% @copyright (c) 2019-2023 Maxim Fedorov
 2 | %%% Smoke tests for erlperf_history.
 3 | -module(erlperf_history_SUITE).
 4 | -author("maximfca@gmail.com").
 5 | 
 6 | %% Common Test headers
 7 | -include_lib("stdlib/include/assert.hrl").
 8 | 
 9 | %% Test server callbacks
10 | -export([suite/0, all/0]).
11 | 
12 | %% Test cases
13 | -export([basic/1]).
14 | 
15 | suite() ->
16 |     [{timetrap, {seconds, 10}}].
17 | 
18 | all() ->
19 |     [basic].
20 | 
21 | %%--------------------------------------------------------------------
22 | %% TEST CASES
23 | 
24 | basic(Config) when is_list(Config) ->
25 |     {ok, Pg} = pg:start_link(erlperf),
26 |     {ok, HistoryServer} = erlperf_history:start_link(1000), %% keep history for 1 second
27 |     %% simulate a number of samples via pg
28 |     Template = #{sched_util => 0.05},
29 |     FutureTime = os:system_time(millisecond) + 500, %% half a second in the future, +100 ms for samples
30 |     Nodes = [node(), 'second@anywhere'],
31 |     Samples = [Template#{time => FutureTime + Seq * 10, node => Node} || Seq <- lists:seq(1, 10), Node <- Nodes],
32 |     [HistoryServer ! S || S <- Samples],
33 |     sys:get_state(HistoryServer),
34 |     %% all of these samples must be still available
35 |     {_Times, FullHistory} = lists:unzip(erlperf_history:get(FutureTime, FutureTime + 1000)),
36 |     ?assertEqual(Samples, FullHistory),
37 |     %% fire cleanup
38 |     timer:sleep(2000),
39 |     HistoryServer ! Template#{time => FutureTime - 1000, node => node()},
40 |     sys:get_state(HistoryServer),
41 |     %% ensure cleanup worked
42 |     ?assertEqual([], erlperf_history:get(FutureTime, FutureTime + 1000)),
43 |     %% ensure get/1,2 work
44 |     gen:stop(HistoryServer),
45 |     gen:stop(Pg).
46 | 


--------------------------------------------------------------------------------
/test/erlperf_job_SUITE.erl:
--------------------------------------------------------------------------------
  1 | %%% @copyright (c) 2019-2023 Maxim Fedorov
  2 | %%% @doc
  3 | %%% Tests all combinations of code maps accepted by erlperf_job
  4 | %%% @end
  5 | -module(erlperf_job_SUITE).
  6 | -author("maximfca@gmail.com").
  7 | 
  8 | %% Include stdlib header to enable ?assert() for readable output
  9 | -include_lib("stdlib/include/assert.hrl").
 10 | 
 11 | %% Test server callbacks
 12 | -export([
 13 |     suite/0,
 14 |     all/0,
 15 |     groups/0
 16 | ]).
 17 | 
 18 | %% Runner variants test cases
 19 | -export([
 20 |     runner_code/1, runner_mfa/1, runner_mfa_list/1, runner_fun/1, runner_mod_fun/1,
 21 |     runner_fun1/1, runner_fun2/1, runner_code_fun/1, runner_code_fun1/1,
 22 |     runner_code_fun2/1, runner_code_name2/1]).
 23 | 
 24 | %% Basic test cases
 25 | -export([priority/0, priority/1, overhead/0, overhead/1, module/0, module/1]).
 26 | 
 27 | %% internal exports
 28 | -export([recv/0, recv/1]).
 29 | 
 30 | suite() ->
 31 |     [{timetrap, {seconds, 120}}].
 32 | 
 33 | groups() ->
 34 |     [{variants, [parallel], [runner_code, runner_mfa, runner_mfa_list, runner_fun, runner_mod_fun,
 35 |         runner_fun1, runner_fun2, runner_code_fun, runner_code_fun1,
 36 |         runner_code_fun2, runner_code_name2]}].
 37 | 
 38 | all() ->
 39 |     [{group, variants}, priority, overhead, module].
 40 | 
 41 | %%--------------------------------------------------------------------
 42 | %% Convenience helpers
 43 | 
 44 | recv() ->
 45 |     recv(check).
 46 | 
 47 | recv(check) ->
 48 |     receive
 49 |         {Ref, ReplyTo} ->
 50 |             ReplyTo ! Ref
 51 |     end.
 52 | 
 53 | %%--------------------------------------------------------------------
 54 | %% Runner definitions
 55 | 
 56 | 
 57 | %%--------------------------------------------------------------------
 58 | %% TEST CASES
 59 | 
 60 | priority() ->
 61 |     [{doc, "Tests job controller priority setting"}].
 62 | 
 63 | priority(Config) when is_list(Config) ->
 64 |     {ok, Job} = erlperf_job:start_link(#{runner => {?MODULE, recv, []}}),
 65 |     high = erlperf_job:set_priority(Job, max),
 66 |     ok = erlperf_job:set_concurrency(Job, 1),
 67 |     {priority, max} = erlang:process_info(Job, priority),
 68 |     ok = erlperf_job:set_concurrency(Job, 0),
 69 |     {priority, normal} = erlang:process_info(Job, priority),
 70 |     gen:stop(Job).
 71 | 
 72 | overhead() ->
 73 |     [{doc, "Compares timed and continuous mode, may be failing sporadically"}].
 74 | 
 75 | overhead(Config) when is_list(Config) ->
 76 |     SampleCount = 10000000,
 77 |     %% must use code (it's the fastest method), cannot use sleep (imprecise and slow),
 78 |     %% and cannot rely on message passing for it cannot control timing
 79 |     {ok, Job} = erlperf_job:start_link(#{runner => "rand:uniform(1000)."}),
 80 |     Sampler = erlperf_job:handle(Job),
 81 |     TimeUs = erlperf_job:measure(Job, SampleCount),
 82 |     %% measure the same thing now with continuous benchmark
 83 |     ok = erlperf_job:set_concurrency(Job, 1),
 84 |     %% fetch a sample sleeping ~ same time as for timed run
 85 |     Start = erlperf_job:sample(Sampler),
 86 |     timer:sleep(TimeUs div 1000 + 1),
 87 |     Finish = erlperf_job:sample(Sampler),
 88 |     gen:stop(Job),
 89 |     ContinuousQPS = Finish - Start,
 90 |     Effy = ContinuousQPS * 100 div SampleCount,
 91 |     ct:pal("Continuous benchmarking efficiency: ~b% (~b time for ~b, ~b continuous)~n",
 92 |         [Effy, TimeUs, SampleCount, ContinuousQPS]),
 93 |     ?assert(Effy > 50, {efficiency, Effy}).
 94 | 
 95 | module() ->
 96 |     [{doc, "Tests that generated module gets unloaded after job stops"}].
 97 | 
 98 | module(Config) when is_list(Config) ->
 99 |     sys:module_info(), %% just in case if it wasn't loaded
100 |     PreJob = code:all_loaded(),
101 |     {ok, Job} = erlperf_job:start_link(#{runner => "ok."}),
102 |     InJob = code:all_loaded() -- PreJob,
103 |     gen:stop(Job),
104 |     PostJob = code:all_loaded(),
105 |     ?assertEqual([], PostJob -- PreJob),
106 |     ?assert(length(InJob) == 1, InJob).
107 | 
108 | %%--------------------------------------------------------------------
109 | %% Code map variations
110 | 
111 | %% code below is a simple hack to make run with some parallelism. Original code
112 | %%  just had RunnerVariants as a list comprehension.
113 | 
114 | runner_code(Config) when is_list(Config) ->
115 |     variants("erlperf_job_SUITE:recv().", ?FUNCTION_NAME).
116 | 
117 | runner_mfa(Config) when is_list(Config) ->
118 |     variants({?MODULE, recv, [check]}, ?FUNCTION_NAME).
119 | 
120 | runner_mfa_list(Config) when is_list(Config) ->
121 |     variants([{?MODULE, recv, [check]}, {erlang, unique_integer, []}], ?FUNCTION_NAME).
122 | 
123 | runner_fun(Config) when is_list(Config) ->
124 |     variants(fun () -> recv(check) end, ?FUNCTION_NAME).
125 | 
126 | runner_mod_fun(Config) when is_list(Config) ->
127 |     variants(fun ?MODULE:recv/0, ?FUNCTION_NAME).
128 | 
129 | runner_fun1(Config) when is_list(Config) ->
130 |     variants(fun (1) -> recv(check), 1 end, ?FUNCTION_NAME).
131 | 
132 | runner_fun2(Config) when is_list(Config) ->
133 |     variants(fun (1, 1) -> recv(check), 1 end, ?FUNCTION_NAME).
134 | 
135 | runner_code_fun(Config) when is_list(Config) ->
136 |     variants("runner() -> erlperf_job_SUITE:recv(check).", ?FUNCTION_NAME).
137 | 
138 | runner_code_fun1(Config) when is_list(Config) ->
139 |     variants("runner(1) -> erlperf_job_SUITE:recv(check), 1.", ?FUNCTION_NAME).
140 | 
141 | runner_code_fun2(Config) when is_list(Config) ->
142 |     variants("runner(1, 1) -> erlperf_job_SUITE:recv(check), 1.", ?FUNCTION_NAME).
143 | 
144 | runner_code_name2(Config) when is_list(Config) ->
145 |     variants("baz(1, 1) -> erlperf_job_SUITE:recv(check), 1.", ?FUNCTION_NAME).
146 | 
147 | variants(Runner, ProcName) ->
148 |     ProcStr = atom_to_list(ProcName),
149 |     Sep = io_lib:format("~n    ", []),
150 |     %% register this process to send messages from init/init_runner/done
151 |     register(ProcName, self()),
152 |     %%
153 |     InitVariants = [
154 |         undefined,
155 |         {erlang, send, [ProcName, init]},
156 |         fun () -> erlang:send(ProcName, {init, self()}) end,
157 |         lists:concat(["erlang:send(" ++ ProcStr ++ ", {init, self()})."]),
158 |         lists:concat(["init() -> erlang:send(" ++ ProcStr ++ ", {init, self()})."]),
159 |         lists:concat(["foo() -> erlang:send(" ++ ProcStr ++ ", {init, self()})."])
160 |     ],
161 |     InitRunnerVariants = [
162 |         undefined,
163 |         {erlang, send, [ProcName, 1]},
164 |         fun () -> erlang:send(ProcName, 1) end,
165 |         fun (_) -> erlang:send(ProcName, 1) end,
166 |         "erlang:send(" ++ ProcStr ++ ", 1).",
167 |         lists:concat(["init_runner() ->", Sep, "erlang:send(" ++ ProcStr ++ ", 1)."]),
168 |         lists:concat(["init_runner(_) ->", Sep, "erlang:send(" ++ ProcStr ++ ", 1)."]),
169 |         lists:concat(["bar(_) ->", Sep, "erlang:send(" ++ ProcStr ++ ", 1)."])
170 |     ],
171 |     DoneVariants = [
172 |         undefined,
173 |         {erlang, send, [ProcName, done]},
174 |         fun () -> erlang:send(ProcName, done) end,
175 |         fun (_) -> erlang:send(ProcName, done) end,
176 |         lists:concat(["erlang:send(" ++ ProcStr ++ ", done)."]),
177 |         lists:concat(["done() -> erlang:send(" ++ ProcStr ++ ", done)."]),
178 |         lists:concat(["done(_) -> erlang:send(" ++ ProcStr ++ ", done)."]),
179 |         lists:concat(["buzz(_) -> erlang:send(" ++ ProcStr ++ ", done)."])
180 |     ],
181 |     %% try all variants
182 |     Variants = [#{init => Init, init_runner => InitRunner, runner => Runner, done => Done}
183 |         || Init <- InitVariants, InitRunner <- InitRunnerVariants, Done <- DoneVariants],
184 |     %% filter "undefined" entries from the map
185 |     Maps = [maps:filter(fun (_Key, Value) -> Value =/= undefined end, Variant)
186 |         || Variant <- Variants],
187 | 
188 |     %% generate code for each variant and measure performance
189 |     [measure_variant(Variant) || Variant <- Maps].
190 | 
191 | measure_variant(Code) ->
192 |     try
193 |         {ok, Job} = erlperf_job:start_link(Code),
194 |         Handle = erlperf_job:handle(Job),
195 |         %% wait for "init" function to complete, when possible, ensure it's sent from the job process
196 |         is_map_key(init, Code) andalso
197 |             receive
198 |                 InitResult ->
199 |                     ?assert((InitResult =:= init) orelse (InitResult =:= {init, Job}), {bad_init_result, InitResult})
200 |             after 1000 -> throw({init, timeout})
201 |             end,
202 |         %% ensure it does not crash attempting to do a single measurement,
203 |         %% basic sanity check that timed mode returns time > 0
204 |         TimeUs = measure_timed(Job, is_map_key(init_runner, Code)),
205 |         ?assert(TimeUs > 1, {timed_mode_too_fast, TimeUs}),
206 |         %%
207 |         ok = erlperf_job:set_concurrency(Job, 1),
208 |         %% wait for 1 worker to get started
209 |         is_map_key(init_runner, Code) andalso expect_message(1, init_runner),
210 |         %% whitebox...
211 |         {erlperf_job_state, _, _, [Worker], _, _, _} = sys:get_state(Job),
212 |         %% by now, function may have been _called_ once (but not yet returned)
213 |         Before = erlperf_job:sample(Handle),
214 |         ?assert(Before =:= 0 orelse Before =:= 1, {unexpected_sample, Before}),
215 |         BumpCount = 50,
216 |         %% do exactly BumpCount iterations
217 |         [Worker ! {Seq, self()} || Seq <- lists:seq(1, BumpCount)],
218 |         %% receive BumpCount replies
219 |         [receive Seq -> ok end || Seq <- lists:seq(1, BumpCount)],
220 |         %% by now, extra 50 calls happened
221 |         After = erlperf_job:sample(Handle),
222 |         ?assert(After >= BumpCount, {unexpected_after, After}),
223 | 
224 |         %% stop the job
225 |         gen:stop(Job),
226 |         is_map_key(done, Code) andalso expect_message(done, done),
227 |         %% must not have anything in the message queue
228 |         receive
229 |             Unexpected ->
230 |                 ?assert(false, {unexpected_message, Unexpected})
231 |         after 0 -> ok
232 |         end
233 |     catch error:{generate, {What, Arity, requires, Dependency}} ->
234 |         %% verify this combination is indeed invalid
235 |         ?assertNot(is_map_key(Dependency, Code)),
236 |         ?assert((What =:= init_runner andalso Arity =:= 1) orelse (What =:= runner andalso Arity > 0)
237 |             orelse (What =:= done andalso Arity =:= 1))
238 |         %% io:format(user, "Invalid combination: ~s/~b requires ~s~n~n", [What, Arity, Dependency])
239 |     end.
240 | 
241 | expect_message(Expect, Operation) ->
242 |     receive
243 |         Message ->
244 |             ?assertEqual(Expect, Message, {Operation, Message})
245 |     after
246 |         1000 ->
247 |             throw({Operation, timeout})
248 |     end.
249 | 
250 | measure_timed(Job, InitRunnerPresent) ->
251 |     Iterations = 10,
252 |     Control = self(),
253 |     spawn_link(
254 |         fun () ->
255 |             TimeUs = erlperf_job:measure(Job, Iterations),
256 |             Control ! {time, TimeUs}
257 |         end),
258 | 
259 |     %% timed mode starts exactly 1 worker
260 |     InitRunnerPresent andalso expect_message(1, init_runner),
261 |     Worker = find_timed_worker(Job),
262 | 
263 |     %% send exactly "iterations" messages
264 |     [Worker ! {Seq, self()} || Seq <- lists:seq(1, Iterations)],
265 |     [receive Seq -> ok end || Seq <- lists:seq(1, Iterations)],
266 |     receive
267 |         {time, Time} ->
268 |             Time
269 |     end.
270 | 
271 | find_timed_worker(Job) ->
272 |     {erlperf_job_state, _, _, _, TimedWorkers, _, _} = sys:get_state(Job),
273 |     case map_size(TimedWorkers) of
274 |         1 -> hd(maps:keys(TimedWorkers));
275 |         0 -> timer:sleep(1), find_timed_worker(Job)
276 |     end.
277 | 


--------------------------------------------------------------------------------
/test/erlperf_monitor_SUITE.erl:
--------------------------------------------------------------------------------
 1 | %%%-------------------------------------------------------------------
 2 | %%% @author Maxim Fedorov <maximfca@gmail.com>
 3 | %%% @copyright (c) 2019-2023 Maxim Fedorov
 4 | %%% @doc
 5 | %%%     Tests monitor
 6 | %%% @end
 7 | 
 8 | -module(erlperf_monitor_SUITE).
 9 | 
10 | -include_lib("stdlib/include/assert.hrl").
11 | 
12 | %% Test server callbacks
13 | -export([
14 |     suite/0,
15 |     all/0
16 | ]).
17 | 
18 | %% Test cases
19 | -export([
20 |     subscribe/0, subscribe/1
21 | ]).
22 | 
23 | suite() ->
24 |     [{timetrap, {seconds, 10}}].
25 | 
26 | all() ->
27 |     [subscribe].
28 | 
29 | %%--------------------------------------------------------------------
30 | %% TEST CASES
31 | 
32 | subscribe() ->
33 |     [{doc, "Tests monitoring subscription"}].
34 | 
35 | subscribe(_Config) ->
36 |     {ok, Mon} = erlperf_sup:start_link(), %% instead of starting the app
37 |     ok = pg:join(erlperf, {erlperf_monitor, node()}, self()),
38 |     % start a benchmark and see it running on 1 scheduler
39 |     {ok, Job} = erlperf_job:start_link(#{runner => {timer, sleep, [10]}}),
40 |     ok = erlperf_job:set_concurrency(Job, 4),
41 |     ?assertEqual(4, erlperf_job:concurrency(Job)),
42 |     % wait for 3 seconds, receive updates
43 |     First = receive_updates(Job, 0, 2),
44 |     ok = erlperf_job:set_concurrency(Job, 2),
45 |     Second = receive_updates(Job, 0, 1),
46 |     ok = gen_server:stop(Job),
47 |     ?assert(First > 0),
48 |     ?assert(Second > 0),
49 |     ?assert(First > Second),
50 |     pg:leave(erlperf, {erlperf_monitor, node()}, self()),
51 |     gen:stop(Mon).
52 | 
53 | receive_updates(_, Total, 0) ->
54 |     Total;
55 | receive_updates(Job, Total, Count) ->
56 |     receive
57 |         #{jobs := [{Job, Cycles}]} ->
58 |             receive_updates(Job, Total + Cycles, Count - 1);
59 |         Other ->
60 |             ?assertEqual([], Other)
61 |     end.
62 | 


--------------------------------------------------------------------------------