├── .gitattributes
├── .github
    ├── FUNDING.yml
    ├── dependabot.yml
    └── workflows
    │   └── ci.yml
├── .gitignore
├── LICENSE
├── README.md
├── bench.zig
└── zig.mod


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.zig text eol=lf
2 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: [Hejsil]
2 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: github-actions
4 |   directory: "/"
5 |   schedule:
6 |     interval: daily
7 |     time: "11:00"
8 |   open-pull-requests-limit: 10
9 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | on:
 3 |   push:
 4 |   pull_request:
 5 |   schedule:
 6 |     - cron: '0 0 * * *'
 7 | 
 8 | jobs:
 9 |   test:
10 |     strategy:
11 |       matrix:
12 |         optimize: [Debug, ReleaseSafe, ReleaseFast, ReleaseSmall]
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - uses: actions/checkout@v4
16 |         with:
17 |           submodules: recursive
18 |       - uses: goto-bus-stop/setup-zig@v2
19 |         with:
20 |           version: master
21 |       - run: zig test bench.zig -O${{ matrix.optimize }}
22 |   lint:
23 |     runs-on: ubuntu-latest
24 |     steps:
25 |       - uses: actions/checkout@v4
26 |       - uses: goto-bus-stop/setup-zig@v2
27 |         with:
28 |           version: master
29 |       - run: zig fmt --check bench.zig
30 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | zig-cache
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) Jimmi Holst Christensen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # zig-bench
  2 | 
  3 | A simple benchmarking lib in Zig
  4 | 
  5 | ```
  6 | Test [0/2] test "Debug benchmark"... 
  7 | Benchmark             Iterations    Min(ns)    Max(ns)   Variance   Mean(ns)
  8 | ----------------------------------------------------------------------------
  9 | sum_slice(block=16)       100000         90       2690        243        107
 10 | sum_slice(block=32)       100000        170       1760        338        190
 11 | sum_slice(block=64)       100000        320       2340        476        352
 12 | sum_slice(block=128)      100000        630       2290        862        678
 13 | sum_slice(block=256)      100000       1270       3170       2402       1336
 14 | sum_slice(block=512)      100000       2550       8490       4835       2651
 15 | sum_reader(block=16)      100000        990       2640       1592       1039
 16 | sum_reader(block=32)      100000       1930       3890       3292       2012
 17 | sum_reader(block=64)      100000       3830       6250       6806       3962
 18 | sum_reader(block=128)      63673       7660      12830      15703       7852
 19 | sum_reader(block=256)      31967      15360      22190      31847      15641
 20 | sum_reader(block=512)      16031      30800      34690      59444      31191
 21 | Test [1/2] test "Debug benchmark generics"... 
 22 | Benchmark             Iterations    Min(ns)    Max(ns)   Variance   Mean(ns)
 23 | ----------------------------------------------------------------------------
 24 | sum_vectors(vec4f16)      100000       2730      13390       3620       2775
 25 | sum_vectors(vec4f32)      100000       1289       5800       1277       1296
 26 | sum_vectors(vec4f64)      100000       1389       6870       1358       1400
 27 | sum_vectors(vec8f16)      100000       4400       9680       4613       4479
 28 | sum_vectors(vec8f32)      100000       1389       5180       1231       1400
 29 | sum_vectors(vec8f64)      100000       1390       6170       2260       1457
 30 | sum_vectors(vec16f16)      61088       8090      13980      15455       8184
 31 | sum_vectors(vec16f32)     100000       1399       4560       2069       1441
 32 | sum_vectors(vec16f64)     100000       1440       6080       1664       1475
 33 | All 2 tests passed.
 34 | Test [0/2] test "ReleaseSafe benchmark"... 
 35 | Benchmark             Iterations    Min(ns)    Max(ns)   Variance   Mean(ns)
 36 | ----------------------------------------------------------------------------
 37 | sum_slice(block=16)       100000          9       3550        164         18
 38 | sum_slice(block=32)       100000          9        940         22         18
 39 | sum_slice(block=64)       100000         49       1530         66         52
 40 | sum_slice(block=128)      100000         89       1280        102         92
 41 | sum_slice(block=256)      100000        169       1690        210        171
 42 | sum_slice(block=512)      100000        319       5530        724        329
 43 | sum_reader(block=16)      100000         60       2840        180         69
 44 | sum_reader(block=32)      100000        110       3059        288        121
 45 | sum_reader(block=64)      100000        209       2810        323        224
 46 | sum_reader(block=128)     100000        400       1780        387        431
 47 | sum_reader(block=256)     100000        790       2220        681        843
 48 | sum_reader(block=512)     100000       1550       4300       3805       1669
 49 | Test [1/2] test "ReleaseSafe benchmark generics"... 
 50 | Benchmark             Iterations    Min(ns)    Max(ns)   Variance   Mean(ns)
 51 | ----------------------------------------------------------------------------
 52 | sum_vectors(vec4f16)      100000       1269       3790       1799       1283
 53 | sum_vectors(vec4f32)      100000        319       1680        300        328
 54 | sum_vectors(vec4f64)      100000        319       1860        355        329
 55 | sum_vectors(vec8f16)      100000       2399       5010       5014       2420
 56 | sum_vectors(vec8f32)      100000        319       2660        641        329
 57 | sum_vectors(vec8f64)      100000        319       7740       1019        330
 58 | sum_vectors(vec16f16)     100000       4599       9970      22580       4636
 59 | sum_vectors(vec16f32)     100000        319       4310       1231        330
 60 | sum_vectors(vec16f64)     100000        429       4070       1783        439
 61 | All 2 tests passed.
 62 | Test [0/2] test "ReleaseFast benchmark"... 
 63 | Benchmark             Iterations    Min(ns)    Max(ns)   Variance   Mean(ns)
 64 | ----------------------------------------------------------------------------
 65 | sum_slice(block=16)       100000         19       2840        128         21
 66 | sum_slice(block=32)       100000         19       1600         78         20
 67 | sum_slice(block=64)       100000         19       1970         74         21
 68 | sum_slice(block=128)      100000         19       1530         68         21
 69 | sum_slice(block=256)      100000         39       1250         74         44
 70 | sum_slice(block=512)      100000         59       1150         85         68
 71 | sum_reader(block=16)      100000         19       1170         21         20
 72 | sum_reader(block=32)      100000         19       1650         74         20
 73 | sum_reader(block=64)      100000         19       1250         34         20
 74 | sum_reader(block=128)     100000         19       1240         32         20
 75 | sum_reader(block=256)     100000         39       2180        177         44
 76 | sum_reader(block=512)     100000         59       2470        148         68
 77 | Test [1/2] test "ReleaseFast benchmark generics"... 
 78 | Benchmark             Iterations    Min(ns)    Max(ns)   Variance   Mean(ns)
 79 | ----------------------------------------------------------------------------
 80 | sum_vectors(vec4f16)      100000       1259       8590       1678       1284
 81 | sum_vectors(vec4f32)      100000        319       1440        279        327
 82 | sum_vectors(vec4f64)      100000        319       1760        303        327
 83 | sum_vectors(vec8f16)      100000       2399       5260       1861       2417
 84 | sum_vectors(vec8f32)      100000        319       2080        434        327
 85 | sum_vectors(vec8f64)      100000        319       1710        329        328
 86 | sum_vectors(vec16f16)     100000       4599       9010       3883       4634
 87 | sum_vectors(vec16f32)     100000        319       2800        356        329
 88 | sum_vectors(vec16f64)     100000        429       1750        404        436
 89 | All 2 tests passed.
 90 | Test [0/2] test "ReleaseSmall benchmark"... 
 91 | Benchmark             Iterations    Min(ns)    Max(ns)   Variance   Mean(ns)
 92 | ----------------------------------------------------------------------------
 93 | sum_slice(block=16)       100000         19       2760        247         27
 94 | sum_slice(block=32)       100000         29       5090        363         37
 95 | sum_slice(block=64)       100000         50       2640        177         63
 96 | sum_slice(block=128)      100000         90       1830        157        102
 97 | sum_slice(block=256)      100000        169       5860        733        201
 98 | sum_slice(block=512)      100000        330       3690       1560        365
 99 | sum_reader(block=16)      100000        219       1430        276        226
100 | sum_reader(block=32)      100000        420       1870        460        432
101 | sum_reader(block=64)      100000        819       2690        770        837
102 | sum_reader(block=128)     100000       1629       5390       1696       1649
103 | sum_reader(block=256)     100000       3240       9080       3240       3274
104 | sum_reader(block=512)      76638       6469       9780       5302       6524
105 | Test [1/2] test "ReleaseSmall benchmark generics"... 
106 | Benchmark             Iterations    Min(ns)    Max(ns)   Variance   Mean(ns)
107 | ----------------------------------------------------------------------------
108 | sum_vectors(vec4f16)      100000       4859      16710       5250       4902
109 | sum_vectors(vec4f32)      100000        319       1650        326        328
110 | sum_vectors(vec4f64)      100000        319       1470        295        327
111 | sum_vectors(vec8f16)      100000       3980       9070       3382       4254
112 | sum_vectors(vec8f32)      100000        319       3740        459        328
113 | sum_vectors(vec8f64)      100000        319       4100        534        330
114 | sum_vectors(vec16f16)      79800       6219      15130      10000       6265
115 | sum_vectors(vec16f32)     100000        319       3340        455        330
116 | sum_vectors(vec16f64)     100000        429       2020        454        438
117 | All 2 tests passed.
118 | ```
119 | 


--------------------------------------------------------------------------------
/bench.zig:
--------------------------------------------------------------------------------
  1 | const std = @import("std");
  2 | 
  3 | const debug = std.debug;
  4 | const io = std.io;
  5 | const math = std.math;
  6 | const mem = std.mem;
  7 | const meta = std.meta;
  8 | const time = std.time;
  9 | 
 10 | const Decl = std.builtin.Type.Declaration;
 11 | 
 12 | pub fn benchmark(comptime B: type) !void {
 13 |     const args = if (@hasDecl(B, "args")) B.args else [_]void{{}};
 14 |     const arg_names = if (@hasDecl(B, "arg_names")) B.arg_names else [_]u8{};
 15 |     const min_iterations = if (@hasDecl(B, "min_iterations")) B.min_iterations else 10000;
 16 |     const max_iterations = if (@hasDecl(B, "max_iterations")) B.max_iterations else 100000;
 17 |     const max_time = 500 * time.ns_per_ms;
 18 | 
 19 |     const functions = comptime blk: {
 20 |         var res: []const Decl = &[_]Decl{};
 21 |         for (meta.declarations(B)) |decl| {
 22 |             if (@typeInfo(@TypeOf(@field(B, decl.name))) != .Fn)
 23 |                 continue;
 24 |             res = res ++ [_]Decl{decl};
 25 |         }
 26 | 
 27 |         break :blk res;
 28 |     };
 29 |     if (functions.len == 0)
 30 |         @compileError("No benchmarks to run.");
 31 | 
 32 |     const min_width = blk: {
 33 |         const writer = io.null_writer;
 34 |         var res = [_]u64{ 0, 0, 0, 0, 0, 0 };
 35 |         res = try printBenchmark(
 36 |             writer,
 37 |             res,
 38 |             "Benchmark",
 39 |             formatter("{s}", ""),
 40 |             formatter("{s}", "Iterations"),
 41 |             formatter("{s}", "Min(ns)"),
 42 |             formatter("{s}", "Max(ns)"),
 43 |             formatter("{s}", "Variance"),
 44 |             formatter("{s}", "Mean(ns)"),
 45 |         );
 46 |         inline for (functions) |f| {
 47 |             var i: usize = 0;
 48 |             while (i < args.len) : (i += 1) {
 49 |                 const max = math.maxInt(u32);
 50 |                 res = if (i < arg_names.len) blk2: {
 51 |                     const arg_name = formatter("{s}", arg_names[i]);
 52 |                     break :blk2 try printBenchmark(writer, res, f.name, arg_name, max, max, max, max, max);
 53 |                 } else blk2: {
 54 |                     break :blk2 try printBenchmark(writer, res, f.name, i, max, max, max, max, max);
 55 |                 };
 56 |             }
 57 |         }
 58 |         break :blk res;
 59 |     };
 60 | 
 61 |     var _stderr = std.io.bufferedWriter(std.io.getStdErr().writer());
 62 |     const stderr = _stderr.writer();
 63 |     try stderr.writeAll("\n");
 64 |     _ = try printBenchmark(
 65 |         stderr,
 66 |         min_width,
 67 |         "Benchmark",
 68 |         formatter("{s}", ""),
 69 |         formatter("{s}", "Iterations"),
 70 |         formatter("{s}", "Min(ns)"),
 71 |         formatter("{s}", "Max(ns)"),
 72 |         formatter("{s}", "Variance"),
 73 |         formatter("{s}", "Mean(ns)"),
 74 |     );
 75 |     try stderr.writeAll("\n");
 76 |     for (min_width) |w|
 77 |         try stderr.writeByteNTimes('-', w);
 78 |     try stderr.writeByteNTimes('-', min_width.len - 1);
 79 |     try stderr.writeAll("\n");
 80 |     try stderr.context.flush();
 81 | 
 82 |     var timer = try time.Timer.start();
 83 |     inline for (functions) |def| {
 84 |         inline for (args, 0..) |arg, index| {
 85 |             var runtimes: [max_iterations]u64 = undefined;
 86 |             var min: u64 = math.maxInt(u64);
 87 |             var max: u64 = 0;
 88 |             var runtime_sum: u128 = 0;
 89 | 
 90 |             var i: usize = 0;
 91 |             while (i < min_iterations or
 92 |                 (i < max_iterations and runtime_sum < max_time)) : (i += 1)
 93 |             {
 94 |                 timer.reset();
 95 | 
 96 |                 const res = switch (@TypeOf(arg)) {
 97 |                     void => @field(B, def.name)(),
 98 |                     else => @field(B, def.name)(arg),
 99 |                 };
100 |                 runtimes[i] = timer.read();
101 |                 runtime_sum += runtimes[i];
102 |                 if (runtimes[i] < min) min = runtimes[i];
103 |                 if (runtimes[i] > max) max = runtimes[i];
104 |                 switch (@TypeOf(res)) {
105 |                     void => {},
106 |                     else => std.mem.doNotOptimizeAway(&res),
107 |                 }
108 |             }
109 | 
110 |             const runtime_mean: u64 = @intCast(runtime_sum / i);
111 | 
112 |             var d_sq_sum: u128 = 0;
113 |             for (runtimes[0..i]) |runtime| {
114 |                 const d = @as(i64, @intCast(@as(i128, @intCast(runtime)) - runtime_mean));
115 |                 d_sq_sum += @as(u64, @intCast(d * d));
116 |             }
117 |             const variance = d_sq_sum / i;
118 | 
119 |             if (index < arg_names.len) {
120 |                 const arg_name = formatter("{s}", arg_names[index]);
121 |                 _ = try printBenchmark(stderr, min_width, def.name, arg_name, i, min, max, variance, runtime_mean);
122 |             } else {
123 |                 _ = try printBenchmark(stderr, min_width, def.name, index, i, min, max, variance, runtime_mean);
124 |             }
125 |             try stderr.writeAll("\n");
126 |             try stderr.context.flush();
127 |         }
128 |     }
129 | }
130 | 
131 | fn printBenchmark(
132 |     writer: anytype,
133 |     min_widths: [6]u64,
134 |     func_name: []const u8,
135 |     arg_name: anytype,
136 |     iterations: anytype,
137 |     min_runtime: anytype,
138 |     max_runtime: anytype,
139 |     variance: anytype,
140 |     mean_runtime: anytype,
141 | ) ![6]u64 {
142 |     const arg_len = std.fmt.count("{}", .{arg_name});
143 |     const name_len = try alignedPrint(writer, .left, min_widths[0], "{s}{s}{}{s}", .{
144 |         func_name,
145 |         "("[0..@intFromBool(arg_len != 0)],
146 |         arg_name,
147 |         ")"[0..@intFromBool(arg_len != 0)],
148 |     });
149 |     try writer.writeAll(" ");
150 |     const it_len = try alignedPrint(writer, .right, min_widths[1], "{}", .{iterations});
151 |     try writer.writeAll(" ");
152 |     const min_runtime_len = try alignedPrint(writer, .right, min_widths[2], "{}", .{min_runtime});
153 |     try writer.writeAll(" ");
154 |     const max_runtime_len = try alignedPrint(writer, .right, min_widths[3], "{}", .{max_runtime});
155 |     try writer.writeAll(" ");
156 |     const variance_len = try alignedPrint(writer, .right, min_widths[4], "{}", .{variance});
157 |     try writer.writeAll(" ");
158 |     const mean_runtime_len = try alignedPrint(writer, .right, min_widths[5], "{}", .{mean_runtime});
159 | 
160 |     return [_]u64{ name_len, it_len, min_runtime_len, max_runtime_len, variance_len, mean_runtime_len };
161 | }
162 | 
163 | fn formatter(comptime fmt_str: []const u8, value: anytype) Formatter(fmt_str, @TypeOf(value)) {
164 |     return .{ .value = value };
165 | }
166 | 
167 | fn Formatter(comptime fmt_str: []const u8, comptime T: type) type {
168 |     return struct {
169 |         value: T,
170 | 
171 |         pub fn format(
172 |             self: @This(),
173 |             comptime fmt: []const u8,
174 |             options: std.fmt.FormatOptions,
175 |             writer: anytype,
176 |         ) !void {
177 |             _ = fmt;
178 |             _ = options;
179 |             try std.fmt.format(writer, fmt_str, .{self.value});
180 |         }
181 |     };
182 | }
183 | 
184 | fn alignedPrint(writer: anytype, dir: enum { left, right }, width: u64, comptime fmt: []const u8, args: anytype) !u64 {
185 |     const value_len = std.fmt.count(fmt, args);
186 | 
187 |     var cow = io.countingWriter(writer);
188 |     if (dir == .right)
189 |         try cow.writer().writeByteNTimes(' ', math.sub(u64, width, value_len) catch 0);
190 |     try cow.writer().print(fmt, args);
191 |     if (dir == .left)
192 |         try cow.writer().writeByteNTimes(' ', math.sub(u64, width, value_len) catch 0);
193 |     return cow.bytes_written;
194 | }
195 | 
196 | test "benchmark" {
197 |     try benchmark(struct {
198 |         // The functions will be benchmarked with the following inputs.
199 |         // If not present, then it is assumed that the functions
200 |         // take no input.
201 |         pub const args = [_][]const u8{
202 |             &([_]u8{ 1, 10, 100 } ** 16),
203 |             &([_]u8{ 1, 10, 100 } ** 32),
204 |             &([_]u8{ 1, 10, 100 } ** 64),
205 |             &([_]u8{ 1, 10, 100 } ** 128),
206 |             &([_]u8{ 1, 10, 100 } ** 256),
207 |             &([_]u8{ 1, 10, 100 } ** 512),
208 |         };
209 | 
210 |         // You can specify `arg_names` to give the inputs more meaningful
211 |         // names. If the index of the input exceeds the available string
212 |         // names, the index is used as a backup.
213 |         pub const arg_names = [_][]const u8{
214 |             "block=16",
215 |             "block=32",
216 |             "block=64",
217 |             "block=128",
218 |             "block=256",
219 |             "block=512",
220 |         };
221 | 
222 |         // How many iterations to run each benchmark.
223 |         // If not present then a default will be used.
224 |         pub const min_iterations = 1000;
225 |         pub const max_iterations = 100000;
226 | 
227 |         pub fn sum_slice(slice: []const u8) u64 {
228 |             var res: u64 = 0;
229 |             for (slice) |item|
230 |                 res += item;
231 | 
232 |             return res;
233 |         }
234 | 
235 |         pub fn sum_reader(slice: []const u8) u64 {
236 |             var _reader = io.fixedBufferStream(slice);
237 |             var reader = &_reader.reader();
238 |             var res: u64 = 0;
239 |             while (reader.readByte()) |c| {
240 |                 res += c;
241 |             } else |_| {}
242 | 
243 |             return res;
244 |         }
245 |     });
246 | }
247 | 
248 | test "benchmark generics" {
249 |     try benchmark(struct {
250 |         pub const args = [_]type{
251 |             @Vector(4, f16),  @Vector(4, f32),  @Vector(4, f64),
252 |             @Vector(8, f16),  @Vector(8, f32),  @Vector(8, f64),
253 |             @Vector(16, f16), @Vector(16, f32), @Vector(16, f64),
254 |         };
255 | 
256 |         pub const arg_names = [_][]const u8{
257 |             "vec4f16",  "vec4f32",  "vec4f64",
258 |             "vec8f16",  "vec8f32",  "vec8f64",
259 |             "vec16f16", "vec16f32", "vec16f64",
260 |         };
261 | 
262 |         pub fn sum_vectors(comptime T: type) T {
263 |             const info = @typeInfo(T).Vector;
264 |             const one: T = @splat(@as(info.child, 1));
265 |             const vecs = [1]T{one} ** 512;
266 | 
267 |             var res = one;
268 |             for (vecs) |vec| {
269 |                 res += vec;
270 |             }
271 |             return res;
272 |         }
273 |     });
274 | }
275 | 


--------------------------------------------------------------------------------
/zig.mod:
--------------------------------------------------------------------------------
1 | id: e9rgqrtrxhfenatqmdgecjf4hbpewyhywyke7aj8p4kz753v
2 | name: bench
3 | main: bench.zig
4 | license: MIT
5 | description: Simple benchmarking library
6 | dependencies:
7 | 


--------------------------------------------------------------------------------