├── .gitignore
├── examples
    ├── rust-parallel-example
    │   ├── .gitignore
    │   ├── Cargo.toml
    │   ├── benches
    │   │   └── binary-tree.rs
    │   ├── src
    │   │   └── lib.rs
    │   └── Cargo.lock
    └── zig-parallel-example
    │   └── main.zig
├── bench
    ├── rayon-tree-sum-100M.csv
    ├── spice-tree-sum-1000.csv
    ├── spice-tree-sum-100M.csv
    ├── rayon-tree-sum-1000.csv
    ├── plot.py
    ├── criterion-to-csv.py
    ├── README.md
    ├── rayon-tree-sum-100M.svg
    ├── spice-tree-sum-100M.svg
    ├── spice-tree-sum-1000.svg
    └── rayon-tree-sum-1000.svg
├── LICENSE.txt
├── .github
    └── workflows
    │   └── test.yml
├── Makefile
├── src
    └── root.zig
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | /.zig-cache
2 | /zig-out
3 | 


--------------------------------------------------------------------------------
/examples/rust-parallel-example/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | 


--------------------------------------------------------------------------------
/bench/rayon-tree-sum-100M.csv:
--------------------------------------------------------------------------------
1 | Baseline,7.4849190632000004
2 | Rayon 1 thread,22.9933083354
3 | Rayon 2 threads,11.7585642422
4 | Rayon 4 threads,5.898856567
5 | Rayon 8 threads,2.9989583844
6 | Rayon 16 threads,1.6433929402
7 | Rayon 32 threads,1.651134205
8 | 


--------------------------------------------------------------------------------
/bench/spice-tree-sum-1000.csv:
--------------------------------------------------------------------------------
1 | Baseline,1.9870600000000007
2 | Spice 1 thread,2.2875200000000007
3 | Spice 2 threads,2.3040000000000003
4 | Spice 4 threads,2.2950600000000008
5 | Spice 8 threads,2.2884
6 | Spice 16 threads,2.28916
7 | Spice 32 threads,2.288220000000001
8 | 


--------------------------------------------------------------------------------
/bench/spice-tree-sum-100M.csv:
--------------------------------------------------------------------------------
1 | Baseline,3.6320248672
2 | Spice 1 thread,3.927454584600001
3 | Spice 2 threads,2.011768004
4 | Spice 4 threads,1.0827650286000001
5 | Spice 8 threads,0.5992898036000001
6 | Spice 16 threads,0.3624978568
7 | Spice 32 threads,0.3843630439999999
8 | 


--------------------------------------------------------------------------------
/bench/rayon-tree-sum-1000.csv:
--------------------------------------------------------------------------------
1 | Baseline,1.5597891883229127
2 | Rayon 1 thread,23.51176644639185
3 | Rayon 2 threads,16.82203207978065
4 | Rayon 4 threads,14.939625885334602
5 | Rayon 8 threads,18.66874537774096
6 | Rayon 16 threads,24.183746144849145
7 | Rayon 32 threads,105.44453796411226
8 | 


--------------------------------------------------------------------------------
/examples/rust-parallel-example/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "parallel-example"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | 
 6 | [dependencies]
 7 | rayon = "1.10"
 8 | 
 9 | [dev-dependencies]
10 | criterion = "0.5"
11 | 
12 | [[bench]]
13 | name = "binary-tree"
14 | harness = false


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | BSD Zero Clause License
 2 | 
 3 | Copyright (c) 2024 Magnus Holm
 4 | 
 5 | Permission to use, copy, modify, and/or distribute this software for any
 6 | purpose with or without fee is hereby granted.
 7 | 
 8 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
 9 | REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
10 | AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
11 | INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
12 | LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
13 | OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
14 | PERFORMANCE OF THIS SOFTWARE.
15 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: "Tests"
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: ["main"]
 6 |     paths: ["**/*.zig", "build.zig.zon"]
 7 |   pull_request:
 8 |   schedule:
 9 |     - cron: "0 3 * * 5"
10 |   workflow_dispatch:
11 | 
12 | jobs:
13 |   test:
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - uses: actions/checkout@v4
17 |       - uses: mlugg/setup-zig@v2
18 |         with:
19 |           version: master
20 |       - name: Formatting
21 |         run: zig fmt --check src/*.zig
22 |       - name: Build executable
23 |         run: zig build -Doptimize=ReleaseFast -Dexamples
24 |       - name: Run small benchmark
25 |         run: ./zig-out/bin/spice-example -n 10000 --baseline -t 1 -t 2 -t 4
26 |         # According to https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners/about-github-hosted-runners
27 |         # `ubuntu-latest` has 4 processes so we expect some speed-up here.
28 | 


--------------------------------------------------------------------------------
/examples/rust-parallel-example/benches/binary-tree.rs:
--------------------------------------------------------------------------------
 1 | use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
 2 | use parallel_example::Node;
 3 | 
 4 | fn criterion_benchmark(c: &mut Criterion) {
 5 |     for n in [1000, 100_000_000] {
 6 |         let mut group = c.benchmark_group(format!("tree-sum-{}", n));
 7 |         group.sample_size(50);
 8 |         let root = Node::make_balanced_tree(1, n);
 9 |         group.bench_with_input(BenchmarkId::new("Baseline", 1), &root, |b, root| {
10 |             b.iter(|| root.sum())
11 |         });
12 |         for num_threads in [1, 2, 4, 8, 16, 32] {
13 |             let pool = rayon::ThreadPoolBuilder::new()
14 |                 .num_threads(num_threads)
15 |                 .build()
16 |                 .unwrap();
17 |             group.bench_with_input(BenchmarkId::new("Rayon", num_threads), &root, |b, root| {
18 |                 b.iter(|| root.sum_rayon(&pool))
19 |             });
20 |         }
21 |     }
22 | }
23 | 
24 | criterion_group!(benches, criterion_benchmark);
25 | criterion_main!(benches);
26 | 


--------------------------------------------------------------------------------
/bench/plot.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | import os
 4 | import sys
 5 | 
 6 | filename, title, outfile = sys.argv[1:]
 7 | 
 8 | data = [row.strip().split(",") for row in open(sys.argv[1])]
 9 | 
10 | # `barh` prints from bottom to top.
11 | data.reverse()
12 | 
13 | labels = [row[0] for row in data]
14 | values = [float(row[1]) for row in data]
15 | 
16 | # https://github.com/system-fonts/modern-font-stacks#humanist
17 | plt.rcParams["font.family"] = "Seravek, Gill Sans Nova, Ubuntu, Calibri, DejaVu Sans, source-sans-pro, sans-serif"
18 | # This makes sure we don't embed the font into the SVG:
19 | plt.rcParams['svg.fonttype'] = 'none'
20 | 
21 | plt.figure(figsize=(5, 2.7))
22 | plt.title(title, fontdict={'fontweight': 'bold'}, pad=10)
23 | plt.grid(axis='x', color='#ccc')
24 | bars = plt.barh(labels, values, height=0.4)
25 | plt.bar_label(bars, ["{:.2f} ns".format(value) for value in values], padding=3)
26 | plt.xlabel("total wall time divided by node count [nanoseconds]", labelpad=10)
27 | 
28 | ax = plt.gca()
29 | ax.set_axisbelow(True)
30 | ax.spines['top'].set_visible(False)
31 | ax.spines['right'].set_visible(False)
32 | # ax.spines['bottom'].set_visible(False)
33 | # ax.tick_params(axis='x', colors='#ccc')
34 | plt.tight_layout()
35 | 
36 | plt.savefig(outfile)
37 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: all bench bench-spice bench-rayon plot plot-spice plot-rayon
 2 | 
 3 | all: plot
 4 | 
 5 | N := 1000 100M
 6 | 
 7 | # Expands SI suffixes (100k => 100000)
 8 | expand_SI = $(subst k,000,$(subst M,000k,$(1)))
 9 | 
10 | bench: bench-spice bench-rayon
11 | bench-spice: $(foreach n,$(N),bench/spice-tree-sum-$(n).csv)
12 | bench-rayon: $(foreach n,$(N),bench/rayon-tree-sum-$(n).csv)
13 | 
14 | plot: plot-spice plot-rayon
15 | plot-spice: $(foreach n,$(N),bench/spice-tree-sum-$(n).svg)
16 | plot-rayon: $(foreach n,$(N),bench/rayon-tree-sum-$(n).svg)
17 | 
18 | ## Spice
19 | 
20 | bench/spice-tree-sum-%.csv:
21 | 	zig build -Doptimize=ReleaseFast -Dexamples
22 | 	./zig-out/bin/spice-example -n $(call expand_SI,$*) --csv $@
23 | 
24 | bench/spice-tree-sum-%.svg: bench/spice-tree-sum-%.csv bench/plot.py
25 | 	python3 bench/plot.py $< "Time to calculate sum of binary tree of $* nodes" $@
26 | 
27 | ## Rayon
28 | 
29 | bench/rayon-tree-sum-%.csv: | examples/rust-parallel-example/target/criterion
30 | 	python3 bench/criterion-to-csv.py tree-sum-$(call expand_SI,$*) > $@
31 | 
32 | examples/rust-parallel-example/target/criterion:
33 | 	(cd examples/rust-parallel-example && cargo bench)
34 | 
35 | bench/rayon-tree-sum-%.svg: bench/rayon-tree-sum-%.csv bench/plot.py
36 | 	python3 bench/plot.py $< "Time to calculate sum of binary tree of $* nodes" $@
37 | 


--------------------------------------------------------------------------------
/examples/rust-parallel-example/src/lib.rs:
--------------------------------------------------------------------------------
 1 | pub struct Node {
 2 |     value: i64,
 3 |     left: Option<Box<Node>>,
 4 |     right: Option<Box<Node>>,
 5 | }
 6 | 
 7 | impl Node {
 8 |     pub fn make_balanced_tree(from: i64, to: i64) -> Self {
 9 |         let value = from + (to - from) / 2;
10 |         Node {
11 |             value,
12 |             left: (value > from).then(|| Self::make_balanced_tree(from, value - 1).into()),
13 |             right: (value < to).then(|| Self::make_balanced_tree(value + 1, to).into()),
14 |         }
15 |     }
16 | 
17 |     pub fn sum(&self) -> i64 {
18 |         match (&self.left, &self.right) {
19 |             (Some(left), Some(right)) => self.value + left.sum() + right.sum(),
20 |             (Some(child), _) | (_, Some(child)) => self.value + child.sum(),
21 |             (None, None) => self.value,
22 |         }
23 |     }
24 | 
25 |     pub fn sum_rayon(&self, pool: &rayon::ThreadPool) -> i64 {
26 |         match (&self.left, &self.right) {
27 |             (Some(left), Some(right)) => {
28 |                 let (left, right) = pool.join(|| left.sum_rayon(pool), || right.sum_rayon(pool));
29 |                 self.value + left + right
30 |             }
31 |             (Some(child), _) | (_, Some(child)) => self.value + child.sum_rayon(pool),
32 |             (None, None) => self.value,
33 |         }
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/bench/criterion-to-csv.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | from glob import glob
 3 | import json
 4 | import os
 5 | import sys
 6 | 
 7 | name = sys.argv[1]
 8 | n = int(name.split("-")[-1])
 9 | 
10 | target_directory = "examples/rust-parallel-example/target/criterion"
11 | 
12 | BenchData = namedtuple('BenchData', ('nano_mean', 'threadcount'))
13 | BenchRow = namedtuple('BenchRow', ('name', 'value', 'sort_key'))
14 | 
15 | def read_data(path):
16 |     bench = json.loads(open(os.path.join(path, "benchmark.json")).read())
17 |     est = json.loads(open(os.path.join(path, "estimates.json")).read())
18 |     nano_mean = est["mean"]["point_estimate"]
19 |     threadcount = int(bench["value_str"])
20 |     return BenchData(nano_mean, threadcount)
21 | 
22 | rows = []
23 | 
24 | baseline_data = read_data(os.path.join(target_directory, name, "Baseline", "1", "new"))
25 | rows.append(BenchRow("Baseline", baseline_data.nano_mean / n, (1, 0)))
26 | 
27 | for bench_dir in glob(os.path.join(target_directory, name, "Rayon", "*", "new")):
28 |     bench_data = read_data(bench_dir)
29 |     threadcount = bench_data.threadcount
30 |     threadlabel = "1 thread" if threadcount == 1 else f"{threadcount} threads"
31 |     rows.append(BenchRow(f"Rayon {threadlabel}", bench_data.nano_mean / n, (2, threadcount)))
32 | 
33 | rows.sort(key=lambda row: row.sort_key)
34 | 
35 | for row in rows:
36 |     print(f"{row.name},{row.value}")
37 | 


--------------------------------------------------------------------------------
/examples/zig-parallel-example/main.zig:
--------------------------------------------------------------------------------
  1 | const std = @import("std");
  2 | 
  3 | const spice = @import("spice");
  4 | const parg = @import("parg");
  5 | 
  6 | const usage =
  7 |     \\usage: spice-example <options>
  8 |     \\
  9 |     \\Builds a perfectly balanced binary tree (with integer values) 
 10 |     \\and benchmark how quickly we can sum of all its values.
 11 |     \\
 12 |     \\--baseline and -t/--threads defines the benchmarks that
 13 |     \\will be executed. If none of these are present it defaults to
 14 |     \\--baseline -t 1 -t 2 -t 4 -t 8 -t 16 -t 32
 15 |     \\
 16 |     \\OPTIONS
 17 |     \\  -n <num> (required)
 18 |     \\    Define the size of the binary tree (number of nodes).
 19 |     \\    
 20 |     \\  -t, --threads <num>
 21 |     \\    Run a benchmark using the given the number of threads.
 22 |     \\    This can be passed multiple times and multiple benchmarks
 23 |     \\    will be run.
 24 |     \\
 25 |     \\  --baseline
 26 |     \\    When present, also run the baseline version.
 27 |     \\
 28 |     \\  --csv <path>
 29 |     \\    Output the benchmark results as CSV.
 30 |     \\
 31 |     \\  -h, --help
 32 |     \\    Show this message.
 33 |     \\
 34 | ;
 35 | 
 36 | const Node = struct {
 37 |     val: i64,
 38 |     left: ?*Node = null,
 39 |     right: ?*Node = null,
 40 | 
 41 |     fn sum(self: *const Node) i64 {
 42 |         var res = self.val;
 43 |         if (self.left) |child| res += child.sum();
 44 |         if (self.right) |child| res += child.sum();
 45 |         return res;
 46 |     }
 47 | };
 48 | 
 49 | fn balancedTree(allocator: std.mem.Allocator, from: i64, to: i64) !*Node {
 50 |     var node = try allocator.create(Node);
 51 |     node.* = .{ .val = from + @divTrunc(to - from, 2) };
 52 |     if (node.val > from) {
 53 |         node.left = try balancedTree(allocator, from, node.val - 1);
 54 |     }
 55 |     if (node.val < to) {
 56 |         node.right = try balancedTree(allocator, node.val + 1, to);
 57 |     }
 58 |     return node;
 59 | }
 60 | 
 61 | fn sum(t: *spice.Task, node: *Node) i64 {
 62 |     var res: i64 = node.val;
 63 | 
 64 |     if (node.left) |left_child| {
 65 |         if (node.right) |right_child| {
 66 |             var fut = spice.Future(*Node, i64).init();
 67 |             fut.fork(t, sum, right_child);
 68 |             res += t.call(i64, sum, left_child);
 69 |             if (fut.join(t)) |val| {
 70 |                 res += val;
 71 |             } else {
 72 |                 res += t.call(i64, sum, right_child);
 73 |             }
 74 |             return res;
 75 |         }
 76 | 
 77 |         res += t.call(i64, sum, left_child);
 78 |     }
 79 | 
 80 |     if (node.right) |right_child| {
 81 |         res += t.call(i64, sum, right_child);
 82 |     }
 83 | 
 84 |     return res;
 85 | }
 86 | 
 87 | const BaselineTreeSum = struct {
 88 |     pub fn writeName(self: *BaselineTreeSum, writer: *std.Io.Writer) !void {
 89 |         _ = self;
 90 |         try writer.print("Baseline", .{});
 91 |     }
 92 | 
 93 |     pub fn init(self: *BaselineTreeSum, allocator: std.mem.Allocator, io: std.Io) void {
 94 |         _ = self;
 95 |         _ = allocator;
 96 |         _ = io;
 97 |     }
 98 | 
 99 |     pub fn deinit(self: *BaselineTreeSum) void {
100 |         _ = self;
101 |     }
102 | 
103 |     pub fn run(self: *BaselineTreeSum, input: *Node) i64 {
104 |         _ = self;
105 |         return input.sum();
106 |     }
107 | };
108 | 
109 | const SpiceTreeSum = struct {
110 |     num_threads: usize,
111 |     thread_pool: spice.ThreadPool = undefined,
112 | 
113 |     pub fn writeName(self: *SpiceTreeSum, writer: *std.Io.Writer) !void {
114 |         if (self.num_threads == 1) {
115 |             try writer.print("Spice 1 thread", .{});
116 |         } else {
117 |             try writer.print("Spice {} threads", .{self.num_threads});
118 |         }
119 |     }
120 | 
121 |     pub fn init(self: *SpiceTreeSum, allocator: std.mem.Allocator, io: std.Io) void {
122 |         self.thread_pool = spice.ThreadPool.init(allocator, io);
123 |         self.thread_pool.start(.{ .background_worker_count = self.num_threads - 1 });
124 |     }
125 | 
126 |     pub fn deinit(self: *SpiceTreeSum) void {
127 |         self.thread_pool.deinit();
128 |     }
129 | 
130 |     pub fn run(self: *SpiceTreeSum, input: *Node) i64 {
131 |         return self.thread_pool.call(i64, sum, input);
132 |     }
133 | };
134 | 
135 | const n_samples = 50;
136 | const warmup_duration = 3 * std.time.ns_per_s;
137 | 
138 | const Runner = struct {
139 |     allocator: std.mem.Allocator,
140 |     io: std.Io,
141 |     n: usize,
142 |     csv: ?std.fs.File.Writer = null,
143 | 
144 |     pub fn run(self: *Runner, bench: anytype, input: anytype) !void {
145 |         var out = std.fs.File.stdout();
146 |         var out_buf: [512]u8 = undefined;
147 |         var outw = out.writer(&out_buf);
148 | 
149 |         var name_buf: [255]u8 = undefined;
150 |         var fbs = std.Io.Writer.fixed(&name_buf);
151 |         try bench.writeName(&fbs);
152 |         const name = fbs.buffered();
153 | 
154 |         try outw.interface.print("{s}:\n", .{name});
155 |         try outw.interface.print("  Warming up...\n", .{});
156 | 
157 |         bench.init(self.allocator, self.io);
158 |         defer bench.deinit();
159 | 
160 |         {
161 |             var timer = std.time.Timer.start() catch @panic("timer error");
162 |             var warmup_iter: usize = 0;
163 |             while (true) {
164 |                 const output = bench.run(input);
165 |                 warmup_iter += 1;
166 |                 if (timer.read() >= warmup_duration) {
167 |                     try outw.interface.print("  Warmup iterations: {}\n", .{warmup_iter});
168 |                     try outw.interface.print("  Warmup result: {}\n\n", .{output});
169 |                     try outw.interface.flush();
170 |                     break;
171 |                 }
172 |             }
173 |         }
174 | 
175 |         try outw.interface.print("  Running {} times...\n", .{n_samples});
176 |         try outw.interface.flush();
177 |         var sample_times: [n_samples]f64 = undefined;
178 |         for (0..n_samples) |i| {
179 |             var timer = std.time.Timer.start() catch @panic("timer error");
180 |             std.mem.doNotOptimizeAway(bench.run(input));
181 |             const dur = timer.read();
182 |             sample_times[i] = @as(f64, @floatFromInt(dur)) / @as(f64, @floatFromInt(self.n));
183 |         }
184 | 
185 |         const mean = memSum(f64, &sample_times) / n_samples;
186 | 
187 |         try outw.interface.print("  Mean: {d} ns\n  Min: {d} ns\n  Max: {d} ns\n", .{
188 |             mean,
189 |             std.mem.min(f64, &sample_times),
190 |             std.mem.max(f64, &sample_times),
191 |         });
192 | 
193 |         try outw.interface.print("\n", .{});
194 |         try outw.interface.flush();
195 | 
196 |         if (self.csv) |*csv| {
197 |             try csv.interface.print("{s},{d}\n", .{ name, mean });
198 |             try csv.interface.flush();
199 |         }
200 |     }
201 | };
202 | 
203 | fn memSum(comptime T: type, slice: []const T) T {
204 |     var result: T = 0;
205 |     for (slice) |val| {
206 |         result += val;
207 |     }
208 |     return result;
209 | }
210 | 
211 | fn failArgs(comptime format: []const u8, args: anytype) noreturn {
212 |     var buf: [512]u8 = undefined;
213 |     var err = std.fs.File.stderr();
214 |     var writer = err.writer(&buf);
215 |     writer.interface.print("invalid arguments: " ++ format ++ "\n", args) catch @panic("failed to print to stderr");
216 |     std.process.exit(1);
217 | }
218 | 
219 | pub fn main() !void {
220 |     var gpa = std.heap.GeneralPurposeAllocator(.{}){};
221 |     defer {
222 |         const check = gpa.deinit();
223 |         if (check == .leak) {
224 |             std.debug.print("memory leaked\n", .{});
225 |             std.process.exit(1);
226 |         }
227 |     }
228 | 
229 |     var arena = std.heap.ArenaAllocator.init(gpa.allocator());
230 |     defer arena.deinit();
231 | 
232 |     var threaded = std.Io.Threaded.init(gpa.allocator());
233 |     defer threaded.deinit();
234 | 
235 |     var n: ?usize = null;
236 |     var csv_buf: [512]u8 = undefined;
237 |     var csv_file: std.fs.File = undefined;
238 |     var csv: ?std.fs.File.Writer = null;
239 |     var enable_baseline = false;
240 |     var num_threads_list = std.ArrayList(usize).empty;
241 |     defer num_threads_list.deinit(gpa.allocator());
242 |     var defaults = true;
243 |     var show_usage = false;
244 |     var no_args = true;
245 | 
246 |     var p = try parg.parseProcess(arena.allocator(), .{});
247 |     defer p.deinit();
248 | 
249 |     const program_name = p.nextValue() orelse @panic("no executable name");
250 |     _ = program_name;
251 | 
252 |     while (p.next()) |token| {
253 |         no_args = false;
254 |         switch (token) {
255 |             .flag => |flag| {
256 |                 if (flag.isShort("n")) {
257 |                     const n_str = p.nextValue() orelse failArgs("-n requires a value", .{});
258 |                     n = std.fmt.parseInt(usize, n_str, 10) catch failArgs("-n must be an integer", .{});
259 |                 } else if (flag.isLong("csv")) {
260 |                     const csv_path = p.nextValue() orelse failArgs("--csv requires a value", .{});
261 |                     csv_file = try std.fs.cwd().createFile(csv_path, .{});
262 |                     csv = csv_file.writer(&csv_buf);
263 |                 } else if (flag.isLong("baseline")) {
264 |                     enable_baseline = true;
265 |                     defaults = false;
266 |                 } else if (flag.isShort("t") or flag.isLong("threads")) {
267 |                     const num_threads_str = p.nextValue() orelse failArgs("{f} requires a value", .{flag});
268 |                     const num_threads = std.fmt.parseInt(usize, num_threads_str, 10) catch failArgs("{f} must be an integer", .{flag});
269 |                     try num_threads_list.append(gpa.allocator(), num_threads);
270 |                     defaults = false;
271 |                 } else if (flag.isShort("h") or flag.isLong("help")) {
272 |                     show_usage = true;
273 |                 } else {
274 |                     failArgs("{f} is a not a valid flag", .{flag});
275 |                 }
276 |             },
277 |             .arg => |arg| {
278 |                 failArgs("{s}", .{arg});
279 |             },
280 |             .unexpected_value => |val| {
281 |                 failArgs("{s}", .{val});
282 |             },
283 |         }
284 |     }
285 | 
286 |     if (show_usage or no_args) {
287 |         std.debug.print(usage, .{});
288 |         std.process.exit(0);
289 |     }
290 | 
291 |     if (n == null) {
292 |         failArgs("-n is required.", .{});
293 |     }
294 | 
295 |     if (defaults) {
296 |         enable_baseline = true;
297 |         try num_threads_list.appendSlice(gpa.allocator(), &[_]usize{ 1, 2, 4, 8, 16, 32 });
298 |     }
299 | 
300 |     const root = try balancedTree(arena.allocator(), 0, @intCast(n.?));
301 | 
302 |     var runner = Runner{
303 |         .allocator = gpa.allocator(),
304 |         .io = threaded.io(),
305 |         .n = n.?,
306 |         .csv = csv,
307 |     };
308 | 
309 |     if (enable_baseline) {
310 |         var baseline: BaselineTreeSum = .{};
311 |         try runner.run(&baseline, root);
312 |     }
313 | 
314 |     for (num_threads_list.items) |num_threads| {
315 |         var bench: SpiceTreeSum = .{ .num_threads = num_threads };
316 |         try runner.run(&bench, root);
317 |     }
318 | 
319 |     if (csv) |_| {
320 |         csv_file.close();
321 |     }
322 | }
323 | 


--------------------------------------------------------------------------------
/bench/README.md:
--------------------------------------------------------------------------------
  1 | # Benchmark of Spice and Rayon
  2 | 
  3 | Date: August 2024.
  4 | 
  5 | ## Introduction
  6 | 
  7 | [Rayon][rayon] is high-quality data-parallelism library written in Rust based on the well-known technique of _work-stealing fork/join_.
  8 | [Spice](..), written in Zig, is an experimental implementation of _heartbeat scheduling_ which claims to have a much smaller overhead.
  9 | We'd like to understand how these two techniques compares against each other.
 10 | Rayon also provides a set of API around `ParallelIterator`.
 11 | We're not focusing on these since it's not comparable to the API which Spice provides.
 12 | 
 13 | Evaluations of parallel frameworks are often summarized along the lines of "we implemented X algorithms, ran it on a machine with 48 cores and saw a (geometric) mean improvement of 34x".
 14 | This is a fine way of validating that it works for a wide range of problems, but it's hard to draw conclusions from the final result.
 15 | It's also not very interesting for a comparison between Spice and Rayon because they are widely different (experimental vs production-ready; Zig vs Rust).
 16 | 
 17 | This benchmark therefore rather focuses on a _single_ simple problem and tries to maximize its learning from a small set of focused experiments.
 18 | Further benchmarks are recommended to validate the findings.
 19 | 
 20 | ## Key findings and recommendations
 21 | 
 22 | - Rayon adds roughly **15 nanoseconds** overhead for a single invocation of `rayon::join`.
 23 |   This means the smallest amount of work should take around **~1 microsecond** for the overhead be negligible (<1%).
 24 | - Rayon shows **good linear scalability**: ~14x performance improvement when going from 1 to 16 threads.
 25 |   This was when the total duration of the program was in the scale of **seconds**.
 26 | - Rayon struggled a lot when the overall duration was on the scale of **microseconds**.
 27 |   At 16 threads we saw _worse_ performance than using only 1.
 28 |   In this scenario, using more threads (32) than cores (16) was **detrimental** to the performance (~60x slowdown).
 29 | - Spice has sub-nanosecond overhead for a single invocation of `fork/join`.
 30 |   This means the smallest amount of work should take around **tens of nanoseconds** for the overhead be negligible (<1%).
 31 | - Spice shows **subpar scalability**:
 32 |   The speed-up of using 16 threads was merely ~11x (compared to Rayon's ~14x).
 33 | 
 34 | ## Methodology
 35 | 
 36 | There's three areas we'd like to focus on:
 37 | 
 38 | - **Understand the fixed overhead:**
 39 |   We expect there to be a overhead of writing a program in a parallel-enabled style compared to a baseline implementation.
 40 |   This overhead is most likely _constant_: Every instance of possible parallelism now takes a few extra nanoseconds.
 41 |   To determine this overhead we can compare a baseline implementation with a parallel-enabled implementation which only uses a single thread.
 42 | - **Understand scalability:**
 43 |   Ignoring the overhead, we'd like the program to be twice as fast when given twice amount of threads.
 44 | - **Understanding contention and synchronization:**
 45 |   Multiple threads can interact with each other in ways that are hard to predict.
 46 | 
 47 | In order to explore these areas we've therefore decided to focus on _summing the values of a perfectly balanced binary tree_.
 48 | 
 49 | <details>
 50 | <summary>Baseline Zig implementation</summary>
 51 | 
 52 | ```zig
 53 | const Node = struct {
 54 |     val: i64,
 55 |     left: ?*Node = null,
 56 |     right: ?*Node = null,
 57 | 
 58 |     fn sum(self: *const Node) i64 {
 59 |         var res = self.val;
 60 |         if (self.left) |child| res += child.sum();
 61 |         if (self.right) |child| res += child.sum();
 62 |         return res;
 63 |     }
 64 | };
 65 | 
 66 | fn balancedTree(allocator: std.mem.Allocator, from: i64, to: i64) !*Node {
 67 |     var node = try allocator.create(Node);
 68 |     node.* = .{ .val = from + @divTrunc(to - from, 2) };
 69 |     if (node.val > from) {
 70 |         node.left = try balancedTree(allocator, from, node.val - 1);
 71 |     }
 72 |     if (node.val < to) {
 73 |         node.right = try balancedTree(allocator, node.val + 1, to);
 74 |     }
 75 |     return node;
 76 | }
 77 | ```
 78 | 
 79 | </details>
 80 | 
 81 | <details>
 82 | <summary>Baseline Rust implementation</summary>
 83 | 
 84 | ```rust
 85 | struct Node<T> {
 86 |     value: T,
 87 |     left: Option<Box<Node<T>>>,
 88 |     right: Option<Box<Node<T>>>,
 89 | }
 90 | 
 91 | fn sum(node: &Node<i64>) -> i64 {
 92 |     let mut result = node.value;
 93 |     if let Some(child) = &node.left {
 94 |         result += sum(child);
 95 |     }
 96 |     if let Some(child) = &node.right {
 97 |         result += sum(child);
 98 |     }
 99 |     return result;
100 | }
101 | 
102 | fn make_balanced_tree(from: i64, to: i64) -> Node<i64> {
103 |     let value = from + (to-from)/2;
104 |     return Node {
105 |         value: value,
106 |         left: (value > from).then(|| Box::new(make_balanced_tree(from, value - 1))),
107 |         right: (value < to).then(|| Box::new(make_balanced_tree(value + 1, to))),
108 |     };
109 | }
110 | ```
111 | 
112 | </details>
113 | 
114 | This is a program which is very small and easy to reason about.
115 | The actual unit of work is minimal which means that any overhead is easily observable.
116 | This program is not representative of real-world use cases, but it will give us valuable insights nonetheless:
117 | 
118 | - The structure of the tree is _optimal_ for parallelism.
119 |   If we're not able to speed up a _perfectly_ balanced problem, why would it be any good at a messy real-life problem?
120 | - Running the baseline and comparing it with a "parallel, but locked to a single thread" should precisely tell us the overhead.
121 | - Running with n=100 million nodes will represent a quite decent chunk of work.
122 | - Running with n=1000 nodes will highlight what happens when we have a very small amount of total work.
123 | - Running with varying number of threads (1, 2, 4, 8, 16, 32) will
124 | 
125 | If we're seeing sub-linear scalability when increasing the number of threads there's three possible causes:
126 | 
127 | 1. **Idle threads:** The scheduler is not able to precisely give threads enough work.
128 |    These threads ends up being idle (e.g. waiting for a lock).
129 |    This leads to higher overall latency, but leaves the system with additional resources.
130 |    In a practical setting this isn't too bad since it's common for a system to have additional work to do.
131 | 2. **Competing threads:** The threads could be actively working and competing for the _same_ work.
132 |    This means they are actively wasting work.
133 |    This also leads to higher overall latency, but is much worse than the previous scenario.
134 | 3. **System limitation:** Despite being perfectly balanced, there are still shared resources in this benchmark.
135 |    There's quite a lot of memory reads which will be.
136 |    We're also running in the cloud where we don't have full detail over what's actually happening behind the scenes.
137 | 
138 | ## Results
139 | 
140 | - Commit: `0b697d6d7af98e3db3501933c45302521b606a93`
141 | - Command: `make bench`
142 | 
143 | These benchmarks were executed in Google Cloud (the `europe-west4` region) on a `c4-standard-16` instance which has 16 vCPUs and 60GB memory.
144 | No other significant workloads were running at the same time, and the instance came directly from a TERMINATING state into the RUNNING state before the benchmarks were started.
145 | The operating system is NixOS 24.05.
146 | 
147 | The numbers reported here are from a single execution of `make bench`, but during development of Spice this benchmark suite has been executed frequently and the numbers reported are consistent with earlier executions.
148 | 
149 | ```
150 | $ uname -a
151 | Linux <hostname> 6.6.41 #1-NixOS SMP PREEMPT_DYNAMIC Thu Jul 18 11:21:27 UTC 2024 x86_64 GNU/Linux
152 | ```
153 | 
154 | ### Spice
155 | 
156 | The Spice benchmarks were compiled with `ReleaseFast` and executed under a benchmark runner which does a warmup phase (3 seconds) followed by taking 50 samples.
157 | We report the total mean scaled by number of nodes in the tree.
158 | 
159 | ```
160 | $ zig version
161 | 0.14.0-dev.564+75cf7fca9
162 | ```
163 | 
164 | ![](spice-tree-sum-100M.svg)
165 | ![](spice-tree-sum-1000.svg)
166 | 
167 | ### Rayon
168 | 
169 | The Rayon benchmarks were executed using [Criterion](https://docs.rs/criterion/latest/criterion/) which does a warmup phase followed by taking 50 samples.
170 | We report the total mean scaled by number of nodes in the tree.
171 | 
172 | ```
173 | $ rustc --version
174 | rustc 1.77.2 (25ef9e3d8 2024-04-09) (built from a source tarball)
175 | $ cargo --version
176 | cargo 1.77.1
177 | ```
178 | 
179 | ![](rayon-tree-sum-100M.svg)
180 | ![](rayon-tree-sum-1000.svg)
181 | 
182 | ## Discussion
183 | 
184 | **Baseline implementation:**
185 | Both languages perform better on the small tree compared to the large tree.
186 | This can be explained by the smaller tree being able to fit in cache.
187 | Interestingly, the Rust implementation is _twice_ as slow as Zig's (7.48 ns vs 3.63 ns) for the large tree.
188 | It's not obvious why this is the case.
189 | (Another interesting anecdote: On a M3 Pro then Spice using 1 thread is actually _faster_ than the baseline implementation despite it definitely containing more instructions. This shows that it can be hard to reason about performance on the presence of caches, pipelining and branch predictions.)
190 | 
191 | **Overhead:**
192 | Looking at the case of 100 million nodes we see that Rayon adds roughly ~15-20 ns of overhead.
193 | Considering the overall amount of work is just ~7 ns this makes Rayon unsuitable for parallelizing this problem:
194 | Using 4 threads were only barely faster than the baseline implementation, but wasting 4x the resources.
195 | The minimum amount of work should rather be in the range of _microseconds_.
196 | Spice on the other hand has a sub-nanosecond overhead and is capable of reducing the latency of ~3.3x by using 4 cores.
197 | 
198 | **Scalability:**
199 | Rayon shows good scalability _when the tree is large enough_:
200 | Latency is reduced by ~14x by using 16 cores.
201 | In comparison, Spice only achieves ~11x speed-up in the same scenario.
202 | Considering the design of Spice this is most likely caused by poor scheduling and idle threads and _not_ competing threads.
203 | 
204 | **Contention:**
205 | For the case where we have a small tree (1000 nodes) we see that Rayon struggles a lot.
206 | The initial overhead is similar (~19 ns), but there's no real performance gain by increasing the number of threads.
207 | There's some _slight_ improvements going from 1 to 4 threads, but the overall latency becomes _worse_ as it increased.
208 | At 32 threads it suddenly becomes ~60x slower than the baseline implementation.
209 | This behavior is consistent with competing threads (i.e. retries in a lock-free queue).
210 | Spice on the other hand shows consistent performance regardless of the number of threads.
211 | This is due to the fact that since the overall duration is so short none of the threads actually gets scheduled any work to do.
212 | 
213 | ## Future work
214 | 
215 | To explore this area further we recommend:
216 | 
217 | - **Explore the scheduling choices of Spice:**
218 |   Spice showed subpar scalability most likely related to its scheduling mechanism.
219 |   Further exploration into the heartbeat mechanism could possibly unlock increased scalability.
220 | - **Increase the smallest unit of work:**
221 |   Run a benchmark with highly CPU-bound and configurable work (i.e. SHA1 hashing repeated `x` times).
222 |   Try with increasing the duration of the smallest unit of work and measure the overhead.
223 |   This can validate whether our assumption that "the overhead is constant" is true or not.
224 | - **Run a parallel-baseline version:**
225 |   For this particular problem there is a trivial fixed way of parallelizing it:
226 |   For e.g. 4 threads take the 4 nodes that are grand-children of the root and evaluate their sum in each thread.
227 |   Then in the main thread wait for the result and finally sum them together with rest.
228 |   This should have the minimal amount of overhead in terms of thread-scalability and should give an upper bound of how well this problem scales.
229 | - **Run benchmarks with performance counters at high scale:**
230 |   Performance counters should be able to tell whether threads are _idle_ or _competing_ as we scale.
231 | 
232 | [rayon]: https://docs.rs/rayon/latest/rayon/
233 | 


--------------------------------------------------------------------------------
/bench/rayon-tree-sum-100M.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8" standalone="no"?>
  2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
  3 |   "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
  4 | <svg xmlns:xlink="http://www.w3.org/1999/xlink" width="360pt" height="194.4pt" viewBox="0 0 360 194.4" xmlns="http://www.w3.org/2000/svg" version="1.1">
  5 |  <metadata>
  6 |   <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
  7 |    <cc:Work>
  8 |     <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
  9 |     <dc:date>2025-11-27T21:39:13.894099</dc:date>
 10 |     <dc:format>image/svg+xml</dc:format>
 11 |     <dc:creator>
 12 |      <cc:Agent>
 13 |       <dc:title>Matplotlib v3.10.5, https://matplotlib.org/</dc:title>
 14 |      </cc:Agent>
 15 |     </dc:creator>
 16 |    </cc:Work>
 17 |   </rdf:RDF>
 18 |  </metadata>
 19 |  <defs>
 20 |   <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
 21 |  </defs>
 22 |  <g id="figure_1">
 23 |   <g id="patch_1">
 24 |    <path d="M 0 194.4 
 25 | L 360 194.4 
 26 | L 360 0 
 27 | L 0 0 
 28 | z
 29 | " style="fill: #ffffff"/>
 30 |   </g>
 31 |   <g id="axes_1">
 32 |    <g id="patch_2">
 33 |     <path d="M 90.79 147.52 
 34 | L 324.520714 147.52 
 35 | L 324.520714 29.18125 
 36 | L 90.79 29.18125 
 37 | z
 38 | " style="fill: #ffffff"/>
 39 |    </g>
 40 |    <g id="matplotlib.axis_1">
 41 |     <g id="xtick_1">
 42 |      <g id="line2d_1">
 43 |       <path d="M 90.79 147.52 
 44 | L 90.79 29.18125 
 45 | " clip-path="url(#pc3110207b0)" style="fill: none; stroke: #cccccc; stroke-width: 0.8; stroke-linecap: square"/>
 46 |      </g>
 47 |      <g id="line2d_2">
 48 |       <defs>
 49 |        <path id="m2987ada8ff" d="M 0 0 
 50 | L 0 3.5 
 51 | " style="stroke: #000000; stroke-width: 0.8"/>
 52 |       </defs>
 53 |       <g>
 54 |        <use xlink:href="#m2987ada8ff" x="90.79" y="147.52" style="stroke: #000000; stroke-width: 0.8"/>
 55 |       </g>
 56 |      </g>
 57 |      <g id="text_1">
 58 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="90.79" y="161.579375" transform="rotate(-0 90.79 161.579375)">0</text>
 59 |      </g>
 60 |     </g>
 61 |     <g id="xtick_2">
 62 |      <g id="line2d_3">
 63 |       <path d="M 139.195535 147.52 
 64 | L 139.195535 29.18125 
 65 | " clip-path="url(#pc3110207b0)" style="fill: none; stroke: #cccccc; stroke-width: 0.8; stroke-linecap: square"/>
 66 |      </g>
 67 |      <g id="line2d_4">
 68 |       <g>
 69 |        <use xlink:href="#m2987ada8ff" x="139.195535" y="147.52" style="stroke: #000000; stroke-width: 0.8"/>
 70 |       </g>
 71 |      </g>
 72 |      <g id="text_2">
 73 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="139.195535" y="161.579375" transform="rotate(-0 139.195535 161.579375)">5</text>
 74 |      </g>
 75 |     </g>
 76 |     <g id="xtick_3">
 77 |      <g id="line2d_5">
 78 |       <path d="M 187.601071 147.52 
 79 | L 187.601071 29.18125 
 80 | " clip-path="url(#pc3110207b0)" style="fill: none; stroke: #cccccc; stroke-width: 0.8; stroke-linecap: square"/>
 81 |      </g>
 82 |      <g id="line2d_6">
 83 |       <g>
 84 |        <use xlink:href="#m2987ada8ff" x="187.601071" y="147.52" style="stroke: #000000; stroke-width: 0.8"/>
 85 |       </g>
 86 |      </g>
 87 |      <g id="text_3">
 88 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="187.601071" y="161.579375" transform="rotate(-0 187.601071 161.579375)">10</text>
 89 |      </g>
 90 |     </g>
 91 |     <g id="xtick_4">
 92 |      <g id="line2d_7">
 93 |       <path d="M 236.006606 147.52 
 94 | L 236.006606 29.18125 
 95 | " clip-path="url(#pc3110207b0)" style="fill: none; stroke: #cccccc; stroke-width: 0.8; stroke-linecap: square"/>
 96 |      </g>
 97 |      <g id="line2d_8">
 98 |       <g>
 99 |        <use xlink:href="#m2987ada8ff" x="236.006606" y="147.52" style="stroke: #000000; stroke-width: 0.8"/>
100 |       </g>
101 |      </g>
102 |      <g id="text_4">
103 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="236.006606" y="161.579375" transform="rotate(-0 236.006606 161.579375)">15</text>
104 |      </g>
105 |     </g>
106 |     <g id="xtick_5">
107 |      <g id="line2d_9">
108 |       <path d="M 284.412142 147.52 
109 | L 284.412142 29.18125 
110 | " clip-path="url(#pc3110207b0)" style="fill: none; stroke: #cccccc; stroke-width: 0.8; stroke-linecap: square"/>
111 |      </g>
112 |      <g id="line2d_10">
113 |       <g>
114 |        <use xlink:href="#m2987ada8ff" x="284.412142" y="147.52" style="stroke: #000000; stroke-width: 0.8"/>
115 |       </g>
116 |      </g>
117 |      <g id="text_5">
118 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="284.412142" y="161.579375" transform="rotate(-0 284.412142 161.579375)">20</text>
119 |      </g>
120 |     </g>
121 |     <g id="text_6">
122 |      <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="207.655357" y="180.998125" transform="rotate(-0 207.655357 180.998125)">total wall time divided by node count [nanoseconds]</text>
123 |     </g>
124 |    </g>
125 |    <g id="matplotlib.axis_2">
126 |     <g id="ytick_1">
127 |      <g id="line2d_11">
128 |       <defs>
129 |        <path id="mb7f15b7c27" d="M 0 0 
130 | L -3.5 0 
131 | " style="stroke: #000000; stroke-width: 0.8"/>
132 |       </defs>
133 |       <g>
134 |        <use xlink:href="#mb7f15b7c27" x="90.79" y="138.77907" style="stroke: #000000; stroke-width: 0.8"/>
135 |       </g>
136 |      </g>
137 |      <g id="text_7">
138 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="83.79" y="142.308757" transform="rotate(-0 83.79 142.308757)">Rayon 32 threads</text>
139 |      </g>
140 |     </g>
141 |     <g id="ytick_2">
142 |      <g id="line2d_12">
143 |       <g>
144 |        <use xlink:href="#mb7f15b7c27" x="90.79" y="121.969588" style="stroke: #000000; stroke-width: 0.8"/>
145 |       </g>
146 |      </g>
147 |      <g id="text_8">
148 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="83.79" y="125.499276" transform="rotate(-0 83.79 125.499276)">Rayon 16 threads</text>
149 |      </g>
150 |     </g>
151 |     <g id="ytick_3">
152 |      <g id="line2d_13">
153 |       <g>
154 |        <use xlink:href="#mb7f15b7c27" x="90.79" y="105.160107" style="stroke: #000000; stroke-width: 0.8"/>
155 |       </g>
156 |      </g>
157 |      <g id="text_9">
158 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="83.79" y="108.689794" transform="rotate(-0 83.79 108.689794)">Rayon 8 threads</text>
159 |      </g>
160 |     </g>
161 |     <g id="ytick_4">
162 |      <g id="line2d_14">
163 |       <g>
164 |        <use xlink:href="#mb7f15b7c27" x="90.79" y="88.350625" style="stroke: #000000; stroke-width: 0.8"/>
165 |       </g>
166 |      </g>
167 |      <g id="text_10">
168 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="83.79" y="91.880313" transform="rotate(-0 83.79 91.880313)">Rayon 4 threads</text>
169 |      </g>
170 |     </g>
171 |     <g id="ytick_5">
172 |      <g id="line2d_15">
173 |       <g>
174 |        <use xlink:href="#mb7f15b7c27" x="90.79" y="71.541143" style="stroke: #000000; stroke-width: 0.8"/>
175 |       </g>
176 |      </g>
177 |      <g id="text_11">
178 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="83.79" y="75.070831" transform="rotate(-0 83.79 75.070831)">Rayon 2 threads</text>
179 |      </g>
180 |     </g>
181 |     <g id="ytick_6">
182 |      <g id="line2d_16">
183 |       <g>
184 |        <use xlink:href="#mb7f15b7c27" x="90.79" y="54.731662" style="stroke: #000000; stroke-width: 0.8"/>
185 |       </g>
186 |      </g>
187 |      <g id="text_12">
188 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="83.79" y="58.261349" transform="rotate(-0 83.79 58.261349)">Rayon 1 thread</text>
189 |      </g>
190 |     </g>
191 |     <g id="ytick_7">
192 |      <g id="line2d_17">
193 |       <g>
194 |        <use xlink:href="#mb7f15b7c27" x="90.79" y="37.92218" style="stroke: #000000; stroke-width: 0.8"/>
195 |       </g>
196 |      </g>
197 |      <g id="text_13">
198 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="83.79" y="41.451868" transform="rotate(-0 83.79 41.451868)">Baseline</text>
199 |      </g>
200 |     </g>
201 |    </g>
202 |    <g id="patch_3">
203 |     <path d="M 90.79 142.140966 
204 | L 106.774807 142.140966 
205 | L 106.774807 135.417173 
206 | L 90.79 135.417173 
207 | z
208 | " clip-path="url(#pc3110207b0)" style="fill: #1f77b4"/>
209 |    </g>
210 |    <g id="patch_4">
211 |     <path d="M 90.79 125.331484 
212 | L 106.699863 125.331484 
213 | L 106.699863 118.607692 
214 | L 90.79 118.607692 
215 | z
216 | " clip-path="url(#pc3110207b0)" style="fill: #1f77b4"/>
217 |    </g>
218 |    <g id="patch_5">
219 |     <path d="M 90.79 108.522003 
220 | L 119.823237 108.522003 
221 | L 119.823237 101.79821 
222 | L 90.79 101.79821 
223 | z
224 | " clip-path="url(#pc3110207b0)" style="fill: #1f77b4"/>
225 |    </g>
226 |    <g id="patch_6">
227 |     <path d="M 90.79 91.712521 
228 | L 147.897462 91.712521 
229 | L 147.897462 84.988729 
230 | L 90.79 84.988729 
231 | z
232 | " clip-path="url(#pc3110207b0)" style="fill: #1f77b4"/>
233 |    </g>
234 |    <g id="patch_7">
235 |     <path d="M 90.79 74.90304 
236 | L 204.62592 74.90304 
237 | L 204.62592 68.179247 
238 | L 90.79 68.179247 
239 | z
240 | " clip-path="url(#pc3110207b0)" style="fill: #1f77b4"/>
241 |    </g>
242 |    <g id="patch_8">
243 |     <path d="M 90.79 58.093558 
244 | L 313.39068 58.093558 
245 | L 313.39068 51.369766 
246 | L 90.79 51.369766 
247 | z
248 | " clip-path="url(#pc3110207b0)" style="fill: #1f77b4"/>
249 |    </g>
250 |    <g id="patch_9">
251 |     <path d="M 90.79 41.284077 
252 | L 163.252303 41.284077 
253 | L 163.252303 34.560284 
254 | L 90.79 34.560284 
255 | z
256 | " clip-path="url(#pc3110207b0)" style="fill: #1f77b4"/>
257 |    </g>
258 |    <g id="patch_10">
259 |     <path d="M 90.79 147.52 
260 | L 90.79 29.18125 
261 | " style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
262 |    </g>
263 |    <g id="patch_11">
264 |     <path d="M 90.79 147.52 
265 | L 324.520714 147.52 
266 | " style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
267 |    </g>
268 |    <g id="text_14">
269 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="109.774807" y="141.27907" transform="rotate(-0 109.774807 141.27907)">1.65 ns</text>
270 |    </g>
271 |    <g id="text_15">
272 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="109.699863" y="124.469588" transform="rotate(-0 109.699863 124.469588)">1.64 ns</text>
273 |    </g>
274 |    <g id="text_16">
275 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="122.823237" y="107.660107" transform="rotate(-0 122.823237 107.660107)">3.00 ns</text>
276 |    </g>
277 |    <g id="text_17">
278 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="150.897462" y="90.850625" transform="rotate(-0 150.897462 90.850625)">5.90 ns</text>
279 |    </g>
280 |    <g id="text_18">
281 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="207.62592" y="74.041143" transform="rotate(-0 207.62592 74.041143)">11.76 ns</text>
282 |    </g>
283 |    <g id="text_19">
284 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="316.39068" y="57.231662" transform="rotate(-0 316.39068 57.231662)">22.99 ns</text>
285 |    </g>
286 |    <g id="text_20">
287 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="166.252303" y="40.42218" transform="rotate(-0 166.252303 40.42218)">7.48 ns</text>
288 |    </g>
289 |    <g id="text_21">
290 |     <text style="font-weight: 700; font-size: 12px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="207.655357" y="19.18125" transform="rotate(-0 207.655357 19.18125)">Time to calculate sum of binary tree of 100M nodes</text>
291 |    </g>
292 |   </g>
293 |  </g>
294 |  <defs>
295 |   <clipPath id="pc3110207b0">
296 |    <rect x="90.79" y="29.18125" width="233.730714" height="118.33875"/>
297 |   </clipPath>
298 |  </defs>
299 | </svg>
300 | 


--------------------------------------------------------------------------------
/bench/spice-tree-sum-100M.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8" standalone="no"?>
  2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
  3 |   "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
  4 | <svg xmlns:xlink="http://www.w3.org/1999/xlink" width="360pt" height="194.4pt" viewBox="0 0 360 194.4" xmlns="http://www.w3.org/2000/svg" version="1.1">
  5 |  <metadata>
  6 |   <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
  7 |    <cc:Work>
  8 |     <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
  9 |     <dc:date>2025-11-27T21:39:12.885443</dc:date>
 10 |     <dc:format>image/svg+xml</dc:format>
 11 |     <dc:creator>
 12 |      <cc:Agent>
 13 |       <dc:title>Matplotlib v3.10.5, https://matplotlib.org/</dc:title>
 14 |      </cc:Agent>
 15 |     </dc:creator>
 16 |    </cc:Work>
 17 |   </rdf:RDF>
 18 |  </metadata>
 19 |  <defs>
 20 |   <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
 21 |  </defs>
 22 |  <g id="figure_1">
 23 |   <g id="patch_1">
 24 |    <path d="M 0 194.4 
 25 | L 360 194.4 
 26 | L 360 0 
 27 | L 0 0 
 28 | z
 29 | " style="fill: #ffffff"/>
 30 |   </g>
 31 |   <g id="axes_1">
 32 |    <g id="patch_2">
 33 |     <path d="M 86.47 147.52 
 34 | L 329.875714 147.52 
 35 | L 329.875714 29.18125 
 36 | L 86.47 29.18125 
 37 | z
 38 | " style="fill: #ffffff"/>
 39 |    </g>
 40 |    <g id="matplotlib.axis_1">
 41 |     <g id="xtick_1">
 42 |      <g id="line2d_1">
 43 |       <path d="M 86.47 147.52 
 44 | L 86.47 29.18125 
 45 | " clip-path="url(#p9e768b4d4d)" style="fill: none; stroke: #cccccc; stroke-width: 0.8; stroke-linecap: square"/>
 46 |      </g>
 47 |      <g id="line2d_2">
 48 |       <defs>
 49 |        <path id="ma723cc64e6" d="M 0 0 
 50 | L 0 3.5 
 51 | " style="stroke: #000000; stroke-width: 0.8"/>
 52 |       </defs>
 53 |       <g>
 54 |        <use xlink:href="#ma723cc64e6" x="86.47" y="147.52" style="stroke: #000000; stroke-width: 0.8"/>
 55 |       </g>
 56 |      </g>
 57 |      <g id="text_1">
 58 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="86.47" y="161.579375" transform="rotate(-0 86.47 161.579375)">0</text>
 59 |      </g>
 60 |     </g>
 61 |     <g id="xtick_2">
 62 |      <g id="line2d_3">
 63 |       <path d="M 145.494226 147.52 
 64 | L 145.494226 29.18125 
 65 | " clip-path="url(#p9e768b4d4d)" style="fill: none; stroke: #cccccc; stroke-width: 0.8; stroke-linecap: square"/>
 66 |      </g>
 67 |      <g id="line2d_4">
 68 |       <g>
 69 |        <use xlink:href="#ma723cc64e6" x="145.494226" y="147.52" style="stroke: #000000; stroke-width: 0.8"/>
 70 |       </g>
 71 |      </g>
 72 |      <g id="text_2">
 73 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="145.494226" y="161.579375" transform="rotate(-0 145.494226 161.579375)">1</text>
 74 |      </g>
 75 |     </g>
 76 |     <g id="xtick_3">
 77 |      <g id="line2d_5">
 78 |       <path d="M 204.518451 147.52 
 79 | L 204.518451 29.18125 
 80 | " clip-path="url(#p9e768b4d4d)" style="fill: none; stroke: #cccccc; stroke-width: 0.8; stroke-linecap: square"/>
 81 |      </g>
 82 |      <g id="line2d_6">
 83 |       <g>
 84 |        <use xlink:href="#ma723cc64e6" x="204.518451" y="147.52" style="stroke: #000000; stroke-width: 0.8"/>
 85 |       </g>
 86 |      </g>
 87 |      <g id="text_3">
 88 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="204.518451" y="161.579375" transform="rotate(-0 204.518451 161.579375)">2</text>
 89 |      </g>
 90 |     </g>
 91 |     <g id="xtick_4">
 92 |      <g id="line2d_7">
 93 |       <path d="M 263.542677 147.52 
 94 | L 263.542677 29.18125 
 95 | " clip-path="url(#p9e768b4d4d)" style="fill: none; stroke: #cccccc; stroke-width: 0.8; stroke-linecap: square"/>
 96 |      </g>
 97 |      <g id="line2d_8">
 98 |       <g>
 99 |        <use xlink:href="#ma723cc64e6" x="263.542677" y="147.52" style="stroke: #000000; stroke-width: 0.8"/>
100 |       </g>
101 |      </g>
102 |      <g id="text_4">
103 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="263.542677" y="161.579375" transform="rotate(-0 263.542677 161.579375)">3</text>
104 |      </g>
105 |     </g>
106 |     <g id="xtick_5">
107 |      <g id="line2d_9">
108 |       <path d="M 322.566903 147.52 
109 | L 322.566903 29.18125 
110 | " clip-path="url(#p9e768b4d4d)" style="fill: none; stroke: #cccccc; stroke-width: 0.8; stroke-linecap: square"/>
111 |      </g>
112 |      <g id="line2d_10">
113 |       <g>
114 |        <use xlink:href="#ma723cc64e6" x="322.566903" y="147.52" style="stroke: #000000; stroke-width: 0.8"/>
115 |       </g>
116 |      </g>
117 |      <g id="text_5">
118 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="322.566903" y="161.579375" transform="rotate(-0 322.566903 161.579375)">4</text>
119 |      </g>
120 |     </g>
121 |     <g id="text_6">
122 |      <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="208.172857" y="180.998125" transform="rotate(-0 208.172857 180.998125)">total wall time divided by node count [nanoseconds]</text>
123 |     </g>
124 |    </g>
125 |    <g id="matplotlib.axis_2">
126 |     <g id="ytick_1">
127 |      <g id="line2d_11">
128 |       <defs>
129 |        <path id="mfd1f7d7db0" d="M 0 0 
130 | L -3.5 0 
131 | " style="stroke: #000000; stroke-width: 0.8"/>
132 |       </defs>
133 |       <g>
134 |        <use xlink:href="#mfd1f7d7db0" x="86.47" y="138.77907" style="stroke: #000000; stroke-width: 0.8"/>
135 |       </g>
136 |      </g>
137 |      <g id="text_7">
138 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="79.47" y="142.308757" transform="rotate(-0 79.47 142.308757)">Spice 32 threads</text>
139 |      </g>
140 |     </g>
141 |     <g id="ytick_2">
142 |      <g id="line2d_12">
143 |       <g>
144 |        <use xlink:href="#mfd1f7d7db0" x="86.47" y="121.969588" style="stroke: #000000; stroke-width: 0.8"/>
145 |       </g>
146 |      </g>
147 |      <g id="text_8">
148 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="79.47" y="125.499276" transform="rotate(-0 79.47 125.499276)">Spice 16 threads</text>
149 |      </g>
150 |     </g>
151 |     <g id="ytick_3">
152 |      <g id="line2d_13">
153 |       <g>
154 |        <use xlink:href="#mfd1f7d7db0" x="86.47" y="105.160107" style="stroke: #000000; stroke-width: 0.8"/>
155 |       </g>
156 |      </g>
157 |      <g id="text_9">
158 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="79.47" y="108.689794" transform="rotate(-0 79.47 108.689794)">Spice 8 threads</text>
159 |      </g>
160 |     </g>
161 |     <g id="ytick_4">
162 |      <g id="line2d_14">
163 |       <g>
164 |        <use xlink:href="#mfd1f7d7db0" x="86.47" y="88.350625" style="stroke: #000000; stroke-width: 0.8"/>
165 |       </g>
166 |      </g>
167 |      <g id="text_10">
168 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="79.47" y="91.880313" transform="rotate(-0 79.47 91.880313)">Spice 4 threads</text>
169 |      </g>
170 |     </g>
171 |     <g id="ytick_5">
172 |      <g id="line2d_15">
173 |       <g>
174 |        <use xlink:href="#mfd1f7d7db0" x="86.47" y="71.541143" style="stroke: #000000; stroke-width: 0.8"/>
175 |       </g>
176 |      </g>
177 |      <g id="text_11">
178 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="79.47" y="75.070831" transform="rotate(-0 79.47 75.070831)">Spice 2 threads</text>
179 |      </g>
180 |     </g>
181 |     <g id="ytick_6">
182 |      <g id="line2d_16">
183 |       <g>
184 |        <use xlink:href="#mfd1f7d7db0" x="86.47" y="54.731662" style="stroke: #000000; stroke-width: 0.8"/>
185 |       </g>
186 |      </g>
187 |      <g id="text_12">
188 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="79.47" y="58.261349" transform="rotate(-0 79.47 58.261349)">Spice 1 thread</text>
189 |      </g>
190 |     </g>
191 |     <g id="ytick_7">
192 |      <g id="line2d_17">
193 |       <g>
194 |        <use xlink:href="#mfd1f7d7db0" x="86.47" y="37.92218" style="stroke: #000000; stroke-width: 0.8"/>
195 |       </g>
196 |      </g>
197 |      <g id="text_13">
198 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="79.47" y="41.451868" transform="rotate(-0 79.47 41.451868)">Baseline</text>
199 |      </g>
200 |     </g>
201 |    </g>
202 |    <g id="patch_3">
203 |     <path d="M 86.47 142.140966 
204 | L 109.156731 142.140966 
205 | L 109.156731 135.417173 
206 | L 86.47 135.417173 
207 | z
208 | " clip-path="url(#p9e768b4d4d)" style="fill: #1f77b4"/>
209 |    </g>
210 |    <g id="patch_4">
211 |     <path d="M 86.47 125.331484 
212 | L 107.866155 125.331484 
213 | L 107.866155 118.607692 
214 | L 86.47 118.607692 
215 | z
216 | " clip-path="url(#p9e768b4d4d)" style="fill: #1f77b4"/>
217 |    </g>
218 |    <g id="patch_5">
219 |     <path d="M 86.47 108.522003 
220 | L 121.842617 108.522003 
221 | L 121.842617 101.79821 
222 | L 86.47 101.79821 
223 | z
224 | " clip-path="url(#p9e768b4d4d)" style="fill: #1f77b4"/>
225 |    </g>
226 |    <g id="patch_6">
227 |     <path d="M 86.47 91.712521 
228 | L 150.379367 91.712521 
229 | L 150.379367 84.988729 
230 | L 86.47 84.988729 
231 | z
232 | " clip-path="url(#p9e768b4d4d)" style="fill: #1f77b4"/>
233 |    </g>
234 |    <g id="patch_7">
235 |     <path d="M 86.47 74.90304 
236 | L 205.213049 74.90304 
237 | L 205.213049 68.179247 
238 | L 86.47 68.179247 
239 | z
240 | " clip-path="url(#p9e768b4d4d)" style="fill: #1f77b4"/>
241 |    </g>
242 |    <g id="patch_8">
243 |     <path d="M 86.47 58.093558 
244 | L 318.284966 58.093558 
245 | L 318.284966 51.369766 
246 | L 86.47 51.369766 
247 | z
248 | " clip-path="url(#p9e768b4d4d)" style="fill: #1f77b4"/>
249 |    </g>
250 |    <g id="patch_9">
251 |     <path d="M 86.47 41.284077 
252 | L 300.847456 41.284077 
253 | L 300.847456 34.560284 
254 | L 86.47 34.560284 
255 | z
256 | " clip-path="url(#p9e768b4d4d)" style="fill: #1f77b4"/>
257 |    </g>
258 |    <g id="patch_10">
259 |     <path d="M 86.47 147.52 
260 | L 86.47 29.18125 
261 | " style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
262 |    </g>
263 |    <g id="patch_11">
264 |     <path d="M 86.47 147.52 
265 | L 329.875714 147.52 
266 | " style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
267 |    </g>
268 |    <g id="text_14">
269 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="112.156731" y="141.27907" transform="rotate(-0 112.156731 141.27907)">0.38 ns</text>
270 |    </g>
271 |    <g id="text_15">
272 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="110.866155" y="124.469588" transform="rotate(-0 110.866155 124.469588)">0.36 ns</text>
273 |    </g>
274 |    <g id="text_16">
275 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="124.842617" y="107.660107" transform="rotate(-0 124.842617 107.660107)">0.60 ns</text>
276 |    </g>
277 |    <g id="text_17">
278 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="153.379367" y="90.850625" transform="rotate(-0 153.379367 90.850625)">1.08 ns</text>
279 |    </g>
280 |    <g id="text_18">
281 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="208.213049" y="74.041143" transform="rotate(-0 208.213049 74.041143)">2.01 ns</text>
282 |    </g>
283 |    <g id="text_19">
284 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="321.284966" y="57.231662" transform="rotate(-0 321.284966 57.231662)">3.93 ns</text>
285 |    </g>
286 |    <g id="text_20">
287 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="303.847456" y="40.42218" transform="rotate(-0 303.847456 40.42218)">3.63 ns</text>
288 |    </g>
289 |    <g id="text_21">
290 |     <text style="font-weight: 700; font-size: 12px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="208.172857" y="19.18125" transform="rotate(-0 208.172857 19.18125)">Time to calculate sum of binary tree of 100M nodes</text>
291 |    </g>
292 |   </g>
293 |  </g>
294 |  <defs>
295 |   <clipPath id="p9e768b4d4d">
296 |    <rect x="86.47" y="29.18125" width="243.405714" height="118.33875"/>
297 |   </clipPath>
298 |  </defs>
299 | </svg>
300 | 


--------------------------------------------------------------------------------
/bench/spice-tree-sum-1000.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8" standalone="no"?>
  2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
  3 |   "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
  4 | <svg xmlns:xlink="http://www.w3.org/1999/xlink" width="360pt" height="194.4pt" viewBox="0 0 360 194.4" xmlns="http://www.w3.org/2000/svg" version="1.1">
  5 |  <metadata>
  6 |   <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
  7 |    <cc:Work>
  8 |     <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
  9 |     <dc:date>2025-11-27T21:39:12.364109</dc:date>
 10 |     <dc:format>image/svg+xml</dc:format>
 11 |     <dc:creator>
 12 |      <cc:Agent>
 13 |       <dc:title>Matplotlib v3.10.5, https://matplotlib.org/</dc:title>
 14 |      </cc:Agent>
 15 |     </dc:creator>
 16 |    </cc:Work>
 17 |   </rdf:RDF>
 18 |  </metadata>
 19 |  <defs>
 20 |   <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
 21 |  </defs>
 22 |  <g id="figure_1">
 23 |   <g id="patch_1">
 24 |    <path d="M 0 194.4 
 25 | L 360 194.4 
 26 | L 360 0 
 27 | L 0 0 
 28 | z
 29 | " style="fill: #ffffff"/>
 30 |   </g>
 31 |   <g id="axes_1">
 32 |    <g id="patch_2">
 33 |     <path d="M 86.47 147.52 
 34 | L 329.380714 147.52 
 35 | L 329.380714 29.18125 
 36 | L 86.47 29.18125 
 37 | z
 38 | " style="fill: #ffffff"/>
 39 |    </g>
 40 |    <g id="matplotlib.axis_1">
 41 |     <g id="xtick_1">
 42 |      <g id="line2d_1">
 43 |       <path d="M 86.47 147.52 
 44 | L 86.47 29.18125 
 45 | " clip-path="url(#p1addf65849)" style="fill: none; stroke: #cccccc; stroke-width: 0.8; stroke-linecap: square"/>
 46 |      </g>
 47 |      <g id="line2d_2">
 48 |       <defs>
 49 |        <path id="m7a00929e51" d="M 0 0 
 50 | L 0 3.5 
 51 | " style="stroke: #000000; stroke-width: 0.8"/>
 52 |       </defs>
 53 |       <g>
 54 |        <use xlink:href="#m7a00929e51" x="86.47" y="147.52" style="stroke: #000000; stroke-width: 0.8"/>
 55 |       </g>
 56 |      </g>
 57 |      <g id="text_1">
 58 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="86.47" y="161.579375" transform="rotate(-0 86.47 161.579375)">0.0</text>
 59 |      </g>
 60 |     </g>
 61 |     <g id="xtick_2">
 62 |      <g id="line2d_3">
 63 |       <path d="M 136.674761 147.52 
 64 | L 136.674761 29.18125 
 65 | " clip-path="url(#p1addf65849)" style="fill: none; stroke: #cccccc; stroke-width: 0.8; stroke-linecap: square"/>
 66 |      </g>
 67 |      <g id="line2d_4">
 68 |       <g>
 69 |        <use xlink:href="#m7a00929e51" x="136.674761" y="147.52" style="stroke: #000000; stroke-width: 0.8"/>
 70 |       </g>
 71 |      </g>
 72 |      <g id="text_2">
 73 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="136.674761" y="161.579375" transform="rotate(-0 136.674761 161.579375)">0.5</text>
 74 |      </g>
 75 |     </g>
 76 |     <g id="xtick_3">
 77 |      <g id="line2d_5">
 78 |       <path d="M 186.879521 147.52 
 79 | L 186.879521 29.18125 
 80 | " clip-path="url(#p1addf65849)" style="fill: none; stroke: #cccccc; stroke-width: 0.8; stroke-linecap: square"/>
 81 |      </g>
 82 |      <g id="line2d_6">
 83 |       <g>
 84 |        <use xlink:href="#m7a00929e51" x="186.879521" y="147.52" style="stroke: #000000; stroke-width: 0.8"/>
 85 |       </g>
 86 |      </g>
 87 |      <g id="text_3">
 88 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="186.879521" y="161.579375" transform="rotate(-0 186.879521 161.579375)">1.0</text>
 89 |      </g>
 90 |     </g>
 91 |     <g id="xtick_4">
 92 |      <g id="line2d_7">
 93 |       <path d="M 237.084282 147.52 
 94 | L 237.084282 29.18125 
 95 | " clip-path="url(#p1addf65849)" style="fill: none; stroke: #cccccc; stroke-width: 0.8; stroke-linecap: square"/>
 96 |      </g>
 97 |      <g id="line2d_8">
 98 |       <g>
 99 |        <use xlink:href="#m7a00929e51" x="237.084282" y="147.52" style="stroke: #000000; stroke-width: 0.8"/>
100 |       </g>
101 |      </g>
102 |      <g id="text_4">
103 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="237.084282" y="161.579375" transform="rotate(-0 237.084282 161.579375)">1.5</text>
104 |      </g>
105 |     </g>
106 |     <g id="xtick_5">
107 |      <g id="line2d_9">
108 |       <path d="M 287.289043 147.52 
109 | L 287.289043 29.18125 
110 | " clip-path="url(#p1addf65849)" style="fill: none; stroke: #cccccc; stroke-width: 0.8; stroke-linecap: square"/>
111 |      </g>
112 |      <g id="line2d_10">
113 |       <g>
114 |        <use xlink:href="#m7a00929e51" x="287.289043" y="147.52" style="stroke: #000000; stroke-width: 0.8"/>
115 |       </g>
116 |      </g>
117 |      <g id="text_5">
118 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="287.289043" y="161.579375" transform="rotate(-0 287.289043 161.579375)">2.0</text>
119 |      </g>
120 |     </g>
121 |     <g id="text_6">
122 |      <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="207.925357" y="180.998125" transform="rotate(-0 207.925357 180.998125)">total wall time divided by node count [nanoseconds]</text>
123 |     </g>
124 |    </g>
125 |    <g id="matplotlib.axis_2">
126 |     <g id="ytick_1">
127 |      <g id="line2d_11">
128 |       <defs>
129 |        <path id="me1667e2ee7" d="M 0 0 
130 | L -3.5 0 
131 | " style="stroke: #000000; stroke-width: 0.8"/>
132 |       </defs>
133 |       <g>
134 |        <use xlink:href="#me1667e2ee7" x="86.47" y="138.77907" style="stroke: #000000; stroke-width: 0.8"/>
135 |       </g>
136 |      </g>
137 |      <g id="text_7">
138 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="79.47" y="142.308757" transform="rotate(-0 79.47 142.308757)">Spice 32 threads</text>
139 |      </g>
140 |     </g>
141 |     <g id="ytick_2">
142 |      <g id="line2d_12">
143 |       <g>
144 |        <use xlink:href="#me1667e2ee7" x="86.47" y="121.969588" style="stroke: #000000; stroke-width: 0.8"/>
145 |       </g>
146 |      </g>
147 |      <g id="text_8">
148 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="79.47" y="125.499276" transform="rotate(-0 79.47 125.499276)">Spice 16 threads</text>
149 |      </g>
150 |     </g>
151 |     <g id="ytick_3">
152 |      <g id="line2d_13">
153 |       <g>
154 |        <use xlink:href="#me1667e2ee7" x="86.47" y="105.160107" style="stroke: #000000; stroke-width: 0.8"/>
155 |       </g>
156 |      </g>
157 |      <g id="text_9">
158 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="79.47" y="108.689794" transform="rotate(-0 79.47 108.689794)">Spice 8 threads</text>
159 |      </g>
160 |     </g>
161 |     <g id="ytick_4">
162 |      <g id="line2d_14">
163 |       <g>
164 |        <use xlink:href="#me1667e2ee7" x="86.47" y="88.350625" style="stroke: #000000; stroke-width: 0.8"/>
165 |       </g>
166 |      </g>
167 |      <g id="text_10">
168 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="79.47" y="91.880313" transform="rotate(-0 79.47 91.880313)">Spice 4 threads</text>
169 |      </g>
170 |     </g>
171 |     <g id="ytick_5">
172 |      <g id="line2d_15">
173 |       <g>
174 |        <use xlink:href="#me1667e2ee7" x="86.47" y="71.541143" style="stroke: #000000; stroke-width: 0.8"/>
175 |       </g>
176 |      </g>
177 |      <g id="text_11">
178 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="79.47" y="75.070831" transform="rotate(-0 79.47 75.070831)">Spice 2 threads</text>
179 |      </g>
180 |     </g>
181 |     <g id="ytick_6">
182 |      <g id="line2d_16">
183 |       <g>
184 |        <use xlink:href="#me1667e2ee7" x="86.47" y="54.731662" style="stroke: #000000; stroke-width: 0.8"/>
185 |       </g>
186 |      </g>
187 |      <g id="text_12">
188 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="79.47" y="58.261349" transform="rotate(-0 79.47 58.261349)">Spice 1 thread</text>
189 |      </g>
190 |     </g>
191 |     <g id="ytick_7">
192 |      <g id="line2d_17">
193 |       <g>
194 |        <use xlink:href="#me1667e2ee7" x="86.47" y="37.92218" style="stroke: #000000; stroke-width: 0.8"/>
195 |       </g>
196 |      </g>
197 |      <g id="text_13">
198 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="79.47" y="41.451868" transform="rotate(-0 79.47 41.451868)">Baseline</text>
199 |      </g>
200 |     </g>
201 |    </g>
202 |    <g id="patch_3">
203 |     <path d="M 86.47 142.140966 
204 | L 316.229075 142.140966 
205 | L 316.229075 135.417173 
206 | L 86.47 135.417173 
207 | z
208 | " clip-path="url(#p1addf65849)" style="fill: #1f77b4"/>
209 |    </g>
210 |    <g id="patch_4">
211 |     <path d="M 86.47 125.331484 
212 | L 316.32346 125.331484 
213 | L 316.32346 118.607692 
214 | L 86.47 118.607692 
215 | z
216 | " clip-path="url(#p1addf65849)" style="fill: #1f77b4"/>
217 |    </g>
218 |    <g id="patch_5">
219 |     <path d="M 86.47 108.522003 
220 | L 316.247149 108.522003 
221 | L 316.247149 101.79821 
222 | L 86.47 101.79821 
223 | z
224 | " clip-path="url(#p1addf65849)" style="fill: #1f77b4"/>
225 |    </g>
226 |    <g id="patch_6">
227 |     <path d="M 86.47 91.712521 
228 | L 316.915876 91.712521 
229 | L 316.915876 84.988729 
230 | L 86.47 84.988729 
231 | z
232 | " clip-path="url(#p1addf65849)" style="fill: #1f77b4"/>
233 |    </g>
234 |    <g id="patch_7">
235 |     <path d="M 86.47 74.90304 
236 | L 317.813537 74.90304 
237 | L 317.813537 68.179247 
238 | L 86.47 68.179247 
239 | z
240 | " clip-path="url(#p1addf65849)" style="fill: #1f77b4"/>
241 |    </g>
242 |    <g id="patch_8">
243 |     <path d="M 86.47 58.093558 
244 | L 316.158789 58.093558 
245 | L 316.158789 51.369766 
246 | L 86.47 51.369766 
247 | z
248 | " clip-path="url(#p1addf65849)" style="fill: #1f77b4"/>
249 |    </g>
250 |    <g id="patch_9">
251 |     <path d="M 86.47 41.284077 
252 | L 285.989744 41.284077 
253 | L 285.989744 34.560284 
254 | L 86.47 34.560284 
255 | z
256 | " clip-path="url(#p1addf65849)" style="fill: #1f77b4"/>
257 |    </g>
258 |    <g id="patch_10">
259 |     <path d="M 86.47 147.52 
260 | L 86.47 29.18125 
261 | " style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
262 |    </g>
263 |    <g id="patch_11">
264 |     <path d="M 86.47 147.52 
265 | L 329.380714 147.52 
266 | " style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
267 |    </g>
268 |    <g id="text_14">
269 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="319.229075" y="141.27907" transform="rotate(-0 319.229075 141.27907)">2.29 ns</text>
270 |    </g>
271 |    <g id="text_15">
272 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="319.32346" y="124.469588" transform="rotate(-0 319.32346 124.469588)">2.29 ns</text>
273 |    </g>
274 |    <g id="text_16">
275 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="319.247149" y="107.660107" transform="rotate(-0 319.247149 107.660107)">2.29 ns</text>
276 |    </g>
277 |    <g id="text_17">
278 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="319.915876" y="90.850625" transform="rotate(-0 319.915876 90.850625)">2.30 ns</text>
279 |    </g>
280 |    <g id="text_18">
281 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="320.813537" y="74.041143" transform="rotate(-0 320.813537 74.041143)">2.30 ns</text>
282 |    </g>
283 |    <g id="text_19">
284 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="319.158789" y="57.231662" transform="rotate(-0 319.158789 57.231662)">2.29 ns</text>
285 |    </g>
286 |    <g id="text_20">
287 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="288.989744" y="40.42218" transform="rotate(-0 288.989744 40.42218)">1.99 ns</text>
288 |    </g>
289 |    <g id="text_21">
290 |     <text style="font-weight: 700; font-size: 12px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="207.925357" y="19.18125" transform="rotate(-0 207.925357 19.18125)">Time to calculate sum of binary tree of 1000 nodes</text>
291 |    </g>
292 |   </g>
293 |  </g>
294 |  <defs>
295 |   <clipPath id="p1addf65849">
296 |    <rect x="86.47" y="29.18125" width="242.910714" height="118.33875"/>
297 |   </clipPath>
298 |  </defs>
299 | </svg>
300 | 


--------------------------------------------------------------------------------
/bench/rayon-tree-sum-1000.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8" standalone="no"?>
  2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
  3 |   "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
  4 | <svg xmlns:xlink="http://www.w3.org/1999/xlink" width="360pt" height="194.4pt" viewBox="0 0 360 194.4" xmlns="http://www.w3.org/2000/svg" version="1.1">
  5 |  <metadata>
  6 |   <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
  7 |    <cc:Work>
  8 |     <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
  9 |     <dc:date>2025-11-27T21:39:13.390883</dc:date>
 10 |     <dc:format>image/svg+xml</dc:format>
 11 |     <dc:creator>
 12 |      <cc:Agent>
 13 |       <dc:title>Matplotlib v3.10.5, https://matplotlib.org/</dc:title>
 14 |      </cc:Agent>
 15 |     </dc:creator>
 16 |    </cc:Work>
 17 |   </rdf:RDF>
 18 |  </metadata>
 19 |  <defs>
 20 |   <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
 21 |  </defs>
 22 |  <g id="figure_1">
 23 |   <g id="patch_1">
 24 |    <path d="M 0 194.4 
 25 | L 360 194.4 
 26 | L 360 0 
 27 | L 0 0 
 28 | z
 29 | " style="fill: #ffffff"/>
 30 |   </g>
 31 |   <g id="axes_1">
 32 |    <g id="patch_2">
 33 |     <path d="M 90.79 147.52 
 34 | L 320.155714 147.52 
 35 | L 320.155714 29.18125 
 36 | L 90.79 29.18125 
 37 | z
 38 | " style="fill: #ffffff"/>
 39 |    </g>
 40 |    <g id="matplotlib.axis_1">
 41 |     <g id="xtick_1">
 42 |      <g id="line2d_1">
 43 |       <path d="M 90.79 147.52 
 44 | L 90.79 29.18125 
 45 | " clip-path="url(#pe56ddb2237)" style="fill: none; stroke: #cccccc; stroke-width: 0.8; stroke-linecap: square"/>
 46 |      </g>
 47 |      <g id="line2d_2">
 48 |       <defs>
 49 |        <path id="mdb1c5721c0" d="M 0 0 
 50 | L 0 3.5 
 51 | " style="stroke: #000000; stroke-width: 0.8"/>
 52 |       </defs>
 53 |       <g>
 54 |        <use xlink:href="#mdb1c5721c0" x="90.79" y="147.52" style="stroke: #000000; stroke-width: 0.8"/>
 55 |       </g>
 56 |      </g>
 57 |      <g id="text_1">
 58 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="90.79" y="161.579375" transform="rotate(-0 90.79 161.579375)">0</text>
 59 |      </g>
 60 |     </g>
 61 |     <g id="xtick_2">
 62 |      <g id="line2d_3">
 63 |       <path d="M 132.222879 147.52 
 64 | L 132.222879 29.18125 
 65 | " clip-path="url(#pe56ddb2237)" style="fill: none; stroke: #cccccc; stroke-width: 0.8; stroke-linecap: square"/>
 66 |      </g>
 67 |      <g id="line2d_4">
 68 |       <g>
 69 |        <use xlink:href="#mdb1c5721c0" x="132.222879" y="147.52" style="stroke: #000000; stroke-width: 0.8"/>
 70 |       </g>
 71 |      </g>
 72 |      <g id="text_2">
 73 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="132.222879" y="161.579375" transform="rotate(-0 132.222879 161.579375)">20</text>
 74 |      </g>
 75 |     </g>
 76 |     <g id="xtick_3">
 77 |      <g id="line2d_5">
 78 |       <path d="M 173.655757 147.52 
 79 | L 173.655757 29.18125 
 80 | " clip-path="url(#pe56ddb2237)" style="fill: none; stroke: #cccccc; stroke-width: 0.8; stroke-linecap: square"/>
 81 |      </g>
 82 |      <g id="line2d_6">
 83 |       <g>
 84 |        <use xlink:href="#mdb1c5721c0" x="173.655757" y="147.52" style="stroke: #000000; stroke-width: 0.8"/>
 85 |       </g>
 86 |      </g>
 87 |      <g id="text_3">
 88 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="173.655757" y="161.579375" transform="rotate(-0 173.655757 161.579375)">40</text>
 89 |      </g>
 90 |     </g>
 91 |     <g id="xtick_4">
 92 |      <g id="line2d_7">
 93 |       <path d="M 215.088636 147.52 
 94 | L 215.088636 29.18125 
 95 | " clip-path="url(#pe56ddb2237)" style="fill: none; stroke: #cccccc; stroke-width: 0.8; stroke-linecap: square"/>
 96 |      </g>
 97 |      <g id="line2d_8">
 98 |       <g>
 99 |        <use xlink:href="#mdb1c5721c0" x="215.088636" y="147.52" style="stroke: #000000; stroke-width: 0.8"/>
100 |       </g>
101 |      </g>
102 |      <g id="text_4">
103 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="215.088636" y="161.579375" transform="rotate(-0 215.088636 161.579375)">60</text>
104 |      </g>
105 |     </g>
106 |     <g id="xtick_5">
107 |      <g id="line2d_9">
108 |       <path d="M 256.521515 147.52 
109 | L 256.521515 29.18125 
110 | " clip-path="url(#pe56ddb2237)" style="fill: none; stroke: #cccccc; stroke-width: 0.8; stroke-linecap: square"/>
111 |      </g>
112 |      <g id="line2d_10">
113 |       <g>
114 |        <use xlink:href="#mdb1c5721c0" x="256.521515" y="147.52" style="stroke: #000000; stroke-width: 0.8"/>
115 |       </g>
116 |      </g>
117 |      <g id="text_5">
118 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="256.521515" y="161.579375" transform="rotate(-0 256.521515 161.579375)">80</text>
119 |      </g>
120 |     </g>
121 |     <g id="xtick_6">
122 |      <g id="line2d_11">
123 |       <path d="M 297.954393 147.52 
124 | L 297.954393 29.18125 
125 | " clip-path="url(#pe56ddb2237)" style="fill: none; stroke: #cccccc; stroke-width: 0.8; stroke-linecap: square"/>
126 |      </g>
127 |      <g id="line2d_12">
128 |       <g>
129 |        <use xlink:href="#mdb1c5721c0" x="297.954393" y="147.52" style="stroke: #000000; stroke-width: 0.8"/>
130 |       </g>
131 |      </g>
132 |      <g id="text_6">
133 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="297.954393" y="161.579375" transform="rotate(-0 297.954393 161.579375)">100</text>
134 |      </g>
135 |     </g>
136 |     <g id="text_7">
137 |      <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="205.472857" y="180.998125" transform="rotate(-0 205.472857 180.998125)">total wall time divided by node count [nanoseconds]</text>
138 |     </g>
139 |    </g>
140 |    <g id="matplotlib.axis_2">
141 |     <g id="ytick_1">
142 |      <g id="line2d_13">
143 |       <defs>
144 |        <path id="m7cabcbcf10" d="M 0 0 
145 | L -3.5 0 
146 | " style="stroke: #000000; stroke-width: 0.8"/>
147 |       </defs>
148 |       <g>
149 |        <use xlink:href="#m7cabcbcf10" x="90.79" y="138.77907" style="stroke: #000000; stroke-width: 0.8"/>
150 |       </g>
151 |      </g>
152 |      <g id="text_8">
153 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="83.79" y="142.308757" transform="rotate(-0 83.79 142.308757)">Rayon 32 threads</text>
154 |      </g>
155 |     </g>
156 |     <g id="ytick_2">
157 |      <g id="line2d_14">
158 |       <g>
159 |        <use xlink:href="#m7cabcbcf10" x="90.79" y="121.969588" style="stroke: #000000; stroke-width: 0.8"/>
160 |       </g>
161 |      </g>
162 |      <g id="text_9">
163 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="83.79" y="125.499276" transform="rotate(-0 83.79 125.499276)">Rayon 16 threads</text>
164 |      </g>
165 |     </g>
166 |     <g id="ytick_3">
167 |      <g id="line2d_15">
168 |       <g>
169 |        <use xlink:href="#m7cabcbcf10" x="90.79" y="105.160107" style="stroke: #000000; stroke-width: 0.8"/>
170 |       </g>
171 |      </g>
172 |      <g id="text_10">
173 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="83.79" y="108.689794" transform="rotate(-0 83.79 108.689794)">Rayon 8 threads</text>
174 |      </g>
175 |     </g>
176 |     <g id="ytick_4">
177 |      <g id="line2d_16">
178 |       <g>
179 |        <use xlink:href="#m7cabcbcf10" x="90.79" y="88.350625" style="stroke: #000000; stroke-width: 0.8"/>
180 |       </g>
181 |      </g>
182 |      <g id="text_11">
183 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="83.79" y="91.880313" transform="rotate(-0 83.79 91.880313)">Rayon 4 threads</text>
184 |      </g>
185 |     </g>
186 |     <g id="ytick_5">
187 |      <g id="line2d_17">
188 |       <g>
189 |        <use xlink:href="#m7cabcbcf10" x="90.79" y="71.541143" style="stroke: #000000; stroke-width: 0.8"/>
190 |       </g>
191 |      </g>
192 |      <g id="text_12">
193 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="83.79" y="75.070831" transform="rotate(-0 83.79 75.070831)">Rayon 2 threads</text>
194 |      </g>
195 |     </g>
196 |     <g id="ytick_6">
197 |      <g id="line2d_18">
198 |       <g>
199 |        <use xlink:href="#m7cabcbcf10" x="90.79" y="54.731662" style="stroke: #000000; stroke-width: 0.8"/>
200 |       </g>
201 |      </g>
202 |      <g id="text_13">
203 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="83.79" y="58.261349" transform="rotate(-0 83.79 58.261349)">Rayon 1 thread</text>
204 |      </g>
205 |     </g>
206 |     <g id="ytick_7">
207 |      <g id="line2d_19">
208 |       <g>
209 |        <use xlink:href="#m7cabcbcf10" x="90.79" y="37.92218" style="stroke: #000000; stroke-width: 0.8"/>
210 |       </g>
211 |      </g>
212 |      <g id="text_14">
213 |       <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: end" x="83.79" y="41.451868" transform="rotate(-0 83.79 41.451868)">Baseline</text>
214 |      </g>
215 |     </g>
216 |    </g>
217 |    <g id="patch_3">
218 |     <path d="M 90.79 142.140966 
219 | L 309.233537 142.140966 
220 | L 309.233537 135.417173 
221 | L 90.79 135.417173 
222 | z
223 | " clip-path="url(#pe56ddb2237)" style="fill: #1f77b4"/>
224 |    </g>
225 |    <g id="patch_4">
226 |     <path d="M 90.79 125.331484 
227 | L 140.890111 125.331484 
228 | L 140.890111 118.607692 
229 | L 90.79 118.607692 
230 | z
231 | " clip-path="url(#pe56ddb2237)" style="fill: #1f77b4"/>
232 |    </g>
233 |    <g id="patch_5">
234 |     <path d="M 90.79 108.522003 
235 | L 129.464993 108.522003 
236 | L 129.464993 101.79821 
237 | L 90.79 101.79821 
238 | z
239 | " clip-path="url(#pe56ddb2237)" style="fill: #1f77b4"/>
240 |    </g>
241 |    <g id="patch_6">
242 |     <path d="M 90.79 91.712521 
243 | L 121.739585 91.712521 
244 | L 121.739585 84.988729 
245 | L 90.79 84.988729 
246 | z
247 | " clip-path="url(#pe56ddb2237)" style="fill: #1f77b4"/>
248 |    </g>
249 |    <g id="patch_7">
250 |     <path d="M 90.79 74.90304 
251 | L 125.639261 74.90304 
252 | L 125.639261 68.179247 
253 | L 90.79 68.179247 
254 | z
255 | " clip-path="url(#pe56ddb2237)" style="fill: #1f77b4"/>
256 |    </g>
257 |    <g id="patch_8">
258 |     <path d="M 90.79 58.093558 
259 | L 139.498008 58.093558 
260 | L 139.498008 51.369766 
261 | L 90.79 51.369766 
262 | z
263 | " clip-path="url(#pe56ddb2237)" style="fill: #1f77b4"/>
264 |    </g>
265 |    <g id="patch_9">
266 |     <path d="M 90.79 41.284077 
267 | L 94.021328 41.284077 
268 | L 94.021328 34.560284 
269 | L 90.79 34.560284 
270 | z
271 | " clip-path="url(#pe56ddb2237)" style="fill: #1f77b4"/>
272 |    </g>
273 |    <g id="patch_10">
274 |     <path d="M 90.79 147.52 
275 | L 90.79 29.18125 
276 | " style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
277 |    </g>
278 |    <g id="patch_11">
279 |     <path d="M 90.79 147.52 
280 | L 320.155714 147.52 
281 | " style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
282 |    </g>
283 |    <g id="text_15">
284 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="312.233537" y="141.27907" transform="rotate(-0 312.233537 141.27907)">105.44 ns</text>
285 |    </g>
286 |    <g id="text_16">
287 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="143.890111" y="124.469588" transform="rotate(-0 143.890111 124.469588)">24.18 ns</text>
288 |    </g>
289 |    <g id="text_17">
290 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="132.464993" y="107.660107" transform="rotate(-0 132.464993 107.660107)">18.67 ns</text>
291 |    </g>
292 |    <g id="text_18">
293 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="124.739585" y="90.850625" transform="rotate(-0 124.739585 90.850625)">14.94 ns</text>
294 |    </g>
295 |    <g id="text_19">
296 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="128.639261" y="74.041143" transform="rotate(-0 128.639261 74.041143)">16.82 ns</text>
297 |    </g>
298 |    <g id="text_20">
299 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="142.498008" y="57.231662" transform="rotate(-0 142.498008 57.231662)">23.51 ns</text>
300 |    </g>
301 |    <g id="text_21">
302 |     <text style="font-size: 10px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: start" x="97.021328" y="40.42218" transform="rotate(-0 97.021328 40.42218)">1.56 ns</text>
303 |    </g>
304 |    <g id="text_22">
305 |     <text style="font-weight: 700; font-size: 12px; font-family: 'Seravek', 'Gill Sans Nova', 'Ubuntu', 'Calibri', 'DejaVu Sans', 'source-sans-pro', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'Lucida Grande', 'Verdana', 'Geneva', 'Lucid', 'Arial', 'Helvetica', 'Avant Garde', sans-serif; text-anchor: middle" x="205.472857" y="19.18125" transform="rotate(-0 205.472857 19.18125)">Time to calculate sum of binary tree of 1000 nodes</text>
306 |    </g>
307 |   </g>
308 |  </g>
309 |  <defs>
310 |   <clipPath id="pe56ddb2237">
311 |    <rect x="90.79" y="29.18125" width="229.365714" height="118.33875"/>
312 |   </clipPath>
313 |  </defs>
314 | </svg>
315 | 


--------------------------------------------------------------------------------
/examples/rust-parallel-example/Cargo.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Cargo.
  2 | # It is not intended for manual editing.
  3 | version = 3
  4 | 
  5 | [[package]]
  6 | name = "aho-corasick"
  7 | version = "1.1.3"
  8 | source = "registry+https://github.com/rust-lang/crates.io-index"
  9 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
 10 | dependencies = [
 11 |  "memchr",
 12 | ]
 13 | 
 14 | [[package]]
 15 | name = "anes"
 16 | version = "0.1.6"
 17 | source = "registry+https://github.com/rust-lang/crates.io-index"
 18 | checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
 19 | 
 20 | [[package]]
 21 | name = "anstyle"
 22 | version = "1.0.7"
 23 | source = "registry+https://github.com/rust-lang/crates.io-index"
 24 | checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b"
 25 | 
 26 | [[package]]
 27 | name = "autocfg"
 28 | version = "1.3.0"
 29 | source = "registry+https://github.com/rust-lang/crates.io-index"
 30 | checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
 31 | 
 32 | [[package]]
 33 | name = "bumpalo"
 34 | version = "3.16.0"
 35 | source = "registry+https://github.com/rust-lang/crates.io-index"
 36 | checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
 37 | 
 38 | [[package]]
 39 | name = "cast"
 40 | version = "0.3.0"
 41 | source = "registry+https://github.com/rust-lang/crates.io-index"
 42 | checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
 43 | 
 44 | [[package]]
 45 | name = "cfg-if"
 46 | version = "1.0.0"
 47 | source = "registry+https://github.com/rust-lang/crates.io-index"
 48 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 49 | 
 50 | [[package]]
 51 | name = "ciborium"
 52 | version = "0.2.2"
 53 | source = "registry+https://github.com/rust-lang/crates.io-index"
 54 | checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
 55 | dependencies = [
 56 |  "ciborium-io",
 57 |  "ciborium-ll",
 58 |  "serde",
 59 | ]
 60 | 
 61 | [[package]]
 62 | name = "ciborium-io"
 63 | version = "0.2.2"
 64 | source = "registry+https://github.com/rust-lang/crates.io-index"
 65 | checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
 66 | 
 67 | [[package]]
 68 | name = "ciborium-ll"
 69 | version = "0.2.2"
 70 | source = "registry+https://github.com/rust-lang/crates.io-index"
 71 | checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
 72 | dependencies = [
 73 |  "ciborium-io",
 74 |  "half",
 75 | ]
 76 | 
 77 | [[package]]
 78 | name = "clap"
 79 | version = "4.5.7"
 80 | source = "registry+https://github.com/rust-lang/crates.io-index"
 81 | checksum = "5db83dced34638ad474f39f250d7fea9598bdd239eaced1bdf45d597da0f433f"
 82 | dependencies = [
 83 |  "clap_builder",
 84 | ]
 85 | 
 86 | [[package]]
 87 | name = "clap_builder"
 88 | version = "4.5.7"
 89 | source = "registry+https://github.com/rust-lang/crates.io-index"
 90 | checksum = "f7e204572485eb3fbf28f871612191521df159bc3e15a9f5064c66dba3a8c05f"
 91 | dependencies = [
 92 |  "anstyle",
 93 |  "clap_lex",
 94 | ]
 95 | 
 96 | [[package]]
 97 | name = "clap_lex"
 98 | version = "0.7.1"
 99 | source = "registry+https://github.com/rust-lang/crates.io-index"
100 | checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70"
101 | 
102 | [[package]]
103 | name = "criterion"
104 | version = "0.5.1"
105 | source = "registry+https://github.com/rust-lang/crates.io-index"
106 | checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
107 | dependencies = [
108 |  "anes",
109 |  "cast",
110 |  "ciborium",
111 |  "clap",
112 |  "criterion-plot",
113 |  "is-terminal",
114 |  "itertools",
115 |  "num-traits",
116 |  "once_cell",
117 |  "oorandom",
118 |  "plotters",
119 |  "rayon",
120 |  "regex",
121 |  "serde",
122 |  "serde_derive",
123 |  "serde_json",
124 |  "tinytemplate",
125 |  "walkdir",
126 | ]
127 | 
128 | [[package]]
129 | name = "criterion-plot"
130 | version = "0.5.0"
131 | source = "registry+https://github.com/rust-lang/crates.io-index"
132 | checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
133 | dependencies = [
134 |  "cast",
135 |  "itertools",
136 | ]
137 | 
138 | [[package]]
139 | name = "crossbeam-deque"
140 | version = "0.8.5"
141 | source = "registry+https://github.com/rust-lang/crates.io-index"
142 | checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
143 | dependencies = [
144 |  "crossbeam-epoch",
145 |  "crossbeam-utils",
146 | ]
147 | 
148 | [[package]]
149 | name = "crossbeam-epoch"
150 | version = "0.9.18"
151 | source = "registry+https://github.com/rust-lang/crates.io-index"
152 | checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
153 | dependencies = [
154 |  "crossbeam-utils",
155 | ]
156 | 
157 | [[package]]
158 | name = "crossbeam-utils"
159 | version = "0.8.20"
160 | source = "registry+https://github.com/rust-lang/crates.io-index"
161 | checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
162 | 
163 | [[package]]
164 | name = "crunchy"
165 | version = "0.2.2"
166 | source = "registry+https://github.com/rust-lang/crates.io-index"
167 | checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
168 | 
169 | [[package]]
170 | name = "either"
171 | version = "1.12.0"
172 | source = "registry+https://github.com/rust-lang/crates.io-index"
173 | checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b"
174 | 
175 | [[package]]
176 | name = "half"
177 | version = "2.4.1"
178 | source = "registry+https://github.com/rust-lang/crates.io-index"
179 | checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888"
180 | dependencies = [
181 |  "cfg-if",
182 |  "crunchy",
183 | ]
184 | 
185 | [[package]]
186 | name = "hermit-abi"
187 | version = "0.3.9"
188 | source = "registry+https://github.com/rust-lang/crates.io-index"
189 | checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
190 | 
191 | [[package]]
192 | name = "is-terminal"
193 | version = "0.4.12"
194 | source = "registry+https://github.com/rust-lang/crates.io-index"
195 | checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b"
196 | dependencies = [
197 |  "hermit-abi",
198 |  "libc",
199 |  "windows-sys",
200 | ]
201 | 
202 | [[package]]
203 | name = "itertools"
204 | version = "0.10.5"
205 | source = "registry+https://github.com/rust-lang/crates.io-index"
206 | checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
207 | dependencies = [
208 |  "either",
209 | ]
210 | 
211 | [[package]]
212 | name = "itoa"
213 | version = "1.0.11"
214 | source = "registry+https://github.com/rust-lang/crates.io-index"
215 | checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
216 | 
217 | [[package]]
218 | name = "js-sys"
219 | version = "0.3.69"
220 | source = "registry+https://github.com/rust-lang/crates.io-index"
221 | checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d"
222 | dependencies = [
223 |  "wasm-bindgen",
224 | ]
225 | 
226 | [[package]]
227 | name = "libc"
228 | version = "0.2.155"
229 | source = "registry+https://github.com/rust-lang/crates.io-index"
230 | checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
231 | 
232 | [[package]]
233 | name = "log"
234 | version = "0.4.21"
235 | source = "registry+https://github.com/rust-lang/crates.io-index"
236 | checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
237 | 
238 | [[package]]
239 | name = "memchr"
240 | version = "2.7.2"
241 | source = "registry+https://github.com/rust-lang/crates.io-index"
242 | checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
243 | 
244 | [[package]]
245 | name = "num-traits"
246 | version = "0.2.19"
247 | source = "registry+https://github.com/rust-lang/crates.io-index"
248 | checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
249 | dependencies = [
250 |  "autocfg",
251 | ]
252 | 
253 | [[package]]
254 | name = "once_cell"
255 | version = "1.19.0"
256 | source = "registry+https://github.com/rust-lang/crates.io-index"
257 | checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
258 | 
259 | [[package]]
260 | name = "oorandom"
261 | version = "11.1.3"
262 | source = "registry+https://github.com/rust-lang/crates.io-index"
263 | checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
264 | 
265 | [[package]]
266 | name = "parallel-example"
267 | version = "0.1.0"
268 | dependencies = [
269 |  "criterion",
270 |  "rayon",
271 | ]
272 | 
273 | [[package]]
274 | name = "plotters"
275 | version = "0.3.6"
276 | source = "registry+https://github.com/rust-lang/crates.io-index"
277 | checksum = "a15b6eccb8484002195a3e44fe65a4ce8e93a625797a063735536fd59cb01cf3"
278 | dependencies = [
279 |  "num-traits",
280 |  "plotters-backend",
281 |  "plotters-svg",
282 |  "wasm-bindgen",
283 |  "web-sys",
284 | ]
285 | 
286 | [[package]]
287 | name = "plotters-backend"
288 | version = "0.3.6"
289 | source = "registry+https://github.com/rust-lang/crates.io-index"
290 | checksum = "414cec62c6634ae900ea1c56128dfe87cf63e7caece0852ec76aba307cebadb7"
291 | 
292 | [[package]]
293 | name = "plotters-svg"
294 | version = "0.3.6"
295 | source = "registry+https://github.com/rust-lang/crates.io-index"
296 | checksum = "81b30686a7d9c3e010b84284bdd26a29f2138574f52f5eb6f794fc0ad924e705"
297 | dependencies = [
298 |  "plotters-backend",
299 | ]
300 | 
301 | [[package]]
302 | name = "proc-macro2"
303 | version = "1.0.85"
304 | source = "registry+https://github.com/rust-lang/crates.io-index"
305 | checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23"
306 | dependencies = [
307 |  "unicode-ident",
308 | ]
309 | 
310 | [[package]]
311 | name = "quote"
312 | version = "1.0.36"
313 | source = "registry+https://github.com/rust-lang/crates.io-index"
314 | checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
315 | dependencies = [
316 |  "proc-macro2",
317 | ]
318 | 
319 | [[package]]
320 | name = "rayon"
321 | version = "1.10.0"
322 | source = "registry+https://github.com/rust-lang/crates.io-index"
323 | checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
324 | dependencies = [
325 |  "either",
326 |  "rayon-core",
327 | ]
328 | 
329 | [[package]]
330 | name = "rayon-core"
331 | version = "1.12.1"
332 | source = "registry+https://github.com/rust-lang/crates.io-index"
333 | checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
334 | dependencies = [
335 |  "crossbeam-deque",
336 |  "crossbeam-utils",
337 | ]
338 | 
339 | [[package]]
340 | name = "regex"
341 | version = "1.10.5"
342 | source = "registry+https://github.com/rust-lang/crates.io-index"
343 | checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f"
344 | dependencies = [
345 |  "aho-corasick",
346 |  "memchr",
347 |  "regex-automata",
348 |  "regex-syntax",
349 | ]
350 | 
351 | [[package]]
352 | name = "regex-automata"
353 | version = "0.4.7"
354 | source = "registry+https://github.com/rust-lang/crates.io-index"
355 | checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
356 | dependencies = [
357 |  "aho-corasick",
358 |  "memchr",
359 |  "regex-syntax",
360 | ]
361 | 
362 | [[package]]
363 | name = "regex-syntax"
364 | version = "0.8.4"
365 | source = "registry+https://github.com/rust-lang/crates.io-index"
366 | checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
367 | 
368 | [[package]]
369 | name = "ryu"
370 | version = "1.0.18"
371 | source = "registry+https://github.com/rust-lang/crates.io-index"
372 | checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
373 | 
374 | [[package]]
375 | name = "same-file"
376 | version = "1.0.6"
377 | source = "registry+https://github.com/rust-lang/crates.io-index"
378 | checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
379 | dependencies = [
380 |  "winapi-util",
381 | ]
382 | 
383 | [[package]]
384 | name = "serde"
385 | version = "1.0.203"
386 | source = "registry+https://github.com/rust-lang/crates.io-index"
387 | checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094"
388 | dependencies = [
389 |  "serde_derive",
390 | ]
391 | 
392 | [[package]]
393 | name = "serde_derive"
394 | version = "1.0.203"
395 | source = "registry+https://github.com/rust-lang/crates.io-index"
396 | checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba"
397 | dependencies = [
398 |  "proc-macro2",
399 |  "quote",
400 |  "syn",
401 | ]
402 | 
403 | [[package]]
404 | name = "serde_json"
405 | version = "1.0.117"
406 | source = "registry+https://github.com/rust-lang/crates.io-index"
407 | checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3"
408 | dependencies = [
409 |  "itoa",
410 |  "ryu",
411 |  "serde",
412 | ]
413 | 
414 | [[package]]
415 | name = "syn"
416 | version = "2.0.66"
417 | source = "registry+https://github.com/rust-lang/crates.io-index"
418 | checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5"
419 | dependencies = [
420 |  "proc-macro2",
421 |  "quote",
422 |  "unicode-ident",
423 | ]
424 | 
425 | [[package]]
426 | name = "tinytemplate"
427 | version = "1.2.1"
428 | source = "registry+https://github.com/rust-lang/crates.io-index"
429 | checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
430 | dependencies = [
431 |  "serde",
432 |  "serde_json",
433 | ]
434 | 
435 | [[package]]
436 | name = "unicode-ident"
437 | version = "1.0.12"
438 | source = "registry+https://github.com/rust-lang/crates.io-index"
439 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
440 | 
441 | [[package]]
442 | name = "walkdir"
443 | version = "2.5.0"
444 | source = "registry+https://github.com/rust-lang/crates.io-index"
445 | checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
446 | dependencies = [
447 |  "same-file",
448 |  "winapi-util",
449 | ]
450 | 
451 | [[package]]
452 | name = "wasm-bindgen"
453 | version = "0.2.92"
454 | source = "registry+https://github.com/rust-lang/crates.io-index"
455 | checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8"
456 | dependencies = [
457 |  "cfg-if",
458 |  "wasm-bindgen-macro",
459 | ]
460 | 
461 | [[package]]
462 | name = "wasm-bindgen-backend"
463 | version = "0.2.92"
464 | source = "registry+https://github.com/rust-lang/crates.io-index"
465 | checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da"
466 | dependencies = [
467 |  "bumpalo",
468 |  "log",
469 |  "once_cell",
470 |  "proc-macro2",
471 |  "quote",
472 |  "syn",
473 |  "wasm-bindgen-shared",
474 | ]
475 | 
476 | [[package]]
477 | name = "wasm-bindgen-macro"
478 | version = "0.2.92"
479 | source = "registry+https://github.com/rust-lang/crates.io-index"
480 | checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726"
481 | dependencies = [
482 |  "quote",
483 |  "wasm-bindgen-macro-support",
484 | ]
485 | 
486 | [[package]]
487 | name = "wasm-bindgen-macro-support"
488 | version = "0.2.92"
489 | source = "registry+https://github.com/rust-lang/crates.io-index"
490 | checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
491 | dependencies = [
492 |  "proc-macro2",
493 |  "quote",
494 |  "syn",
495 |  "wasm-bindgen-backend",
496 |  "wasm-bindgen-shared",
497 | ]
498 | 
499 | [[package]]
500 | name = "wasm-bindgen-shared"
501 | version = "0.2.92"
502 | source = "registry+https://github.com/rust-lang/crates.io-index"
503 | checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96"
504 | 
505 | [[package]]
506 | name = "web-sys"
507 | version = "0.3.69"
508 | source = "registry+https://github.com/rust-lang/crates.io-index"
509 | checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef"
510 | dependencies = [
511 |  "js-sys",
512 |  "wasm-bindgen",
513 | ]
514 | 
515 | [[package]]
516 | name = "winapi-util"
517 | version = "0.1.8"
518 | source = "registry+https://github.com/rust-lang/crates.io-index"
519 | checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b"
520 | dependencies = [
521 |  "windows-sys",
522 | ]
523 | 
524 | [[package]]
525 | name = "windows-sys"
526 | version = "0.52.0"
527 | source = "registry+https://github.com/rust-lang/crates.io-index"
528 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
529 | dependencies = [
530 |  "windows-targets",
531 | ]
532 | 
533 | [[package]]
534 | name = "windows-targets"
535 | version = "0.52.5"
536 | source = "registry+https://github.com/rust-lang/crates.io-index"
537 | checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb"
538 | dependencies = [
539 |  "windows_aarch64_gnullvm",
540 |  "windows_aarch64_msvc",
541 |  "windows_i686_gnu",
542 |  "windows_i686_gnullvm",
543 |  "windows_i686_msvc",
544 |  "windows_x86_64_gnu",
545 |  "windows_x86_64_gnullvm",
546 |  "windows_x86_64_msvc",
547 | ]
548 | 
549 | [[package]]
550 | name = "windows_aarch64_gnullvm"
551 | version = "0.52.5"
552 | source = "registry+https://github.com/rust-lang/crates.io-index"
553 | checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263"
554 | 
555 | [[package]]
556 | name = "windows_aarch64_msvc"
557 | version = "0.52.5"
558 | source = "registry+https://github.com/rust-lang/crates.io-index"
559 | checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6"
560 | 
561 | [[package]]
562 | name = "windows_i686_gnu"
563 | version = "0.52.5"
564 | source = "registry+https://github.com/rust-lang/crates.io-index"
565 | checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670"
566 | 
567 | [[package]]
568 | name = "windows_i686_gnullvm"
569 | version = "0.52.5"
570 | source = "registry+https://github.com/rust-lang/crates.io-index"
571 | checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9"
572 | 
573 | [[package]]
574 | name = "windows_i686_msvc"
575 | version = "0.52.5"
576 | source = "registry+https://github.com/rust-lang/crates.io-index"
577 | checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf"
578 | 
579 | [[package]]
580 | name = "windows_x86_64_gnu"
581 | version = "0.52.5"
582 | source = "registry+https://github.com/rust-lang/crates.io-index"
583 | checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9"
584 | 
585 | [[package]]
586 | name = "windows_x86_64_gnullvm"
587 | version = "0.52.5"
588 | source = "registry+https://github.com/rust-lang/crates.io-index"
589 | checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596"
590 | 
591 | [[package]]
592 | name = "windows_x86_64_msvc"
593 | version = "0.52.5"
594 | source = "registry+https://github.com/rust-lang/crates.io-index"
595 | checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"
596 | 


--------------------------------------------------------------------------------
/src/root.zig:
--------------------------------------------------------------------------------
  1 | const std = @import("std");
  2 | 
  3 | // The overall design of Spice is as follows:
  4 | // - ThreadPool spawns threads which acts as background workers.
  5 | // - A Worker, while executing, will share one piece of work (`shared_job`).
  6 | // - A Worker, while waiting, will look for shared jobs by other workers.
  7 | 
  8 | pub const ThreadPoolConfig = struct {
  9 |     /// The number of background workers. If `null` this chooses a sensible
 10 |     /// default based on your system (i.e. number of cores).
 11 |     background_worker_count: ?usize = null,
 12 | 
 13 |     /// How often a background thread is interrupted to find more work.
 14 |     heartbeat_interval: usize = 100 * std.time.ns_per_us,
 15 | };
 16 | 
 17 | pub const ThreadPool = struct {
 18 |     allocator: std.mem.Allocator,
 19 |     io: std.Io,
 20 |     mutex: std.Thread.Mutex = .{},
 21 |     /// List of all workers.
 22 |     workers: std.ArrayListUnmanaged(*Worker) = .{},
 23 |     /// List of all background workers.
 24 |     background_threads: std.ArrayListUnmanaged(std.Thread) = .{},
 25 |     /// The background thread which beats.
 26 |     heartbeat_thread: ?std.Thread = null,
 27 |     /// A pool for the JobExecuteState, to minimize allocations.
 28 |     execute_state_pool: std.heap.MemoryPool(JobExecuteState),
 29 |     /// This is used to signal that more jobs are now ready.
 30 |     job_ready: std.Thread.Condition = .{},
 31 |     /// This is used to wait for the background workers to be available initially.
 32 |     workers_ready: std.Thread.Semaphore = .{},
 33 |     /// This is set to true once we're trying to stop.
 34 |     is_stopping: bool = false,
 35 | 
 36 |     /// A timer which we increment whenever we share a job.
 37 |     /// This is used to prioritize always picking the oldest job.
 38 |     time: usize = 0,
 39 | 
 40 |     heartbeat_interval: usize,
 41 | 
 42 |     pub fn init(allocator: std.mem.Allocator, io: std.Io) ThreadPool {
 43 |         return ThreadPool{
 44 |             .allocator = allocator,
 45 |             .io = io,
 46 |             .execute_state_pool = .empty,
 47 |             .heartbeat_interval = undefined,
 48 |         };
 49 |     }
 50 | 
 51 |     /// Starts the thread pool. This should only be invoked once.
 52 |     pub fn start(self: *ThreadPool, config: ThreadPoolConfig) void {
 53 |         const actual_count = config.background_worker_count orelse (std.Thread.getCpuCount() catch @panic("getCpuCount error")) - 1;
 54 | 
 55 |         self.heartbeat_interval = config.heartbeat_interval;
 56 |         self.background_threads.ensureUnusedCapacity(self.allocator, actual_count) catch @panic("OOM");
 57 |         self.workers.ensureUnusedCapacity(self.allocator, actual_count) catch @panic("OOM");
 58 | 
 59 |         for (0..actual_count) |_| {
 60 |             const thread = std.Thread.spawn(.{}, backgroundWorker, .{self}) catch @panic("spawn error");
 61 |             self.background_threads.append(self.allocator, thread) catch @panic("OOM");
 62 |         }
 63 | 
 64 |         self.heartbeat_thread = std.Thread.spawn(.{}, heartbeatWorker, .{self}) catch @panic("spawn error");
 65 | 
 66 |         // Wait for all of them to be ready:
 67 |         for (0..actual_count) |_| {
 68 |             self.workers_ready.wait();
 69 |         }
 70 |     }
 71 | 
 72 |     pub fn deinit(self: *ThreadPool) void {
 73 |         // Tell all background workers to stop:
 74 |         {
 75 |             self.mutex.lock();
 76 |             defer self.mutex.unlock();
 77 | 
 78 |             self.is_stopping = true;
 79 |             self.job_ready.broadcast();
 80 |         }
 81 | 
 82 |         // Wait for background workers to stop:
 83 |         for (self.background_threads.items) |thread| {
 84 |             thread.join();
 85 |         }
 86 | 
 87 |         if (self.heartbeat_thread) |thread| {
 88 |             thread.join();
 89 |         }
 90 | 
 91 |         // Free up memory:
 92 |         self.background_threads.deinit(self.allocator);
 93 |         self.workers.deinit(self.allocator);
 94 |         self.execute_state_pool.deinit(self.allocator);
 95 |         self.* = undefined;
 96 |     }
 97 | 
 98 |     fn backgroundWorker(self: *ThreadPool) void {
 99 |         var w = Worker{ .pool = self };
100 |         var first = true;
101 | 
102 |         self.mutex.lock();
103 |         defer self.mutex.unlock();
104 | 
105 |         self.workers.append(self.allocator, &w) catch @panic("OOM");
106 | 
107 |         // We don't bother removing ourselves from the workers list of exit since
108 |         // this only happens when the whole thread pool is destroyed anyway.
109 | 
110 |         while (true) {
111 |             if (self.is_stopping) break;
112 | 
113 |             if (self._popReadyJob()) |job| {
114 |                 // Release the lock while executing the job.
115 |                 self.mutex.unlock();
116 |                 defer self.mutex.lock();
117 | 
118 |                 w.executeJob(job);
119 | 
120 |                 continue; // Go straight to another attempt of finding more work.
121 |             }
122 | 
123 |             if (first) {
124 |                 // Register that we are ready.
125 |                 self.workers_ready.post();
126 |                 first = false;
127 |             }
128 | 
129 |             self.job_ready.wait(&self.mutex);
130 |         }
131 |     }
132 | 
133 |     fn heartbeatWorker(self: *ThreadPool) void {
134 |         // We try to make sure that each worker is being heartbeat at the
135 |         // fixed interval by going through the workers-list one by one.
136 |         var i: usize = 0;
137 | 
138 |         while (true) {
139 |             var to_sleep: u64 = self.heartbeat_interval;
140 | 
141 |             {
142 |                 self.mutex.lock();
143 |                 defer self.mutex.unlock();
144 | 
145 |                 if (self.is_stopping) break;
146 | 
147 |                 const workers = self.workers.items;
148 |                 if (workers.len > 0) {
149 |                     i %= workers.len;
150 |                     workers[i].heartbeat.store(true, .monotonic);
151 |                     i += 1;
152 |                     to_sleep /= workers.len;
153 |                 }
154 |             }
155 | 
156 |             self.io.sleep(std.Io.Duration.fromNanoseconds(to_sleep), .awake) catch |err| std.debug.panic("sleep error: {}", .{err});
157 |         }
158 |     }
159 | 
160 |     pub fn call(self: *ThreadPool, comptime T: type, func: anytype, arg: anytype) T {
161 |         // Create an one-off worker:
162 | 
163 |         var worker = Worker{ .pool = self };
164 |         {
165 |             self.mutex.lock();
166 |             defer self.mutex.unlock();
167 | 
168 |             self.workers.append(self.allocator, &worker) catch @panic("OOM");
169 |         }
170 | 
171 |         defer {
172 |             self.mutex.lock();
173 |             defer self.mutex.unlock();
174 | 
175 |             for (self.workers.items, 0..) |worker_ptr, idx| {
176 |                 if (worker_ptr == &worker) {
177 |                     _ = self.workers.swapRemove(idx);
178 |                     break;
179 |                 }
180 |             }
181 |         }
182 | 
183 |         var t = worker.begin();
184 |         return t.call(T, func, arg);
185 |     }
186 | 
187 |     /// The core logic of the heartbeat. Every executing worker invokes this periodically.
188 |     fn heartbeat(self: *ThreadPool, worker: *Worker) void {
189 |         @branchHint(.cold);
190 | 
191 |         self.mutex.lock();
192 |         defer self.mutex.unlock();
193 | 
194 |         if (worker.shared_job == null) {
195 |             if (worker.job_head.shift()) |job| {
196 |                 // Allocate an execute state for it:
197 |                 const execute_state = self.execute_state_pool.create(self.allocator) catch @panic("OOM");
198 |                 execute_state.* = .{
199 |                     .result = undefined,
200 |                 };
201 |                 job.setExecuteState(execute_state);
202 | 
203 |                 worker.shared_job = job;
204 |                 worker.job_time = self.time;
205 |                 self.time += 1;
206 | 
207 |                 self.job_ready.signal(); // wake up one thread
208 |             }
209 |         }
210 | 
211 |         worker.heartbeat.store(false, .monotonic);
212 |     }
213 | 
214 |     /// Waits for (a shared) job to be completed.
215 |     /// This returns `false` if it turns out the job was not actually started.
216 |     fn waitForJob(self: *ThreadPool, worker: *Worker, job: *Job) bool {
217 |         const exec_state = job.getExecuteState();
218 | 
219 |         {
220 |             self.mutex.lock();
221 |             defer self.mutex.unlock();
222 | 
223 |             if (worker.shared_job == job) {
224 |                 // This is the job we attempted to share with someone else, but before someone picked it up.
225 |                 worker.shared_job = null;
226 |                 self.execute_state_pool.destroy(exec_state);
227 |                 return false;
228 |             }
229 | 
230 |             // Help out by picking up more work if it's available.
231 |             while (!exec_state.done.isSet()) {
232 |                 if (self._popReadyJob()) |other_job| {
233 |                     self.mutex.unlock();
234 |                     defer self.mutex.lock();
235 | 
236 |                     worker.executeJob(other_job);
237 |                 } else {
238 |                     break;
239 |                 }
240 |             }
241 |         }
242 | 
243 |         exec_state.done.wait();
244 |         return true;
245 |     }
246 | 
247 |     /// Finds a job that's ready to be executed.
248 |     fn _popReadyJob(self: *ThreadPool) ?*Job {
249 |         var best_worker: ?*Worker = null;
250 | 
251 |         for (self.workers.items) |other_worker| {
252 |             if (other_worker.shared_job) |_| {
253 |                 if (best_worker) |best| {
254 |                     if (other_worker.job_time < best.job_time) {
255 |                         // Pick this one instead if it's older.
256 |                         best_worker = other_worker;
257 |                     }
258 |                 } else {
259 |                     best_worker = other_worker;
260 |                 }
261 |             }
262 |         }
263 | 
264 |         if (best_worker) |worker| {
265 |             defer worker.shared_job = null;
266 |             return worker.shared_job;
267 |         }
268 | 
269 |         return null;
270 |     }
271 | 
272 |     fn destroyExecuteState(self: *ThreadPool, exec_state: *JobExecuteState) void {
273 |         self.mutex.lock();
274 |         defer self.mutex.unlock();
275 | 
276 |         self.execute_state_pool.destroy(exec_state);
277 |     }
278 | };
279 | 
280 | pub const Worker = struct {
281 |     pool: *ThreadPool,
282 |     job_head: Job = Job.head(),
283 | 
284 |     /// A job (guaranteed to be in executing state) which other workers can pick up.
285 |     shared_job: ?*Job = null,
286 |     /// The time when the job was shared. Used for prioritizing which job to pick up.
287 |     job_time: usize = 0,
288 | 
289 |     /// The heartbeat value. This is set to `true` to signal we should do a heartbeat action.
290 |     heartbeat: std.atomic.Value(bool) = std.atomic.Value(bool).init(true),
291 | 
292 |     pub fn begin(self: *Worker) Task {
293 |         std.debug.assert(self.job_head.isTail());
294 | 
295 |         return Task{
296 |             .worker = self,
297 |             .job_tail = &self.job_head,
298 |         };
299 |     }
300 | 
301 |     fn executeJob(self: *Worker, job: *Job) void {
302 |         var t = self.begin();
303 |         job.handler.?(&t, job);
304 |     }
305 | };
306 | 
307 | pub const Task = struct {
308 |     worker: *Worker,
309 |     job_tail: *Job,
310 | 
311 |     pub inline fn tick(self: *Task) void {
312 |         if (self.worker.heartbeat.load(.monotonic)) {
313 |             self.worker.pool.heartbeat(self.worker);
314 |         }
315 |     }
316 | 
317 |     pub inline fn call(self: *Task, comptime T: type, func: anytype, arg: anytype) T {
318 |         return callWithContext(
319 |             self.worker,
320 |             self.job_tail,
321 |             T,
322 |             func,
323 |             arg,
324 |         );
325 |     }
326 | };
327 | 
328 | // The following function's signature is actually extremely critical. We take in all of
329 | // the task state (worker, last_heartbeat, job_tail) as parameters. The reason for this
330 | // is that Zig/LLVM is really good at passing parameters in registers, but struggles to
331 | // do the same for "fields in structs". In addition, we then return the changed value
332 | // of last_heartbeat and job_tail.
333 | fn callWithContext(
334 |     worker: *Worker,
335 |     job_tail: *Job,
336 |     comptime T: type,
337 |     func: anytype,
338 |     arg: anytype,
339 | ) T {
340 |     var t = Task{
341 |         .worker = worker,
342 |         .job_tail = job_tail,
343 |     };
344 |     t.tick();
345 |     return @call(.always_inline, func, .{
346 |         &t,
347 |         arg,
348 |     });
349 | }
350 | 
351 | pub const JobState = enum {
352 |     pending,
353 |     queued,
354 |     executing,
355 | };
356 | 
357 | // A job represents something which _potentially_ could be executed on a different thread.
358 | // The jobs forms a doubly-linked list: You call `push` to append a job and `pop` to remove it.
359 | const Job = struct {
360 |     handler: ?*const fn (t: *Task, job: *Job) void,
361 |     prev_or_null: ?*anyopaque,
362 |     next_or_state: ?*anyopaque,
363 | 
364 |     // This struct gets placed on the stack in _every_ frame so we're very cautious
365 |     // about the size of it. There's three possible states, but we don't use a union(enum)
366 |     // since this would actually increase the size.
367 |     //
368 |     // 1. pending: handler is null. a/b is undefined.
369 |     // 2. queued: handler is set. prev_or_null is `prev`, next_or_state is `next`.
370 |     // 3. executing: handler is set. prev_or_null is null, next_or_state is `*JobExecuteState`.
371 | 
372 |     /// Returns a new job which can be used for the head of a list.
373 |     fn head() Job {
374 |         return Job{
375 |             .handler = undefined,
376 |             .prev_or_null = null,
377 |             .next_or_state = null,
378 |         };
379 |     }
380 | 
381 |     pub fn pending() Job {
382 |         return Job{
383 |             .handler = null,
384 |             .prev_or_null = undefined,
385 |             .next_or_state = undefined,
386 |         };
387 |     }
388 | 
389 |     pub fn state(self: Job) JobState {
390 |         if (self.handler == null) return .pending;
391 |         if (self.prev_or_null != null) return .queued;
392 |         return .executing;
393 |     }
394 | 
395 |     pub fn isTail(self: Job) bool {
396 |         return self.next_or_state == null;
397 |     }
398 | 
399 |     fn getExecuteState(self: *Job) *JobExecuteState {
400 |         std.debug.assert(self.state() == .executing);
401 |         return @ptrCast(@alignCast(self.next_or_state));
402 |     }
403 | 
404 |     pub fn setExecuteState(self: *Job, execute_state: *JobExecuteState) void {
405 |         std.debug.assert(self.state() == .executing);
406 |         self.next_or_state = execute_state;
407 |     }
408 | 
409 |     /// Pushes the job onto a stack.
410 |     fn push(self: *Job, tail: **Job, handler: *const fn (task: *Task, job: *Job) void) void {
411 |         std.debug.assert(self.state() == .pending);
412 |         defer std.debug.assert(self.state() == .queued);
413 | 
414 |         self.handler = handler;
415 |         tail.*.next_or_state = self; // tail.next = self
416 |         self.prev_or_null = tail.*; // self.prev = tail
417 |         self.next_or_state = null; // self.next = null
418 |         tail.* = self; // tail = self
419 |     }
420 | 
421 |     fn pop(self: *Job, tail: **Job) void {
422 |         std.debug.assert(self.state() == .queued);
423 |         std.debug.assert(tail.* == self);
424 |         const prev: *Job = @ptrCast(@alignCast(self.prev_or_null));
425 |         prev.next_or_state = null; // prev.next = null
426 |         tail.* = @ptrCast(@alignCast(self.prev_or_null)); // tail = self.prev
427 |         self.* = undefined;
428 |     }
429 | 
430 |     fn shift(self: *Job) ?*Job {
431 |         const job = @as(?*Job, @ptrCast(@alignCast(self.next_or_state))) orelse return null;
432 | 
433 |         std.debug.assert(job.state() == .queued);
434 | 
435 |         const next: ?*Job = @ptrCast(@alignCast(job.next_or_state));
436 |         // Now we have: self -> job -> next.
437 | 
438 |         // If there is no `next` then it means that `tail` actually points to `job`.
439 |         // In this case we can't remove `job` since we're not able to also update the tail.
440 |         if (next == null) return null;
441 | 
442 |         defer std.debug.assert(job.state() == .executing);
443 | 
444 |         next.?.prev_or_null = self; // next.prev = self
445 |         self.next_or_state = next; // self.next = next
446 | 
447 |         // Turn the job into "executing" state.
448 |         job.prev_or_null = null;
449 |         job.next_or_state = undefined;
450 |         return job;
451 |     }
452 | };
453 | 
454 | const max_result_words = 4;
455 | 
456 | const JobExecuteState = struct {
457 |     done: std.Thread.ResetEvent = .unset,
458 |     result: ResultType,
459 | 
460 |     const ResultType = [max_result_words]u64;
461 | 
462 |     fn resultPtr(self: *JobExecuteState, comptime T: type) *T {
463 |         if (@sizeOf(T) > @sizeOf(ResultType)) {
464 |             @compileError("value is too big to be returned by background thread");
465 |         }
466 | 
467 |         const bytes = std.mem.sliceAsBytes(&self.result);
468 |         return std.mem.bytesAsValue(T, bytes);
469 |     }
470 | };
471 | 
472 | pub fn Future(comptime Input: type, Output: type) type {
473 |     return struct {
474 |         const Self = @This();
475 | 
476 |         job: Job,
477 |         input: Input,
478 | 
479 |         pub inline fn init() Self {
480 |             return Self{ .job = Job.pending(), .input = undefined };
481 |         }
482 | 
483 |         /// Schedules a piece of work to be executed by another thread.
484 |         /// After this has been called you MUST call `join` or `tryJoin`.
485 |         pub inline fn fork(
486 |             self: *Self,
487 |             task: *Task,
488 |             comptime func: fn (task: *Task, input: Input) Output,
489 |             input: Input,
490 |         ) void {
491 |             const handler = struct {
492 |                 fn handler(t: *Task, job: *Job) void {
493 |                     const fut: *Self = @fieldParentPtr("job", job);
494 |                     const exec_state = job.getExecuteState();
495 |                     const value = t.call(Output, func, fut.input);
496 |                     exec_state.resultPtr(Output).* = value;
497 |                     exec_state.done.set();
498 |                 }
499 |             }.handler;
500 |             self.input = input;
501 |             self.job.push(&task.job_tail, handler);
502 |         }
503 | 
504 |         /// Waits for the result of `fork`.
505 |         /// This is only safe to call if `fork` was _actually_ called.
506 |         /// Use `tryJoin` if you conditionally called it.
507 |         pub inline fn join(
508 |             self: *Self,
509 |             task: *Task,
510 |         ) ?Output {
511 |             std.debug.assert(self.job.state() != .pending);
512 |             return self.tryJoin(task);
513 |         }
514 | 
515 |         /// Waits for the result of `fork`.
516 |         /// This function is safe to call even if you didn't call `fork` at all.
517 |         pub inline fn tryJoin(
518 |             self: *Self,
519 |             task: *Task,
520 |         ) ?Output {
521 |             switch (self.job.state()) {
522 |                 .pending => return null,
523 |                 .queued => {
524 |                     self.job.pop(&task.job_tail);
525 |                     return null;
526 |                 },
527 |                 .executing => return self.joinExecuting(task),
528 |             }
529 |         }
530 | 
531 |         fn joinExecuting(self: *Self, task: *Task) ?Output {
532 |             @branchHint(.cold);
533 | 
534 |             const w = task.worker;
535 |             const pool = w.pool;
536 |             const exec_state = self.job.getExecuteState();
537 | 
538 |             if (pool.waitForJob(w, &self.job)) {
539 |                 const result = exec_state.resultPtr(Output).*;
540 |                 pool.destroyExecuteState(exec_state);
541 |                 return result;
542 |             }
543 | 
544 |             return null;
545 |         }
546 |     };
547 | }
548 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Spice: Parallelism with sub-nanosecond overhead
  2 | 
  3 | ![Time to calculate sum of binary tree of 100M nodes with Spice](bench/spice-tree-sum-100M.svg)
  4 | 
  5 | **Spice** uses [_heartbeat scheduling_][hb] to accomplish extremely efficient parallelism in Zig:
  6 | 
  7 | - **Sub-nanosecond overhead:**
  8 |   Turning your function into a parallelism-enabled function adds less than a nanosecond of overhead.
  9 | - **Contention-free:**
 10 |   Threads will never compete (i.e. spin) over the same work.
 11 |   Adding more threads to the system will not make your program any slower, but the extra threads might be completely idle since there's nothing useful to do.
 12 | 
 13 | _(Update, September 2024: [Chili](https://github.com/dragostis/chili) is a Rust port of the ideas presented here. Check it out!)_
 14 | 
 15 | The benchmark in the figure above (summing over the nodes in a binary tree) is typically one of the worst cases for parallelism frameworks:
 16 | The actual operation is extremely fast so any sort of overhead will have a measurable impact.
 17 | 
 18 | Here's the _exact_ same benchmark in [Rayon][rayon], an excellent library in Rust for doing parallelism.
 19 | Both implementations follow the same fork/join API which gives code that is very easy to read and reason about.
 20 | None of the findings here would surprise anyone who deeply knows Rayon and there are ways of getting better performance out of Rayon by using different techniques.
 21 | This comes at cost of the code becoming more complicated and/or behaving subpar on different types of input.
 22 | The purpose of this benchmark is to not discourage use of Rayon (on the contrary!), but rather demonstrate that it _is_ possible to have both simple code and good parallelism.
 23 | See [issue #5](https://github.com/judofyr/spice/issues/5) for a longer discussion.
 24 | 
 25 | ![Time to calculate sum of binary tree of 100M nodes with Rayon](bench/rayon-tree-sum-100M.svg)
 26 | 
 27 | The overhead here is roughly ~15 ns (from 7.48 ns to 22.99 ns) which means that at 4 threads we're "back" to the sequential performance - just using four times as much CPU.
 28 | Luckily we _are_ able to get linear speed-up (in terms of threads) initially.
 29 | These benchmarks were ran on a `c4-standard-16` instance in Google Cloud with 16 cores.
 30 | Rayon itself shows a nice ~14x speed-up (from 22.99 ns to 1.64 ns) at 16 threads, but compared to the _baseline_ this ends up only being ~4.5x due to the overhead.
 31 | 
 32 | In comparison, Spice scales slightly worse:
 33 | It only got ~11x speed-up when going from 1 to 16 threads.
 34 | However, due its low overhead this is also essentially the speed-up compared to the baseline.
 35 | 
 36 | (It's not entirely clear why the Zig baseline implementation is twice as fast as the Rust implementation.
 37 | The [compiled assembly (godbolt)][rust-vs-zig] show that Rust saves five registers on the stack while Zig only saves three, but why?
 38 | For the purpose of this benchmark it shouldn't matter since we're only comparing against the baseline of each language.)
 39 | 
 40 | It becomes even more interesting if we're summing the nodes of a much smaller tree:
 41 | 
 42 | ![Time to calculate sum of binary tree of 1000 nodes with Rayon](bench/rayon-tree-sum-1000.svg)
 43 | 
 44 | In this scenario we have a very short duration of our program:
 45 | The baseline implementation takes a few microseconds in total to run.
 46 | For some reason the overhead is a bit higher (~19 ns), but more concerningly we see that performance becomes _worse_ the _more_ threads we're adding.
 47 | At 32 threads it's in total **60 times slower**.
 48 | 
 49 | (In this case we're using 32 threads on a machine which only has 16 cores.
 50 | It's not given that we would see the same slowdown for a machine with 32 cores.
 51 | Nonetheless, this scaling behavior is concerning.)
 52 | 
 53 | The conventional wisdom for parallelism therefore ends up being "it's not worth it unless you have _enough work_ to parallelize".
 54 | The example above is typically presented as a "bad fit for parallelism".
 55 | This is understandable and pragmatic, but in practice it makes it a lot more difficult to _actually_ parallelize your code:
 56 | 
 57 | - What exactly is "enough work"?
 58 |   You might need to do a lot of benchmarking with different types of input to understand this.
 59 | - It might be difficult to detect how much work a certain input does.
 60 |   For instance, in our binary tree we don't know the full size of it.
 61 |   There's no obvious way for us to say "if the tree is small enough, don't run the parallelized code" since by only looking at the root we don't the size of it.
 62 | - As we've seen, the potential slowdown can be extreme.
 63 |   What if 90% of your workload is like this?
 64 | - As your program evolves and your code does more (or less) _things_, the definition of "enough work" will also naturally change.
 65 | 
 66 | The goal of Spice is for you **to never have to worry about your program becoming slower by making it parallel**.
 67 | If you're looking to maximize the performance you should of course do elaborate benchmarking, but _generally_ with Spice you can add parallelism and there will be _practically_ no overhead.
 68 | 
 69 | The last example of summing over 1000 nodes behaves as follows in Spice:
 70 | 
 71 | ![Time to calculate sum of binary tree of 1000 nodes with Spice](bench/spice-tree-sum-1000.svg)
 72 | 
 73 | What's happening here is that it's discovering that the duration is too short so none of the multi-threading kicks in.
 74 | All the extra threads here are sleeping, giving the cores time to execute other programs.
 75 | 
 76 | Spice is **primarily a research project**.
 77 | Read along to learn more about it, but if you're considering using it in production you should be aware of its [many limitations](#limitations).
 78 | 
 79 | _(See the [bench/](bench/) directory for more details about these specific benchmarks.)_
 80 | 
 81 | ## Table of Contents
 82 | 
 83 | - [Using Spice](#using-spice)
 84 | - [Work-stealing and its inefficiencies](#work-stealing-and-its-inefficiencies)
 85 | - [Implementation details](#implementation-details)
 86 |   - [Optimizing for static dispatch](#optimizing-for-static-dispatch)
 87 |   - [Low-overhead heartbeating signaling](#low-overhead-heartbeating-signaling)
 88 |   - [Global mutex is fine when there's no contention](#global-mutex-is-fine-when-theres-no-contention)
 89 |   - [Branch-free doubly-linked list](#branch-free-doubly-linked-list)
 90 |   - [Minimizing the stack usage](#minimizing-the-stack-usage)
 91 |   - [Passing values around in registers](#passing-values-around-in-registers)
 92 | - [Benchmarks](#benchmarks)
 93 | - [Acknowledgments](#acknowledgments)
 94 | - [Limitations](#limitations)
 95 | - [FAQ](#faq)
 96 | 
 97 | ## Using Spice
 98 | 
 99 | The following example demonstrates how Spice works:
100 | 
101 | ```zig
102 | const spice = @import("spice");
103 | 
104 | // (1) Add task as a parameter.
105 | fn sum(t: *spice.Task, node: *const Node) i64 {
106 |     var res: i64 = node.val;
107 | 
108 |     if (node.left) |left_child| {
109 |         if (node.right) |right_child| {
110 |             var fut = spice.Future(*const Node, i64).init();
111 | 
112 |             // (3) Call `fork` to set up work for another thread.
113 |             fut.fork(t, sum, right_child);
114 | 
115 |             // (4) Do some work yourself.
116 |             res += t.call(i64, sum, left_child);
117 | 
118 |             if (fut.join(t)) |val| {
119 |                 // (5) Wait for the other thread to complete the work.
120 |                 res += val;
121 |             } else {
122 |                 // (6) ... or do it yourself.
123 |                 res += t.call(i64, sum, right_child);
124 |             }
125 |             return res;
126 |         }
127 | 
128 |         res += t.call(i64, sum, left_child);
129 |     }
130 | 
131 |     if (node.right) |right_child| {
132 |         // (2) Recursive calls must use `t.call`
133 |         res += t.call(i64, sum, right_child);
134 |     }
135 | 
136 |     return res;
137 | }
138 | ```
139 | 
140 | 1. Every parallel function needs to take a _task_ as a parameter.
141 |    This is used to coordinate the work.
142 | 2. You should never call your function directly, but instead use `t.call` which will call it for you (in the right way).
143 | 3. Call `fork` to set up a piece of work which can be done by a different thread.
144 |    This can be called multiple times to set up multiple pieces of work.
145 | 4. After that your function should do some meaningful work itself.
146 | 5. Call `join` to wait for the work done by the other thread.
147 | 6. _However_, `join` might return `null` and this signals that _no other thread picked up the work_.
148 |    In this case you must do the work yourself.
149 | 
150 | Here we repeat ourselves in step 3 and 6:
151 | Both places we refer to `sum` and `right_child`.
152 | It's possible to hide this duplication by some helper function, _but_ this example demonstrates a core idea behind Spice:
153 | 
154 | **Not every piece of work comes from the queue.**
155 | You call `fork` to signal that there's something which _can_ be executed by another thread, but if all the other threads are busy then you fallback to executing it as if the fork never happened.
156 | 
157 | This principle is core to how Spice achieves its low and predictable overhead:
158 | If there's no parallelism possible then all Spice is doing on the hot path is pushing and popping the queue (without ever looking at any of the items).
159 | 
160 | The actually coordination with other threads happens on a _fixed heartbeat_:
161 | Every 100 microsecond or so a thread will look at its current work queue and dispatch the top-most item to another waiting thread.
162 | Since the heartbeat happens very infrequently (compared to the clock speed) we also don't need to worry so much about what we're doing during the heartbeat.
163 | Even if we spend _hundreds_ of nanoseconds the _total_ overhead becomes small since we do it rarely.
164 | 
165 | ## Work-stealing and its inefficiencies
166 | 
167 | Spice provides the [fork/join model][fj] which has typically been implementing by using [**work-stealing**][wb].
168 | Let's have a look at work-stealing:
169 | 
170 | - Every thread have their own local _work queue_.
171 |   Every piece of work in the system gets put onto this queue.
172 | - The same thread will pick up work from this queue and execute it.
173 |   This might lead to more work being added (onto the same queue).
174 | - At some point, the local work queue for a thread will become empty.
175 |   The thread will then attempt to _steal_ work from another thread:
176 |   It takes a chunk of the work from the _end_ of another thread's queue and places it into its own.
177 | - Since each thread pulls work from the _beginning_ of its queue and other thread steals from the _end_, we expect there to be little contention on these queues.
178 | 
179 | However, there's three major sources of inefficiencies in this design:
180 | 
181 | **Every piece of work is a _dynamic dispatch_.**
182 | In compiled languages (such as C) function calls are "practically" free due to the capability of statically knowing everything about the called function.
183 | This is a scenario which compilers and CPUs have been optimized for _decades_ to execute efficiently.
184 | Work-stealing systems _don't_ use this functionality, but instead puts every piece of work into generic "call this dynamic function".
185 | It's a small piece of overhead, but it does add up.
186 | 
187 | **The "local" work queue isn't really local.**
188 | Yes, it's true that every thread have a single queue that they will push work onto, _but_ this is far from a "local" queue as is typically described in concurrent algorithms.
189 | This is a queue in which _every_ thread at _every_ point might steal from.
190 | In reality, work-stealing systems with N threads have N global queues, where each queue only has a single producer, but everyone is a consumer.
191 | Why does this distinction matter?
192 | _Because all operations on these queues have to use atomic operations._
193 | Atomic operations, especially stores, are far more expensive than regular, _local_ stores.
194 | 
195 | **Spinning works great … until it doesn't.**
196 | The queues in work-stealing systems are typically implemented using _spinning_:
197 | Every thread will optimistically try to acquire a single item from the queue, and if there's a contention with another thread it will _try again_ in a loop.
198 | This typically gives great performance … **until it doesn't**.
199 | It can be very hard to reason about this or replicate it since under one set of conditions everything is fine, but _suddenly_ during contention the system will slow down to a halt (i.e. 10x-100x slower).
200 | 
201 | Spice directly tackles all of these inefficiencies:
202 | 
203 | 1. The dynamic dispatch of the work queue is only used when work is sent to another thread.
204 |    Work done _within_ a single thread will use regular function calls outside of the work queue.
205 | 2. The work queue is truly local:
206 |    Pushing to it involves (1) one memory store to a pointer to somewhere on the stack, (2) one memory store to the current stack frame, (3) one register store.
207 |    None of these operations need to synchronize with other threads.
208 | 3. There isn't a single `while`-loop in Spice which doesn't also contain a `wait()`-call which will suspend the thread.
209 |    There is no spinning.
210 | 
211 | ## Implementation details
212 | 
213 | Let's dive further into how Spice is implemented to achieve its efficient parallelism.
214 | 
215 | ### Optimizing for static dispatch
216 | 
217 | A fork/join program has a set of code blocks which are executed in parallel and once they finish the `join` action completes:
218 | 
219 | ```
220 | join(
221 |   fork { code1 }
222 |   fork { code2 }
223 |   fork { code3 }
224 | )
225 | ```
226 | 
227 | In Spice this is represented as:
228 | 
229 | ```
230 | job1 = fork { code1 }  // Place on the queue
231 | job2 = fork { code2 }  // Place on the queue
232 | 
233 | code3 // Run right away
234 | 
235 | if (job2.isExecuting()) {
236 |   // Job was picked up by another thread. Wait for it.
237 |   job2.wait()
238 | } else {
239 |   code2
240 | }
241 | 
242 | if (job1.isExecuting()) {
243 |   // Job was picked up by another thread. Wait for it.
244 |   job1.wait()
245 | } else {
246 |   code1
247 | }
248 | ```
249 | 
250 | Notice that `code1` and `code2` has been duplicated_inside the function.
251 | This is actually a _good_ thing.
252 | Most of the time the job will _not_ be picked up by another thread.
253 | In this case, our program nicely turns into the sequential version (although in reverse order) with a few extra branches which are all very predictable.
254 | This is friendly both for the code optimizer (e.g. it can now inline the function call) and the CPU.
255 | 
256 | ### Low-overhead heartbeating signaling
257 | 
258 | The core idea of heartbeat scheduling is to do scheduling _locally_ and at a _low frequency_:
259 | Every 100 microsecond or so we'd like every thread to look at it local work queue and send work to a different thread.
260 | The low frequency is key to eliminating overall overhead.
261 | If we're only doing something every 100 microsecond we can actually spend 100 nanoseconds (an eternity!) and still only introduce 0.1% overhead.
262 | 
263 | Operating systems have built-in support for _signaling_, but these are very hard to reason about.
264 | The user code gets paused at _any_ random point and it's hard to safely continue running.
265 | For this reason, Spice uses a cooperative approach instead:
266 | The user code have to call `tick()` and this detects whether a heartbeat should happen.
267 | This function call is automatically called for you whenever you use the `call`-helper.
268 | 
269 | It's critical that this function is efficient when a heartbeat **isn't** happening.
270 | This is after all the common case (as the heartbeat is only happening every ~100 microsecond).
271 | 
272 | ```zig
273 | pub inline fn tick(self: *Task) void {
274 |     if (self.worker.heartbeat.load(.monotonic)) {
275 |         self.worker.pool.heartbeat(self.worker);
276 |     }
277 | }
278 | ```
279 | 
280 | In Spice we spawn a separate heartbeat thread whose sole purpose is to periodically flip the thread's atomic heartbeat value from `false` to `true`.
281 | The `tick()` function then reads this atomic value and starts its heartbeat code when it's `true`.
282 | 
283 | A key part of reducing the overhead of the ticking is to make sure the heartbeat function itself is marked as _cold_.
284 | This causes the presence of this function call to not use up any registers.
285 | Without this the overhead is significantly higher.
286 | 
287 | ### Global mutex is fine when there's no contention
288 | 
289 | If you look inside the codebase of Spice you will find that each thread pool has a single mutex which is locked all over the place.
290 | An immediate reaction would be "oh no, a global mutex is terrible" and you might be tempted to replace it.
291 | 
292 | _However_, there's no problem with a global mutex _until you're being blocked_.
293 | And you can only be blocked if two conditions occur:
294 | 
295 | 1. A thread is holding the lock for a _long_ time.
296 | 2. There's concurrent threads trying to acquire the lock at the same time.
297 | 
298 | **None** of these are true for Spice.
299 | The heartbeating ensures that typically only a single thread is executing a heartbeat.
300 | In addition, no user code is executed while the lock is held.
301 | We're only protecting trivial simple memory reads/writes which will complete in constant time.
302 | 
303 | ### Branch-free doubly-linked list
304 | 
305 | We're using a doubly-linked list to keep track of the work queue:
306 | `fork()` appends to the end, `join()` pops from the end (if it's still there), and we pop from the _beginning_ when we want to send work to a background worker.
307 | 
308 | [Appending into a doubly-linked list](https://github.com/ziglang/zig/blob/cb308ba3ac2d7e3735d1cb42ef085edb1e6db723/lib/std/linked_list.zig#L267-L275) typically looks like this:
309 | 
310 | ```zig
311 | pub fn append(list: *Self, new_node: *Node) void {
312 |     if (list.last) |last| {
313 |         // Insert after last.
314 |         list.insertAfter(last, new_node);
315 |     } else {
316 |         // Empty list.
317 |         list.prepend(new_node);
318 |     }
319 | }
320 | ```
321 | 
322 | Notice that there's a conditional here: If the list is empty we need to do something special.
323 | Most of the time the list will of course _not_ be empty.
324 | To eliminate the branch we can make sure that the list is _never_ empty.
325 | We define a sentinel node (the "head") which always represents the beginning of the list.
326 | The tail pointer will start by pointing to this head node.
327 | 
328 | This means that both pushing and popping is completely branch-free and these are operations we do at _every_ recursive function call.
329 | 
330 | ### Minimizing the stack usage
331 | 
332 | A `Future` in Spice has two possible states: It's either _queued_ or _executing_.
333 | The heartbeat is responsible for taking a _queued_ future and start _executing_ it.
334 | And as we already know: Heartbeating happens rarely so we expect many futures to be queued without executing.
335 | 
336 | An early prototype of Spice used a _tagged union_ to store the future on the stack.
337 | This turns out to be suboptimal because (1) stack usage matters for performance (at least in this benchmark) and (2) there's quite a lot of additional state needed to keep track of futures which are _executing_.
338 | 
339 | To minimize stack usage Spice therefore uses two techniques:
340 | 
341 | 1. Execution state is placed in a separate (pool-allocated) struct.
342 |    The queued (but not executed) futures therefore does not need to consume any of this space.
343 | 2. We manually create a tagged union where we use the fact that the _executing_ state only needs a single pointer while the _queued_ state is guaranteed to have a `prev` pointer.
344 |    Whether the first field is `null` therefore decides which of these it is.
345 |    (Maybe a smart enough compiler would be able to this optimization for us.)
346 | 
347 | ```zig
348 | const Future = struct {
349 |     prev_or_null: ?*anyopaque,
350 |     next_or_state: ?*anyopaque,
351 | }
352 | 
353 | // A future which is _queued_ has:
354 | //   prev_or_null = pointer to prev future
355 | //   next_or_state = pointer to next future
356 | 
357 | // A future which is _executing_ has:
358 | //   prev_or_null = null
359 | //   next_or_state = ExecuteState
360 | 
361 | const ExecuteState = struct {
362 |     requester: *Worker,
363 |     done: std.Thread.ResetEvent = .{},
364 |     result: ResultType,
365 |     // Any number of fields.
366 | }
367 | ```
368 | 
369 | ### Passing values around in registers
370 | 
371 | Spice works with a `Task` struct which has two fields:
372 | A pointer to the owning worker and a pointer to tail of the work queue.
373 | For optimal performance these should be passed as registers across all function boundaries.
374 | However, with LLVM, passing a struct will very often cause it be passed on the stack.
375 | 
376 | To work around this we define a _separate_ function where `worker` and `job_tail` are actual parameters.
377 | We place the parameters into a struct and pass a pointer to this into the user-defined function.
378 | This function call we make sure is always being inlined:
379 | 
380 | ```zig
381 | fn callWithContext(
382 |     worker: *Worker,
383 |     job_tail: *Job,
384 |     comptime T: type,
385 |     func: anytype,
386 |     arg: anytype,
387 | ) T {
388 |     var t = Task{
389 |         .worker = worker,
390 |         .job_tail = job_tail,
391 |     };
392 |     return @call(.always_inline, func, .{
393 |         &t,
394 |         arg,
395 |     });
396 | }
397 | ```
398 | 
399 | This causes the `callWithContext`-function to be the _actual_ function which LLVM works on, and since this has pointers are parameters it will happily pass these directly into registers.
400 | 
401 | ## Benchmarks
402 | 
403 | The initial development of Spice has been focused around a single benchmark which is described in detail in [bench/](bench/).
404 | 
405 | ## Acknowledgments
406 | 
407 | Spice was made possible thanks to the research into _heartbeat scheduling_:
408 | 
409 | ["The best multicore-parallelization refactoring you've never heard of"](https://arxiv.org/abs/2307.10556) gives an _excellent_ introduction into the concepts of heartbeat scheduling.
410 | It's a very short paper which focuses entirely on a single use case, but describes everything in a manner which can be generalized.
411 | The solution presented in this paper is based around turning all the code into continuation-passing style which enables switching between sequential and parallel execution.
412 | Spice started out as an experiment of this approach, but this turned out to have quite high overhead (>10 nanosecond).
413 | 
414 | Going backwards in time, ["Heartbeat scheduling: provable efficiency for nested parallelism"](https://www.chargueraud.org/research/2018/heartbeat/heartbeat.pdf) was the first paper introducing "heartbeat scheduling".
415 | This paper provides excellent information about the concepts, but the implementation is based around integrating this into an interpreter and focus is primarily on the theoretical guarantees as opposed to raw performance.
416 | 
417 | ["Task parallel assembly language for uncompromising parallelism"](https://paragon.cs.northwestern.edu/papers/2021-PLDI-TPAL-Rainey.pdf) is a follow-up paper which improves the performance by defining a custom assembly language and using OS signaling for heartbeats.
418 | This is a fascinating line of research, but it's difficult to integrate into an existing language.
419 | 
420 | ## Limitations
421 | 
422 | There's _many_ limitations of the current implementation of Spice:
423 | 
424 | - **Rough edges when you're using it wrong:** Spice is quite peculiar about how it should be used (most notably about `fork` and `join`).
425 |   If you're using it wrong now then weird things could happen.
426 |   This should be improved by adding more compile-time checking, debug-mode assertions, or changing the overall API.
427 | - **Lack of tests:** Spice contains a lot of gnarly concurrent code, but has zero testing coverage.
428 |   This would have be improved before Spice can be responsibly used for critical tasks.
429 | - **Lack of support for arrays/slices:** Probably _the_ most common use case for fine-grained parallelism is to do something for every element of an array/slice.
430 |   There should be native, efficient support for this use case.
431 | - **Lack of documentation:** There's no good documentation of how to use it.
432 | - **Lack of further benchmarks:** This has only been tested on a single small benchmark.
433 |   This benchmark _should_ be quite representative (see [bench/](bench/) for more details), but further benchmarks are needed to validate these findings.
434 | - **@panic-heavy:** Spice is quite optimistic in its error handling and uses `@panic` extensively.
435 |   To be considered a proper Zig library there needs to be way more consideration of how error cases are handled.
436 | - **Lack of testing with ReleaseSafe:**
437 |   `ReleaseSafe` is an extremely nice feature of Zig.
438 |   Further benchmarking and testing is needed to understand how well Spice can work here.
439 | 
440 | Luckily the whole codebase is ~500 lines so it shouldn't be _too_ difficult to make progress on these areas.
441 | 
442 | There's currently no plans of doing any active development on Spice to improve this (as the original author don't have the time).
443 | Any improvements in forks and/or re-implementations in other languages are highly encouraged!
444 | 
445 | ## FAQ
446 | 
447 | **Question: Why is it called "Spice"?**
448 | 
449 | Answer: This project enables _fine-grained_ parallelism. Sand is extremely fine-grained. Sand forms in dunes. [Spice](<https://en.wikipedia.org/wiki/Melange_(fictional_drug)>).
450 | Also: It's a hot take on parallelism.
451 | 
452 | **Question: Why is it implemented in Zig?**
453 | 
454 | Answer: Why not?
455 | This describes a _generic approach_ to parallelism that should be possible to implement in multiple languages.
456 | Maybe I'll end up implementing something similar in another language as well?
457 | I don't know yet.
458 | If you think this is interesting for _your_ language of choice I would encourage you to explore this area.
459 | 
460 | **Question: But if you did it in Rust we could have _safe_ parallelism?**
461 | 
462 | Answer:
463 | Yeah, that sounds very cool.
464 | I'm not at all opposed to it.
465 | _That said_, I've been exploring many different techniques and variants while developing Spice.
466 | Many of my initial ideas were definitely not "safe" by any means, but I was able to express these ideas in Zig, look at the assembly and measure the performance in benchmarks.
467 | I'd probably only be able to explore a fraction of the ideas if I was limited by Rust's strict semantics in the _initial_ phase of this project.
468 | If I have to turn this into a production-ready system I might decide to use Rust.
469 | 
470 | [hb]: https://www.andrew.cmu.edu/user/mrainey/heartbeat/heartbeat.html
471 | [rayon]: https://docs.rs/rayon/latest/rayon/
472 | [wb]: https://en.wikipedia.org/wiki/Work_stealing
473 | [fj]: https://en.wikipedia.org/wiki/Fork%E2%80%93join_model
474 | [rust-vs-zig]: https://godbolt.org/#z:OYLghAFBqd5QCxAYwPYBMCmBRdBLAF1QCcAaPECAMzwBtMA7AQwFtMQByARg9KtQYEAysib0QXACx8BBAKoBnTAAUAHpwAMvAFYTStJg1AAvPMFJL6yAngGVG6AMKpaAVxYMJAZlIOAMngMmABy7gBGmMQgAGykAA6oCoS2DM5uHt7xickCAUGhLBFRsZaY1ilCBEzEBGnunlw%2BpeUCldUEeSHhkTEWVTV1GY197Z0FRTEAlBaorsTI7BxoDAoEANTBGJhrAKReACJrq8Su1rsA7ABCOxoAgmsPawBuYiBreNHSN/eP9FQEbz2ADEAFSbLC7A5rBiuWi0UjfR5rYhmBAAyGg8HbPaHGFwhF3RGPOKuMJrKgMI7uCCWKhvEHLVYbLaTd6fC7XO5IpEvYjIzAKSGHWkAOhetD2nJ%2B3PeVDWNMwtCoIr%2BBFZO3OjmQCDo6A1jn5gp2ACZrlDtbqRQpqZNJUSZXg5QqlSKUcA0erNRbaHrNYbdqacWtvegrTa7VyZfyCHNKcQBRHpRd9t8NSmvFLviSyRS1iwmIEIKynqg8OgOfa1gB6KtrAAqbuAkTWBAQ22tLAAtFRXAwWpSiGsImtZgQSQRKwB9IXMrBhlgQXtYGhBdC2jOp877DjTWicACsvE83F4qE4AC0zEdZvNscavDxSACOFpJtMANYSACcIq40X3XiSMaQHGvuwEABySNIe4cJIR6aKenC8AoIAaE%2BCHTHAsBIGgLBxHQkTkJQuH4fQUTGAQJwMO%2BfB0AQkQoRAYQIaQYSBNUACenCPmxzDEBxADyYTaGUz6PrhbCCAJDC0FxL68FgYSuMAjhiLQKEnqQWD5kY4jyVpeDxuUTwCixmCqGUrj0dxvCBPRMFaPoeBhMQnHOFgLGUXgLA2aQJnEGEiSYPsmA6cAtCBKA8nTFQBjAAoABqeCYAA7gJcSML5/CCCIYjsFIMiCIoKjqPpujGvohgmGYTlhChkDTKgcQ2AIGmdgJawAEqKpgTBKECfUEKe/kolg9VFhYPX9vYDBOC49R6P4gRdIUPRcFkSQtak81DBtOQMGM3RROtzRbW0Aw7Q0k1WGd/QdMt4xrSMF3pFdqyjA9R0SNMCg3gs336Ae8H6WeHBrKYwAtlR77yrghAkAGD6TLwz6vtMbZMFgUQTZ%2BoEivu5zRBokhfsaX77vunxgYDsHA45oPIah6HRaQWGICgWxw0QZAUNQBHMGwWWyLl4gFdl8hKGoLHlZVRggCcqzXaJKQzXNr0gBVS35F9f57VtgyeBVCSbSkh2rcdJRTbd7QGxrSv9ud93a%2BbEglHdtsVe9NRmxMf4/X9izHKc6xYnsjh1ns2AVpGDziq47D1gSSaqm86VbWHlyoKoYeh144eRwXXjYEnSJumiqfNSkGdZznWxhxHReF8Xm4poSdzZuSlIdhADBbICxrRLnjgfJIkesp2kdspI0fJ5g6wsFZhqwusQa93OceYImSKOms9DrEIqBsBAIbqlCJrRGvmAqpg/wz1GS%2B0CvgZQt3J9b48aaVjve9rAfR8nzOc%2Bl9XSohXlcSspcBTLwDGaYU1I34bhjsmSs8YYzEDjFAx%2BiY0w7hpoeUgx56acE6q4Jkv05gLERsaFGGEPwa2NCKL84FjTgS8F4fcXAuDGg0NENh0FOBwQISxBmFgmao1wWzCAOFD6kUIrzEiBEojEC4OBDQaEaCPwYpQZi%2BleKcV8no/iQkRLWF8hJRgBBpKyRYopZSqk4QaUfNpKqelHL4CMjYEyGlHLmUstZTSdlFQsQii5Nyfd9JeR8ppfygUlAhTChFOWLNYpMHiklVK6VMqaXFiLfK0hxbFSlmVPQBg5YKwILVcajVK6tU4J2A0TUCCdnoCZCUBwvDDUiKNUysABbsEwPgLafkxDx04Nw40PA3z2y2qrW260tYrV9nrFIczlm5E%2Bi7E6VsKju0unoU6OyPrOyWV7WoeyTp3R9mtf2FD2DGl3EDIRINODIlIQQZAawuAilUSKDQsNBkIxNF4e5NDop0OBSKY05wCbnEkOcc4GgEWSGBTTQRhDEIcEZmhcRUyYLUKeUQzFzM0Z%2BQYirSQQA%3D%3D%3D
475 | 


--------------------------------------------------------------------------------