├── LICENSE
├── README.md
├── asm_386.s
├── asm_amd64.s
├── asm_arm.s
├── asm_arm64.s
├── asm_mips.s
├── asm_mips64.s
├── asm_ppc64.s
├── asm_s390x.s
├── bench_reports
├── darwin_arm64_m1
│ ├── 1.3.0.txt
│ ├── 1.4.0.txt
│ ├── 1.5.0.txt
│ ├── 2.0.0.txt
│ ├── 2.1.0.txt
│ ├── 2.2.0.txt
│ ├── 2.2.1.txt
│ ├── 2.3.0.txt
│ ├── 2.4.0.txt
│ ├── 2.4.0_alternate.txt
│ ├── 2.4.0_on_battery.txt
│ ├── 2.5.0.txt
│ ├── 2.5.1.txt
│ ├── 2.5.2.txt
│ ├── 2.7.0.txt
│ └── 2.7.1.txt
├── raspian_arm32.txt
├── ubuntu_amd64_16core.txt
├── ubuntu_intel_xeon.txt
└── windows_amd64_16core.txt
├── benchmarks
├── cgo_test
│ └── cgobench.go
├── e2e
│ ├── benchmark_test.go
│ └── benchsuite_test.go
├── selector
│ └── main.go
└── simple
│ ├── main.go
│ └── main_test.go
├── constants
├── constants_386.go
├── constants_amd64.go
├── constants_arm.go
├── constants_arm64.go
├── constants_mips.go
├── constants_mips64.go
├── constants_mips64le.go
├── constants_mipsle.go
├── constants_ppc64x.go
├── constants_riscv64.go
├── constants_s390x.go
└── constants_wasm.go
├── examples
├── selector
│ └── main.go
└── simple
│ └── main.go
├── go.mod
├── go.sum
├── lib_runtime_fastrand.go
├── lib_runtime_fastrand_1.22.go
├── lib_runtime_linkage.go
├── select_list.go
├── selector.go
├── thread_parker.go
└── zenq.go
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 Anish Mukherjee
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ZenQ
2 |
3 | > A low-latency thread-safe queue in golang implemented using a lock-free ringbuffer and runtime internals
4 |
5 | Based on the [LMAX Disruptor Pattern](https://lmax-exchange.github.io/disruptor/disruptor.html)
6 |
7 | ## Features
8 |
9 | * Much faster than native channels in both SPSC (single-producer-single-consumer) and MPSC (multi-producer-single-consumer) modes in terms of `time/op`
10 | * More resource efficient in terms of `memory_allocation/op` and `num_allocations/op` evident while benchmarking large batch size inputs
11 | * Handles the case where NUM_WRITER_GOROUTINES > NUM_CPU_CORES much better than native channels
12 | * Selection from multiple ZenQs just like golang's `select{}` ensuring fair selection and no starvation
13 | * Closing a ZenQ
14 |
15 | Benchmarks to support the above claims [here](#benchmarks)
16 |
17 | ## Installation
18 |
19 | You need Golang [1.19.x](https://go.dev/dl/) or above
20 |
21 | ```bash
22 | $ go get github.com/alphadose/zenq/v2
23 | ```
24 |
25 | ## Usage
26 |
27 | 1. Simple Read/Write
28 | ```go
29 | package main
30 |
31 | import (
32 | "fmt"
33 |
34 | "github.com/alphadose/zenq/v2"
35 | )
36 |
37 | type payload struct {
38 | alpha int
39 | beta string
40 | }
41 |
42 | func main() {
43 | zq := zenq.New[payload](10)
44 |
45 | for j := 0; j < 5; j++ {
46 | go func() {
47 | for i := 0; i < 20; i++ {
48 | zq.Write(payload{
49 | alpha: i,
50 | beta: fmt.Sprint(i),
51 | })
52 | }
53 | }()
54 | }
55 |
56 | for i := 0; i < 100; i++ {
57 | if data, queueOpen := zq.Read(); queueOpen {
58 | fmt.Printf("%+v\n", data)
59 | }
60 | }
61 | }
62 | ```
63 |
64 | 2. **Selection** from multiple ZenQs just like golang's native `select{}`. The selection process is fair i.e no single ZenQ gets starved
65 | ```go
66 | package main
67 |
68 | import (
69 | "fmt"
70 |
71 | "github.com/alphadose/zenq/v2"
72 | )
73 |
74 | type custom1 struct {
75 | alpha int
76 | beta string
77 | }
78 |
79 | type custom2 struct {
80 | gamma int
81 | }
82 |
83 | const size = 100
84 |
85 | var (
86 | zq1 = zenq.New[int](size)
87 | zq2 = zenq.New[string](size)
88 | zq3 = zenq.New[custom1](size)
89 | zq4 = zenq.New[*custom2](size)
90 | )
91 |
92 | func main() {
93 | go looper(intProducer)
94 | go looper(stringProducer)
95 | go looper(custom1Producer)
96 | go looper(custom2Producer)
97 |
98 | for i := 0; i < 40; i++ {
99 |
100 | // Selection occurs here
101 | if data := zenq.Select(zq1, zq2, zq3, zq4); data != nil {
102 | switch data.(type) {
103 | case int:
104 | fmt.Printf("Received int %d\n", data)
105 | case string:
106 | fmt.Printf("Received string %s\n", data)
107 | case custom1:
108 | fmt.Printf("Received custom data type number 1 %#v\n", data)
109 | case *custom2:
110 | fmt.Printf("Received pointer %#v\n", data)
111 | }
112 | }
113 | }
114 | }
115 |
116 | func intProducer(ctr int) { zq1.Write(ctr) }
117 |
118 | func stringProducer(ctr int) { zq2.Write(fmt.Sprint(ctr * 10)) }
119 |
120 | func custom1Producer(ctr int) { zq3.Write(custom1{alpha: ctr, beta: fmt.Sprint(ctr)}) }
121 |
122 | func custom2Producer(ctr int) { zq4.Write(&custom2{gamma: 1 << ctr}) }
123 |
124 | func looper(producer func(ctr int)) {
125 | for i := 0; i < 10; i++ {
126 | producer(i)
127 | }
128 | }
129 | ```
130 |
131 | ## Benchmarks
132 |
133 | Benchmarking code available [here](./benchmarks)
134 |
135 | Note that if you run the benchmarks with `--race` flag then ZenQ will perform slower because the `--race` flag slows
136 | down the atomic operations in golang. Under normal circumstances, ZenQ will outperform golang native channels.
137 |
138 | ### Hardware Specs
139 |
140 | ```
141 | ❯ neofetch
142 | 'c. alphadose@ReiEki.local
143 | ,xNMM. ----------------------
144 | .OMMMMo OS: macOS 12.3 21E230 arm64
145 | OMMM0, Host: MacBookAir10,1
146 | .;loddo:' loolloddol;. Kernel: 21.4.0
147 | cKMMMMMMMMMMNWMMMMMMMMMM0: Uptime: 6 hours, 41 mins
148 | .KMMMMMMMMMMMMMMMMMMMMMMMWd. Packages: 86 (brew)
149 | XMMMMMMMMMMMMMMMMMMMMMMMX. Shell: zsh 5.8
150 | ;MMMMMMMMMMMMMMMMMMMMMMMM: Resolution: 1440x900
151 | :MMMMMMMMMMMMMMMMMMMMMMMM: DE: Aqua
152 | .MMMMMMMMMMMMMMMMMMMMMMMMX. WM: Rectangle
153 | kMMMMMMMMMMMMMMMMMMMMMMMMWd. Terminal: iTerm2
154 | .XMMMMMMMMMMMMMMMMMMMMMMMMMMk Terminal Font: FiraCodeNerdFontComplete-Medium 16 (normal)
155 | .XMMMMMMMMMMMMMMMMMMMMMMMMK. CPU: Apple M1
156 | kMMMMMMMMMMMMMMMMMMMMMMd GPU: Apple M1
157 | ;KMMMMMMMWXXWMMMMMMMk. Memory: 1370MiB / 8192MiB
158 | .cooc,. .,coo:.
159 |
160 | ```
161 |
162 | ### Terminology
163 |
164 | * NUM_WRITERS -> The number of goroutines concurrently writing to ZenQ/Channel
165 | * INPUT_SIZE -> The number of input payloads to be passed through ZenQ/Channel from producers to consumer
166 |
167 | ```bash
168 | Computed from benchstat of 30 benchmarks each via go test -benchmem -bench=. benchmarks/simple/*.go
169 |
170 | name time/op
171 | _Chan_NumWriters1_InputSize600-8 23.2µs ± 1%
172 | _ZenQ_NumWriters1_InputSize600-8 17.9µs ± 1%
173 | _Chan_NumWriters3_InputSize60000-8 5.27ms ± 3%
174 | _ZenQ_NumWriters3_InputSize60000-8 2.36ms ± 2%
175 | _Chan_NumWriters8_InputSize6000000-8 671ms ± 2%
176 | _ZenQ_NumWriters8_InputSize6000000-8 234ms ± 6%
177 | _Chan_NumWriters100_InputSize6000000-8 1.59s ± 4%
178 | _ZenQ_NumWriters100_InputSize6000000-8 309ms ± 2%
179 | _Chan_NumWriters1000_InputSize7000000-8 1.97s ± 0%
180 | _ZenQ_NumWriters1000_InputSize7000000-8 389ms ± 4%
181 | _Chan_Million_Blocking_Writers-8 10.4s ± 2%
182 | _ZenQ_Million_Blocking_Writers-8 2.32s ±21%
183 |
184 | name alloc/op
185 | _Chan_NumWriters1_InputSize600-8 0.00B
186 | _ZenQ_NumWriters1_InputSize600-8 0.00B
187 | _Chan_NumWriters3_InputSize60000-8 109B ±68%
188 | _ZenQ_NumWriters3_InputSize60000-8 24.6B ±107%
189 | _Chan_NumWriters8_InputSize6000000-8 802B ±241%
190 | _ZenQ_NumWriters8_InputSize6000000-8 1.18kB ±100%
191 | _Chan_NumWriters100_InputSize6000000-8 44.2kB ±41%
192 | _ZenQ_NumWriters100_InputSize6000000-8 10.7kB ±38%
193 | _Chan_NumWriters1000_InputSize7000000-8 476kB ± 8%
194 | _ZenQ_NumWriters1000_InputSize7000000-8 90.6kB ±10%
195 | _Chan_Million_Blocking_Writers-8 553MB ± 0%
196 | _ZenQ_Million_Blocking_Writers-8 122MB ± 3%
197 |
198 | name allocs/op
199 | _Chan_NumWriters1_InputSize600-8 0.00
200 | _ZenQ_NumWriters1_InputSize600-8 0.00
201 | _Chan_NumWriters3_InputSize60000-8 0.00
202 | _ZenQ_NumWriters3_InputSize60000-8 0.00
203 | _Chan_NumWriters8_InputSize6000000-8 2.76 ±190%
204 | _ZenQ_NumWriters8_InputSize6000000-8 5.47 ±83%
205 | _Chan_NumWriters100_InputSize6000000-8 159 ±26%
206 | _ZenQ_NumWriters100_InputSize6000000-8 25.1 ±39%
207 | _Chan_NumWriters1000_InputSize7000000-8 1.76k ± 6%
208 | _ZenQ_NumWriters1000_InputSize7000000-8 47.3 ±31%
209 | _Chan_Million_Blocking_Writers-8 2.00M ± 0%
210 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0%
211 | ```
212 |
213 | The above results show that ZenQ is more efficient than channels in all 3 metrics i.e `time/op`, `mem_alloc/op` and `num_allocs/op` for the following tested cases:-
214 |
215 | 1. SPSC
216 | 2. MPSC with NUM_WRITER_GOROUTINES < NUM_CPU_CORES
217 | 3. MPSC with NUM_WRITER_GOROUTINES > NUM_CPU_CORES
218 |
219 |
220 | ## Cherry on the Cake
221 |
222 | In SPSC mode ZenQ is faster than channels by **92 seconds** in case of input size of 6 * 108 elements
223 |
224 | ```bash
225 | ❯ go run benchmarks/simple/main.go
226 |
227 | With Input Batch Size: 60 and Num Concurrent Writers: 1
228 |
229 | Native Channel Runner completed transfer in: 26.916µs
230 | ZenQ Runner completed transfer in: 20.292µs
231 | ====================================================================
232 |
233 | With Input Batch Size: 600 and Num Concurrent Writers: 1
234 |
235 | Native Channel Runner completed transfer in: 135.75µs
236 | ZenQ Runner completed transfer in: 105.792µs
237 | ====================================================================
238 |
239 | With Input Batch Size: 6000 and Num Concurrent Writers: 1
240 |
241 | Native Channel Runner completed transfer in: 2.100209ms
242 | ZenQ Runner completed transfer in: 510.792µs
243 | ====================================================================
244 |
245 | With Input Batch Size: 6000000 and Num Concurrent Writers: 1
246 |
247 | Native Channel Runner completed transfer in: 1.241481917s
248 | ZenQ Runner completed transfer in: 226.068209ms
249 | ====================================================================
250 |
251 | With Input Batch Size: 600000000 and Num Concurrent Writers: 1
252 |
253 | Native Channel Runner completed transfer in: 1m55.074638875s
254 | ZenQ Runner completed transfer in: 22.582667917s
255 | ====================================================================
256 | ```
257 |
--------------------------------------------------------------------------------
/asm_386.s:
--------------------------------------------------------------------------------
1 | #include "textflag.h"
2 | #include "go_asm.h"
3 |
4 | #define get_tls(r) MOVL TLS, r
5 | #define g(r) 0(r)(TLS*1)
6 |
7 | TEXT ·GetG(SB),NOSPLIT,$0-4
8 | get_tls(CX)
9 | MOVL g(CX), AX
10 | MOVL AX, gp+0(FP)
11 | RET
12 |
--------------------------------------------------------------------------------
/asm_amd64.s:
--------------------------------------------------------------------------------
1 | #include "textflag.h"
2 | #include "go_asm.h"
3 |
4 | #define get_tls(r) MOVQ TLS, r
5 | #define g(r) 0(r)(TLS*1)
6 |
7 | TEXT ·GetG(SB),NOSPLIT,$0-8
8 | get_tls(CX)
9 | MOVQ g(CX), AX
10 | MOVQ AX, gp+0(FP)
11 | RET
12 |
--------------------------------------------------------------------------------
/asm_arm.s:
--------------------------------------------------------------------------------
1 | #include "textflag.h"
2 | #include "go_asm.h"
3 |
4 | #define get_tls(r) MOVW g, r
5 |
6 | TEXT ·GetG(SB),NOSPLIT,$0-4
7 | get_tls(R1)
8 | MOVW R1, gp+0(FP)
9 | RET
10 |
--------------------------------------------------------------------------------
/asm_arm64.s:
--------------------------------------------------------------------------------
1 | #include "textflag.h"
2 | #include "go_asm.h"
3 |
4 | #define get_tls(r) MOVD g, r
5 |
6 | TEXT ·GetG(SB),NOSPLIT,$0-8
7 | get_tls(R1)
8 | MOVD R1, gp+0(FP)
9 | RET
10 |
--------------------------------------------------------------------------------
/asm_mips.s:
--------------------------------------------------------------------------------
1 | #include "textflag.h"
2 | #include "go_asm.h"
3 |
4 | #define get_tls(r) MOVD g, r
5 |
6 | TEXT ·GetG(SB),NOSPLIT,$0-4
7 | get_tls(R1)
8 | MOVD R1, gp+0(FP)
9 | RET
10 |
--------------------------------------------------------------------------------
/asm_mips64.s:
--------------------------------------------------------------------------------
1 | #include "textflag.h"
2 | #include "go_asm.h"
3 |
4 | #define get_tls(r) MOVD g, r
5 |
6 | TEXT ·GetG(SB),NOSPLIT,$0-8
7 | get_tls(R1)
8 | MOVD R1, gp+0(FP)
9 | RET
10 |
--------------------------------------------------------------------------------
/asm_ppc64.s:
--------------------------------------------------------------------------------
1 | #include "textflag.h"
2 | #include "go_asm.h"
3 |
4 | TEXT ·GetG(SB), NOSPLIT, $0-8
5 | MOVD g, R8
6 | MOVD R8, ret+0(FP)
7 | RET
8 |
--------------------------------------------------------------------------------
/asm_s390x.s:
--------------------------------------------------------------------------------
1 | #include "textflag.h"
2 | #include "go_asm.h"
3 |
4 | TEXT ·GetG(SB), NOSPLIT, $0-8
5 | MOVD g, R8
6 | MOVD R8, ret+0(FP)
7 | RET
8 |
--------------------------------------------------------------------------------
/bench_reports/darwin_arm64_m1/1.3.0.txt:
--------------------------------------------------------------------------------
1 | name time/op
2 | _Chan_NumWriters1_InputSize600-8 24.6µs ± 1%
3 | _ZenQ_NumWriters1_InputSize600-8 16.5µs ± 1%
4 | _Chan_NumWriters3_InputSize60000-8 6.21ms ± 2%
5 | _ZenQ_NumWriters3_InputSize60000-8 2.85ms ± 0%
6 | _Chan_NumWriters8_InputSize6000000-8 735ms ± 1%
7 | _ZenQ_NumWriters8_InputSize6000000-8 417ms ± 0%
8 | _Chan_NumWriters100_InputSize6000000-8 1.61s ± 1%
9 | _ZenQ_NumWriters100_InputSize6000000-8 741ms ± 3%
10 | _Chan_NumWriters1000_InputSize7000000-8 1.98s ± 0%
11 | _ZenQ_NumWriters1000_InputSize7000000-8 1.05s ± 1%
12 | _Chan_Million_Blocking_Writers-8 10.0s ±13%
13 | _ZenQ_Million_Blocking_Writers-8 7.01s ±44%
14 |
15 | name alloc/op
16 | _Chan_NumWriters1_InputSize600-8 0.00B
17 | _ZenQ_NumWriters1_InputSize600-8 0.00B
18 | _Chan_NumWriters3_InputSize60000-8 106B ±88%
19 | _ZenQ_NumWriters3_InputSize60000-8 28.9B ±111%
20 | _Chan_NumWriters8_InputSize6000000-8 946B ±267%
21 | _ZenQ_NumWriters8_InputSize6000000-8 885B ±163%
22 | _Chan_NumWriters100_InputSize6000000-8 46.7kB ±25%
23 | _ZenQ_NumWriters100_InputSize6000000-8 16.2kB ±66%
24 | _Chan_NumWriters1000_InputSize7000000-8 484kB ±10%
25 | _ZenQ_NumWriters1000_InputSize7000000-8 62.4kB ±82%
26 | _Chan_Million_Blocking_Writers-8 553MB ± 0%
27 | _ZenQ_Million_Blocking_Writers-8 95.9MB ± 0%
28 |
29 | name allocs/op
30 | _Chan_NumWriters1_InputSize600-8 0.00
31 | _ZenQ_NumWriters1_InputSize600-8 0.00
32 | _Chan_NumWriters3_InputSize60000-8 0.00
33 | _ZenQ_NumWriters3_InputSize60000-8 0.00
34 | _Chan_NumWriters8_InputSize6000000-8 3.07 ±193%
35 | _ZenQ_NumWriters8_InputSize6000000-8 2.07 ±142%
36 | _Chan_NumWriters100_InputSize6000000-8 166 ±15%
37 | _ZenQ_NumWriters100_InputSize6000000-8 53.5 ±50%
38 | _Chan_NumWriters1000_InputSize7000000-8 1.74k ± 7%
39 | _ZenQ_NumWriters1000_InputSize7000000-8 525 ±39%
40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0%
41 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0%
42 |
--------------------------------------------------------------------------------
/bench_reports/darwin_arm64_m1/1.4.0.txt:
--------------------------------------------------------------------------------
1 | name time/op
2 | _Chan_NumWriters1_InputSize600-8 23.3µs ± 1%
3 | _ZenQ_NumWriters1_InputSize600-8 17.9µs ± 1%
4 | _Chan_NumWriters3_InputSize60000-8 5.48ms ± 3%
5 | _ZenQ_NumWriters3_InputSize60000-8 2.67ms ± 6%
6 | _Chan_NumWriters8_InputSize6000000-8 679ms ± 1%
7 | _ZenQ_NumWriters8_InputSize6000000-8 313ms ± 5%
8 | _Chan_NumWriters100_InputSize6000000-8 1.58s ± 1%
9 | _ZenQ_NumWriters100_InputSize6000000-8 516ms ± 2%
10 | _Chan_NumWriters1000_InputSize7000000-8 1.97s ± 1%
11 | _ZenQ_NumWriters1000_InputSize7000000-8 445ms ± 9%
12 | _Chan_Million_Blocking_Writers-8 10.8s ± 1%
13 | _ZenQ_Million_Blocking_Writers-8 11.0s ± 4%
14 |
15 | name alloc/op
16 | _Chan_NumWriters1_InputSize600-8 0.00B
17 | _ZenQ_NumWriters1_InputSize600-8 0.00B
18 | _Chan_NumWriters3_InputSize60000-8 95.0B ±65%
19 | _ZenQ_NumWriters3_InputSize60000-8 34.8B ±127%
20 | _Chan_NumWriters8_InputSize6000000-8 878B ±272%
21 | _ZenQ_NumWriters8_InputSize6000000-8 671B ±222%
22 | _Chan_NumWriters100_InputSize6000000-8 46.0kB ±31%
23 | _ZenQ_NumWriters100_InputSize6000000-8 13.3kB ±100%
24 | _Chan_NumWriters1000_InputSize7000000-8 488kB ± 8%
25 | _ZenQ_NumWriters1000_InputSize7000000-8 2.37kB ±210%
26 | _Chan_Million_Blocking_Writers-8 553MB ± 0%
27 | _ZenQ_Million_Blocking_Writers-8 95.5MB ± 0%
28 |
29 | name allocs/op
30 | _Chan_NumWriters1_InputSize600-8 0.00
31 | _ZenQ_NumWriters1_InputSize600-8 0.00
32 | _Chan_NumWriters3_InputSize60000-8 0.00
33 | _ZenQ_NumWriters3_InputSize60000-8 0.00
34 | _Chan_NumWriters8_InputSize6000000-8 2.77 ±225%
35 | _ZenQ_NumWriters8_InputSize6000000-8 1.40 ±186%
36 | _Chan_NumWriters100_InputSize6000000-8 164 ±20%
37 | _ZenQ_NumWriters100_InputSize6000000-8 31.8 ±100%
38 | _Chan_NumWriters1000_InputSize7000000-8 1.79k ± 3%
39 | _ZenQ_NumWriters1000_InputSize7000000-8 5.50 ±227%
40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0%
41 | _ZenQ_Million_Blocking_Writers-8 995k ± 0%
42 |
--------------------------------------------------------------------------------
/bench_reports/darwin_arm64_m1/1.5.0.txt:
--------------------------------------------------------------------------------
1 | name time/op
2 | _Chan_NumWriters1_InputSize600-8 23.2µs ± 1%
3 | _ZenQ_NumWriters1_InputSize600-8 18.1µs ± 1%
4 | _Chan_NumWriters3_InputSize60000-8 5.52ms ± 3%
5 | _ZenQ_NumWriters3_InputSize60000-8 2.67ms ± 6%
6 | _Chan_NumWriters8_InputSize6000000-8 680ms ± 1%
7 | _ZenQ_NumWriters8_InputSize6000000-8 308ms ± 4%
8 | _Chan_NumWriters100_InputSize6000000-8 1.56s ± 6%
9 | _ZenQ_NumWriters100_InputSize6000000-8 519ms ± 2%
10 | _Chan_NumWriters1000_InputSize7000000-8 1.98s ± 1%
11 | _ZenQ_NumWriters1000_InputSize7000000-8 441ms ±11%
12 | _Chan_Million_Blocking_Writers-8 10.4s ± 3%
13 | _ZenQ_Million_Blocking_Writers-8 8.56s ±24%
14 |
15 | name alloc/op
16 | _Chan_NumWriters1_InputSize600-8 0.00B
17 | _ZenQ_NumWriters1_InputSize600-8 0.00B
18 | _Chan_NumWriters3_InputSize60000-8 110B ±68%
19 | _ZenQ_NumWriters3_InputSize60000-8 23.6B ±107%
20 | _Chan_NumWriters8_InputSize6000000-8 585B ±234%
21 | _ZenQ_NumWriters8_InputSize6000000-8 411B ±299%
22 | _Chan_NumWriters100_InputSize6000000-8 44.7kB ±35%
23 | _ZenQ_NumWriters100_InputSize6000000-8 19.7kB ±78%
24 | _Chan_NumWriters1000_InputSize7000000-8 483kB ±10%
25 | _ZenQ_NumWriters1000_InputSize7000000-8 1.13kB ±602%
26 | _Chan_Million_Blocking_Writers-8 553MB ± 0%
27 | _ZenQ_Million_Blocking_Writers-8 95.5MB ± 0%
28 |
29 | name allocs/op
30 | _Chan_NumWriters1_InputSize600-8 0.00
31 | _ZenQ_NumWriters1_InputSize600-8 0.00
32 | _Chan_NumWriters3_InputSize60000-8 0.00
33 | _ZenQ_NumWriters3_InputSize60000-8 0.00
34 | _Chan_NumWriters8_InputSize6000000-8 2.20 ±218%
35 | _ZenQ_NumWriters8_InputSize6000000-8 0.90 ±344%
36 | _Chan_NumWriters100_InputSize6000000-8 163 ±18%
37 | _ZenQ_NumWriters100_InputSize6000000-8 47.0 ±79%
38 | _Chan_NumWriters1000_InputSize7000000-8 1.79k ± 6%
39 | _ZenQ_NumWriters1000_InputSize7000000-8 2.00 ±550%
40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0%
41 | _ZenQ_Million_Blocking_Writers-8 995k ± 0%
42 |
--------------------------------------------------------------------------------
/bench_reports/darwin_arm64_m1/2.0.0.txt:
--------------------------------------------------------------------------------
1 | name time/op
2 | _Chan_NumWriters1_InputSize600-8 23.4µs ± 1%
3 | _ZenQ_NumWriters1_InputSize600-8 17.9µs ± 2%
4 | _Chan_NumWriters3_InputSize60000-8 5.50ms ± 2%
5 | _ZenQ_NumWriters3_InputSize60000-8 2.85ms ± 2%
6 | _Chan_NumWriters8_InputSize6000000-8 685ms ± 2%
7 | _ZenQ_NumWriters8_InputSize6000000-8 180ms ± 4%
8 | _Chan_NumWriters100_InputSize6000000-8 1.59s ± 4%
9 | _ZenQ_NumWriters100_InputSize6000000-8 206ms ± 6%
10 | _Chan_NumWriters1000_InputSize7000000-8 1.98s ± 1%
11 | _ZenQ_NumWriters1000_InputSize7000000-8 300ms ± 7%
12 | _Chan_Million_Blocking_Writers-8 10.8s ± 2%
13 | _ZenQ_Million_Blocking_Writers-8 11.8s ± 5%
14 |
15 | name alloc/op
16 | _Chan_NumWriters1_InputSize600-8 0.00B
17 | _ZenQ_NumWriters1_InputSize600-8 0.00B
18 | _Chan_NumWriters3_InputSize60000-8 114B ±53%
19 | _ZenQ_NumWriters3_InputSize60000-8 29.7B ±105%
20 | _Chan_NumWriters8_InputSize6000000-8 547B ±362%
21 | _ZenQ_NumWriters8_InputSize6000000-8 941B ±114%
22 | _Chan_NumWriters100_InputSize6000000-8 45.8kB ±33%
23 | _ZenQ_NumWriters100_InputSize6000000-8 6.86kB ±73%
24 | _Chan_NumWriters1000_InputSize7000000-8 481kB ± 5%
25 | _ZenQ_NumWriters1000_InputSize7000000-8 42.6kB ±26%
26 | _Chan_Million_Blocking_Writers-8 553MB ± 0%
27 | _ZenQ_Million_Blocking_Writers-8 48.0MB ±12%
28 |
29 | name allocs/op
30 | _Chan_NumWriters1_InputSize600-8 0.00
31 | _ZenQ_NumWriters1_InputSize600-8 0.00
32 | _Chan_NumWriters3_InputSize60000-8 0.00
33 | _ZenQ_NumWriters3_InputSize60000-8 0.00
34 | _Chan_NumWriters8_InputSize6000000-8 1.63 ±268%
35 | _ZenQ_NumWriters8_InputSize6000000-8 3.87 ±107%
36 | _Chan_NumWriters100_InputSize6000000-8 162 ±29%
37 | _ZenQ_NumWriters100_InputSize6000000-8 17.3 ±74%
38 | _Chan_NumWriters1000_InputSize7000000-8 1.77k ± 3%
39 | _ZenQ_NumWriters1000_InputSize7000000-8 28.1 ±46%
40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0%
41 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0%
42 |
--------------------------------------------------------------------------------
/bench_reports/darwin_arm64_m1/2.1.0.txt:
--------------------------------------------------------------------------------
1 | name time/op
2 | _Chan_NumWriters1_InputSize600-8 23.2µs ± 1%
3 | _ZenQ_NumWriters1_InputSize600-8 17.6µs ± 0%
4 | _Chan_NumWriters3_InputSize60000-8 5.50ms ± 4%
5 | _ZenQ_NumWriters3_InputSize60000-8 2.63ms ± 1%
6 | _Chan_NumWriters8_InputSize6000000-8 684ms ± 2%
7 | _ZenQ_NumWriters8_InputSize6000000-8 150ms ± 4%
8 | _Chan_NumWriters100_InputSize6000000-8 1.58s ± 4%
9 | _ZenQ_NumWriters100_InputSize6000000-8 162ms ± 9%
10 | _Chan_NumWriters1000_InputSize7000000-8 1.97s ± 1%
11 | _ZenQ_NumWriters1000_InputSize7000000-8 313ms ±19%
12 | _Chan_Million_Blocking_Writers-8 10.5s ± 3%
13 | _ZenQ_Million_Blocking_Writers-8 10.4s ± 6%
14 |
15 | name alloc/op
16 | _Chan_NumWriters1_InputSize600-8 0.00B
17 | _ZenQ_NumWriters1_InputSize600-8 0.00B
18 | _Chan_NumWriters3_InputSize60000-8 104B ±54%
19 | _ZenQ_NumWriters3_InputSize60000-8 22.2B ±91%
20 | _Chan_NumWriters8_InputSize6000000-8 813B ±307%
21 | _ZenQ_NumWriters8_InputSize6000000-8 690B ±115%
22 | _Chan_NumWriters100_InputSize6000000-8 42.6kB ±36%
23 | _ZenQ_NumWriters100_InputSize6000000-8 5.92kB ±118%
24 | _Chan_NumWriters1000_InputSize7000000-8 475kB ±11%
25 | _ZenQ_NumWriters1000_InputSize7000000-8 41.6kB ±34%
26 | _Chan_Million_Blocking_Writers-8 553MB ± 0%
27 | _ZenQ_Million_Blocking_Writers-8 47.4MB ± 9%
28 |
29 | name allocs/op
30 | _Chan_NumWriters1_InputSize600-8 0.00
31 | _ZenQ_NumWriters1_InputSize600-8 0.00
32 | _Chan_NumWriters3_InputSize60000-8 0.00
33 | _ZenQ_NumWriters3_InputSize60000-8 0.00
34 | _Chan_NumWriters8_InputSize6000000-8 2.77 ±225%
35 | _ZenQ_NumWriters8_InputSize6000000-8 2.63 ±52%
36 | _Chan_NumWriters100_InputSize6000000-8 157 ±17%
37 | _ZenQ_NumWriters100_InputSize6000000-8 14.3 ±116%
38 | _Chan_NumWriters1000_InputSize7000000-8 1.77k ± 5%
39 | _ZenQ_NumWriters1000_InputSize7000000-8 30.3 ±42%
40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0%
41 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0%
42 |
--------------------------------------------------------------------------------
/bench_reports/darwin_arm64_m1/2.2.0.txt:
--------------------------------------------------------------------------------
1 | name time/op
2 | _Chan_NumWriters1_InputSize600-8 23.3µs ± 1%
3 | _ZenQ_NumWriters1_InputSize600-8 17.7µs ± 1%
4 | _Chan_NumWriters3_InputSize60000-8 5.50ms ± 1%
5 | _ZenQ_NumWriters3_InputSize60000-8 2.62ms ± 4%
6 | _Chan_NumWriters8_InputSize6000000-8 686ms ± 1%
7 | _ZenQ_NumWriters8_InputSize6000000-8 153ms ± 3%
8 | _Chan_NumWriters100_InputSize6000000-8 1.59s ± 1%
9 | _ZenQ_NumWriters100_InputSize6000000-8 166ms ± 7%
10 | _Chan_NumWriters1000_InputSize7000000-8 1.98s ± 1%
11 | _ZenQ_NumWriters1000_InputSize7000000-8 318ms ±12%
12 | _Chan_Million_Blocking_Writers-8 10.8s ± 2%
13 | _ZenQ_Million_Blocking_Writers-8 10.3s ± 5%
14 |
15 | name alloc/op
16 | _Chan_NumWriters1_InputSize600-8 0.00B
17 | _ZenQ_NumWriters1_InputSize600-8 0.00B
18 | _Chan_NumWriters3_InputSize60000-8 97.2B ±60%
19 | _ZenQ_NumWriters3_InputSize60000-8 28.5B ±121%
20 | _Chan_NumWriters8_InputSize6000000-8 922B ±297%
21 | _ZenQ_NumWriters8_InputSize6000000-8 860B ±87%
22 | _Chan_NumWriters100_InputSize6000000-8 43.8kB ±39%
23 | _ZenQ_NumWriters100_InputSize6000000-8 6.18kB ±69%
24 | _Chan_NumWriters1000_InputSize7000000-8 472kB ±11%
25 | _ZenQ_NumWriters1000_InputSize7000000-8 38.9kB ±47%
26 | _Chan_Million_Blocking_Writers-8 553MB ± 0%
27 | _ZenQ_Million_Blocking_Writers-8 46.3MB ±10%
28 |
29 | name allocs/op
30 | _Chan_NumWriters1_InputSize600-8 0.00
31 | _ZenQ_NumWriters1_InputSize600-8 0.00
32 | _Chan_NumWriters3_InputSize60000-8 0.00
33 | _ZenQ_NumWriters3_InputSize60000-8 0.00
34 | _Chan_NumWriters8_InputSize6000000-8 2.83 ±182%
35 | _ZenQ_NumWriters8_InputSize6000000-8 3.25 ±54%
36 | _Chan_NumWriters100_InputSize6000000-8 161 ±24%
37 | _ZenQ_NumWriters100_InputSize6000000-8 15.0 ±67%
38 | _Chan_NumWriters1000_InputSize7000000-8 1.76k ± 5%
39 | _ZenQ_NumWriters1000_InputSize7000000-8 26.2 ±37%
40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0%
41 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0%
42 |
--------------------------------------------------------------------------------
/bench_reports/darwin_arm64_m1/2.2.1.txt:
--------------------------------------------------------------------------------
1 | name time/op
2 | _Chan_NumWriters1_InputSize600-8 23.3µs ± 1%
3 | _ZenQ_NumWriters1_InputSize600-8 38.7µs ± 1%
4 | _Chan_NumWriters3_InputSize60000-8 5.48ms ± 1%
5 | _ZenQ_NumWriters3_InputSize60000-8 2.63ms ± 1%
6 | _Chan_NumWriters8_InputSize6000000-8 685ms ± 1%
7 | _ZenQ_NumWriters8_InputSize6000000-8 254ms ± 3%
8 | _Chan_NumWriters100_InputSize6000000-8 1.60s ± 1%
9 | _ZenQ_NumWriters100_InputSize6000000-8 298ms ± 1%
10 | _Chan_NumWriters1000_InputSize7000000-8 1.98s ± 1%
11 | _ZenQ_NumWriters1000_InputSize7000000-8 409ms ± 1%
12 | _Chan_Million_Blocking_Writers-8 10.5s ± 1%
13 | _ZenQ_Million_Blocking_Writers-8 1.99s ±16%
14 |
15 | name alloc/op
16 | _Chan_NumWriters1_InputSize600-8 0.00B
17 | _ZenQ_NumWriters1_InputSize600-8 0.00B
18 | _Chan_NumWriters3_InputSize60000-8 17.5B ±163%
19 | _ZenQ_NumWriters3_InputSize60000-8 13.4B ±348%
20 | _Chan_NumWriters8_InputSize6000000-8 123B ±148%
21 | _ZenQ_NumWriters8_InputSize6000000-8 545B ±56%
22 | _Chan_NumWriters100_InputSize6000000-8 36.1kB ±49%
23 | _ZenQ_NumWriters100_InputSize6000000-8 9.32kB ±32%
24 | _Chan_NumWriters1000_InputSize7000000-8 479kB ± 8%
25 | _ZenQ_NumWriters1000_InputSize7000000-8 89.3kB ± 5%
26 | _Chan_Million_Blocking_Writers-8 553MB ± 0%
27 | _ZenQ_Million_Blocking_Writers-8 122MB ± 3%
28 |
29 | name allocs/op
30 | _Chan_NumWriters1_InputSize600-8 0.00
31 | _ZenQ_NumWriters1_InputSize600-8 0.00
32 | _Chan_NumWriters3_InputSize60000-8 0.00
33 | _ZenQ_NumWriters3_InputSize60000-8 0.00
34 | _Chan_NumWriters8_InputSize6000000-8 1.10 ±173%
35 | _ZenQ_NumWriters8_InputSize6000000-8 3.19 ±57%
36 | _Chan_NumWriters100_InputSize6000000-8 140 ±32%
37 | _ZenQ_NumWriters100_InputSize6000000-8 21.8 ±33%
38 | _Chan_NumWriters1000_InputSize7000000-8 1.77k ± 5%
39 | _ZenQ_NumWriters1000_InputSize7000000-8 46.5 ±27%
40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0%
41 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0%
42 |
--------------------------------------------------------------------------------
/bench_reports/darwin_arm64_m1/2.3.0.txt:
--------------------------------------------------------------------------------
1 | name time/op
2 | _Chan_NumWriters1_InputSize600-8 23.4µs ± 1%
3 | _ZenQ_NumWriters1_InputSize600-8 38.3µs ± 1%
4 | _Chan_NumWriters3_InputSize60000-8 5.54ms ± 6%
5 | _ZenQ_NumWriters3_InputSize60000-8 2.62ms ± 2%
6 | _Chan_NumWriters8_InputSize6000000-8 680ms ± 3%
7 | _ZenQ_NumWriters8_InputSize6000000-8 254ms ± 4%
8 | _Chan_NumWriters100_InputSize6000000-8 1.58s ± 5%
9 | _ZenQ_NumWriters100_InputSize6000000-8 292ms ± 3%
10 | _Chan_NumWriters1000_InputSize7000000-8 1.97s ± 1%
11 | _ZenQ_NumWriters1000_InputSize7000000-8 408ms ± 3%
12 | _Chan_Million_Blocking_Writers-8 10.6s ± 2%
13 | _ZenQ_Million_Blocking_Writers-8 1.98s ±26%
14 |
15 | name alloc/op
16 | _Chan_NumWriters1_InputSize600-8 0.00B
17 | _ZenQ_NumWriters1_InputSize600-8 0.00B
18 | _Chan_NumWriters3_InputSize60000-8 17.4B ±267%
19 | _ZenQ_NumWriters3_InputSize60000-8 15.6B ±291%
20 | _Chan_NumWriters8_InputSize6000000-8 132B ±118%
21 | _ZenQ_NumWriters8_InputSize6000000-8 248B ±227%
22 | _Chan_NumWriters100_InputSize6000000-8 35.7kB ±45%
23 | _ZenQ_NumWriters100_InputSize6000000-8 2.74kB ±181%
24 | _Chan_NumWriters1000_InputSize7000000-8 476kB ± 7%
25 | _ZenQ_NumWriters1000_InputSize7000000-8 949B ±265%
26 | _Chan_Million_Blocking_Writers-8 553MB ± 0%
27 | _ZenQ_Million_Blocking_Writers-8 122MB ± 5%
28 |
29 | name allocs/op
30 | _Chan_NumWriters1_InputSize600-8 0.00
31 | _ZenQ_NumWriters1_InputSize600-8 0.00
32 | _Chan_NumWriters3_InputSize60000-8 0.00
33 | _ZenQ_NumWriters3_InputSize60000-8 0.00
34 | _Chan_NumWriters8_InputSize6000000-8 1.30 ±131%
35 | _ZenQ_NumWriters8_InputSize6000000-8 1.57 ±219%
36 | _Chan_NumWriters100_InputSize6000000-8 139 ±33%
37 | _ZenQ_NumWriters100_InputSize6000000-8 6.14 ±193%
38 | _Chan_NumWriters1000_InputSize7000000-8 1.76k ± 5%
39 | _ZenQ_NumWriters1000_InputSize7000000-8 2.70 ±344%
40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0%
41 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0%
42 |
--------------------------------------------------------------------------------
/bench_reports/darwin_arm64_m1/2.4.0.txt:
--------------------------------------------------------------------------------
1 | name time/op
2 | _Chan_NumWriters1_InputSize600-8 23.4µs ± 2%
3 | _ZenQ_NumWriters1_InputSize600-8 17.7µs ± 1%
4 | _Chan_NumWriters3_InputSize60000-8 5.53ms ± 5%
5 | _ZenQ_NumWriters3_InputSize60000-8 2.61ms ± 3%
6 | _Chan_NumWriters8_InputSize6000000-8 684ms ± 1%
7 | _ZenQ_NumWriters8_InputSize6000000-8 247ms ± 4%
8 | _Chan_NumWriters100_InputSize6000000-8 1.60s ± 1%
9 | _ZenQ_NumWriters100_InputSize6000000-8 296ms ± 1%
10 | _Chan_NumWriters1000_InputSize7000000-8 1.97s ± 0%
11 | _ZenQ_NumWriters1000_InputSize7000000-8 410ms ± 2%
12 | _Chan_Million_Blocking_Writers-8 10.8s ± 1%
13 | _ZenQ_Million_Blocking_Writers-8 2.54s ±11%
14 |
15 | name alloc/op
16 | _Chan_NumWriters1_InputSize600-8 0.00B
17 | _ZenQ_NumWriters1_InputSize600-8 0.00B
18 | _Chan_NumWriters3_InputSize60000-8 119B ±58%
19 | _ZenQ_NumWriters3_InputSize60000-8 29.3B ±100%
20 | _Chan_NumWriters8_InputSize6000000-8 647B ±306%
21 | _ZenQ_NumWriters8_InputSize6000000-8 467B ±309%
22 | _Chan_NumWriters100_InputSize6000000-8 42.1kB ±31%
23 | _ZenQ_NumWriters100_InputSize6000000-8 2.19kB ±213%
24 | _Chan_NumWriters1000_InputSize7000000-8 482kB ± 7%
25 | _ZenQ_NumWriters1000_InputSize7000000-8 1.42kB ±252%
26 | _Chan_Million_Blocking_Writers-8 553MB ± 0%
27 | _ZenQ_Million_Blocking_Writers-8 122MB ± 3%
28 |
29 | name allocs/op
30 | _Chan_NumWriters1_InputSize600-8 0.00
31 | _ZenQ_NumWriters1_InputSize600-8 0.00
32 | _Chan_NumWriters3_InputSize60000-8 0.00
33 | _ZenQ_NumWriters3_InputSize60000-8 0.00
34 | _Chan_NumWriters8_InputSize6000000-8 2.13 ±181%
35 | _ZenQ_NumWriters8_InputSize6000000-8 0.87 ±362%
36 | _Chan_NumWriters100_InputSize6000000-8 157 ±29%
37 | _ZenQ_NumWriters100_InputSize6000000-8 5.47 ±284%
38 | _Chan_NumWriters1000_InputSize7000000-8 1.78k ± 5%
39 | _ZenQ_NumWriters1000_InputSize7000000-8 3.27 ±267%
40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0%
41 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0%
42 |
--------------------------------------------------------------------------------
/bench_reports/darwin_arm64_m1/2.4.0_alternate.txt:
--------------------------------------------------------------------------------
1 | name time/op
2 | _Chan_NumWriters1_InputSize600-8 23.4µs ± 1%
3 | _ZenQ_NumWriters1_InputSize600-8 17.8µs ± 1%
4 | _Chan_NumWriters3_InputSize60000-8 5.48ms ± 2%
5 | _ZenQ_NumWriters3_InputSize60000-8 2.65ms ± 5%
6 | _Chan_NumWriters8_InputSize6000000-8 684ms ± 1%
7 | _ZenQ_NumWriters8_InputSize6000000-8 251ms ± 5%
8 | _Chan_NumWriters100_InputSize6000000-8 1.57s ± 6%
9 | _ZenQ_NumWriters100_InputSize6000000-8 300ms ± 4%
10 | _Chan_NumWriters1000_InputSize7000000-8 1.97s ± 1%
11 | _ZenQ_NumWriters1000_InputSize7000000-8 411ms ± 3%
12 | _Chan_Million_Blocking_Writers-8 10.6s ± 1%
13 | _ZenQ_Million_Blocking_Writers-8 1.99s ±19%
14 |
15 | name alloc/op
16 | _Chan_NumWriters1_InputSize600-8 0.00B
17 | _ZenQ_NumWriters1_InputSize600-8 0.00B
18 | _Chan_NumWriters3_InputSize60000-8 108B ±64%
19 | _ZenQ_NumWriters3_InputSize60000-8 27.5B ±111%
20 | _Chan_NumWriters8_InputSize6000000-8 818B ±248%
21 | _ZenQ_NumWriters8_InputSize6000000-8 545B ±175%
22 | _Chan_NumWriters100_InputSize6000000-8 44.9kB ±34%
23 | _ZenQ_NumWriters100_InputSize6000000-8 2.15kB ±210%
24 | _Chan_NumWriters1000_InputSize7000000-8 481kB ± 8%
25 | _ZenQ_NumWriters1000_InputSize7000000-8 771B ±354%
26 | _Chan_Million_Blocking_Writers-8 553MB ± 0%
27 | _ZenQ_Million_Blocking_Writers-8 122MB ± 3%
28 |
29 | name allocs/op
30 | _Chan_NumWriters1_InputSize600-8 0.00
31 | _ZenQ_NumWriters1_InputSize600-8 0.00
32 | _Chan_NumWriters3_InputSize60000-8 0.00
33 | _ZenQ_NumWriters3_InputSize60000-8 0.00
34 | _Chan_NumWriters8_InputSize6000000-8 2.37 ±238%
35 | _ZenQ_NumWriters8_InputSize6000000-8 1.66 ±202%
36 | _Chan_NumWriters100_InputSize6000000-8 162 ±19%
37 | _ZenQ_NumWriters100_InputSize6000000-8 4.87 ±229%
38 | _Chan_NumWriters1000_InputSize7000000-8 1.77k ± 5%
39 | _ZenQ_NumWriters1000_InputSize7000000-8 1.73 ±362%
40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0%
41 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0%
42 |
--------------------------------------------------------------------------------
/bench_reports/darwin_arm64_m1/2.4.0_on_battery.txt:
--------------------------------------------------------------------------------
1 | name time/op
2 | _Chan_NumWriters1_InputSize600-8 23.3µs ± 0%
3 | _ZenQ_NumWriters1_InputSize600-8 17.7µs ± 1%
4 | _Chan_NumWriters3_InputSize60000-8 5.42ms ± 7%
5 | _ZenQ_NumWriters3_InputSize60000-8 2.60ms ± 1%
6 | _Chan_NumWriters8_InputSize6000000-8 687ms ± 1%
7 | _ZenQ_NumWriters8_InputSize6000000-8 243ms ± 5%
8 | _Chan_NumWriters100_InputSize6000000-8 1.60s ± 2%
9 | _ZenQ_NumWriters100_InputSize6000000-8 295ms ± 3%
10 | _Chan_NumWriters1000_InputSize7000000-8 1.97s ± 0%
11 | _ZenQ_NumWriters1000_InputSize7000000-8 404ms ± 3%
12 | _Chan_Million_Blocking_Writers-8 8.02s ±33%
13 | _ZenQ_Million_Blocking_Writers-8 1.71s ±17%
14 |
15 | name alloc/op
16 | _Chan_NumWriters1_InputSize600-8 0.00B
17 | _ZenQ_NumWriters1_InputSize600-8 0.00B
18 | _Chan_NumWriters3_InputSize60000-8 101B ±65%
19 | _ZenQ_NumWriters3_InputSize60000-8 28.1B ±113%
20 | _Chan_NumWriters8_InputSize6000000-8 891B ±191%
21 | _ZenQ_NumWriters8_InputSize6000000-8 664B ±163%
22 | _Chan_NumWriters100_InputSize6000000-8 43.1kB ±40%
23 | _ZenQ_NumWriters100_InputSize6000000-8 2.75kB ±154%
24 | _Chan_NumWriters1000_InputSize7000000-8 483kB ± 4%
25 | _ZenQ_NumWriters1000_InputSize7000000-8 554B ±626%
26 | _Chan_Million_Blocking_Writers-8 553MB ± 0%
27 | _ZenQ_Million_Blocking_Writers-8 124MB ± 3%
28 |
29 | name allocs/op
30 | _Chan_NumWriters1_InputSize600-8 0.00
31 | _ZenQ_NumWriters1_InputSize600-8 0.00
32 | _Chan_NumWriters3_InputSize60000-8 0.00
33 | _ZenQ_NumWriters3_InputSize60000-8 0.00
34 | _Chan_NumWriters8_InputSize6000000-8 2.83 ±147%
35 | _ZenQ_NumWriters8_InputSize6000000-8 1.48 ±237%
36 | _Chan_NumWriters100_InputSize6000000-8 156 ±27%
37 | _ZenQ_NumWriters100_InputSize6000000-8 6.23 ±157%
38 | _Chan_NumWriters1000_InputSize7000000-8 1.78k ± 4%
39 | _ZenQ_NumWriters1000_InputSize7000000-8 1.54 ±550%
40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0%
41 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0%
42 |
--------------------------------------------------------------------------------
/bench_reports/darwin_arm64_m1/2.5.0.txt:
--------------------------------------------------------------------------------
1 | name time/op
2 | _Chan_NumWriters1_InputSize600-8 23.3µs ± 2%
3 | _ZenQ_NumWriters1_InputSize600-8 17.7µs ± 1%
4 | _Chan_NumWriters3_InputSize60000-8 5.52ms ± 3%
5 | _ZenQ_NumWriters3_InputSize60000-8 2.64ms ± 2%
6 | _Chan_NumWriters8_InputSize6000000-8 686ms ± 1%
7 | _ZenQ_NumWriters8_InputSize6000000-8 244ms ± 5%
8 | _Chan_NumWriters100_InputSize6000000-8 1.59s ± 2%
9 | _ZenQ_NumWriters100_InputSize6000000-8 296ms ± 2%
10 | _Chan_NumWriters1000_InputSize7000000-8 1.98s ± 1%
11 | _ZenQ_NumWriters1000_InputSize7000000-8 405ms ± 4%
12 | _Chan_Million_Blocking_Writers-8 10.6s ± 2%
13 | _ZenQ_Million_Blocking_Writers-8 1.92s ±20%
14 |
15 | name alloc/op
16 | _Chan_NumWriters1_InputSize600-8 0.00B
17 | _ZenQ_NumWriters1_InputSize600-8 0.00B
18 | _Chan_NumWriters3_InputSize60000-8 101B ±75%
19 | _ZenQ_NumWriters3_InputSize60000-8 19.0B ±132%
20 | _Chan_NumWriters8_InputSize6000000-8 672B ±317%
21 | _ZenQ_NumWriters8_InputSize6000000-8 1.05kB ±94%
22 | _Chan_NumWriters100_InputSize6000000-8 43.3kB ±30%
23 | _ZenQ_NumWriters100_InputSize6000000-8 11.7kB ±41%
24 | _Chan_NumWriters1000_InputSize7000000-8 475kB ± 8%
25 | _ZenQ_NumWriters1000_InputSize7000000-8 88.8kB ± 2%
26 | _Chan_Million_Blocking_Writers-8 553MB ± 0%
27 | _ZenQ_Million_Blocking_Writers-8 123MB ± 4%
28 |
29 | name allocs/op
30 | _Chan_NumWriters1_InputSize600-8 0.00
31 | _ZenQ_NumWriters1_InputSize600-8 0.00
32 | _Chan_NumWriters3_InputSize60000-8 0.00
33 | _ZenQ_NumWriters3_InputSize60000-8 0.00
34 | _Chan_NumWriters8_InputSize6000000-8 2.37 ±196%
35 | _ZenQ_NumWriters8_InputSize6000000-8 4.77 ±89%
36 | _Chan_NumWriters100_InputSize6000000-8 162 ±19%
37 | _ZenQ_NumWriters100_InputSize6000000-8 27.6 ±41%
38 | _Chan_NumWriters1000_InputSize7000000-8 1.76k ± 4%
39 | _ZenQ_NumWriters1000_InputSize7000000-8 45.0 ±18%
40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0%
41 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0%
--------------------------------------------------------------------------------
/bench_reports/darwin_arm64_m1/2.5.1.txt:
--------------------------------------------------------------------------------
1 | name time/op
2 | _Chan_NumWriters1_InputSize600-8 23.4µs ± 1%
3 | _ZenQ_NumWriters1_InputSize600-8 17.7µs ± 0%
4 | _Chan_NumWriters3_InputSize60000-8 5.54ms ± 5%
5 | _ZenQ_NumWriters3_InputSize60000-8 2.63ms ± 2%
6 | _Chan_NumWriters8_InputSize6000000-8 687ms ± 2%
7 | _ZenQ_NumWriters8_InputSize6000000-8 243ms ± 4%
8 | _Chan_NumWriters100_InputSize6000000-8 1.59s ± 4%
9 | _ZenQ_NumWriters100_InputSize6000000-8 296ms ± 2%
10 | _Chan_NumWriters1000_InputSize7000000-8 1.97s ± 0%
11 | _ZenQ_NumWriters1000_InputSize7000000-8 409ms ± 2%
12 | _Chan_Million_Blocking_Writers-8 10.4s ± 4%
13 | _ZenQ_Million_Blocking_Writers-8 1.83s ±10%
14 |
15 | name alloc/op
16 | _Chan_NumWriters1_InputSize600-8 0.00B
17 | _ZenQ_NumWriters1_InputSize600-8 0.00B
18 | _Chan_NumWriters3_InputSize60000-8 117B ±63%
19 | _ZenQ_NumWriters3_InputSize60000-8 22.1B ±122%
20 | _Chan_NumWriters8_InputSize6000000-8 1.01kB ±196%
21 | _ZenQ_NumWriters8_InputSize6000000-8 1.12kB ±89%
22 | _Chan_NumWriters100_InputSize6000000-8 42.6kB ±37%
23 | _ZenQ_NumWriters100_InputSize6000000-8 11.3kB ±28%
24 | _Chan_NumWriters1000_InputSize7000000-8 481kB ± 7%
25 | _ZenQ_NumWriters1000_InputSize7000000-8 90.5kB ± 6%
26 | _Chan_Million_Blocking_Writers-8 553MB ± 0%
27 | _ZenQ_Million_Blocking_Writers-8 123MB ± 4%
28 |
29 | name allocs/op
30 | _Chan_NumWriters1_InputSize600-8 0.00
31 | _ZenQ_NumWriters1_InputSize600-8 0.00
32 | _Chan_NumWriters3_InputSize60000-8 0.00
33 | _ZenQ_NumWriters3_InputSize60000-8 0.00
34 | _Chan_NumWriters8_InputSize6000000-8 3.43 ±162%
35 | _ZenQ_NumWriters8_InputSize6000000-8 5.23 ±53%
36 | _Chan_NumWriters100_InputSize6000000-8 158 ±20%
37 | _ZenQ_NumWriters100_InputSize6000000-8 26.3 ±29%
38 | _Chan_NumWriters1000_InputSize7000000-8 1.76k ± 2%
39 | _ZenQ_NumWriters1000_InputSize7000000-8 48.3 ±28%
40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0%
41 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0%
42 |
--------------------------------------------------------------------------------
/bench_reports/darwin_arm64_m1/2.5.2.txt:
--------------------------------------------------------------------------------
1 | name time/op
2 | _Chan_NumWriters1_InputSize600-8 23.4µs ± 1%
3 | _ZenQ_NumWriters1_InputSize600-8 17.7µs ± 1%
4 | _Chan_NumWriters3_InputSize60000-8 5.49ms ± 3%
5 | _ZenQ_NumWriters3_InputSize60000-8 2.62ms ± 2%
6 | _Chan_NumWriters8_InputSize6000000-8 685ms ± 1%
7 | _ZenQ_NumWriters8_InputSize6000000-8 244ms ± 4%
8 | _Chan_NumWriters100_InputSize6000000-8 1.60s ± 1%
9 | _ZenQ_NumWriters100_InputSize6000000-8 296ms ± 2%
10 | _Chan_NumWriters1000_InputSize7000000-8 1.97s ± 0%
11 | _ZenQ_NumWriters1000_InputSize7000000-8 408ms ± 1%
12 | _Chan_Million_Blocking_Writers-8 10.5s ± 2%
13 | _ZenQ_Million_Blocking_Writers-8 1.94s ±12%
14 |
15 | name alloc/op
16 | _Chan_NumWriters1_InputSize600-8 0.00B
17 | _ZenQ_NumWriters1_InputSize600-8 0.00B
18 | _Chan_NumWriters3_InputSize60000-8 106B ±71%
19 | _ZenQ_NumWriters3_InputSize60000-8 26.5B ±119%
20 | _Chan_NumWriters8_InputSize6000000-8 634B ±339%
21 | _ZenQ_NumWriters8_InputSize6000000-8 1.10kB ±81%
22 | _Chan_NumWriters100_InputSize6000000-8 43.3kB ±42%
23 | _ZenQ_NumWriters100_InputSize6000000-8 10.4kB ±43%
24 | _Chan_NumWriters1000_InputSize7000000-8 481kB ± 6%
25 | _ZenQ_NumWriters1000_InputSize7000000-8 89.3kB ± 4%
26 | _Chan_Million_Blocking_Writers-8 553MB ± 0%
27 | _ZenQ_Million_Blocking_Writers-8 123MB ± 3%
28 |
29 | name allocs/op
30 | _Chan_NumWriters1_InputSize600-8 0.00
31 | _ZenQ_NumWriters1_InputSize600-8 0.00
32 | _Chan_NumWriters3_InputSize60000-8 0.00
33 | _ZenQ_NumWriters3_InputSize60000-8 0.00
34 | _Chan_NumWriters8_InputSize6000000-8 2.03 ±195%
35 | _ZenQ_NumWriters8_InputSize6000000-8 5.17 ±74%
36 | _Chan_NumWriters100_InputSize6000000-8 157 ±27%
37 | _ZenQ_NumWriters100_InputSize6000000-8 24.1 ±45%
38 | _Chan_NumWriters1000_InputSize7000000-8 1.77k ± 4%
39 | _ZenQ_NumWriters1000_InputSize7000000-8 45.2 ±17%
40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0%
41 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0%
42 |
--------------------------------------------------------------------------------
/bench_reports/darwin_arm64_m1/2.7.0.txt:
--------------------------------------------------------------------------------
1 | name time/op
2 | _Chan_NumWriters1_InputSize600-8 23.4µs ± 1%
3 | _ZenQ_NumWriters1_InputSize600-8 18.0µs ± 1%
4 | _Chan_NumWriters3_InputSize60000-8 5.35ms ± 3%
5 | _ZenQ_NumWriters3_InputSize60000-8 2.39ms ± 5%
6 | _Chan_NumWriters8_InputSize6000000-8 674ms ± 2%
7 | _ZenQ_NumWriters8_InputSize6000000-8 236ms ± 2%
8 | _Chan_NumWriters100_InputSize6000000-8 1.58s ± 6%
9 | _ZenQ_NumWriters100_InputSize6000000-8 312ms ± 2%
10 | _Chan_NumWriters1000_InputSize7000000-8 1.97s ± 1%
11 | _ZenQ_NumWriters1000_InputSize7000000-8 397ms ± 4%
12 | _Chan_Million_Blocking_Writers-8 11.0s ± 2%
13 | _ZenQ_Million_Blocking_Writers-8 2.59s ±10%
14 |
15 | name alloc/op
16 | _Chan_NumWriters1_InputSize600-8 0.00B
17 | _ZenQ_NumWriters1_InputSize600-8 0.00B
18 | _Chan_NumWriters3_InputSize60000-8 114B ±82%
19 | _ZenQ_NumWriters3_InputSize60000-8 23.6B ±112%
20 | _Chan_NumWriters8_InputSize6000000-8 733B ±260%
21 | _ZenQ_NumWriters8_InputSize6000000-8 1.02kB ±121%
22 | _Chan_NumWriters100_InputSize6000000-8 43.7kB ±40%
23 | _ZenQ_NumWriters100_InputSize6000000-8 11.2kB ±54%
24 | _Chan_NumWriters1000_InputSize7000000-8 474kB ± 7%
25 | _ZenQ_NumWriters1000_InputSize7000000-8 90.0kB ± 6%
26 | _Chan_Million_Blocking_Writers-8 553MB ± 0%
27 | _ZenQ_Million_Blocking_Writers-8 121MB ± 4%
28 |
29 | name allocs/op
30 | _Chan_NumWriters1_InputSize600-8 0.00
31 | _ZenQ_NumWriters1_InputSize600-8 0.00
32 | _Chan_NumWriters3_InputSize60000-8 0.00
33 | _ZenQ_NumWriters3_InputSize60000-8 0.00
34 | _Chan_NumWriters8_InputSize6000000-8 2.18 ±175%
35 | _ZenQ_NumWriters8_InputSize6000000-8 5.13 ±56%
36 | _Chan_NumWriters100_InputSize6000000-8 157 ±30%
37 | _ZenQ_NumWriters100_InputSize6000000-8 26.3 ±56%
38 | _Chan_NumWriters1000_InputSize7000000-8 1.76k ± 4%
39 | _ZenQ_NumWriters1000_InputSize7000000-8 47.1 ±29%
40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0%
41 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0%
42 |
--------------------------------------------------------------------------------
/bench_reports/darwin_arm64_m1/2.7.1.txt:
--------------------------------------------------------------------------------
1 | 'c. alphadose@ReiEki.local
2 | ,xNMM. ----------------------
3 | .OMMMMo OS: macOS 12.3 21E230 arm64
4 | OMMM0, Host: MacBookAir10,1
5 | .;loddo:' loolloddol;. Kernel: 21.4.0
6 | cKMMMMMMMMMMNWMMMMMMMMMM0: Uptime: 6 hours, 27 mins
7 | .KMMMMMMMMMMMMMMMMMMMMMMMWd. Packages: 98 (brew)
8 | XMMMMMMMMMMMMMMMMMMMMMMMX. Shell: zsh 5.8
9 | ;MMMMMMMMMMMMMMMMMMMMMMMM: Resolution: 1440x900
10 | :MMMMMMMMMMMMMMMMMMMMMMMM: DE: Aqua
11 | .MMMMMMMMMMMMMMMMMMMMMMMMX. WM: Rectangle
12 | kMMMMMMMMMMMMMMMMMMMMMMMMWd. Terminal: iTerm2
13 | .XMMMMMMMMMMMMMMMMMMMMMMMMMMk Terminal Font: FiraCodeNerdFontComplete-Medium 16 (normal) / FiraCodeNerdFontComplete-Medium 14 (non-ascii)
14 | .XMMMMMMMMMMMMMMMMMMMMMMMMK. CPU: Apple M1
15 | kMMMMMMMMMMMMMMMMMMMMMMd GPU: Apple M1
16 | ;KMMMMMMMWXXWMMMMMMMk. Memory: 1345MiB / 8192MiB
17 | .cooc,. .,coo:.
18 |
19 |
20 |
21 | name time/op
22 | _Chan_NumWriters1_InputSize600-8 23.2µs ± 1%
23 | _ZenQ_NumWriters1_InputSize600-8 17.9µs ± 1%
24 | _Chan_NumWriters3_InputSize60000-8 5.27ms ± 3%
25 | _ZenQ_NumWriters3_InputSize60000-8 2.36ms ± 2%
26 | _Chan_NumWriters8_InputSize6000000-8 671ms ± 2%
27 | _ZenQ_NumWriters8_InputSize6000000-8 234ms ± 6%
28 | _Chan_NumWriters100_InputSize6000000-8 1.59s ± 4%
29 | _ZenQ_NumWriters100_InputSize6000000-8 309ms ± 2%
30 | _Chan_NumWriters1000_InputSize7000000-8 1.97s ± 0%
31 | _ZenQ_NumWriters1000_InputSize7000000-8 389ms ± 4%
32 | _Chan_Million_Blocking_Writers-8 10.4s ± 2%
33 | _ZenQ_Million_Blocking_Writers-8 2.32s ±21%
34 |
35 | name alloc/op
36 | _Chan_NumWriters1_InputSize600-8 0.00B
37 | _ZenQ_NumWriters1_InputSize600-8 0.00B
38 | _Chan_NumWriters3_InputSize60000-8 109B ±68%
39 | _ZenQ_NumWriters3_InputSize60000-8 24.6B ±107%
40 | _Chan_NumWriters8_InputSize6000000-8 802B ±241%
41 | _ZenQ_NumWriters8_InputSize6000000-8 1.18kB ±100%
42 | _Chan_NumWriters100_InputSize6000000-8 44.2kB ±41%
43 | _ZenQ_NumWriters100_InputSize6000000-8 10.7kB ±38%
44 | _Chan_NumWriters1000_InputSize7000000-8 476kB ± 8%
45 | _ZenQ_NumWriters1000_InputSize7000000-8 90.6kB ±10%
46 | _Chan_Million_Blocking_Writers-8 553MB ± 0%
47 | _ZenQ_Million_Blocking_Writers-8 122MB ± 3%
48 |
49 | name allocs/op
50 | _Chan_NumWriters1_InputSize600-8 0.00
51 | _ZenQ_NumWriters1_InputSize600-8 0.00
52 | _Chan_NumWriters3_InputSize60000-8 0.00
53 | _ZenQ_NumWriters3_InputSize60000-8 0.00
54 | _Chan_NumWriters8_InputSize6000000-8 2.76 ±190%
55 | _ZenQ_NumWriters8_InputSize6000000-8 5.47 ±83%
56 | _Chan_NumWriters100_InputSize6000000-8 159 ±26%
57 | _ZenQ_NumWriters100_InputSize6000000-8 25.1 ±39%
58 | _Chan_NumWriters1000_InputSize7000000-8 1.76k ± 6%
59 | _ZenQ_NumWriters1000_InputSize7000000-8 47.3 ±31%
60 | _Chan_Million_Blocking_Writers-8 2.00M ± 0%
61 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0%
62 |
--------------------------------------------------------------------------------
/bench_reports/raspian_arm32.txt:
--------------------------------------------------------------------------------
1 | `.::///+:/-. --///+//-:`` alphadose@neverwinter
2 | `+oooooooooooo: `+oooooooooooo: -------------------
3 | /oooo++//ooooo: ooooo+//+ooooo. OS: Raspbian GNU/Linux 11 (bullseye) armv7l
4 | `+ooooooo:-:oo- +o+::/ooooooo: Host: Raspberry Pi 4 Model B Rev 1.5
5 | `:oooooooo+`` `.oooooooo+- Kernel: 5.15.32-v7l+
6 | `:++ooo/. :+ooo+/.` Uptime: 1 hour, 58 mins
7 | ...` `.----.` ``.. Packages: 569 (dpkg)
8 | .::::-``:::::::::.`-:::-` Shell: bash 5.1.4
9 | -:::-` .:::::::-` `-:::- Terminal: /dev/pts/0
10 | `::. `.--.` `` `.---.``.::` CPU: BCM2711 (4) @ 1.800GHz
11 | .::::::::` -::::::::` ` Memory: 68MiB / 3838MiB
12 | .::` .:::::::::- `::::::::::``::.
13 | -:::` ::::::::::. ::::::::::.`:::-
14 | :::: -::::::::. `-:::::::: ::::
15 | -::- .-:::-.``....``.-::-. -::-
16 | .. `` .::::::::. `..`..
17 | -:::-` -::::::::::` .:::::`
18 | :::::::` -::::::::::` :::::::.
19 | .::::::: -::::::::. ::::::::
20 | `-:::::` ..--.` ::::::.
21 | `...` `...--..` `...`
22 | .::::::::::
23 | `.-::::-`
24 |
25 |
26 | goos: linux
27 | goarch: arm
28 | name time/op
29 | _Chan_NumWriters1_InputSize600-4 230µs ± 4%
30 | _ZenQ_NumWriters1_InputSize600-4 186µs ± 5%
31 | _Chan_NumWriters3_InputSize60000-4 28.2ms ± 3%
32 | _ZenQ_NumWriters3_InputSize60000-4 12.8ms ± 0%
33 | _Chan_NumWriters8_InputSize6000000-4 4.14s ±10%
34 | _ZenQ_NumWriters8_InputSize6000000-4 1.32s ± 1%
35 | _Chan_NumWriters100_InputSize6000000-4 5.97s ± 5%
36 | _ZenQ_NumWriters100_InputSize6000000-4 1.48s ± 5%
37 | _Chan_NumWriters1000_InputSize7000000-4 7.23s ± 6%
38 | _ZenQ_NumWriters1000_InputSize7000000-4 2.09s ± 4%
39 | _Chan_Million_Blocking_Writers-4 20.3s ± 2%
40 | _ZenQ_Million_Blocking_Writers-4 6.96s ± 4%
41 |
42 | name alloc/op
43 | _Chan_NumWriters1_InputSize600-4 0.00B
44 | _ZenQ_NumWriters1_InputSize600-4 0.00B
45 | _Chan_NumWriters3_InputSize60000-4 227B ±27%
46 | _ZenQ_NumWriters3_InputSize60000-4 77.9B ±91%
47 | _Chan_NumWriters8_InputSize6000000-4 499B ±189%
48 | _ZenQ_NumWriters8_InputSize6000000-4 1.49kB ± 4%
49 | _Chan_NumWriters100_InputSize6000000-4 27.5kB ±19%
50 | _ZenQ_NumWriters100_InputSize6000000-4 27.7kB ±42%
51 | _Chan_NumWriters1000_InputSize7000000-4 290kB ± 5%
52 | _ZenQ_NumWriters1000_InputSize7000000-4 135kB ± 8%
53 | _Chan_Million_Blocking_Writers-4 325MB ± 0%
54 | _ZenQ_Million_Blocking_Writers-4 76.2MB ± 3%
55 |
56 | name allocs/op
57 | _Chan_NumWriters1_InputSize600-4 0.00
58 | _ZenQ_NumWriters1_InputSize600-4 0.00
59 | _Chan_NumWriters3_InputSize60000-4 1.00 ± 0%
60 | _ZenQ_NumWriters3_InputSize60000-4 0.00
61 | _Chan_NumWriters8_InputSize6000000-4 4.30 ±109%
62 | _ZenQ_NumWriters8_InputSize6000000-4 19.2 ± 9%
63 | _Chan_NumWriters100_InputSize6000000-4 171 ±13%
64 | _ZenQ_NumWriters100_InputSize6000000-4 194 ±25%
65 | _Chan_NumWriters1000_InputSize7000000-4 1.84k ± 3%
66 | _ZenQ_NumWriters1000_InputSize7000000-4 1.09k ± 4%
67 | _Chan_Million_Blocking_Writers-4 2.00M ± 0%
68 | _ZenQ_Million_Blocking_Writers-4 1.00M ± 0%
69 |
--------------------------------------------------------------------------------
/bench_reports/ubuntu_amd64_16core.txt:
--------------------------------------------------------------------------------
1 | goos: linux
2 | goarch: amd64
3 | cpu: AMD Ryzen 7 5800H with Radeon Graphics
4 |
5 | name time/op
6 | _Chan_NumWriters1_InputSize600-16 23.4µs ± 4%
7 | _ZenQ_NumWriters1_InputSize600-16 33.1µs ± 4%
8 | _Chan_NumWriters3_InputSize60000-16 2.59ms ± 3%
9 | _ZenQ_NumWriters3_InputSize60000-16 1.79ms ± 1%
10 | _Chan_NumWriters8_InputSize6000000-16 334ms ± 6%
11 | _ZenQ_NumWriters8_InputSize6000000-16 162ms ± 4%
12 | _Chan_NumWriters100_InputSize6000000-16 515ms ± 6%
13 | _ZenQ_NumWriters100_InputSize6000000-16 170ms ± 3%
14 | _Chan_NumWriters1000_InputSize7000000-16 1.76s ± 3%
15 | _ZenQ_NumWriters1000_InputSize7000000-16 273ms ± 2%
16 | _Chan_Million_Blocking_Writers-16 4.52s ± 5%
17 | _ZenQ_Million_Blocking_Writers-16 1.27s ±14%
18 |
19 | name alloc/op
20 | _Chan_NumWriters1_InputSize600-16 0.00B
21 | _ZenQ_NumWriters1_InputSize600-16 0.00B
22 | _Chan_NumWriters3_InputSize60000-16 91.7B ±51%
23 | _ZenQ_NumWriters3_InputSize60000-16 4.00B ± 0%
24 | _Chan_NumWriters8_InputSize6000000-16 487B ±275%
25 | _ZenQ_NumWriters8_InputSize6000000-16 879B ±111%
26 | _Chan_NumWriters100_InputSize6000000-16 30.0kB ±47%
27 | _ZenQ_NumWriters100_InputSize6000000-16 23.2kB ±54%
28 | _Chan_NumWriters1000_InputSize7000000-16 463kB ±11%
29 | _ZenQ_NumWriters1000_InputSize7000000-16 129kB ±10%
30 | _Chan_Million_Blocking_Writers-16 553MB ± 0%
31 | _ZenQ_Million_Blocking_Writers-16 124MB ± 3%
32 |
33 | name allocs/op
34 | _Chan_NumWriters1_InputSize600-16 0.00
35 | _ZenQ_NumWriters1_InputSize600-16 0.00
36 | _Chan_NumWriters3_InputSize60000-16 0.00
37 | _ZenQ_NumWriters3_InputSize60000-16 0.00
38 | _Chan_NumWriters8_InputSize6000000-16 1.57 ±219%
39 | _ZenQ_NumWriters8_InputSize6000000-16 3.48 ±44%
40 | _Chan_NumWriters100_InputSize6000000-16 87.8 ±40%
41 | _ZenQ_NumWriters100_InputSize6000000-16 54.3 ±54%
42 | _Chan_NumWriters1000_InputSize7000000-16 1.67k ± 9%
43 | _ZenQ_NumWriters1000_InputSize7000000-16 63.5 ±10%
44 | _Chan_Million_Blocking_Writers-16 2.00M ± 0%
45 | _ZenQ_Million_Blocking_Writers-16 1.00M ± 0%
46 |
--------------------------------------------------------------------------------
/bench_reports/ubuntu_intel_xeon.txt:
--------------------------------------------------------------------------------
1 | .-/+oossssoo+/-. manas@dell-Precision-Tower-5810
2 | `:+ssssssssssssssssss+:` -------------------------------
3 | -+ssssssssssssssssssyyssss+- OS: Ubuntu 20.04.3 LTS x86_64
4 | .ossssssssssssssssssdMMMNysssso. Host: Precision Tower 5810
5 | /ssssssssssshdmmNNmmyNMMMMhssssss/ Kernel: 5.11.0-27-generic
6 | +ssssssssshmydMMMMMMMNddddyssssssss+ Uptime: 2 hours, 49 mins
7 | /sssssssshNMMMyhhyyyyhmNMMMNhssssssss/ Packages: 3061 (dpkg), 9 (snap)
8 | .ssssssssdMMMNhsssssssssshNMMMdssssssss. Shell: zsh 5.8
9 | +sssshhhyNMMNyssssssssssssyNMMMysssssss+ Resolution: 1920x1080
10 | ossyNMMMNyMMhsssssssssssssshmmmhssssssso DE: Plasma
11 | ossyNMMMNyMMhsssssssssssssshmmmhssssssso WM: KWin
12 | +sssshhhyNMMNyssssssssssssyNMMMysssssss+ Theme: Breeze [Plasma], Breeze [GTK2/3]
13 | .ssssssssdMMMNhsssssssssshNMMMdssssssss. Icons: breeze [Plasma], breeze [GTK2/3]
14 | /sssssssshNMMMyhhyyyyhdNMMMNhssssssss/ Terminal: terminator
15 | +sssssssssdmydMMMMMMMMddddyssssssss+ CPU: Intel Xeon E5-1620 v4 (8) @ 3.800GHz
16 | /ssssssssssshdmNNNNmyNMMMMhssssss/ GPU: NVIDIA GeForce GTX 1080
17 | .ossssssssssssssssssdMMMNysssso. Memory: 2875MiB / 64244MiB
18 | -+sssssssssssssssssyyyssss+-
19 | `:+ssssssssssssssssss+:`
20 | .-/+oossssoo+/-.
21 |
22 | name time/op
23 | _Chan_NumWriters1_InputSize600-8 90.8µs ± 7%
24 | _ZenQ_NumWriters1_InputSize600-8 52.8µs ±15%
25 | _Chan_NumWriters3_InputSize60000-8 12.2ms ± 5%
26 | _ZenQ_NumWriters3_InputSize60000-8 5.14ms ± 3%
27 | _Chan_NumWriters8_InputSize6000000-8 1.48s ± 9%
28 | _ZenQ_NumWriters8_InputSize6000000-8 429ms ± 3%
29 | _Chan_NumWriters100_InputSize6000000-8 1.78s ± 7%
30 | _ZenQ_NumWriters100_InputSize6000000-8 453ms ± 4%
31 | _Chan_NumWriters1000_InputSize7000000-8 3.95s ± 5%
32 | _ZenQ_NumWriters1000_InputSize7000000-8 545ms ± 4%
33 | _Chan_Million_Blocking_Writers-8 7.49s ± 1%
34 | _ZenQ_Million_Blocking_Writers-8 2.04s ± 5%
35 |
36 | name alloc/op
37 | _Chan_NumWriters1_InputSize600-8 0.00B
38 | _ZenQ_NumWriters1_InputSize600-8 0.00B
39 | _Chan_NumWriters3_InputSize60000-8 202B ±81%
40 | _ZenQ_NumWriters3_InputSize60000-8 94.4B ±64%
41 | _Chan_NumWriters8_InputSize6000000-8 333B ±104%
42 | _ZenQ_NumWriters8_InputSize6000000-8 1.62kB ±124%
43 | _Chan_NumWriters100_InputSize6000000-8 41.6kB ±28%
44 | _ZenQ_NumWriters100_InputSize6000000-8 15.4kB ±46%
45 | _Chan_NumWriters1000_InputSize7000000-8 485kB ± 8%
46 | _ZenQ_NumWriters1000_InputSize7000000-8 136kB ± 8%
47 | _Chan_Million_Blocking_Writers-8 553MB ± 0%
48 | _ZenQ_Million_Blocking_Writers-8 123MB ± 3%
49 |
50 | name allocs/op
51 | _Chan_NumWriters1_InputSize600-8 0.00
52 | _ZenQ_NumWriters1_InputSize600-8 0.00
53 | _Chan_NumWriters3_InputSize60000-8 0.00
54 | _ZenQ_NumWriters3_InputSize60000-8 0.00
55 | _Chan_NumWriters8_InputSize6000000-8 3.59 ±123%
56 | _ZenQ_NumWriters8_InputSize6000000-8 8.24 ±46%
57 | _Chan_NumWriters100_InputSize6000000-8 156 ±19%
58 | _ZenQ_NumWriters100_InputSize6000000-8 36.2 ±46%
59 | _Chan_NumWriters1000_InputSize7000000-8 1.80k ± 4%
60 | _ZenQ_NumWriters1000_InputSize7000000-8 76.4 ±31%
61 | _Chan_Million_Blocking_Writers-8 2.00M ± 0%
62 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0%
63 |
--------------------------------------------------------------------------------
/bench_reports/windows_amd64_16core.txt:
--------------------------------------------------------------------------------
1 | goos: windows
2 | goarch: amd64
3 | cpu: AMD Ryzen 7 5800H with Radeon Graphics
4 |
5 | name time/op
6 | _Chan_NumWriters1_InputSize600-16 24.5µs ± 5%
7 | _ZenQ_NumWriters1_InputSize600-16 17.7µs ± 2%
8 | _Chan_NumWriters3_InputSize60000-16 4.75ms ± 3%
9 | _ZenQ_NumWriters3_InputSize60000-16 1.86ms ± 1%
10 | _Chan_NumWriters8_InputSize6000000-16 800ms ± 5%
11 | _ZenQ_NumWriters8_InputSize6000000-16 150ms ± 1%
12 | _Chan_NumWriters100_InputSize6000000-16 1.66s ± 1%
13 | _ZenQ_NumWriters100_InputSize6000000-16 160ms ± 1%
14 | _Chan_NumWriters1000_InputSize7000000-16 1.95s ± 1%
15 | _ZenQ_NumWriters1000_InputSize7000000-16 269ms ± 1%
16 | _Chan_Million_Blocking_Writers-16 5.79s ± 2%
17 | _ZenQ_Million_Blocking_Writers-16 1.37s ± 6%
18 |
19 | name alloc/op
20 | _Chan_NumWriters1_InputSize600-16 0.00B
21 | _ZenQ_NumWriters1_InputSize600-16 0.00B
22 | _Chan_NumWriters3_InputSize60000-16 150B ±57%
23 | _ZenQ_NumWriters3_InputSize60000-16 20.6B ±201%
24 | _Chan_NumWriters8_InputSize6000000-16 472B ±283%
25 | _ZenQ_NumWriters8_InputSize6000000-16 1.05kB ±58%
26 | _Chan_NumWriters100_InputSize6000000-16 43.7kB ±38%
27 | _ZenQ_NumWriters100_InputSize6000000-16 29.7kB ±17%
28 | _Chan_NumWriters1000_InputSize7000000-16 484kB ± 7%
29 | _ZenQ_NumWriters1000_InputSize7000000-16 120kB ±14%
30 | _Chan_Million_Blocking_Writers-16 553MB ± 0%
31 | _ZenQ_Million_Blocking_Writers-16 128MB ± 4%
32 |
33 | name allocs/op
34 | _Chan_NumWriters1_InputSize600-16 0.00
35 | _ZenQ_NumWriters1_InputSize600-16 0.00
36 | _Chan_NumWriters3_InputSize60000-16 0.00
37 | _ZenQ_NumWriters3_InputSize60000-16 0.00
38 | _Chan_NumWriters8_InputSize6000000-16 2.00 ±150%
39 | _ZenQ_NumWriters8_InputSize6000000-16 3.90 ±28%
40 | _Chan_NumWriters100_InputSize6000000-16 148 ±34%
41 | _ZenQ_NumWriters100_InputSize6000000-16 68.3 ±24%
42 | _Chan_NumWriters1000_InputSize7000000-16 1.79k ± 5%
43 | _ZenQ_NumWriters1000_InputSize7000000-16 62.3 ±36%
44 | _Chan_Million_Blocking_Writers-16 2.00M ± 0%
45 | _ZenQ_Million_Blocking_Writers-16 1.00M ± 0%
46 |
--------------------------------------------------------------------------------
/benchmarks/cgo_test/cgobench.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "runtime"
6 | "sync"
7 | "unsafe"
8 | _ "unsafe"
9 | )
10 |
11 | /*
12 | #include
13 | */
14 | import "C"
15 |
16 | //go:linkname noescape runtime.noescape
17 | func noescape(p unsafe.Pointer) unsafe.Pointer
18 |
19 | //go:linkname memmove runtime.memmove
20 | func memmove(to, from unsafe.Pointer, n uintptr)
21 |
22 | //go:linkname memclrNoHeapPointers runtime.memclrNoHeapPointers
23 | func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr)
24 |
25 | //go:linkname mallocgc runtime.mallocgc
26 | func mallocgc(size uintptr, typ unsafe.Pointer, needzero bool) unsafe.Pointer
27 |
28 | func alloc[T any](sample T, size uintptr) unsafe.Pointer {
29 | length := unsafe.Sizeof(sample) * size
30 | return mallocgc(length, nil, true)
31 | }
32 |
33 | func getIndexAt[T any](ptr unsafe.Pointer, offset uintptr) unsafe.Pointer {
34 | return unsafe.Pointer(uintptr(ptr) + offset)
35 | }
36 |
37 | type block struct {
38 | Data int
39 | Kooky string
40 | Endy float64
41 | // Last *uint
42 | }
43 |
44 | func main() {
45 | // a := make([]int32, 0, 3)
46 | // a = append(a, 10, 20, 30)
47 | // t := unsafe.Pointer(&a[0])
48 | // fmt.Println(*(*int32)(unsafe.Pointer(uintptr(t) + 2*unsafe.Sizeof(int32(0)))))
49 | // return
50 |
51 | const n = uintptr(100)
52 | t := make([]block, n, n)
53 | k := unsafe.Pointer(&t[0])
54 | // k := alloc(block{Data: 1, Kooky: "2", Endy: 3.2, Last: new(uint)}, n)
55 | // k := C.calloc(C.ulong(unsafe.Sizeof(block{})), C.ulong(n))
56 | // unsafe.Slice(k, n)
57 | // memclrNoHeapPointers(k, n)
58 | // t := (*[]block)(k)
59 | // runtime.KeepAlive((*[n]block)(k))
60 | // for i := uintptr(0); i < n; i++ {
61 | // slot := getIndexAt[block](k, i*unsafe.Sizeof(block{}))
62 | // slot.Data = int(i)
63 | // slot.Kooky = fmt.Sprintf("wutface%d", i)
64 | // slot.Endy = float64(i)
65 | // slot.Last = new(uint)
66 | // *slot.Last = uint(i)
67 | // }
68 | // for i := uintptr(0); i < n; i++ {
69 | // fmt.Printf("%#v\n", t[i])
70 | // }
71 | // return
72 | var wg sync.WaitGroup
73 | wg.Add(int(n))
74 | for i := uintptr(0); i < n; i++ {
75 | slot := unsafe.Pointer(uintptr(k) + i*unsafe.Sizeof(block{}))
76 | (*block)(slot).Data = int(i)
77 | (*block)(slot).Kooky = fmt.Sprintf("wutface%d", i)
78 | (*block)(slot).Endy = float64(i)
79 | // (*block)(slot).Last = new(uint)
80 | // *(*block)(slot).Last = uint(i)
81 | }
82 | for i := uintptr(0); i < n; i++ {
83 | j := i
84 | go func() {
85 | slot := unsafe.Pointer(uintptr(k) + j*unsafe.Sizeof(block{}))
86 | // *(*block)(slot).Last++
87 | fmt.Println(uintptr(slot), " ", *(*block)(slot))
88 | runtime.GC()
89 | wg.Done()
90 | }()
91 | }
92 | wg.Wait()
93 | }
94 |
--------------------------------------------------------------------------------
/benchmarks/e2e/benchmark_test.go:
--------------------------------------------------------------------------------
1 | package zenq_test
2 |
3 | import (
4 | "fmt"
5 | "runtime"
6 | "sync"
7 | "testing"
8 |
9 | "github.com/alphadose/zenq/v2"
10 | )
11 |
12 | const bufferSize = 1 << 12
13 |
14 | type Payload struct {
15 | first byte
16 | second int64
17 | third float64
18 | fourth string
19 | fifth complex64
20 | sixth []rune
21 | seventh bool
22 | }
23 |
24 | type test struct {
25 | writers int
26 | readers int
27 | inputSize int
28 | }
29 |
30 | var testCases = []test{
31 | {writers: 1, readers: 1, inputSize: 1e3},
32 | {writers: 3, readers: 3, inputSize: 3e3},
33 | {writers: 8, readers: 8, inputSize: 8e3},
34 | {writers: bufferSize * 2, readers: 1, inputSize: bufferSize * 2 * 4},
35 | {writers: 1, readers: bufferSize * 2, inputSize: bufferSize * 2 * 4},
36 | {writers: 100, readers: 100, inputSize: 6e6},
37 | {writers: 1e3, readers: 1e3, inputSize: 7e6},
38 | }
39 |
40 | func init() {
41 | for _, t := range testCases {
42 | if t.inputSize%t.writers != 0 {
43 | panic(fmt.Sprintf("input size %d should be dividable by writers %d", t.inputSize, t.writers))
44 | }
45 | if t.inputSize%t.readers != 0 {
46 | panic(fmt.Sprintf("input size %d should be dividable by readers %d", t.inputSize, t.readers))
47 | }
48 | }
49 | }
50 |
51 | func BenchmarkChan_ProduceConsume(b *testing.B) {
52 | for _, t := range testCases {
53 | t := t
54 | b.Run(fmt.Sprintf("W%d/R%d/Size%d", t.writers, t.readers, t.inputSize), func(b *testing.B) {
55 | for i := 0; i < b.N; i++ {
56 | benchmarkProduceConsumeChan(b, t)
57 | }
58 | })
59 | }
60 | }
61 |
62 | func benchmarkProduceConsumeChan(b *testing.B, t test) {
63 | q := make(chan Payload, bufferSize)
64 | defer runtime.KeepAlive(q)
65 |
66 | writesPerProducer := t.inputSize / t.writers
67 | readsPerConsumer := t.inputSize / t.readers
68 |
69 | var wg sync.WaitGroup
70 | wg.Add(t.writers)
71 |
72 | // b.ResetTimer()
73 |
74 | for writer := 0; writer < t.writers; writer++ {
75 | go func() {
76 | defer wg.Done()
77 | for i := 0; i < writesPerProducer; i++ {
78 | q <- Payload{}
79 | }
80 | }()
81 | }
82 |
83 | wg.Add(t.readers)
84 | for reader := 0; reader < t.readers; reader++ {
85 | go func() {
86 | defer wg.Done()
87 | for i := 0; i < readsPerConsumer; i++ {
88 | <-q
89 | }
90 | }()
91 | }
92 |
93 | wg.Wait()
94 | }
95 |
96 | func BenchmarkZenQ_ProduceConsume(b *testing.B) {
97 | for _, t := range testCases {
98 | t := t
99 | b.Run(fmt.Sprintf("W%d/R%d/Size%d", t.writers, t.readers, t.inputSize), func(b *testing.B) {
100 | for i := 0; i < b.N; i++ {
101 | benchmarkProduceConsumeZenQ(b, t)
102 | }
103 | })
104 | }
105 | }
106 |
107 | func benchmarkProduceConsumeZenQ(b *testing.B, t test) {
108 | q := zenq.New[Payload](bufferSize)
109 | defer runtime.KeepAlive(q)
110 |
111 | writesPerProducer := t.inputSize / t.writers
112 | readsPerConsumer := t.inputSize / t.readers
113 |
114 | var wg sync.WaitGroup
115 | wg.Add(t.writers)
116 |
117 | // b.ResetTimer()
118 |
119 | for writer := 0; writer < t.writers; writer++ {
120 | go func() {
121 | defer wg.Done()
122 | for i := 0; i < writesPerProducer; i++ {
123 | q.Write(Payload{})
124 | }
125 | }()
126 | }
127 |
128 | wg.Add(t.readers)
129 | for reader := 0; reader < t.readers; reader++ {
130 | go func() {
131 | defer wg.Done()
132 | for i := 0; i < readsPerConsumer; i++ {
133 | q.Read()
134 | }
135 | }()
136 | }
137 |
138 | wg.Wait()
139 | }
140 |
141 | func BenchmarkChan_New(b *testing.B) {
142 | b.Run("struct{}", func(b *testing.B) {
143 | b.ReportAllocs()
144 | for i := 0; i < b.N; i++ {
145 | ch := make(chan struct{}, bufferSize)
146 | runtime.KeepAlive(ch)
147 | }
148 | })
149 | b.Run("byte", func(b *testing.B) {
150 | b.ReportAllocs()
151 | for i := 0; i < b.N; i++ {
152 | ch := make(chan byte, bufferSize)
153 | runtime.KeepAlive(ch)
154 | }
155 | })
156 | b.Run("int64", func(b *testing.B) {
157 | b.ReportAllocs()
158 | for i := 0; i < b.N; i++ {
159 | ch := make(chan int64, bufferSize)
160 | runtime.KeepAlive(ch)
161 | }
162 | })
163 | }
164 |
165 | func BenchmarkZenQ_New(b *testing.B) {
166 | b.Run("struct{}", func(b *testing.B) {
167 | b.ReportAllocs()
168 | for i := 0; i < b.N; i++ {
169 | zq := zenq.New[struct{}](bufferSize)
170 | runtime.KeepAlive(zq)
171 | }
172 | })
173 | b.Run("byte", func(b *testing.B) {
174 | b.ReportAllocs()
175 | for i := 0; i < b.N; i++ {
176 | zq := zenq.New[byte](bufferSize)
177 | runtime.KeepAlive(zq)
178 | }
179 | })
180 | b.Run("int64", func(b *testing.B) {
181 | b.ReportAllocs()
182 | for i := 0; i < b.N; i++ {
183 | zq := zenq.New[int64](bufferSize)
184 | runtime.KeepAlive(zq)
185 | }
186 | })
187 | }
188 |
189 | func BenchmarkZenQ_BackgroundSelectWait(b *testing.B) {
190 | const N = 1e4
191 | q := zenq.New[struct{}](bufferSize)
192 |
193 | // create background waiters
194 | for i := 0; i < N; i++ {
195 | go func() {
196 | alt := zenq.New[struct{}](bufferSize)
197 | zenq.Select(q, alt)
198 | }()
199 | }
200 |
201 | b.ResetTimer()
202 |
203 | a := zenq.New[int](bufferSize)
204 | for i := 0; i < b.N; i++ {
205 | a.Write(i)
206 | runtime.Gosched()
207 | a.Read()
208 | }
209 |
210 | // release background waiters
211 | for i := 0; i < N; i++ {
212 | q.Write(struct{}{})
213 | }
214 | }
215 |
216 | func BenchmarkChan_BackgroundSelectWait(b *testing.B) {
217 | const N = 1e4
218 | q := make(chan struct{})
219 |
220 | // create background waiters
221 | for i := 0; i < N; i++ {
222 | go func() {
223 | x := make(chan struct{})
224 | select {
225 | case <-q:
226 | case <-x:
227 | }
228 | }()
229 | }
230 |
231 | b.ResetTimer()
232 |
233 | a := make(chan int, bufferSize)
234 | for i := 0; i < b.N; i++ {
235 | a <- i
236 | runtime.Gosched()
237 | <-a
238 | }
239 |
240 | // release background waiters
241 | for i := 0; i < N; i++ {
242 | q <- struct{}{}
243 | }
244 | }
245 |
--------------------------------------------------------------------------------
/benchmarks/e2e/benchsuite_test.go:
--------------------------------------------------------------------------------
1 | package zenq_test
2 |
3 | import (
4 | "sync"
5 | "testing"
6 |
7 | "github.com/alphadose/zenq/v2"
8 | )
9 |
10 | // wrapper for chan to have exactly the same api as zenq.
11 | type Chan[T any] struct {
12 | ch chan T
13 | }
14 |
15 | func NewChan[T any]() Chan[T] {
16 | return Chan[T]{ch: make(chan T, bufferSize)}
17 | }
18 |
19 | func (ch Chan[T]) Read() T { return <-ch.ch }
20 | func (ch Chan[T]) Write(v T) { ch.ch <- v }
21 |
22 | func BenchmarkChan_Suite(b *testing.B) {
23 | type Queue = Chan[int]
24 | ctor := NewChan[int]
25 |
26 | b.Run("Single", func(b *testing.B) {
27 | q := ctor()
28 | b.ResetTimer()
29 | for i := 0; i < b.N; i++ {
30 | q.Write(i)
31 | _ = q.Read()
32 | }
33 | })
34 |
35 | b.Run("Uncontended/x100", func(b *testing.B) {
36 | b.RunParallel(func(pb *testing.PB) {
37 | q := ctor()
38 | for pb.Next() {
39 | for i := 0; i < 100; i++ {
40 | q.Write(i)
41 | _ = q.Read()
42 | }
43 | }
44 | })
45 | })
46 |
47 | b.Run("Contended/x100", func(b *testing.B) {
48 | q := ctor()
49 | b.RunParallel(func(pb *testing.PB) {
50 | for pb.Next() {
51 | for i := 0; i < 100; i++ {
52 | q.Write(i)
53 | _ = q.Read()
54 | }
55 | }
56 | })
57 | })
58 |
59 | b.Run("Multiple/x100", func(b *testing.B) {
60 | const P = 1000
61 | qs := [P]Queue{}
62 | for i := range qs {
63 | qs[i] = ctor()
64 | }
65 |
66 | b.ResetTimer()
67 |
68 | var wg sync.WaitGroup
69 | wg.Add(P * 2)
70 | for i := 0; i < P; i++ {
71 | go func(q Queue) {
72 | defer wg.Done()
73 | for i := 0; i < b.N; i++ {
74 | var v int
75 | q.Write(v)
76 | }
77 | }(qs[i])
78 | go func(q Queue) {
79 | defer wg.Done()
80 | for i := 0; i < b.N; i++ {
81 | _ = q.Read()
82 | }
83 |
84 | }(qs[i])
85 | }
86 | wg.Wait()
87 | })
88 |
89 | b.Run("ProducerConsumer/x1", func(b *testing.B) {
90 | q := ctor()
91 | b.ResetTimer()
92 | var wg sync.WaitGroup
93 | wg.Add(2)
94 | go func() {
95 | defer wg.Done()
96 | for i := 0; i < b.N; i++ {
97 | var v int
98 | q.Write(v)
99 | work()
100 | }
101 | }()
102 |
103 | go func() {
104 | defer wg.Done()
105 | for i := 0; i < b.N; i++ {
106 | _ = q.Read()
107 | work()
108 | }
109 | }()
110 | wg.Wait()
111 | })
112 |
113 | b.Run("ProducerConsumer/x100", func(b *testing.B) {
114 | q := ctor()
115 | b.ResetTimer()
116 | var wg sync.WaitGroup
117 | wg.Add(2)
118 |
119 | go func() {
120 | b.RunParallel(func(pb *testing.PB) {
121 | for pb.Next() {
122 | for i := 0; i < 100; i++ {
123 | q.Write(0)
124 | work()
125 | }
126 | }
127 | })
128 | wg.Done()
129 | }()
130 |
131 | go func() {
132 | b.RunParallel(func(pb *testing.PB) {
133 | for pb.Next() {
134 | for i := 0; i < 100; i++ {
135 | _ = q.Read()
136 | work()
137 | }
138 | }
139 | })
140 | wg.Done()
141 | }()
142 | wg.Wait()
143 | })
144 |
145 | b.Run("PingPong/x1", func(b *testing.B) {
146 | q1 := ctor()
147 | q2 := ctor()
148 | b.ResetTimer()
149 | var wg sync.WaitGroup
150 | wg.Add(2)
151 |
152 | go func() {
153 | for i := 0; i < b.N; i++ {
154 | var v int
155 | q1.Write(v)
156 | work()
157 | _ = q2.Read()
158 | }
159 | wg.Done()
160 | }()
161 |
162 | go func() {
163 | for i := 0; i < b.N; i++ {
164 | var v int
165 | _ = q1.Read()
166 | work()
167 | q2.Write(v)
168 | }
169 | wg.Done()
170 | }()
171 | wg.Wait()
172 | })
173 | }
174 |
175 | func BenchmarkZenq_Suite(b *testing.B) {
176 | type Queue = zenq.ZenQ[int]
177 | ctor := zenq.New[int]
178 |
179 | b.Run("Single", func(b *testing.B) {
180 | q := ctor(bufferSize)
181 | b.ResetTimer()
182 | for i := 0; i < b.N; i++ {
183 | q.Write(i)
184 | _, _ = q.Read()
185 | }
186 | })
187 |
188 | b.Run("Uncontended/x100", func(b *testing.B) {
189 | b.RunParallel(func(pb *testing.PB) {
190 | q := ctor(bufferSize)
191 | for pb.Next() {
192 | for i := 0; i < 100; i++ {
193 | q.Write(i)
194 | _, _ = q.Read()
195 | }
196 | }
197 | })
198 | })
199 |
200 | b.Run("Contended/x100", func(b *testing.B) {
201 | q := ctor(bufferSize)
202 | b.RunParallel(func(pb *testing.PB) {
203 | for pb.Next() {
204 | for i := 0; i < 100; i++ {
205 | q.Write(i)
206 | _, _ = q.Read()
207 | }
208 | }
209 | })
210 | })
211 |
212 | b.Run("Multiple/x100", func(b *testing.B) {
213 | const P = 1000
214 | qs := [P]*Queue{}
215 | for i := range qs {
216 | qs[i] = ctor(bufferSize)
217 | }
218 |
219 | b.ResetTimer()
220 |
221 | var wg sync.WaitGroup
222 | wg.Add(P * 2)
223 | for i := 0; i < P; i++ {
224 | go func(q *Queue) {
225 | defer wg.Done()
226 | for i := 0; i < b.N; i++ {
227 | var v int
228 | q.Write(v)
229 | }
230 | }(qs[i])
231 | go func(q *Queue) {
232 | defer wg.Done()
233 | for i := 0; i < b.N; i++ {
234 | _, _ = q.Read()
235 | }
236 |
237 | }(qs[i])
238 | }
239 | wg.Wait()
240 | })
241 |
242 | b.Run("ProducerConsumer/x1", func(b *testing.B) {
243 | q := ctor(bufferSize)
244 | b.ResetTimer()
245 | var wg sync.WaitGroup
246 | wg.Add(2)
247 | go func() {
248 | defer wg.Done()
249 | for i := 0; i < b.N; i++ {
250 | var v int
251 | q.Write(v)
252 | work()
253 | }
254 | }()
255 |
256 | go func() {
257 | defer wg.Done()
258 | for i := 0; i < b.N; i++ {
259 | _, _ = q.Read()
260 | work()
261 | }
262 | }()
263 | wg.Wait()
264 | })
265 |
266 | b.Run("ProducerConsumer/x100", func(b *testing.B) {
267 | q := ctor(bufferSize)
268 | b.ResetTimer()
269 | var wg sync.WaitGroup
270 | wg.Add(2)
271 |
272 | go func() {
273 | b.RunParallel(func(pb *testing.PB) {
274 | for pb.Next() {
275 | for i := 0; i < 100; i++ {
276 | q.Write(0)
277 | work()
278 | }
279 | }
280 | })
281 | wg.Done()
282 | }()
283 |
284 | go func() {
285 | b.RunParallel(func(pb *testing.PB) {
286 | for pb.Next() {
287 | for i := 0; i < 100; i++ {
288 | _, _ = q.Read()
289 | work()
290 | }
291 | }
292 | })
293 | wg.Done()
294 | }()
295 | wg.Wait()
296 | })
297 |
298 | b.Run("PingPong/x1", func(b *testing.B) {
299 | q1 := ctor(bufferSize)
300 | q2 := ctor(bufferSize)
301 | b.ResetTimer()
302 | var wg sync.WaitGroup
303 | wg.Add(2)
304 |
305 | go func() {
306 | for i := 0; i < b.N; i++ {
307 | var v int
308 | q1.Write(v)
309 | work()
310 | _, _ = q2.Read()
311 | }
312 | wg.Done()
313 | }()
314 |
315 | go func() {
316 | for i := 0; i < b.N; i++ {
317 | var v int
318 | _, _ = q1.Read()
319 | work()
320 | q2.Write(v)
321 | }
322 | wg.Done()
323 | }()
324 | wg.Wait()
325 | })
326 | }
327 |
328 | //go:noinline
329 | func work() {
330 | // really tiny amount of work
331 | }
332 |
--------------------------------------------------------------------------------
/benchmarks/selector/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "time"
6 |
7 | "github.com/alphadose/zenq/v2"
8 | )
9 |
10 | type custom1 struct {
11 | alpha int
12 | beta string
13 | }
14 |
15 | type custom2 struct {
16 | gamma int
17 | }
18 |
19 | const (
20 | bufferSize = 8
21 |
22 | numProducers = 4
23 | )
24 |
25 | var (
26 | throughput int
27 |
28 | // input batch size
29 | testcases = []int{60, 600, 6e3, 6e5}
30 |
31 | zq1 = zenq.New[int](bufferSize)
32 | zq2 = zenq.New[string](bufferSize)
33 | zq3 = zenq.New[custom1](bufferSize)
34 | zq4 = zenq.New[*custom2](bufferSize)
35 |
36 | ch1 = make(chan int, bufferSize)
37 | ch2 = make(chan string, bufferSize)
38 | ch3 = make(chan custom1, bufferSize)
39 | ch4 = make(chan *custom2, bufferSize)
40 | )
41 |
42 | func zenqSelector() {
43 | go looper(intProducer1)
44 | go looper(stringProducer1)
45 | go looper(custom1Producer1)
46 | go looper(custom2Producer1)
47 |
48 | var ctr = 0
49 |
50 | var startTime time.Time = time.Now()
51 | for i := 0; i < throughput; i++ {
52 | if d := zenq.Select(zq1, zq2, zq3, zq4); d != nil {
53 | ctr++
54 | }
55 | }
56 |
57 | if ctr != throughput {
58 | panic("Data Loss")
59 | }
60 | fmt.Printf("ZenQ Select Runner completed transfer in: %v\n", time.Since(startTime))
61 | }
62 |
63 | func chanSelector() {
64 | go looper(intProducer2)
65 | go looper(stringProducer2)
66 | go looper(custom1Producer2)
67 | go looper(custom2Producer2)
68 |
69 | var ctr = 0
70 |
71 | var startTime time.Time = time.Now()
72 | for i := 0; i < throughput; i++ {
73 | select {
74 | case <-ch1:
75 | ctr++
76 | case <-ch2:
77 | ctr++
78 | case <-ch3:
79 | ctr++
80 | case <-ch4:
81 | ctr++
82 | }
83 |
84 | }
85 |
86 | if ctr != throughput {
87 | panic("Data Loss")
88 | }
89 | fmt.Printf("Chan Select Runner completed transfer in: %v\n", time.Since(startTime))
90 | }
91 |
92 | func main() {
93 | for _, tput := range testcases {
94 | throughput = tput
95 | fmt.Printf("With Input Batch Size: %d and Num Concurrent Writers: %d\n", throughput, numProducers)
96 | fmt.Print("\n")
97 |
98 | // Run tests
99 | chanSelector()
100 | zenqSelector()
101 | fmt.Print("====================================================================\n\n")
102 | }
103 | }
104 |
105 | func intProducer1(ctr int) { zq1.Write(ctr) }
106 |
107 | func stringProducer1(ctr int) { zq2.Write(fmt.Sprint(ctr * 10)) }
108 |
109 | func custom1Producer1(ctr int) { zq3.Write(custom1{alpha: ctr, beta: fmt.Sprint(ctr)}) }
110 |
111 | func custom2Producer1(ctr int) { zq4.Write(&custom2{gamma: 1 << ctr}) }
112 |
113 | func intProducer2(ctr int) { ch1 <- ctr }
114 |
115 | func stringProducer2(ctr int) { ch2 <- fmt.Sprint(ctr * 10) }
116 |
117 | func custom1Producer2(ctr int) { ch3 <- custom1{alpha: ctr, beta: fmt.Sprint(ctr)} }
118 |
119 | func custom2Producer2(ctr int) { ch4 <- &custom2{gamma: 1 << ctr} }
120 |
121 | func looper(producer func(ctr int)) {
122 | for i := 0; i < throughput/numProducers; i++ {
123 | producer(i)
124 | }
125 | }
126 |
--------------------------------------------------------------------------------
/benchmarks/simple/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "time"
6 |
7 | "github.com/alphadose/zenq/v2"
8 | )
9 |
10 | // Example item which we will be writing to and reading from the queue
11 | type Payload struct {
12 | first byte
13 | second int64
14 | third float64
15 | fourth string
16 | fifth complex64
17 | sixth []rune
18 | seventh bool
19 | }
20 |
21 | func NewPayload() *Payload {
22 | return &Payload{
23 | first: 1,
24 | second: 2,
25 | third: 3.0,
26 | fourth: "4",
27 | fifth: 3 + 4i,
28 | sixth: []rune("🐈⚔️👍🌏💥🦖"),
29 | }
30 | }
31 |
32 | const (
33 | bufferSize = 1 << 12
34 | )
35 |
36 | var (
37 | pl Payload = *NewPayload()
38 |
39 | currSize uint64 = throughput[0]
40 |
41 | // input batch size
42 | throughput = []uint64{60, 600, 6e3, 6e6, 6e8}
43 | // throughput = []uint64{5}
44 |
45 | // Number of writers/producers which will be writing to the queue concurrently
46 | numConcurrentWriters uint64 = 1
47 |
48 | // native channel
49 | ch chan Payload = make(chan Payload, bufferSize)
50 |
51 | // ZenQ
52 | zq *zenq.ZenQ[Payload] = zenq.New[Payload](bufferSize)
53 | )
54 |
55 | func validatePayload(param Payload) {
56 | if param.first != pl.first || param.second != pl.second || param.third != pl.third || param.fourth != pl.fourth || param.fifth != pl.fifth || len(param.sixth) != len(pl.sixth) || param.seventh != pl.seventh {
57 | panic("Loss of data integrity")
58 | }
59 | }
60 |
61 | func chanProducer() {
62 | epochs := currSize / numConcurrentWriters
63 | for i := uint64(0); i < epochs; i++ {
64 | ch <- pl
65 | }
66 | }
67 |
68 | func chanConsumer() {
69 | for i := uint64(0); i < currSize; i++ {
70 | validatePayload(<-ch)
71 | }
72 | }
73 |
74 | func chanRunner() {
75 | for i := uint64(0); i < numConcurrentWriters; i++ {
76 | go chanProducer()
77 | }
78 | chanConsumer()
79 | }
80 |
81 | func zenqProducer() {
82 | epochs := currSize / numConcurrentWriters
83 | for i := uint64(0); i < epochs; i++ {
84 | zq.Write(pl)
85 | }
86 | }
87 |
88 | func zenqConsumer() {
89 | var data Payload
90 | for i := uint64(0); i < currSize; i++ {
91 | data, _ = zq.Read()
92 | validatePayload(data)
93 | }
94 | }
95 |
96 | func zenqRunner() {
97 | for i := uint64(0); i < numConcurrentWriters; i++ {
98 | go zenqProducer()
99 | }
100 | zenqConsumer()
101 | }
102 |
103 | func measureTime(callback func(), runnerName string) {
104 | var startTime time.Time = time.Now()
105 | callback()
106 | fmt.Printf("%s Runner completed transfer in: %v\n", runnerName, time.Since(startTime))
107 | }
108 |
109 | // drain the channel and zenQ
110 | func cleanup() {
111 | for len(ch) > 0 {
112 | <-ch
113 | }
114 | zq.Reset()
115 | }
116 |
117 | func main() {
118 | cleanup()
119 | for _, tput := range throughput {
120 | currSize = tput
121 | fmt.Printf("With Input Batch Size: %d and Num Concurrent Writers: %d\n", currSize, numConcurrentWriters)
122 | fmt.Print("\n")
123 |
124 | // Run tests
125 | measureTime(chanRunner, "Native Channel")
126 | measureTime(zenqRunner, "ZenQ")
127 | fmt.Print("====================================================================\n\n")
128 | }
129 | }
130 |
--------------------------------------------------------------------------------
/benchmarks/simple/main_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "testing"
5 | )
6 |
7 | func zenqTestRunner(numWriters uint64, size uint64, b *testing.B) {
8 | currSize = size
9 | numConcurrentWriters = numWriters
10 |
11 | cleanup()
12 | b.ResetTimer()
13 | for n := 0; n < b.N; n++ {
14 | zenqRunner()
15 | }
16 | }
17 |
18 | func chanTestRunner(numWriters uint64, size uint64, b *testing.B) {
19 | currSize = size
20 | numConcurrentWriters = numWriters
21 |
22 | cleanup()
23 | b.ResetTimer()
24 | for n := 0; n < b.N; n++ {
25 | chanRunner()
26 | }
27 | }
28 |
29 | func Benchmark_Chan_NumWriters1_InputSize600(b *testing.B) { chanTestRunner(1, 6e2, b) }
30 |
31 | func Benchmark_ZenQ_NumWriters1_InputSize600(b *testing.B) { zenqTestRunner(1, 6e2, b) }
32 |
33 | func Benchmark_Chan_NumWriters3_InputSize60000(b *testing.B) { chanTestRunner(3, 6e4, b) }
34 |
35 | func Benchmark_ZenQ_NumWriters3_InputSize60000(b *testing.B) { zenqTestRunner(3, 6e4, b) }
36 |
37 | func Benchmark_Chan_NumWriters8_InputSize6000000(b *testing.B) { chanTestRunner(8, 6e6, b) }
38 |
39 | func Benchmark_ZenQ_NumWriters8_InputSize6000000(b *testing.B) { zenqTestRunner(8, 6e6, b) }
40 |
41 | func Benchmark_Chan_NumWriters100_InputSize6000000(b *testing.B) { chanTestRunner(100, 6e6, b) }
42 |
43 | func Benchmark_ZenQ_NumWriters100_InputSize6000000(b *testing.B) { zenqTestRunner(100, 6e6, b) }
44 |
45 | func Benchmark_Chan_NumWriters1000_InputSize7000000(b *testing.B) { chanTestRunner(1e3, 7e6, b) }
46 |
47 | func Benchmark_ZenQ_NumWriters1000_InputSize7000000(b *testing.B) { zenqTestRunner(1e3, 7e6, b) }
48 |
49 | func Benchmark_Chan_Million_Blocking_Writers(b *testing.B) { chanTestRunner(1e6, 1e7, b) }
50 |
51 | func Benchmark_ZenQ_Million_Blocking_Writers(b *testing.B) { zenqTestRunner(1e6, 1e7, b) }
52 |
--------------------------------------------------------------------------------
/constants/constants_386.go:
--------------------------------------------------------------------------------
1 | package constants
2 |
3 | const CacheLinePadSize = 64
4 |
--------------------------------------------------------------------------------
/constants/constants_amd64.go:
--------------------------------------------------------------------------------
1 | package constants
2 |
3 | const CacheLinePadSize = 64
4 |
--------------------------------------------------------------------------------
/constants/constants_arm.go:
--------------------------------------------------------------------------------
1 | package constants
2 |
3 | const CacheLinePadSize = 32
4 |
--------------------------------------------------------------------------------
/constants/constants_arm64.go:
--------------------------------------------------------------------------------
1 | package constants
2 |
3 | const CacheLinePadSize = 64
4 |
--------------------------------------------------------------------------------
/constants/constants_mips.go:
--------------------------------------------------------------------------------
1 | package constants
2 |
3 | const CacheLinePadSize = 32
4 |
--------------------------------------------------------------------------------
/constants/constants_mips64.go:
--------------------------------------------------------------------------------
1 | package constants
2 |
3 | const CacheLinePadSize = 32
4 |
--------------------------------------------------------------------------------
/constants/constants_mips64le.go:
--------------------------------------------------------------------------------
1 | package constants
2 |
3 | const CacheLinePadSize = 32
4 |
--------------------------------------------------------------------------------
/constants/constants_mipsle.go:
--------------------------------------------------------------------------------
1 | package constants
2 |
3 | const CacheLinePadSize = 32
4 |
--------------------------------------------------------------------------------
/constants/constants_ppc64x.go:
--------------------------------------------------------------------------------
1 | //go:build ppc64 || ppc64le
2 |
3 | package constants
4 |
5 | const CacheLinePadSize = 128
6 |
--------------------------------------------------------------------------------
/constants/constants_riscv64.go:
--------------------------------------------------------------------------------
1 | package constants
2 |
3 | const CacheLinePadSize = 32
4 |
--------------------------------------------------------------------------------
/constants/constants_s390x.go:
--------------------------------------------------------------------------------
1 | package constants
2 |
3 | const CacheLinePadSize = 256
4 |
--------------------------------------------------------------------------------
/constants/constants_wasm.go:
--------------------------------------------------------------------------------
1 | package constants
2 |
3 | const CacheLinePadSize = 64
4 |
--------------------------------------------------------------------------------
/examples/selector/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 |
6 | "github.com/alphadose/zenq/v2"
7 | )
8 |
9 | type custom1 struct {
10 | alpha int
11 | beta string
12 | }
13 |
14 | type custom2 struct {
15 | gamma int
16 | }
17 |
18 | const size = 100
19 |
20 | var (
21 | zq1 = zenq.New[int](size)
22 | zq2 = zenq.New[string](size)
23 | zq3 = zenq.New[custom1](size)
24 | zq4 = zenq.New[*custom2](size)
25 | )
26 |
27 | func main() {
28 | go looper(intProducer)
29 | go looper(stringProducer)
30 | go looper(custom1Producer)
31 | go looper(custom2Producer)
32 |
33 | for i := 0; i < 40; i++ {
34 |
35 | // Selection occurs here
36 | if data := zenq.Select(zq1, zq2, zq3, zq4); data != nil {
37 | switch data.(type) {
38 | case int:
39 | fmt.Printf("Received int %d\n", data)
40 | case string:
41 | fmt.Printf("Received string %s\n", data)
42 | case custom1:
43 | fmt.Printf("Received custom data type number 1 %#v\n", data)
44 | case *custom2:
45 | fmt.Printf("Received pointer %#v\n", data)
46 | }
47 | }
48 | }
49 | }
50 |
51 | func intProducer(ctr int) { zq1.Write(ctr) }
52 |
53 | func stringProducer(ctr int) { zq2.Write(fmt.Sprint(ctr * 10)) }
54 |
55 | func custom1Producer(ctr int) { zq3.Write(custom1{alpha: ctr, beta: fmt.Sprint(ctr)}) }
56 |
57 | func custom2Producer(ctr int) { zq4.Write(&custom2{gamma: 1 << ctr}) }
58 |
59 | func looper(producer func(ctr int)) {
60 | for i := 0; i < 10; i++ {
61 | producer(i)
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/examples/simple/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "runtime"
6 |
7 | "github.com/alphadose/zenq/v2"
8 | )
9 |
10 | type payload struct {
11 | alpha int
12 | beta string
13 | }
14 |
15 | func main() {
16 | zq := zenq.New[payload](10)
17 |
18 | for j := 0; j < 5; j++ {
19 | go func() {
20 | for i := 0; i < 20; i++ {
21 | zq.Write(payload{
22 | alpha: i,
23 | beta: fmt.Sprint(i),
24 | })
25 | }
26 | }()
27 | }
28 |
29 | // For lowest latency and best performance, allocate the ZenQ.Read() calling goroutine an entire OS thread
30 | // by calling runtime.LockOSThread()
31 | // Note:- If you have a single core then doing this will cause a deadlock
32 | runtime.LockOSThread()
33 | defer runtime.UnlockOSThread()
34 |
35 | for i := 0; i < 100; i++ {
36 | if data, queueOpen := zq.Read(); queueOpen {
37 | fmt.Printf("%+v\n", data)
38 | }
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/alphadose/zenq/v2
2 |
3 | go 1.19
4 |
--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alphadose/ZenQ/271950c9b7fa1f6b907dca7ffcdf91022ddaac00/go.sum
--------------------------------------------------------------------------------
/lib_runtime_fastrand.go:
--------------------------------------------------------------------------------
1 | //go:build !go1.22
2 |
3 | package zenq
4 |
5 | import (
6 | _ "unsafe"
7 | )
8 |
9 | //go:linkname Fastrand runtime.fastrand
10 | func Fastrand() uint32
11 |
--------------------------------------------------------------------------------
/lib_runtime_fastrand_1.22.go:
--------------------------------------------------------------------------------
1 | //go:build go1.22
2 |
3 | package zenq
4 |
5 | import (
6 | _ "unsafe"
7 | )
8 |
9 | //go:linkname Fastrand runtime.cheaprand
10 | func Fastrand() uint32
11 |
--------------------------------------------------------------------------------
/lib_runtime_linkage.go:
--------------------------------------------------------------------------------
1 | package zenq
2 |
3 | import (
4 | "runtime"
5 | "unsafe"
6 | _ "unsafe"
7 |
8 | "github.com/alphadose/zenq/v2/constants"
9 | )
10 |
11 | type cacheLinePadding struct {
12 | _ [constants.CacheLinePadSize]byte
13 | }
14 |
15 | // Linking ZenQ with golang internal runtime library to allow usage of scheduling primitives
16 | // like goready(), mcall() etc to allow low-level scheduling of goroutines
17 |
18 | type mutex struct {
19 | // Futex-based impl treats it as uint32 key,
20 | // while sema-based impl as M* waitm.
21 | // Used to be a union, but unions break precise GC.
22 | key uintptr
23 | }
24 |
25 | // The functions below are used for scheduling goroutines with exclusive control
26 | // Shifting to the below flow will remove the spinning and mutex lock implementations
27 |
28 | //go:linkname lock runtime.lock
29 | func lock(l *mutex)
30 |
31 | //go:linkname nanotime runtime.nanotime
32 | func nanotime() int64
33 |
34 | //go:linkname unlock runtime.unlock
35 | func unlock(l *mutex)
36 |
37 | //go:linkname goparkunlock runtime.goparkunlock
38 | func goparkunlock(lock *mutex, reason waitReason, traceEv byte, traceskip int)
39 |
40 | // GetG returns the pointer to the current goroutine
41 | // defined in the asm files
42 | func GetG() unsafe.Pointer
43 |
44 | //go:linkname Fastlog2 runtime.fastlog2
45 | func Fastlog2(x float64) float64
46 |
47 | //go:linkname goready runtime.goready
48 | func goready(goroutinePtr unsafe.Pointer, traceskip int)
49 |
50 | //go:linkname gopark runtime.gopark
51 | func gopark(unlockf func(unsafe.Pointer, unsafe.Pointer) bool, lock unsafe.Pointer, reason waitReason, traceEv byte, traceskip int)
52 |
53 | // Active spinning runtime support.
54 | // runtime_canSpin reports whether spinning makes sense at the moment.
55 | //go:linkname runtime_canSpin sync.runtime_canSpin
56 | func runtime_canSpin(i int) bool
57 |
58 | // runtime_doSpin does active spinning.
59 | // //go:linkname runtime_doSpin sync.runtime_doSpin
60 | // func runtime_doSpin()
61 |
62 | func runtime_doSpin() {
63 | spin(30)
64 | }
65 |
66 | //go:linkname osyield runtime.osyield
67 | func osyield()
68 |
69 | //go:linkname runtime_nanotime sync.runtime_nanotime
70 | func runtime_nanotime() int64
71 |
72 | // Semacquire waits until *s > 0 and then atomically decrements it.
73 | // It is intended as a simple sleep primitive for use by the synchronization
74 | // library and should not be used directly.
75 | //go:linkname runtime_Semacquire sync.runtime_Semacquire
76 | func runtime_Semacquire(s *uint32)
77 |
78 | // SemacquireMutex is like Semacquire, but for profiling contended Mutexes.
79 | // If lifo is true, queue waiter at the head of wait queue.
80 | // skipframes is the number of frames to omit during tracing, counting from
81 | // runtime_SemacquireMutex's caller.
82 | //go:linkname runtime_SemacquireMutex sync.runtime_SemacquireMutex
83 | func runtime_SemacquireMutex(s *uint32, lifo bool, skipframes int)
84 |
85 | // Semrelease atomically increments *s and notifies a waiting goroutine
86 | // if one is blocked in Semacquire.
87 | // It is intended as a simple wakeup primitive for use by the synchronization
88 | // library and should not be used directly.
89 | // If handoff is true, pass count directly to the first waiter.
90 | // skipframes is the number of frames to omit during tracing, counting from
91 | // runtime_Semrelease's caller.
92 | //go:linkname runtime_Semrelease sync.runtime_Semrelease
93 | func runtime_Semrelease(s *uint32, handoff bool, skipframes int)
94 |
95 | //go:linkname goyield runtime.goyield
96 | func goyield()
97 |
98 | //go:linkname mcall runtime.mcall
99 | func mcall(fn func(unsafe.Pointer))
100 |
101 | //go:linkname park_m runtime.park_m
102 | func park_m(gp unsafe.Pointer)
103 |
104 | //go:linkname fastrandn runtime.fastrandn
105 | func fastrandn(n uint32) uint32
106 |
107 | //go:linkname throw runtime.throw
108 | func throw(s string)
109 |
110 | //go:linkname Readgstatus runtime.readgstatus
111 | func Readgstatus(gp unsafe.Pointer) uint32
112 |
113 | //go:linkname casgstatus runtime.casgstatus
114 | func casgstatus(gp unsafe.Pointer, oldval, newval uint32)
115 |
116 | //go:linkname dropg runtime.dropg
117 | func dropg()
118 |
119 | //go:linkname schedule runtime.schedule
120 | func schedule()
121 |
122 | //go:linkname mallocgc runtime.mallocgc
123 | func mallocgc(size uintptr, typ unsafe.Pointer, needzero bool) unsafe.Pointer
124 |
125 | //go:linkname sysFree runtime.sysFree
126 | func sysFree(v unsafe.Pointer, n uintptr, sysStat unsafe.Pointer)
127 |
128 | //go:linkname sysFreeOS runtime.sysFreeOS
129 | func sysFreeOS(v unsafe.Pointer, n uintptr)
130 |
131 | //go:linkname gosched_m runtime.gosched_m
132 | func gosched_m(gp unsafe.Pointer)
133 |
134 | //go:linkname spin runtime.procyield
135 | func spin(cycles uint32)
136 |
137 | //go:linkname noescape runtime.noescape
138 | func noescape(p unsafe.Pointer) unsafe.Pointer
139 |
140 | // ProcPin and ProcUnpin disable pre-emption for any calling goroutine
141 | // can be used to guarantee consistent latency
142 | //go:linkname ProcPin runtime.procPin
143 | func ProcPin() int
144 |
145 | //go:linkname ProcUnpin runtime.procUnpin
146 | func ProcUnpin()
147 |
148 | //go:linkname memequal runtime.memequal
149 | func memequal(a, b unsafe.Pointer, size uintptr) bool
150 |
151 | //go:linkname Load8 runtime/internal/atomic.Load8
152 | func Load8(ptr *uint8) uint8
153 |
154 | //go:linkname And8 runtime/internal/atomic.And8
155 | func And8(ptr *uint8, val uint8)
156 |
157 | //go:linkname Or8 runtime/internal/atomic.Or8
158 | func Or8(ptr *uint8, val uint8)
159 |
160 | //go:linkname Store8 runtime/internal/atomic.Store8
161 | func Store8(ptr *uint8, val uint8)
162 |
163 | // custom parking function
164 | func fast_park(gp unsafe.Pointer) {
165 | dropg()
166 | casgstatus(gp, _Grunning, _Gwaiting)
167 | schedule()
168 | }
169 |
170 | // whether the system has multiple cores or a single core
171 | var multicore = runtime.NumCPU() > 1
172 |
173 | // call ready after ensuring the goroutine is parked
174 | func safe_ready(gp unsafe.Pointer) {
175 | // for better microprocessor branch prediction
176 | if multicore {
177 | for Readgstatus(gp)&^_Gscan != _Gwaiting {
178 | spin(20)
179 | }
180 | } else {
181 | for Readgstatus(gp)&^_Gscan != _Gwaiting {
182 | mcall(gosched_m)
183 | }
184 | }
185 | goready(gp, 1)
186 | }
187 |
188 | // simple wait
189 | func wait() {
190 | if multicore {
191 | spin(20)
192 | } else {
193 | mcall(gosched_m)
194 | }
195 | }
196 |
197 | type waitReason uint8
198 |
199 | const (
200 | waitReasonZero waitReason = iota // ""
201 | waitReasonGCAssistMarking // "GC assist marking"
202 | waitReasonIOWait // "IO wait"
203 | waitReasonChanReceiveNilChan // "chan receive (nil chan)"
204 | waitReasonChanSendNilChan // "chan send (nil chan)"
205 | waitReasonDumpingHeap // "dumping heap"
206 | waitReasonGarbageCollection // "garbage collection"
207 | waitReasonGarbageCollectionScan // "garbage collection scan"
208 | waitReasonPanicWait // "panicwait"
209 | waitReasonSelect // "select"
210 | waitReasonSelectNoCases // "select (no cases)"
211 | waitReasonGCAssistWait // "GC assist wait"
212 | waitReasonGCSweepWait // "GC sweep wait"
213 | waitReasonGCScavengeWait // "GC scavenge wait"
214 | waitReasonChanReceive // "chan receive"
215 | waitReasonChanSend // "chan send"
216 | waitReasonFinalizerWait // "finalizer wait"
217 | waitReasonForceGCIdle // "force gc (idle)"
218 | waitReasonSemacquire // "semacquire"
219 | waitReasonSleep // "sleep"
220 | waitReasonSyncCondWait // "sync.Cond.Wait"
221 | waitReasonTimerGoroutineIdle // "timer goroutine (idle)"
222 | waitReasonTraceReaderBlocked // "trace reader (blocked)"
223 | waitReasonWaitForGCCycle // "wait for GC cycle"
224 | waitReasonGCWorkerIdle // "GC worker (idle)"
225 | waitReasonPreempted // "preempted"
226 | waitReasonDebugCall // "debug call"
227 | )
228 |
229 | // Event types in the trace, args are given in square brackets.
230 | const (
231 | traceEvNone = 0 // unused
232 | traceEvBatch = 1 // start of per-P batch of events [pid, timestamp]
233 | traceEvFrequency = 2 // contains tracer timer frequency [frequency (ticks per second)]
234 | traceEvStack = 3 // stack [stack id, number of PCs, array of {PC, func string ID, file string ID, line}]
235 | traceEvGomaxprocs = 4 // current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack id]
236 | traceEvProcStart = 5 // start of P [timestamp, thread id]
237 | traceEvProcStop = 6 // stop of P [timestamp]
238 | traceEvGCStart = 7 // GC start [timestamp, seq, stack id]
239 | traceEvGCDone = 8 // GC done [timestamp]
240 | traceEvGCSTWStart = 9 // GC STW start [timestamp, kind]
241 | traceEvGCSTWDone = 10 // GC STW done [timestamp]
242 | traceEvGCSweepStart = 11 // GC sweep start [timestamp, stack id]
243 | traceEvGCSweepDone = 12 // GC sweep done [timestamp, swept, reclaimed]
244 | traceEvGoCreate = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id]
245 | traceEvGoStart = 14 // goroutine starts running [timestamp, goroutine id, seq]
246 | traceEvGoEnd = 15 // goroutine ends [timestamp]
247 | traceEvGoStop = 16 // goroutine stops (like in select{}) [timestamp, stack]
248 | traceEvGoSched = 17 // goroutine calls Gosched [timestamp, stack]
249 | traceEvGoPreempt = 18 // goroutine is preempted [timestamp, stack]
250 | traceEvGoSleep = 19 // goroutine calls Sleep [timestamp, stack]
251 | traceEvGoBlock = 20 // goroutine blocks [timestamp, stack]
252 | traceEvGoUnblock = 21 // goroutine is unblocked [timestamp, goroutine id, seq, stack]
253 | traceEvGoBlockSend = 22 // goroutine blocks on chan send [timestamp, stack]
254 | traceEvGoBlockRecv = 23 // goroutine blocks on chan recv [timestamp, stack]
255 | traceEvGoBlockSelect = 24 // goroutine blocks on select [timestamp, stack]
256 | traceEvGoBlockSync = 25 // goroutine blocks on Mutex/RWMutex [timestamp, stack]
257 | traceEvGoBlockCond = 26 // goroutine blocks on Cond [timestamp, stack]
258 | traceEvGoBlockNet = 27 // goroutine blocks on network [timestamp, stack]
259 | traceEvGoSysCall = 28 // syscall enter [timestamp, stack]
260 | traceEvGoSysExit = 29 // syscall exit [timestamp, goroutine id, seq, real timestamp]
261 | traceEvGoSysBlock = 30 // syscall blocks [timestamp]
262 | traceEvGoWaiting = 31 // denotes that goroutine is blocked when tracing starts [timestamp, goroutine id]
263 | traceEvGoInSyscall = 32 // denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id]
264 | traceEvHeapAlloc = 33 // gcController.heapLive change [timestamp, heap_alloc]
265 | traceEvHeapGoal = 34 // gcController.heapGoal (formerly next_gc) change [timestamp, heap goal in bytes]
266 | traceEvTimerGoroutine = 35 // not currently used; previously denoted timer goroutine [timer goroutine id]
267 | traceEvFutileWakeup = 36 // denotes that the previous wakeup of this goroutine was futile [timestamp]
268 | traceEvString = 37 // string dictionary entry [ID, length, string]
269 | traceEvGoStartLocal = 38 // goroutine starts running on the same P as the last event [timestamp, goroutine id]
270 | traceEvGoUnblockLocal = 39 // goroutine is unblocked on the same P as the last event [timestamp, goroutine id, stack]
271 | traceEvGoSysExitLocal = 40 // syscall exit on the same P as the last event [timestamp, goroutine id, real timestamp]
272 | traceEvGoStartLabel = 41 // goroutine starts running with label [timestamp, goroutine id, seq, label string id]
273 | traceEvGoBlockGC = 42 // goroutine blocks on GC assist [timestamp, stack]
274 | traceEvGCMarkAssistStart = 43 // GC mark assist start [timestamp, stack]
275 | traceEvGCMarkAssistDone = 44 // GC mark assist done [timestamp]
276 | traceEvUserTaskCreate = 45 // trace.NewContext [timestamp, internal task id, internal parent task id, stack, name string]
277 | traceEvUserTaskEnd = 46 // end of a task [timestamp, internal task id, stack]
278 | traceEvUserRegion = 47 // trace.WithRegion [timestamp, internal task id, mode(0:start, 1:end), stack, name string]
279 | traceEvUserLog = 48 // trace.Log [timestamp, internal task id, key string id, stack, value string]
280 | traceEvCount = 49
281 | // Byte is used but only 6 bits are available for event type.
282 | // The remaining 2 bits are used to specify the number of arguments.
283 | // That means, the max event type value is 63.
284 | )
285 |
286 | // defined constants
287 | const (
288 | // G status
289 | //
290 | // Beyond indicating the general state of a G, the G status
291 | // acts like a lock on the goroutine's stack (and hence its
292 | // ability to execute user code).
293 | //
294 | // If you add to this list, add to the list
295 | // of "okay during garbage collection" status
296 | // in mgcmark.go too.
297 | //
298 | // TODO(austin): The _Gscan bit could be much lighter-weight.
299 | // For example, we could choose not to run _Gscanrunnable
300 | // goroutines found in the run queue, rather than CAS-looping
301 | // until they become _Grunnable. And transitions like
302 | // _Gscanwaiting -> _Gscanrunnable are actually okay because
303 | // they don't affect stack ownership.
304 |
305 | // _Gidle means this goroutine was just allocated and has not
306 | // yet been initialized.
307 | _Gidle = iota // 0
308 |
309 | // _Grunnable means this goroutine is on a run queue. It is
310 | // not currently executing user code. The stack is not owned.
311 | _Grunnable // 1
312 |
313 | // _Grunning means this goroutine may execute user code. The
314 | // stack is owned by this goroutine. It is not on a run queue.
315 | // It is assigned an M and a P (g.m and g.m.p are valid).
316 | _Grunning // 2
317 |
318 | // _Gsyscall means this goroutine is executing a system call.
319 | // It is not executing user code. The stack is owned by this
320 | // goroutine. It is not on a run queue. It is assigned an M.
321 | _Gsyscall // 3
322 |
323 | // _Gwaiting means this goroutine is blocked in the runtime.
324 | // It is not executing user code. It is not on a run queue,
325 | // but should be recorded somewhere (e.g., a channel wait
326 | // queue) so it can be ready()d when necessary. The stack is
327 | // not owned *except* that a channel operation may read or
328 | // write parts of the stack under the appropriate channel
329 | // lock. Otherwise, it is not safe to access the stack after a
330 | // goroutine enters _Gwaiting (e.g., it may get moved).
331 | _Gwaiting // 4
332 |
333 | // _Gmoribund_unused is currently unused, but hardcoded in gdb
334 | // scripts.
335 | _Gmoribund_unused // 5
336 |
337 | // _Gdead means this goroutine is currently unused. It may be
338 | // just exited, on a free list, or just being initialized. It
339 | // is not executing user code. It may or may not have a stack
340 | // allocated. The G and its stack (if any) are owned by the M
341 | // that is exiting the G or that obtained the G from the free
342 | // list.
343 | _Gdead // 6
344 |
345 | // _Genqueue_unused is currently unused.
346 | _Genqueue_unused // 7
347 |
348 | // _Gcopystack means this goroutine's stack is being moved. It
349 | // is not executing user code and is not on a run queue. The
350 | // stack is owned by the goroutine that put it in _Gcopystack.
351 | _Gcopystack // 8
352 |
353 | // _Gpreempted means this goroutine stopped itself for a
354 | // suspendG preemption. It is like _Gwaiting, but nothing is
355 | // yet responsible for ready()ing it. Some suspendG must CAS
356 | // the status to _Gwaiting to take responsibility for
357 | // ready()ing this G.
358 | _Gpreempted // 9
359 |
360 | // _Gscan combined with one of the above states other than
361 | // _Grunning indicates that GC is scanning the stack. The
362 | // goroutine is not executing user code and the stack is owned
363 | // by the goroutine that set the _Gscan bit.
364 | //
365 | // _Gscanrunning is different: it is used to briefly block
366 | // state transitions while GC signals the G to scan its own
367 | // stack. This is otherwise like _Grunning.
368 | //
369 | // atomicstatus&~Gscan gives the state the goroutine will
370 | // return to when the scan completes.
371 | _Gscan = 0x1000
372 | _Gscanrunnable = _Gscan + _Grunnable // 0x1001
373 | _Gscanrunning = _Gscan + _Grunning // 0x1002
374 | _Gscansyscall = _Gscan + _Gsyscall // 0x1003
375 | _Gscanwaiting = _Gscan + _Gwaiting // 0x1004
376 | _Gscanpreempted = _Gscan + _Gpreempted // 0x1009
377 | )
378 |
--------------------------------------------------------------------------------
/select_list.go:
--------------------------------------------------------------------------------
1 | package zenq
2 |
3 | import (
4 | "sync"
5 | "sync/atomic"
6 | "unsafe"
7 | )
8 |
9 | // global memory pool for storing and leasing node objects
10 | var (
11 | nodePool = sync.Pool{New: func() any { return new(node) }}
12 | nodeGet = nodePool.Get
13 | nodePut = nodePool.Put
14 | )
15 |
16 | // List is a lock-free linked list
17 | // theory -> https://www.cs.rochester.edu/u/scott/papers/1996_PODC_queues.pdf
18 | // pseudocode -> https://www.cs.rochester.edu/research/synchronization/pseudocode/queues.html
19 | type List struct {
20 | head atomic.Pointer[node]
21 | tail atomic.Pointer[node]
22 | }
23 |
24 | // NewList returns a new list
25 | func NewList() List {
26 | n := nodeGet().(*node)
27 | n.threadPtr, n.dataOut = nil, nil
28 | n.next.Store(nil)
29 | var ptr atomic.Pointer[node]
30 | ptr.Store(n)
31 | return List{head: ptr, tail: ptr}
32 | }
33 |
34 | // a single node in the linked list
35 | type node struct {
36 | next atomic.Pointer[node]
37 | threadPtr *unsafe.Pointer
38 | dataOut *any
39 | }
40 |
41 | // Enqueue inserts a value into the list
42 | func (l *List) Enqueue(threadPtr *unsafe.Pointer, dataOut *any) {
43 | var (
44 | n = nodeGet().(*node)
45 | tail, next *node
46 | )
47 | n.threadPtr, n.dataOut = threadPtr, dataOut
48 | for {
49 | tail = l.tail.Load()
50 | next = tail.next.Load()
51 | if tail == l.tail.Load() { // are tail and next consistent?
52 | if next == nil {
53 | if tail.next.CompareAndSwap(next, n) {
54 | l.tail.CompareAndSwap(tail, n) // Enqueue is done. try to swing tail to the inserted node
55 | return
56 | }
57 | } else { // tail was not pointing to the last node
58 | // try to swing Tail to the next node
59 | l.tail.CompareAndSwap(tail, next)
60 | }
61 | }
62 | }
63 | }
64 |
65 | // Dequeue removes and returns the value at the head of the queue to the memory pool
66 | // It returns nil if the list is empty
67 | func (l *List) Dequeue() (threadPtr *unsafe.Pointer, dataOut *any) {
68 | var head, tail, next *node
69 | for {
70 | head = l.head.Load()
71 | tail = l.tail.Load()
72 | next = head.next.Load()
73 | if head == l.head.Load() { // are head, tail, and next consistent?
74 | if head == tail { // is list empty or tail falling behind?
75 | if next == nil { // is list empty?
76 | return nil, nil
77 | }
78 | // tail is falling behind. try to advance it
79 | l.tail.CompareAndSwap(tail, next)
80 | } else {
81 | // read value before CAS_node otherwise another dequeue might free the next node
82 | threadPtr, dataOut = next.threadPtr, next.dataOut
83 | if l.head.CompareAndSwap(head, next) {
84 | // sysFreeOS(unsafe.Pointer(head), nodeSize)
85 | head.threadPtr, head.dataOut = nil, nil
86 | head.next.Store(nil)
87 | nodePut(head)
88 | return // Dequeue is done. return
89 | }
90 | }
91 | }
92 | }
93 | }
94 |
--------------------------------------------------------------------------------
/selector.go:
--------------------------------------------------------------------------------
1 | package zenq
2 |
3 | import (
4 | "sync/atomic"
5 | "unsafe"
6 | )
7 |
8 | // Selectable is an interface for getting selected among many others
9 | type Selectable interface {
10 | IsClosed() bool
11 | EnqueueSelector(*unsafe.Pointer, *any)
12 | ReadFromBackLog() (data any)
13 | Signal() uint8
14 | }
15 |
16 | // Select selects a single element out of multiple ZenQs
17 | // A maximum of 127 ZenQs can be selected from at a time owing to the size of int8 type
18 | // `nil` is returned if all streams are closed or if a stream gets closed during the selection process
19 | func Select(streams ...Selectable) (data any) {
20 | numStreams := int8(len(streams) - 1)
21 | filter:
22 | for idx := int8(0); idx < numStreams; idx++ {
23 | if streams[idx] == nil || streams[idx].IsClosed() {
24 | for ; numStreams >= 0 && (streams[numStreams] == nil || streams[numStreams].IsClosed()); numStreams-- {
25 | }
26 | if idx >= numStreams {
27 | break filter
28 | }
29 | streams[idx], streams[numStreams] = streams[numStreams], streams[idx]
30 | numStreams--
31 | }
32 | }
33 | if numStreams < 0 {
34 | data = nil
35 | return
36 | }
37 |
38 | for idx := int8(0); idx <= numStreams; idx++ {
39 | if data = streams[idx].ReadFromBackLog(); data != nil {
40 | return
41 | }
42 | }
43 |
44 | g, numSignals, iter := GetG(), uint8(0), int8(0)
45 |
46 | for idx := int8(0); idx <= numStreams; idx++ {
47 | streams[idx].EnqueueSelector(&g, &data)
48 | }
49 |
50 | retry:
51 | for idx := int8(0); idx <= numStreams; idx++ {
52 | numSignals += streams[idx].Signal()
53 | }
54 |
55 | // might cause deadlock without this case
56 | if numSignals == 0 && atomic.LoadPointer(&g) != nil {
57 | // wait for some ZenQ to acquire this selector's thread
58 | if runtime_canSpin(int(iter)) {
59 | iter++
60 | spin(30)
61 | } else {
62 | mcall(gosched_m)
63 | }
64 | goto retry
65 | }
66 |
67 | // park and wait for notification
68 | mcall(fast_park)
69 | return
70 | }
71 |
--------------------------------------------------------------------------------
/thread_parker.go:
--------------------------------------------------------------------------------
1 | package zenq
2 |
3 | import (
4 | "sync/atomic"
5 | "unsafe"
6 | )
7 |
8 | // ThreadParker is a data-structure used for sleeping and waking up goroutines on user call
9 | // useful for saving up resources by parking excess goroutines and pre-empt them when required with minimal latency overhead
10 | // Uses the same lock-free linked list implementation as in `list.go`
11 | type ThreadParker[T any] struct {
12 | head atomic.Pointer[parkSpot[T]]
13 | tail atomic.Pointer[parkSpot[T]]
14 | }
15 |
16 | // NewThreadParker returns a new thread parker.
17 | func NewThreadParker[T any](spot *parkSpot[T]) *ThreadParker[T] {
18 | var ptr atomic.Pointer[parkSpot[T]]
19 | ptr.Store(spot)
20 | return &ThreadParker[T]{head: ptr, tail: ptr}
21 | }
22 |
23 | // a single parked goroutine
24 | type parkSpot[T any] struct {
25 | next atomic.Pointer[parkSpot[T]]
26 | threadPtr unsafe.Pointer
27 | value T
28 | }
29 |
30 | // Park parks the current calling goroutine
31 | // This keeps only one parked goroutine in state at all times
32 | // the parked goroutine is called with minimal overhead via goready() due to both being in userland
33 | // This ensures there is no thundering herd https://en.wikipedia.org/wiki/Thundering_herd_problem
34 | func (tp *ThreadParker[T]) Park(nextNode *parkSpot[T]) {
35 | var tail, next *parkSpot[T]
36 | for {
37 | tail = tp.tail.Load()
38 | next = tail.next.Load()
39 | if tail == tp.tail.Load() {
40 | if next == nil {
41 | if tail.next.CompareAndSwap(next, nextNode) {
42 | tp.tail.CompareAndSwap(tail, nextNode)
43 | return
44 | }
45 | } else {
46 | tp.tail.CompareAndSwap(tail, next)
47 | }
48 | }
49 | }
50 | }
51 |
52 | // Ready calls one parked goroutine from the queue if available
53 | func (tp *ThreadParker[T]) Ready() (data T, ok bool, freeable *parkSpot[T]) {
54 | var head, tail, next *parkSpot[T]
55 | for {
56 | head = tp.head.Load()
57 | tail = tp.tail.Load()
58 | next = head.next.Load()
59 | if head == tp.head.Load() {
60 | if head == tail {
61 | if next == nil {
62 | return
63 | }
64 | tp.tail.CompareAndSwap(tail, next)
65 | } else {
66 | safe_ready(next.threadPtr)
67 | data, ok = next.value, true
68 | if tp.head.CompareAndSwap(head, next) {
69 | freeable = head
70 | freeable.threadPtr = nil
71 | freeable.next.Store(nil)
72 | return
73 | }
74 | }
75 | }
76 | }
77 | }
78 |
--------------------------------------------------------------------------------
/zenq.go:
--------------------------------------------------------------------------------
1 | // A minimalist thread-safe queue implemented using a lock-free ringbuffer which is faster
2 | // and has lower memory allocations than golang's native channels
3 | // Based on the LMAX disruptor pattern https://lmax-exchange.github.io/disruptor/disruptor.html
4 |
5 | // Known Limitations:-
6 | //
7 | // 1. Max queue_size = 2^16
8 | // 2. The queue_size is a power of 2, in case a different size is provided then queue_size is rounded up to the next greater power of 2 upto a max of 2^16
9 |
10 | // Suggestions:-
11 | //
12 | // 1. Use runtime.LockOSThread() on the goroutine calling ZenQ.Read() for lowest latency provided you have > 1 cpu cores
13 |
14 | package zenq
15 |
16 | import (
17 | "fmt"
18 | "math"
19 | "sync"
20 | "sync/atomic"
21 | "unsafe"
22 |
23 | "github.com/alphadose/zenq/v2/constants"
24 | )
25 |
26 | // ZenQ global state enums
27 | const (
28 | // Both reads and writes are possible
29 | StateOpen = iota
30 | // No further writes can be performed and you can only read upto the last committed write in this state
31 | StateClosedForWrites
32 | // Neither reads nor writes are possible, queue is fully exhausted
33 | StateFullyClosed
34 | )
35 |
36 | // ZenQ selector state enums
37 | const (
38 | // Open for being selected
39 | SelectionOpen = iota
40 | // Running state
41 | SelectionRunning
42 | )
43 |
44 | // ZenQ Slot state enums
45 | const (
46 | SlotEmpty = iota
47 | SlotBusy
48 | SlotCommitted
49 | SlotClosed
50 | )
51 |
52 | type (
53 | // a single slot in the queue
54 | slot[T any] struct {
55 | writeParker *ThreadParker[T]
56 | atomic.Uint32
57 | item T
58 | }
59 |
60 | // metadata of the queue
61 | metaQ struct {
62 | globalState uint8
63 | // NOTE->self: strideLength and indexMask can be further optimized to uint8 for specialized ZenQs
64 | // with known data types instead of generic type
65 | // using variables with lower sizes decreases memory bandwidth consumption and increases speed
66 | strideLength uint16
67 | indexMask uint16
68 | contents unsafe.Pointer
69 | // memory pool refs for storing and leasing parking spots for goroutines
70 | alloc func() any
71 | free func(any)
72 | }
73 |
74 | // container for the selection events among multiple queues
75 | selectFactory[T any] struct {
76 | selectionState atomic.Uint32
77 | auxThread unsafe.Pointer
78 | backlog atomic.Pointer[T]
79 | waitList List
80 | }
81 |
82 | // ZenQ is the CPU cache optimized ringbuffer implementation
83 | ZenQ[T any] struct {
84 | // The padding members 0 to 4 below are here to ensure each item is on a separate cache line.
85 | // This prevents false sharing and hence improves performance.
86 | _ cacheLinePadding
87 | writerIndex atomic.Uint32
88 | _ [constants.CacheLinePadSize - unsafe.Sizeof(atomic.Uint32{})]byte
89 | readerIndex atomic.Uint32
90 | _ [constants.CacheLinePadSize - unsafe.Sizeof(atomic.Uint32{})]byte
91 | metaQ
92 | _ [constants.CacheLinePadSize - unsafe.Sizeof(metaQ{})]byte
93 | selectFactory[T]
94 | _ [constants.CacheLinePadSize - unsafe.Sizeof(selectFactory[T]{})]byte
95 | }
96 | )
97 |
98 | // returns the next greater power of 2 relative to val
99 | func nextGreaterPowerOf2(val uint32) uint32 {
100 | return 1 << uint32(math.Min(math.Ceil(Fastlog2(math.Max(float64(val), 1))), 16))
101 | }
102 |
103 | // New returns a new queue given its payload type passed as a generic parameter
104 | func New[T any](size uint32) *ZenQ[T] {
105 | var (
106 | queueSize = nextGreaterPowerOf2(size)
107 | contents = make([]slot[T], queueSize, queueSize)
108 | parkPool = sync.Pool{New: func() any { return new(parkSpot[T]) }}
109 | )
110 | for idx := uint32(0); idx < queueSize; idx++ {
111 | spot := parkPool.Get().(*parkSpot[T])
112 | spot.threadPtr = nil
113 | contents[idx].writeParker = NewThreadParker(spot)
114 | }
115 | zenq := &ZenQ[T]{
116 | metaQ: metaQ{
117 | strideLength: uint16(unsafe.Sizeof(slot[T]{})),
118 | contents: unsafe.Pointer(&contents[0]),
119 | alloc: parkPool.Get,
120 | free: parkPool.Put,
121 | indexMask: uint16(queueSize - 1),
122 | },
123 | selectFactory: selectFactory[T]{waitList: NewList()},
124 | }
125 | go zenq.selectSender()
126 | // allow the above auxillary thread to manifest
127 | mcall(gosched_m)
128 | return zenq
129 | }
130 |
131 | // Size returns the number of items in the queue at any given time
132 | func (self *ZenQ[T]) Size() uint32 {
133 | var (
134 | readerIndex uint32 = self.readerIndex.Load() & uint32(self.indexMask)
135 | writerIndex uint32 = self.writerIndex.Load() & uint32(self.indexMask)
136 | )
137 | if readerIndex > writerIndex {
138 | return uint32(self.indexMask) + 2 - (readerIndex - writerIndex)
139 | } else if writerIndex > readerIndex {
140 | return writerIndex - readerIndex + 1
141 | } else {
142 | return 0
143 | }
144 | }
145 |
146 | // Write writes a value to the queue
147 | // It returns whether the queue is currently open for writes or not
148 | // If not then it might be still open for reads, which can be checked by calling zenq.IsClosed()
149 | func (self *ZenQ[T]) Write(value T) (queueClosedForWrites bool) {
150 | if Load8(&self.globalState) != StateOpen {
151 | queueClosedForWrites = true
152 | return
153 | }
154 |
155 | // Try to send directly to selector when possible or else just dequeue unselected references
156 | // in order to reduce the burden on the auxillary thread and save cpu time
157 | direct_send:
158 | if threadPtr, dataOut := self.waitList.Dequeue(); threadPtr != nil {
159 | if selThread := atomic.SwapPointer(threadPtr, nil); selThread != nil {
160 | // direct send to selector
161 | *dataOut = value
162 | // notify selector
163 | safe_ready(selThread)
164 | return
165 | }
166 | goto direct_send
167 | }
168 |
169 | slot := (*slot[T])(unsafe.Pointer(uintptr(self.strideLength)*(uintptr(self.indexMask)&uintptr(self.writerIndex.Add(1))) + uintptr(self.contents)))
170 |
171 | // CAS -> change slot_state to busy if slot_state == empty
172 | for !slot.CompareAndSwap(SlotEmpty, SlotBusy) {
173 | switch slot.Load() {
174 | case SlotBusy:
175 | wait()
176 | case SlotCommitted:
177 | n := self.alloc().(*parkSpot[T])
178 | n.threadPtr, n.value = GetG(), value
179 | n.next.Store(nil)
180 | slot.writeParker.Park(n)
181 | mcall(fast_park)
182 | return
183 | case SlotEmpty:
184 | continue
185 | case SlotClosed:
186 | return
187 | }
188 | }
189 | slot.item = value
190 | slot.Store(SlotCommitted)
191 | return
192 | }
193 |
194 | // Read reads a value from the queue, you can once read once per object
195 | func (self *ZenQ[T]) Read() (data T, queueOpen bool) {
196 | slot := (*slot[T])(unsafe.Pointer(uintptr(self.strideLength)*(uintptr(self.indexMask)&uintptr(self.readerIndex.Add(1))) + uintptr(self.contents)))
197 |
198 | // CAS -> change slot_state to busy if slot_state == committed
199 | for !slot.CompareAndSwap(SlotCommitted, SlotBusy) {
200 | switch slot.Load() {
201 | case SlotBusy:
202 | wait()
203 | case SlotEmpty:
204 | var freeable *parkSpot[T]
205 | if data, queueOpen, freeable = slot.writeParker.Ready(); queueOpen {
206 | self.free(freeable)
207 | return
208 | } else if Load8(&self.globalState) != StateFullyClosed {
209 | mcall(gosched_m)
210 | } else {
211 | // queue is closed, decrement the reader index by 1
212 | self.readerIndex.Add(math.MaxUint32)
213 | queueOpen = false
214 | return
215 | }
216 | case SlotClosed:
217 | if slot.CompareAndSwap(SlotClosed, SlotEmpty) {
218 | Store8(&self.globalState, StateFullyClosed)
219 | }
220 | queueOpen = false
221 | return
222 | case SlotCommitted:
223 | continue
224 | }
225 | }
226 | data, queueOpen = slot.item, true
227 | slot.Store(SlotEmpty)
228 | return
229 | }
230 |
231 | // Close closes the ZenQ for further writes
232 | // You can only read uptill the last committed write after closing
233 | // This function will be blocking in case the queue is full
234 | // ZenQ is closed from a writer goroutine by design, hence it should always be called
235 | // from a writer goroutine and never from a reader goroutine which might cause the reader to get blocked and hence deadlock
236 | // It returns if the queue was already closed for writes or not
237 | func (self *ZenQ[T]) Close() (alreadyClosedForWrites bool) {
238 | // This ensures a ZenQ is closed only once even if this function is called multiple times making this operation safe
239 | if Load8(&self.globalState) != StateOpen {
240 | alreadyClosedForWrites = true
241 | return
242 | }
243 | Store8(&self.globalState, StateClosedForWrites)
244 | slot := (*slot[T])(unsafe.Pointer(uintptr(self.strideLength)*(uintptr(self.indexMask)&uintptr(self.writerIndex.Add(1))) + uintptr(self.contents)))
245 |
246 | // CAS -> change slot_state to busy if slot_state == empty
247 | for !slot.CompareAndSwap(SlotEmpty, SlotBusy) {
248 | switch slot.Load() {
249 | case SlotBusy, SlotCommitted:
250 | mcall(gosched_m)
251 | case SlotEmpty:
252 | continue
253 | case SlotClosed:
254 | return
255 | }
256 | }
257 | // Closing commit
258 | slot.Store(SlotClosed)
259 | return
260 | }
261 |
262 | // CloseAsync closes the channel asynchronously
263 | // Useful when an user wants to close the channel from a reader end without blocking the thread
264 | func (self *ZenQ[T]) CloseAsync() {
265 | go self.Close()
266 | }
267 |
268 | // The following 4 functions below implement the Selectable interface
269 |
270 | // ReadFromBackLog tries to read a data from backlog if available
271 | func (self *ZenQ[T]) ReadFromBackLog() (data any) {
272 | if d := self.backlog.Swap(nil); d != nil {
273 | data = *((*T)(d))
274 | }
275 | return
276 | }
277 |
278 | // Signal is the mechanism by which a selector notifies this ZenQ's auxillary thread to contest for the selection
279 | func (self *ZenQ[T]) Signal() uint8 {
280 | if !self.selectionState.CompareAndSwap(SelectionOpen, SelectionRunning) {
281 | return 0
282 | } else {
283 | safe_ready(self.auxThread)
284 | return 1
285 | }
286 | }
287 |
288 | // EnqueueSelector pushes a calling selector to this ZenQ's selector waitlist
289 | func (self *ZenQ[T]) EnqueueSelector(threadPtr *unsafe.Pointer, dataOut *any) {
290 | self.waitList.Enqueue(threadPtr, dataOut)
291 | }
292 |
293 | // IsClosed returns whether the zenq is closed for both reads and writes
294 | func (self *ZenQ[T]) IsClosed() bool {
295 | return Load8(&self.globalState) == StateFullyClosed
296 | }
297 |
298 | // Reset resets the queue state
299 | // This also releases all parked goroutines if any and drains all committed writes
300 | func (self *ZenQ[T]) Reset() {
301 | // Close() is blocking when queue is full hence execute it asynchronously
302 | self.CloseAsync()
303 | // drain entire queue
304 | for open := true; open; _, open = self.Read() {
305 | }
306 | Store8(&self.globalState, StateOpen)
307 | }
308 |
309 | // Dump dumps the current queue state
310 | // Unsafe to be called from multiple goroutines
311 | func (self *ZenQ[T]) Dump() {
312 | fmt.Printf("writerIndex: %3d, readerIndex: %3d\n contents:-\n\n", self.writerIndex, self.readerIndex)
313 | for idx := uintptr(0); idx <= uintptr(self.indexMask); idx++ {
314 | slot := (*slot[T])(unsafe.Pointer(uintptr(self.contents) + idx*unsafe.Sizeof(slot[T]{})))
315 | fmt.Printf("Slot -> %#v\n", *slot)
316 | }
317 | }
318 |
319 | // selectSender is an auxillary thread which remains parked by default
320 | // only when a selector sends a signal, it is notified and tries to send back to the selector
321 | // if it fails, then it parks again and waits for another signal from another selection process
322 | // since it is parked most of the times, it consumes minimal cpu time making the selection process efficient
323 | func (self *ZenQ[T]) selectSender() {
324 | atomic.StorePointer(&self.auxThread, GetG())
325 | var (
326 | data T
327 | threadPtr unsafe.Pointer
328 | readState, queueOpen bool = false, true
329 | selectorThread *unsafe.Pointer
330 | dataOut *any
331 | )
332 |
333 | for {
334 | // park by default and wait for Signal() notification from a selection process
335 | mcall(fast_park)
336 | if !readState {
337 | data, queueOpen = self.Read()
338 | readState = true
339 | }
340 |
341 | selector_dequeue:
342 | for {
343 | // keep dequeuing selectors from waitlist and try to acquire one
344 | // if acquired write to selector, ready it and go back to parking state
345 | if selectorThread, dataOut = self.waitList.Dequeue(); selectorThread != nil {
346 | if threadPtr = atomic.SwapPointer(selectorThread, nil); threadPtr != nil {
347 | // implementaion of sending from closed channel to selector mechanism
348 | if queueOpen {
349 | // write to the selector
350 | *dataOut = data
351 | } else {
352 | // send nil from closed channel
353 | *dataOut = nil
354 | }
355 | // notify selector
356 | safe_ready(threadPtr)
357 | readState = false
358 | break selector_dequeue
359 | } else {
360 | continue
361 | }
362 | } else {
363 | break selector_dequeue
364 | }
365 | }
366 | // if not selected by any selector, commit data to backlog and wait for next signal
367 | // saves a lot of cpu time
368 | if readState && queueOpen {
369 | var i T = data
370 | self.backlog.Store(&i)
371 | }
372 | self.selectionState.Store(SelectionOpen)
373 | }
374 | }
375 |
--------------------------------------------------------------------------------