├── LICENSE ├── README.md ├── asm_386.s ├── asm_amd64.s ├── asm_arm.s ├── asm_arm64.s ├── asm_mips.s ├── asm_mips64.s ├── asm_ppc64.s ├── asm_s390x.s ├── bench_reports ├── darwin_arm64_m1 │ ├── 1.3.0.txt │ ├── 1.4.0.txt │ ├── 1.5.0.txt │ ├── 2.0.0.txt │ ├── 2.1.0.txt │ ├── 2.2.0.txt │ ├── 2.2.1.txt │ ├── 2.3.0.txt │ ├── 2.4.0.txt │ ├── 2.4.0_alternate.txt │ ├── 2.4.0_on_battery.txt │ ├── 2.5.0.txt │ ├── 2.5.1.txt │ ├── 2.5.2.txt │ ├── 2.7.0.txt │ └── 2.7.1.txt ├── raspian_arm32.txt ├── ubuntu_amd64_16core.txt ├── ubuntu_intel_xeon.txt └── windows_amd64_16core.txt ├── benchmarks ├── cgo_test │ └── cgobench.go ├── e2e │ ├── benchmark_test.go │ └── benchsuite_test.go ├── selector │ └── main.go └── simple │ ├── main.go │ └── main_test.go ├── constants ├── constants_386.go ├── constants_amd64.go ├── constants_arm.go ├── constants_arm64.go ├── constants_mips.go ├── constants_mips64.go ├── constants_mips64le.go ├── constants_mipsle.go ├── constants_ppc64x.go ├── constants_riscv64.go ├── constants_s390x.go └── constants_wasm.go ├── examples ├── selector │ └── main.go └── simple │ └── main.go ├── go.mod ├── go.sum ├── lib_runtime_fastrand.go ├── lib_runtime_fastrand_1.22.go ├── lib_runtime_linkage.go ├── select_list.go ├── selector.go ├── thread_parker.go └── zenq.go /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Anish Mukherjee 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ZenQ 2 | 3 | > A low-latency thread-safe queue in golang implemented using a lock-free ringbuffer and runtime internals 4 | 5 | Based on the [LMAX Disruptor Pattern](https://lmax-exchange.github.io/disruptor/disruptor.html) 6 | 7 | ## Features 8 | 9 | * Much faster than native channels in both SPSC (single-producer-single-consumer) and MPSC (multi-producer-single-consumer) modes in terms of `time/op` 10 | * More resource efficient in terms of `memory_allocation/op` and `num_allocations/op` evident while benchmarking large batch size inputs 11 | * Handles the case where NUM_WRITER_GOROUTINES > NUM_CPU_CORES much better than native channels 12 | * Selection from multiple ZenQs just like golang's `select{}` ensuring fair selection and no starvation 13 | * Closing a ZenQ 14 | 15 | Benchmarks to support the above claims [here](#benchmarks) 16 | 17 | ## Installation 18 | 19 | You need Golang [1.19.x](https://go.dev/dl/) or above 20 | 21 | ```bash 22 | $ go get github.com/alphadose/zenq/v2 23 | ``` 24 | 25 | ## Usage 26 | 27 | 1. Simple Read/Write 28 | ```go 29 | package main 30 | 31 | import ( 32 | "fmt" 33 | 34 | "github.com/alphadose/zenq/v2" 35 | ) 36 | 37 | type payload struct { 38 | alpha int 39 | beta string 40 | } 41 | 42 | func main() { 43 | zq := zenq.New[payload](10) 44 | 45 | for j := 0; j < 5; j++ { 46 | go func() { 47 | for i := 0; i < 20; i++ { 48 | zq.Write(payload{ 49 | alpha: i, 50 | beta: fmt.Sprint(i), 51 | }) 52 | } 53 | }() 54 | } 55 | 56 | for i := 0; i < 100; i++ { 57 | if data, queueOpen := zq.Read(); queueOpen { 58 | fmt.Printf("%+v\n", data) 59 | } 60 | } 61 | } 62 | ``` 63 | 64 | 2. **Selection** from multiple ZenQs just like golang's native `select{}`. The selection process is fair i.e no single ZenQ gets starved 65 | ```go 66 | package main 67 | 68 | import ( 69 | "fmt" 70 | 71 | "github.com/alphadose/zenq/v2" 72 | ) 73 | 74 | type custom1 struct { 75 | alpha int 76 | beta string 77 | } 78 | 79 | type custom2 struct { 80 | gamma int 81 | } 82 | 83 | const size = 100 84 | 85 | var ( 86 | zq1 = zenq.New[int](size) 87 | zq2 = zenq.New[string](size) 88 | zq3 = zenq.New[custom1](size) 89 | zq4 = zenq.New[*custom2](size) 90 | ) 91 | 92 | func main() { 93 | go looper(intProducer) 94 | go looper(stringProducer) 95 | go looper(custom1Producer) 96 | go looper(custom2Producer) 97 | 98 | for i := 0; i < 40; i++ { 99 | 100 | // Selection occurs here 101 | if data := zenq.Select(zq1, zq2, zq3, zq4); data != nil { 102 | switch data.(type) { 103 | case int: 104 | fmt.Printf("Received int %d\n", data) 105 | case string: 106 | fmt.Printf("Received string %s\n", data) 107 | case custom1: 108 | fmt.Printf("Received custom data type number 1 %#v\n", data) 109 | case *custom2: 110 | fmt.Printf("Received pointer %#v\n", data) 111 | } 112 | } 113 | } 114 | } 115 | 116 | func intProducer(ctr int) { zq1.Write(ctr) } 117 | 118 | func stringProducer(ctr int) { zq2.Write(fmt.Sprint(ctr * 10)) } 119 | 120 | func custom1Producer(ctr int) { zq3.Write(custom1{alpha: ctr, beta: fmt.Sprint(ctr)}) } 121 | 122 | func custom2Producer(ctr int) { zq4.Write(&custom2{gamma: 1 << ctr}) } 123 | 124 | func looper(producer func(ctr int)) { 125 | for i := 0; i < 10; i++ { 126 | producer(i) 127 | } 128 | } 129 | ``` 130 | 131 | ## Benchmarks 132 | 133 | Benchmarking code available [here](./benchmarks) 134 | 135 | Note that if you run the benchmarks with `--race` flag then ZenQ will perform slower because the `--race` flag slows 136 | down the atomic operations in golang. Under normal circumstances, ZenQ will outperform golang native channels. 137 | 138 | ### Hardware Specs 139 | 140 | ``` 141 | ❯ neofetch 142 | 'c. alphadose@ReiEki.local 143 | ,xNMM. ---------------------- 144 | .OMMMMo OS: macOS 12.3 21E230 arm64 145 | OMMM0, Host: MacBookAir10,1 146 | .;loddo:' loolloddol;. Kernel: 21.4.0 147 | cKMMMMMMMMMMNWMMMMMMMMMM0: Uptime: 6 hours, 41 mins 148 | .KMMMMMMMMMMMMMMMMMMMMMMMWd. Packages: 86 (brew) 149 | XMMMMMMMMMMMMMMMMMMMMMMMX. Shell: zsh 5.8 150 | ;MMMMMMMMMMMMMMMMMMMMMMMM: Resolution: 1440x900 151 | :MMMMMMMMMMMMMMMMMMMMMMMM: DE: Aqua 152 | .MMMMMMMMMMMMMMMMMMMMMMMMX. WM: Rectangle 153 | kMMMMMMMMMMMMMMMMMMMMMMMMWd. Terminal: iTerm2 154 | .XMMMMMMMMMMMMMMMMMMMMMMMMMMk Terminal Font: FiraCodeNerdFontComplete-Medium 16 (normal) 155 | .XMMMMMMMMMMMMMMMMMMMMMMMMK. CPU: Apple M1 156 | kMMMMMMMMMMMMMMMMMMMMMMd GPU: Apple M1 157 | ;KMMMMMMMWXXWMMMMMMMk. Memory: 1370MiB / 8192MiB 158 | .cooc,. .,coo:. 159 | 160 | ``` 161 | 162 | ### Terminology 163 | 164 | * NUM_WRITERS -> The number of goroutines concurrently writing to ZenQ/Channel 165 | * INPUT_SIZE -> The number of input payloads to be passed through ZenQ/Channel from producers to consumer 166 | 167 | ```bash 168 | Computed from benchstat of 30 benchmarks each via go test -benchmem -bench=. benchmarks/simple/*.go 169 | 170 | name time/op 171 | _Chan_NumWriters1_InputSize600-8 23.2µs ± 1% 172 | _ZenQ_NumWriters1_InputSize600-8 17.9µs ± 1% 173 | _Chan_NumWriters3_InputSize60000-8 5.27ms ± 3% 174 | _ZenQ_NumWriters3_InputSize60000-8 2.36ms ± 2% 175 | _Chan_NumWriters8_InputSize6000000-8 671ms ± 2% 176 | _ZenQ_NumWriters8_InputSize6000000-8 234ms ± 6% 177 | _Chan_NumWriters100_InputSize6000000-8 1.59s ± 4% 178 | _ZenQ_NumWriters100_InputSize6000000-8 309ms ± 2% 179 | _Chan_NumWriters1000_InputSize7000000-8 1.97s ± 0% 180 | _ZenQ_NumWriters1000_InputSize7000000-8 389ms ± 4% 181 | _Chan_Million_Blocking_Writers-8 10.4s ± 2% 182 | _ZenQ_Million_Blocking_Writers-8 2.32s ±21% 183 | 184 | name alloc/op 185 | _Chan_NumWriters1_InputSize600-8 0.00B 186 | _ZenQ_NumWriters1_InputSize600-8 0.00B 187 | _Chan_NumWriters3_InputSize60000-8 109B ±68% 188 | _ZenQ_NumWriters3_InputSize60000-8 24.6B ±107% 189 | _Chan_NumWriters8_InputSize6000000-8 802B ±241% 190 | _ZenQ_NumWriters8_InputSize6000000-8 1.18kB ±100% 191 | _Chan_NumWriters100_InputSize6000000-8 44.2kB ±41% 192 | _ZenQ_NumWriters100_InputSize6000000-8 10.7kB ±38% 193 | _Chan_NumWriters1000_InputSize7000000-8 476kB ± 8% 194 | _ZenQ_NumWriters1000_InputSize7000000-8 90.6kB ±10% 195 | _Chan_Million_Blocking_Writers-8 553MB ± 0% 196 | _ZenQ_Million_Blocking_Writers-8 122MB ± 3% 197 | 198 | name allocs/op 199 | _Chan_NumWriters1_InputSize600-8 0.00 200 | _ZenQ_NumWriters1_InputSize600-8 0.00 201 | _Chan_NumWriters3_InputSize60000-8 0.00 202 | _ZenQ_NumWriters3_InputSize60000-8 0.00 203 | _Chan_NumWriters8_InputSize6000000-8 2.76 ±190% 204 | _ZenQ_NumWriters8_InputSize6000000-8 5.47 ±83% 205 | _Chan_NumWriters100_InputSize6000000-8 159 ±26% 206 | _ZenQ_NumWriters100_InputSize6000000-8 25.1 ±39% 207 | _Chan_NumWriters1000_InputSize7000000-8 1.76k ± 6% 208 | _ZenQ_NumWriters1000_InputSize7000000-8 47.3 ±31% 209 | _Chan_Million_Blocking_Writers-8 2.00M ± 0% 210 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0% 211 | ``` 212 | 213 | The above results show that ZenQ is more efficient than channels in all 3 metrics i.e `time/op`, `mem_alloc/op` and `num_allocs/op` for the following tested cases:- 214 | 215 | 1. SPSC 216 | 2. MPSC with NUM_WRITER_GOROUTINES < NUM_CPU_CORES 217 | 3. MPSC with NUM_WRITER_GOROUTINES > NUM_CPU_CORES 218 | 219 | 220 | ## Cherry on the Cake 221 | 222 | In SPSC mode ZenQ is faster than channels by **92 seconds** in case of input size of 6 * 108 elements 223 | 224 | ```bash 225 | ❯ go run benchmarks/simple/main.go 226 | 227 | With Input Batch Size: 60 and Num Concurrent Writers: 1 228 | 229 | Native Channel Runner completed transfer in: 26.916µs 230 | ZenQ Runner completed transfer in: 20.292µs 231 | ==================================================================== 232 | 233 | With Input Batch Size: 600 and Num Concurrent Writers: 1 234 | 235 | Native Channel Runner completed transfer in: 135.75µs 236 | ZenQ Runner completed transfer in: 105.792µs 237 | ==================================================================== 238 | 239 | With Input Batch Size: 6000 and Num Concurrent Writers: 1 240 | 241 | Native Channel Runner completed transfer in: 2.100209ms 242 | ZenQ Runner completed transfer in: 510.792µs 243 | ==================================================================== 244 | 245 | With Input Batch Size: 6000000 and Num Concurrent Writers: 1 246 | 247 | Native Channel Runner completed transfer in: 1.241481917s 248 | ZenQ Runner completed transfer in: 226.068209ms 249 | ==================================================================== 250 | 251 | With Input Batch Size: 600000000 and Num Concurrent Writers: 1 252 | 253 | Native Channel Runner completed transfer in: 1m55.074638875s 254 | ZenQ Runner completed transfer in: 22.582667917s 255 | ==================================================================== 256 | ``` 257 | -------------------------------------------------------------------------------- /asm_386.s: -------------------------------------------------------------------------------- 1 | #include "textflag.h" 2 | #include "go_asm.h" 3 | 4 | #define get_tls(r) MOVL TLS, r 5 | #define g(r) 0(r)(TLS*1) 6 | 7 | TEXT ·GetG(SB),NOSPLIT,$0-4 8 | get_tls(CX) 9 | MOVL g(CX), AX 10 | MOVL AX, gp+0(FP) 11 | RET 12 | -------------------------------------------------------------------------------- /asm_amd64.s: -------------------------------------------------------------------------------- 1 | #include "textflag.h" 2 | #include "go_asm.h" 3 | 4 | #define get_tls(r) MOVQ TLS, r 5 | #define g(r) 0(r)(TLS*1) 6 | 7 | TEXT ·GetG(SB),NOSPLIT,$0-8 8 | get_tls(CX) 9 | MOVQ g(CX), AX 10 | MOVQ AX, gp+0(FP) 11 | RET 12 | -------------------------------------------------------------------------------- /asm_arm.s: -------------------------------------------------------------------------------- 1 | #include "textflag.h" 2 | #include "go_asm.h" 3 | 4 | #define get_tls(r) MOVW g, r 5 | 6 | TEXT ·GetG(SB),NOSPLIT,$0-4 7 | get_tls(R1) 8 | MOVW R1, gp+0(FP) 9 | RET 10 | -------------------------------------------------------------------------------- /asm_arm64.s: -------------------------------------------------------------------------------- 1 | #include "textflag.h" 2 | #include "go_asm.h" 3 | 4 | #define get_tls(r) MOVD g, r 5 | 6 | TEXT ·GetG(SB),NOSPLIT,$0-8 7 | get_tls(R1) 8 | MOVD R1, gp+0(FP) 9 | RET 10 | -------------------------------------------------------------------------------- /asm_mips.s: -------------------------------------------------------------------------------- 1 | #include "textflag.h" 2 | #include "go_asm.h" 3 | 4 | #define get_tls(r) MOVD g, r 5 | 6 | TEXT ·GetG(SB),NOSPLIT,$0-4 7 | get_tls(R1) 8 | MOVD R1, gp+0(FP) 9 | RET 10 | -------------------------------------------------------------------------------- /asm_mips64.s: -------------------------------------------------------------------------------- 1 | #include "textflag.h" 2 | #include "go_asm.h" 3 | 4 | #define get_tls(r) MOVD g, r 5 | 6 | TEXT ·GetG(SB),NOSPLIT,$0-8 7 | get_tls(R1) 8 | MOVD R1, gp+0(FP) 9 | RET 10 | -------------------------------------------------------------------------------- /asm_ppc64.s: -------------------------------------------------------------------------------- 1 | #include "textflag.h" 2 | #include "go_asm.h" 3 | 4 | TEXT ·GetG(SB), NOSPLIT, $0-8 5 | MOVD g, R8 6 | MOVD R8, ret+0(FP) 7 | RET 8 | -------------------------------------------------------------------------------- /asm_s390x.s: -------------------------------------------------------------------------------- 1 | #include "textflag.h" 2 | #include "go_asm.h" 3 | 4 | TEXT ·GetG(SB), NOSPLIT, $0-8 5 | MOVD g, R8 6 | MOVD R8, ret+0(FP) 7 | RET 8 | -------------------------------------------------------------------------------- /bench_reports/darwin_arm64_m1/1.3.0.txt: -------------------------------------------------------------------------------- 1 | name time/op 2 | _Chan_NumWriters1_InputSize600-8 24.6µs ± 1% 3 | _ZenQ_NumWriters1_InputSize600-8 16.5µs ± 1% 4 | _Chan_NumWriters3_InputSize60000-8 6.21ms ± 2% 5 | _ZenQ_NumWriters3_InputSize60000-8 2.85ms ± 0% 6 | _Chan_NumWriters8_InputSize6000000-8 735ms ± 1% 7 | _ZenQ_NumWriters8_InputSize6000000-8 417ms ± 0% 8 | _Chan_NumWriters100_InputSize6000000-8 1.61s ± 1% 9 | _ZenQ_NumWriters100_InputSize6000000-8 741ms ± 3% 10 | _Chan_NumWriters1000_InputSize7000000-8 1.98s ± 0% 11 | _ZenQ_NumWriters1000_InputSize7000000-8 1.05s ± 1% 12 | _Chan_Million_Blocking_Writers-8 10.0s ±13% 13 | _ZenQ_Million_Blocking_Writers-8 7.01s ±44% 14 | 15 | name alloc/op 16 | _Chan_NumWriters1_InputSize600-8 0.00B 17 | _ZenQ_NumWriters1_InputSize600-8 0.00B 18 | _Chan_NumWriters3_InputSize60000-8 106B ±88% 19 | _ZenQ_NumWriters3_InputSize60000-8 28.9B ±111% 20 | _Chan_NumWriters8_InputSize6000000-8 946B ±267% 21 | _ZenQ_NumWriters8_InputSize6000000-8 885B ±163% 22 | _Chan_NumWriters100_InputSize6000000-8 46.7kB ±25% 23 | _ZenQ_NumWriters100_InputSize6000000-8 16.2kB ±66% 24 | _Chan_NumWriters1000_InputSize7000000-8 484kB ±10% 25 | _ZenQ_NumWriters1000_InputSize7000000-8 62.4kB ±82% 26 | _Chan_Million_Blocking_Writers-8 553MB ± 0% 27 | _ZenQ_Million_Blocking_Writers-8 95.9MB ± 0% 28 | 29 | name allocs/op 30 | _Chan_NumWriters1_InputSize600-8 0.00 31 | _ZenQ_NumWriters1_InputSize600-8 0.00 32 | _Chan_NumWriters3_InputSize60000-8 0.00 33 | _ZenQ_NumWriters3_InputSize60000-8 0.00 34 | _Chan_NumWriters8_InputSize6000000-8 3.07 ±193% 35 | _ZenQ_NumWriters8_InputSize6000000-8 2.07 ±142% 36 | _Chan_NumWriters100_InputSize6000000-8 166 ±15% 37 | _ZenQ_NumWriters100_InputSize6000000-8 53.5 ±50% 38 | _Chan_NumWriters1000_InputSize7000000-8 1.74k ± 7% 39 | _ZenQ_NumWriters1000_InputSize7000000-8 525 ±39% 40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0% 41 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0% 42 | -------------------------------------------------------------------------------- /bench_reports/darwin_arm64_m1/1.4.0.txt: -------------------------------------------------------------------------------- 1 | name time/op 2 | _Chan_NumWriters1_InputSize600-8 23.3µs ± 1% 3 | _ZenQ_NumWriters1_InputSize600-8 17.9µs ± 1% 4 | _Chan_NumWriters3_InputSize60000-8 5.48ms ± 3% 5 | _ZenQ_NumWriters3_InputSize60000-8 2.67ms ± 6% 6 | _Chan_NumWriters8_InputSize6000000-8 679ms ± 1% 7 | _ZenQ_NumWriters8_InputSize6000000-8 313ms ± 5% 8 | _Chan_NumWriters100_InputSize6000000-8 1.58s ± 1% 9 | _ZenQ_NumWriters100_InputSize6000000-8 516ms ± 2% 10 | _Chan_NumWriters1000_InputSize7000000-8 1.97s ± 1% 11 | _ZenQ_NumWriters1000_InputSize7000000-8 445ms ± 9% 12 | _Chan_Million_Blocking_Writers-8 10.8s ± 1% 13 | _ZenQ_Million_Blocking_Writers-8 11.0s ± 4% 14 | 15 | name alloc/op 16 | _Chan_NumWriters1_InputSize600-8 0.00B 17 | _ZenQ_NumWriters1_InputSize600-8 0.00B 18 | _Chan_NumWriters3_InputSize60000-8 95.0B ±65% 19 | _ZenQ_NumWriters3_InputSize60000-8 34.8B ±127% 20 | _Chan_NumWriters8_InputSize6000000-8 878B ±272% 21 | _ZenQ_NumWriters8_InputSize6000000-8 671B ±222% 22 | _Chan_NumWriters100_InputSize6000000-8 46.0kB ±31% 23 | _ZenQ_NumWriters100_InputSize6000000-8 13.3kB ±100% 24 | _Chan_NumWriters1000_InputSize7000000-8 488kB ± 8% 25 | _ZenQ_NumWriters1000_InputSize7000000-8 2.37kB ±210% 26 | _Chan_Million_Blocking_Writers-8 553MB ± 0% 27 | _ZenQ_Million_Blocking_Writers-8 95.5MB ± 0% 28 | 29 | name allocs/op 30 | _Chan_NumWriters1_InputSize600-8 0.00 31 | _ZenQ_NumWriters1_InputSize600-8 0.00 32 | _Chan_NumWriters3_InputSize60000-8 0.00 33 | _ZenQ_NumWriters3_InputSize60000-8 0.00 34 | _Chan_NumWriters8_InputSize6000000-8 2.77 ±225% 35 | _ZenQ_NumWriters8_InputSize6000000-8 1.40 ±186% 36 | _Chan_NumWriters100_InputSize6000000-8 164 ±20% 37 | _ZenQ_NumWriters100_InputSize6000000-8 31.8 ±100% 38 | _Chan_NumWriters1000_InputSize7000000-8 1.79k ± 3% 39 | _ZenQ_NumWriters1000_InputSize7000000-8 5.50 ±227% 40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0% 41 | _ZenQ_Million_Blocking_Writers-8 995k ± 0% 42 | -------------------------------------------------------------------------------- /bench_reports/darwin_arm64_m1/1.5.0.txt: -------------------------------------------------------------------------------- 1 | name time/op 2 | _Chan_NumWriters1_InputSize600-8 23.2µs ± 1% 3 | _ZenQ_NumWriters1_InputSize600-8 18.1µs ± 1% 4 | _Chan_NumWriters3_InputSize60000-8 5.52ms ± 3% 5 | _ZenQ_NumWriters3_InputSize60000-8 2.67ms ± 6% 6 | _Chan_NumWriters8_InputSize6000000-8 680ms ± 1% 7 | _ZenQ_NumWriters8_InputSize6000000-8 308ms ± 4% 8 | _Chan_NumWriters100_InputSize6000000-8 1.56s ± 6% 9 | _ZenQ_NumWriters100_InputSize6000000-8 519ms ± 2% 10 | _Chan_NumWriters1000_InputSize7000000-8 1.98s ± 1% 11 | _ZenQ_NumWriters1000_InputSize7000000-8 441ms ±11% 12 | _Chan_Million_Blocking_Writers-8 10.4s ± 3% 13 | _ZenQ_Million_Blocking_Writers-8 8.56s ±24% 14 | 15 | name alloc/op 16 | _Chan_NumWriters1_InputSize600-8 0.00B 17 | _ZenQ_NumWriters1_InputSize600-8 0.00B 18 | _Chan_NumWriters3_InputSize60000-8 110B ±68% 19 | _ZenQ_NumWriters3_InputSize60000-8 23.6B ±107% 20 | _Chan_NumWriters8_InputSize6000000-8 585B ±234% 21 | _ZenQ_NumWriters8_InputSize6000000-8 411B ±299% 22 | _Chan_NumWriters100_InputSize6000000-8 44.7kB ±35% 23 | _ZenQ_NumWriters100_InputSize6000000-8 19.7kB ±78% 24 | _Chan_NumWriters1000_InputSize7000000-8 483kB ±10% 25 | _ZenQ_NumWriters1000_InputSize7000000-8 1.13kB ±602% 26 | _Chan_Million_Blocking_Writers-8 553MB ± 0% 27 | _ZenQ_Million_Blocking_Writers-8 95.5MB ± 0% 28 | 29 | name allocs/op 30 | _Chan_NumWriters1_InputSize600-8 0.00 31 | _ZenQ_NumWriters1_InputSize600-8 0.00 32 | _Chan_NumWriters3_InputSize60000-8 0.00 33 | _ZenQ_NumWriters3_InputSize60000-8 0.00 34 | _Chan_NumWriters8_InputSize6000000-8 2.20 ±218% 35 | _ZenQ_NumWriters8_InputSize6000000-8 0.90 ±344% 36 | _Chan_NumWriters100_InputSize6000000-8 163 ±18% 37 | _ZenQ_NumWriters100_InputSize6000000-8 47.0 ±79% 38 | _Chan_NumWriters1000_InputSize7000000-8 1.79k ± 6% 39 | _ZenQ_NumWriters1000_InputSize7000000-8 2.00 ±550% 40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0% 41 | _ZenQ_Million_Blocking_Writers-8 995k ± 0% 42 | -------------------------------------------------------------------------------- /bench_reports/darwin_arm64_m1/2.0.0.txt: -------------------------------------------------------------------------------- 1 | name time/op 2 | _Chan_NumWriters1_InputSize600-8 23.4µs ± 1% 3 | _ZenQ_NumWriters1_InputSize600-8 17.9µs ± 2% 4 | _Chan_NumWriters3_InputSize60000-8 5.50ms ± 2% 5 | _ZenQ_NumWriters3_InputSize60000-8 2.85ms ± 2% 6 | _Chan_NumWriters8_InputSize6000000-8 685ms ± 2% 7 | _ZenQ_NumWriters8_InputSize6000000-8 180ms ± 4% 8 | _Chan_NumWriters100_InputSize6000000-8 1.59s ± 4% 9 | _ZenQ_NumWriters100_InputSize6000000-8 206ms ± 6% 10 | _Chan_NumWriters1000_InputSize7000000-8 1.98s ± 1% 11 | _ZenQ_NumWriters1000_InputSize7000000-8 300ms ± 7% 12 | _Chan_Million_Blocking_Writers-8 10.8s ± 2% 13 | _ZenQ_Million_Blocking_Writers-8 11.8s ± 5% 14 | 15 | name alloc/op 16 | _Chan_NumWriters1_InputSize600-8 0.00B 17 | _ZenQ_NumWriters1_InputSize600-8 0.00B 18 | _Chan_NumWriters3_InputSize60000-8 114B ±53% 19 | _ZenQ_NumWriters3_InputSize60000-8 29.7B ±105% 20 | _Chan_NumWriters8_InputSize6000000-8 547B ±362% 21 | _ZenQ_NumWriters8_InputSize6000000-8 941B ±114% 22 | _Chan_NumWriters100_InputSize6000000-8 45.8kB ±33% 23 | _ZenQ_NumWriters100_InputSize6000000-8 6.86kB ±73% 24 | _Chan_NumWriters1000_InputSize7000000-8 481kB ± 5% 25 | _ZenQ_NumWriters1000_InputSize7000000-8 42.6kB ±26% 26 | _Chan_Million_Blocking_Writers-8 553MB ± 0% 27 | _ZenQ_Million_Blocking_Writers-8 48.0MB ±12% 28 | 29 | name allocs/op 30 | _Chan_NumWriters1_InputSize600-8 0.00 31 | _ZenQ_NumWriters1_InputSize600-8 0.00 32 | _Chan_NumWriters3_InputSize60000-8 0.00 33 | _ZenQ_NumWriters3_InputSize60000-8 0.00 34 | _Chan_NumWriters8_InputSize6000000-8 1.63 ±268% 35 | _ZenQ_NumWriters8_InputSize6000000-8 3.87 ±107% 36 | _Chan_NumWriters100_InputSize6000000-8 162 ±29% 37 | _ZenQ_NumWriters100_InputSize6000000-8 17.3 ±74% 38 | _Chan_NumWriters1000_InputSize7000000-8 1.77k ± 3% 39 | _ZenQ_NumWriters1000_InputSize7000000-8 28.1 ±46% 40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0% 41 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0% 42 | -------------------------------------------------------------------------------- /bench_reports/darwin_arm64_m1/2.1.0.txt: -------------------------------------------------------------------------------- 1 | name time/op 2 | _Chan_NumWriters1_InputSize600-8 23.2µs ± 1% 3 | _ZenQ_NumWriters1_InputSize600-8 17.6µs ± 0% 4 | _Chan_NumWriters3_InputSize60000-8 5.50ms ± 4% 5 | _ZenQ_NumWriters3_InputSize60000-8 2.63ms ± 1% 6 | _Chan_NumWriters8_InputSize6000000-8 684ms ± 2% 7 | _ZenQ_NumWriters8_InputSize6000000-8 150ms ± 4% 8 | _Chan_NumWriters100_InputSize6000000-8 1.58s ± 4% 9 | _ZenQ_NumWriters100_InputSize6000000-8 162ms ± 9% 10 | _Chan_NumWriters1000_InputSize7000000-8 1.97s ± 1% 11 | _ZenQ_NumWriters1000_InputSize7000000-8 313ms ±19% 12 | _Chan_Million_Blocking_Writers-8 10.5s ± 3% 13 | _ZenQ_Million_Blocking_Writers-8 10.4s ± 6% 14 | 15 | name alloc/op 16 | _Chan_NumWriters1_InputSize600-8 0.00B 17 | _ZenQ_NumWriters1_InputSize600-8 0.00B 18 | _Chan_NumWriters3_InputSize60000-8 104B ±54% 19 | _ZenQ_NumWriters3_InputSize60000-8 22.2B ±91% 20 | _Chan_NumWriters8_InputSize6000000-8 813B ±307% 21 | _ZenQ_NumWriters8_InputSize6000000-8 690B ±115% 22 | _Chan_NumWriters100_InputSize6000000-8 42.6kB ±36% 23 | _ZenQ_NumWriters100_InputSize6000000-8 5.92kB ±118% 24 | _Chan_NumWriters1000_InputSize7000000-8 475kB ±11% 25 | _ZenQ_NumWriters1000_InputSize7000000-8 41.6kB ±34% 26 | _Chan_Million_Blocking_Writers-8 553MB ± 0% 27 | _ZenQ_Million_Blocking_Writers-8 47.4MB ± 9% 28 | 29 | name allocs/op 30 | _Chan_NumWriters1_InputSize600-8 0.00 31 | _ZenQ_NumWriters1_InputSize600-8 0.00 32 | _Chan_NumWriters3_InputSize60000-8 0.00 33 | _ZenQ_NumWriters3_InputSize60000-8 0.00 34 | _Chan_NumWriters8_InputSize6000000-8 2.77 ±225% 35 | _ZenQ_NumWriters8_InputSize6000000-8 2.63 ±52% 36 | _Chan_NumWriters100_InputSize6000000-8 157 ±17% 37 | _ZenQ_NumWriters100_InputSize6000000-8 14.3 ±116% 38 | _Chan_NumWriters1000_InputSize7000000-8 1.77k ± 5% 39 | _ZenQ_NumWriters1000_InputSize7000000-8 30.3 ±42% 40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0% 41 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0% 42 | -------------------------------------------------------------------------------- /bench_reports/darwin_arm64_m1/2.2.0.txt: -------------------------------------------------------------------------------- 1 | name time/op 2 | _Chan_NumWriters1_InputSize600-8 23.3µs ± 1% 3 | _ZenQ_NumWriters1_InputSize600-8 17.7µs ± 1% 4 | _Chan_NumWriters3_InputSize60000-8 5.50ms ± 1% 5 | _ZenQ_NumWriters3_InputSize60000-8 2.62ms ± 4% 6 | _Chan_NumWriters8_InputSize6000000-8 686ms ± 1% 7 | _ZenQ_NumWriters8_InputSize6000000-8 153ms ± 3% 8 | _Chan_NumWriters100_InputSize6000000-8 1.59s ± 1% 9 | _ZenQ_NumWriters100_InputSize6000000-8 166ms ± 7% 10 | _Chan_NumWriters1000_InputSize7000000-8 1.98s ± 1% 11 | _ZenQ_NumWriters1000_InputSize7000000-8 318ms ±12% 12 | _Chan_Million_Blocking_Writers-8 10.8s ± 2% 13 | _ZenQ_Million_Blocking_Writers-8 10.3s ± 5% 14 | 15 | name alloc/op 16 | _Chan_NumWriters1_InputSize600-8 0.00B 17 | _ZenQ_NumWriters1_InputSize600-8 0.00B 18 | _Chan_NumWriters3_InputSize60000-8 97.2B ±60% 19 | _ZenQ_NumWriters3_InputSize60000-8 28.5B ±121% 20 | _Chan_NumWriters8_InputSize6000000-8 922B ±297% 21 | _ZenQ_NumWriters8_InputSize6000000-8 860B ±87% 22 | _Chan_NumWriters100_InputSize6000000-8 43.8kB ±39% 23 | _ZenQ_NumWriters100_InputSize6000000-8 6.18kB ±69% 24 | _Chan_NumWriters1000_InputSize7000000-8 472kB ±11% 25 | _ZenQ_NumWriters1000_InputSize7000000-8 38.9kB ±47% 26 | _Chan_Million_Blocking_Writers-8 553MB ± 0% 27 | _ZenQ_Million_Blocking_Writers-8 46.3MB ±10% 28 | 29 | name allocs/op 30 | _Chan_NumWriters1_InputSize600-8 0.00 31 | _ZenQ_NumWriters1_InputSize600-8 0.00 32 | _Chan_NumWriters3_InputSize60000-8 0.00 33 | _ZenQ_NumWriters3_InputSize60000-8 0.00 34 | _Chan_NumWriters8_InputSize6000000-8 2.83 ±182% 35 | _ZenQ_NumWriters8_InputSize6000000-8 3.25 ±54% 36 | _Chan_NumWriters100_InputSize6000000-8 161 ±24% 37 | _ZenQ_NumWriters100_InputSize6000000-8 15.0 ±67% 38 | _Chan_NumWriters1000_InputSize7000000-8 1.76k ± 5% 39 | _ZenQ_NumWriters1000_InputSize7000000-8 26.2 ±37% 40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0% 41 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0% 42 | -------------------------------------------------------------------------------- /bench_reports/darwin_arm64_m1/2.2.1.txt: -------------------------------------------------------------------------------- 1 | name time/op 2 | _Chan_NumWriters1_InputSize600-8 23.3µs ± 1% 3 | _ZenQ_NumWriters1_InputSize600-8 38.7µs ± 1% 4 | _Chan_NumWriters3_InputSize60000-8 5.48ms ± 1% 5 | _ZenQ_NumWriters3_InputSize60000-8 2.63ms ± 1% 6 | _Chan_NumWriters8_InputSize6000000-8 685ms ± 1% 7 | _ZenQ_NumWriters8_InputSize6000000-8 254ms ± 3% 8 | _Chan_NumWriters100_InputSize6000000-8 1.60s ± 1% 9 | _ZenQ_NumWriters100_InputSize6000000-8 298ms ± 1% 10 | _Chan_NumWriters1000_InputSize7000000-8 1.98s ± 1% 11 | _ZenQ_NumWriters1000_InputSize7000000-8 409ms ± 1% 12 | _Chan_Million_Blocking_Writers-8 10.5s ± 1% 13 | _ZenQ_Million_Blocking_Writers-8 1.99s ±16% 14 | 15 | name alloc/op 16 | _Chan_NumWriters1_InputSize600-8 0.00B 17 | _ZenQ_NumWriters1_InputSize600-8 0.00B 18 | _Chan_NumWriters3_InputSize60000-8 17.5B ±163% 19 | _ZenQ_NumWriters3_InputSize60000-8 13.4B ±348% 20 | _Chan_NumWriters8_InputSize6000000-8 123B ±148% 21 | _ZenQ_NumWriters8_InputSize6000000-8 545B ±56% 22 | _Chan_NumWriters100_InputSize6000000-8 36.1kB ±49% 23 | _ZenQ_NumWriters100_InputSize6000000-8 9.32kB ±32% 24 | _Chan_NumWriters1000_InputSize7000000-8 479kB ± 8% 25 | _ZenQ_NumWriters1000_InputSize7000000-8 89.3kB ± 5% 26 | _Chan_Million_Blocking_Writers-8 553MB ± 0% 27 | _ZenQ_Million_Blocking_Writers-8 122MB ± 3% 28 | 29 | name allocs/op 30 | _Chan_NumWriters1_InputSize600-8 0.00 31 | _ZenQ_NumWriters1_InputSize600-8 0.00 32 | _Chan_NumWriters3_InputSize60000-8 0.00 33 | _ZenQ_NumWriters3_InputSize60000-8 0.00 34 | _Chan_NumWriters8_InputSize6000000-8 1.10 ±173% 35 | _ZenQ_NumWriters8_InputSize6000000-8 3.19 ±57% 36 | _Chan_NumWriters100_InputSize6000000-8 140 ±32% 37 | _ZenQ_NumWriters100_InputSize6000000-8 21.8 ±33% 38 | _Chan_NumWriters1000_InputSize7000000-8 1.77k ± 5% 39 | _ZenQ_NumWriters1000_InputSize7000000-8 46.5 ±27% 40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0% 41 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0% 42 | -------------------------------------------------------------------------------- /bench_reports/darwin_arm64_m1/2.3.0.txt: -------------------------------------------------------------------------------- 1 | name time/op 2 | _Chan_NumWriters1_InputSize600-8 23.4µs ± 1% 3 | _ZenQ_NumWriters1_InputSize600-8 38.3µs ± 1% 4 | _Chan_NumWriters3_InputSize60000-8 5.54ms ± 6% 5 | _ZenQ_NumWriters3_InputSize60000-8 2.62ms ± 2% 6 | _Chan_NumWriters8_InputSize6000000-8 680ms ± 3% 7 | _ZenQ_NumWriters8_InputSize6000000-8 254ms ± 4% 8 | _Chan_NumWriters100_InputSize6000000-8 1.58s ± 5% 9 | _ZenQ_NumWriters100_InputSize6000000-8 292ms ± 3% 10 | _Chan_NumWriters1000_InputSize7000000-8 1.97s ± 1% 11 | _ZenQ_NumWriters1000_InputSize7000000-8 408ms ± 3% 12 | _Chan_Million_Blocking_Writers-8 10.6s ± 2% 13 | _ZenQ_Million_Blocking_Writers-8 1.98s ±26% 14 | 15 | name alloc/op 16 | _Chan_NumWriters1_InputSize600-8 0.00B 17 | _ZenQ_NumWriters1_InputSize600-8 0.00B 18 | _Chan_NumWriters3_InputSize60000-8 17.4B ±267% 19 | _ZenQ_NumWriters3_InputSize60000-8 15.6B ±291% 20 | _Chan_NumWriters8_InputSize6000000-8 132B ±118% 21 | _ZenQ_NumWriters8_InputSize6000000-8 248B ±227% 22 | _Chan_NumWriters100_InputSize6000000-8 35.7kB ±45% 23 | _ZenQ_NumWriters100_InputSize6000000-8 2.74kB ±181% 24 | _Chan_NumWriters1000_InputSize7000000-8 476kB ± 7% 25 | _ZenQ_NumWriters1000_InputSize7000000-8 949B ±265% 26 | _Chan_Million_Blocking_Writers-8 553MB ± 0% 27 | _ZenQ_Million_Blocking_Writers-8 122MB ± 5% 28 | 29 | name allocs/op 30 | _Chan_NumWriters1_InputSize600-8 0.00 31 | _ZenQ_NumWriters1_InputSize600-8 0.00 32 | _Chan_NumWriters3_InputSize60000-8 0.00 33 | _ZenQ_NumWriters3_InputSize60000-8 0.00 34 | _Chan_NumWriters8_InputSize6000000-8 1.30 ±131% 35 | _ZenQ_NumWriters8_InputSize6000000-8 1.57 ±219% 36 | _Chan_NumWriters100_InputSize6000000-8 139 ±33% 37 | _ZenQ_NumWriters100_InputSize6000000-8 6.14 ±193% 38 | _Chan_NumWriters1000_InputSize7000000-8 1.76k ± 5% 39 | _ZenQ_NumWriters1000_InputSize7000000-8 2.70 ±344% 40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0% 41 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0% 42 | -------------------------------------------------------------------------------- /bench_reports/darwin_arm64_m1/2.4.0.txt: -------------------------------------------------------------------------------- 1 | name time/op 2 | _Chan_NumWriters1_InputSize600-8 23.4µs ± 2% 3 | _ZenQ_NumWriters1_InputSize600-8 17.7µs ± 1% 4 | _Chan_NumWriters3_InputSize60000-8 5.53ms ± 5% 5 | _ZenQ_NumWriters3_InputSize60000-8 2.61ms ± 3% 6 | _Chan_NumWriters8_InputSize6000000-8 684ms ± 1% 7 | _ZenQ_NumWriters8_InputSize6000000-8 247ms ± 4% 8 | _Chan_NumWriters100_InputSize6000000-8 1.60s ± 1% 9 | _ZenQ_NumWriters100_InputSize6000000-8 296ms ± 1% 10 | _Chan_NumWriters1000_InputSize7000000-8 1.97s ± 0% 11 | _ZenQ_NumWriters1000_InputSize7000000-8 410ms ± 2% 12 | _Chan_Million_Blocking_Writers-8 10.8s ± 1% 13 | _ZenQ_Million_Blocking_Writers-8 2.54s ±11% 14 | 15 | name alloc/op 16 | _Chan_NumWriters1_InputSize600-8 0.00B 17 | _ZenQ_NumWriters1_InputSize600-8 0.00B 18 | _Chan_NumWriters3_InputSize60000-8 119B ±58% 19 | _ZenQ_NumWriters3_InputSize60000-8 29.3B ±100% 20 | _Chan_NumWriters8_InputSize6000000-8 647B ±306% 21 | _ZenQ_NumWriters8_InputSize6000000-8 467B ±309% 22 | _Chan_NumWriters100_InputSize6000000-8 42.1kB ±31% 23 | _ZenQ_NumWriters100_InputSize6000000-8 2.19kB ±213% 24 | _Chan_NumWriters1000_InputSize7000000-8 482kB ± 7% 25 | _ZenQ_NumWriters1000_InputSize7000000-8 1.42kB ±252% 26 | _Chan_Million_Blocking_Writers-8 553MB ± 0% 27 | _ZenQ_Million_Blocking_Writers-8 122MB ± 3% 28 | 29 | name allocs/op 30 | _Chan_NumWriters1_InputSize600-8 0.00 31 | _ZenQ_NumWriters1_InputSize600-8 0.00 32 | _Chan_NumWriters3_InputSize60000-8 0.00 33 | _ZenQ_NumWriters3_InputSize60000-8 0.00 34 | _Chan_NumWriters8_InputSize6000000-8 2.13 ±181% 35 | _ZenQ_NumWriters8_InputSize6000000-8 0.87 ±362% 36 | _Chan_NumWriters100_InputSize6000000-8 157 ±29% 37 | _ZenQ_NumWriters100_InputSize6000000-8 5.47 ±284% 38 | _Chan_NumWriters1000_InputSize7000000-8 1.78k ± 5% 39 | _ZenQ_NumWriters1000_InputSize7000000-8 3.27 ±267% 40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0% 41 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0% 42 | -------------------------------------------------------------------------------- /bench_reports/darwin_arm64_m1/2.4.0_alternate.txt: -------------------------------------------------------------------------------- 1 | name time/op 2 | _Chan_NumWriters1_InputSize600-8 23.4µs ± 1% 3 | _ZenQ_NumWriters1_InputSize600-8 17.8µs ± 1% 4 | _Chan_NumWriters3_InputSize60000-8 5.48ms ± 2% 5 | _ZenQ_NumWriters3_InputSize60000-8 2.65ms ± 5% 6 | _Chan_NumWriters8_InputSize6000000-8 684ms ± 1% 7 | _ZenQ_NumWriters8_InputSize6000000-8 251ms ± 5% 8 | _Chan_NumWriters100_InputSize6000000-8 1.57s ± 6% 9 | _ZenQ_NumWriters100_InputSize6000000-8 300ms ± 4% 10 | _Chan_NumWriters1000_InputSize7000000-8 1.97s ± 1% 11 | _ZenQ_NumWriters1000_InputSize7000000-8 411ms ± 3% 12 | _Chan_Million_Blocking_Writers-8 10.6s ± 1% 13 | _ZenQ_Million_Blocking_Writers-8 1.99s ±19% 14 | 15 | name alloc/op 16 | _Chan_NumWriters1_InputSize600-8 0.00B 17 | _ZenQ_NumWriters1_InputSize600-8 0.00B 18 | _Chan_NumWriters3_InputSize60000-8 108B ±64% 19 | _ZenQ_NumWriters3_InputSize60000-8 27.5B ±111% 20 | _Chan_NumWriters8_InputSize6000000-8 818B ±248% 21 | _ZenQ_NumWriters8_InputSize6000000-8 545B ±175% 22 | _Chan_NumWriters100_InputSize6000000-8 44.9kB ±34% 23 | _ZenQ_NumWriters100_InputSize6000000-8 2.15kB ±210% 24 | _Chan_NumWriters1000_InputSize7000000-8 481kB ± 8% 25 | _ZenQ_NumWriters1000_InputSize7000000-8 771B ±354% 26 | _Chan_Million_Blocking_Writers-8 553MB ± 0% 27 | _ZenQ_Million_Blocking_Writers-8 122MB ± 3% 28 | 29 | name allocs/op 30 | _Chan_NumWriters1_InputSize600-8 0.00 31 | _ZenQ_NumWriters1_InputSize600-8 0.00 32 | _Chan_NumWriters3_InputSize60000-8 0.00 33 | _ZenQ_NumWriters3_InputSize60000-8 0.00 34 | _Chan_NumWriters8_InputSize6000000-8 2.37 ±238% 35 | _ZenQ_NumWriters8_InputSize6000000-8 1.66 ±202% 36 | _Chan_NumWriters100_InputSize6000000-8 162 ±19% 37 | _ZenQ_NumWriters100_InputSize6000000-8 4.87 ±229% 38 | _Chan_NumWriters1000_InputSize7000000-8 1.77k ± 5% 39 | _ZenQ_NumWriters1000_InputSize7000000-8 1.73 ±362% 40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0% 41 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0% 42 | -------------------------------------------------------------------------------- /bench_reports/darwin_arm64_m1/2.4.0_on_battery.txt: -------------------------------------------------------------------------------- 1 | name time/op 2 | _Chan_NumWriters1_InputSize600-8 23.3µs ± 0% 3 | _ZenQ_NumWriters1_InputSize600-8 17.7µs ± 1% 4 | _Chan_NumWriters3_InputSize60000-8 5.42ms ± 7% 5 | _ZenQ_NumWriters3_InputSize60000-8 2.60ms ± 1% 6 | _Chan_NumWriters8_InputSize6000000-8 687ms ± 1% 7 | _ZenQ_NumWriters8_InputSize6000000-8 243ms ± 5% 8 | _Chan_NumWriters100_InputSize6000000-8 1.60s ± 2% 9 | _ZenQ_NumWriters100_InputSize6000000-8 295ms ± 3% 10 | _Chan_NumWriters1000_InputSize7000000-8 1.97s ± 0% 11 | _ZenQ_NumWriters1000_InputSize7000000-8 404ms ± 3% 12 | _Chan_Million_Blocking_Writers-8 8.02s ±33% 13 | _ZenQ_Million_Blocking_Writers-8 1.71s ±17% 14 | 15 | name alloc/op 16 | _Chan_NumWriters1_InputSize600-8 0.00B 17 | _ZenQ_NumWriters1_InputSize600-8 0.00B 18 | _Chan_NumWriters3_InputSize60000-8 101B ±65% 19 | _ZenQ_NumWriters3_InputSize60000-8 28.1B ±113% 20 | _Chan_NumWriters8_InputSize6000000-8 891B ±191% 21 | _ZenQ_NumWriters8_InputSize6000000-8 664B ±163% 22 | _Chan_NumWriters100_InputSize6000000-8 43.1kB ±40% 23 | _ZenQ_NumWriters100_InputSize6000000-8 2.75kB ±154% 24 | _Chan_NumWriters1000_InputSize7000000-8 483kB ± 4% 25 | _ZenQ_NumWriters1000_InputSize7000000-8 554B ±626% 26 | _Chan_Million_Blocking_Writers-8 553MB ± 0% 27 | _ZenQ_Million_Blocking_Writers-8 124MB ± 3% 28 | 29 | name allocs/op 30 | _Chan_NumWriters1_InputSize600-8 0.00 31 | _ZenQ_NumWriters1_InputSize600-8 0.00 32 | _Chan_NumWriters3_InputSize60000-8 0.00 33 | _ZenQ_NumWriters3_InputSize60000-8 0.00 34 | _Chan_NumWriters8_InputSize6000000-8 2.83 ±147% 35 | _ZenQ_NumWriters8_InputSize6000000-8 1.48 ±237% 36 | _Chan_NumWriters100_InputSize6000000-8 156 ±27% 37 | _ZenQ_NumWriters100_InputSize6000000-8 6.23 ±157% 38 | _Chan_NumWriters1000_InputSize7000000-8 1.78k ± 4% 39 | _ZenQ_NumWriters1000_InputSize7000000-8 1.54 ±550% 40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0% 41 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0% 42 | -------------------------------------------------------------------------------- /bench_reports/darwin_arm64_m1/2.5.0.txt: -------------------------------------------------------------------------------- 1 | name time/op 2 | _Chan_NumWriters1_InputSize600-8 23.3µs ± 2% 3 | _ZenQ_NumWriters1_InputSize600-8 17.7µs ± 1% 4 | _Chan_NumWriters3_InputSize60000-8 5.52ms ± 3% 5 | _ZenQ_NumWriters3_InputSize60000-8 2.64ms ± 2% 6 | _Chan_NumWriters8_InputSize6000000-8 686ms ± 1% 7 | _ZenQ_NumWriters8_InputSize6000000-8 244ms ± 5% 8 | _Chan_NumWriters100_InputSize6000000-8 1.59s ± 2% 9 | _ZenQ_NumWriters100_InputSize6000000-8 296ms ± 2% 10 | _Chan_NumWriters1000_InputSize7000000-8 1.98s ± 1% 11 | _ZenQ_NumWriters1000_InputSize7000000-8 405ms ± 4% 12 | _Chan_Million_Blocking_Writers-8 10.6s ± 2% 13 | _ZenQ_Million_Blocking_Writers-8 1.92s ±20% 14 | 15 | name alloc/op 16 | _Chan_NumWriters1_InputSize600-8 0.00B 17 | _ZenQ_NumWriters1_InputSize600-8 0.00B 18 | _Chan_NumWriters3_InputSize60000-8 101B ±75% 19 | _ZenQ_NumWriters3_InputSize60000-8 19.0B ±132% 20 | _Chan_NumWriters8_InputSize6000000-8 672B ±317% 21 | _ZenQ_NumWriters8_InputSize6000000-8 1.05kB ±94% 22 | _Chan_NumWriters100_InputSize6000000-8 43.3kB ±30% 23 | _ZenQ_NumWriters100_InputSize6000000-8 11.7kB ±41% 24 | _Chan_NumWriters1000_InputSize7000000-8 475kB ± 8% 25 | _ZenQ_NumWriters1000_InputSize7000000-8 88.8kB ± 2% 26 | _Chan_Million_Blocking_Writers-8 553MB ± 0% 27 | _ZenQ_Million_Blocking_Writers-8 123MB ± 4% 28 | 29 | name allocs/op 30 | _Chan_NumWriters1_InputSize600-8 0.00 31 | _ZenQ_NumWriters1_InputSize600-8 0.00 32 | _Chan_NumWriters3_InputSize60000-8 0.00 33 | _ZenQ_NumWriters3_InputSize60000-8 0.00 34 | _Chan_NumWriters8_InputSize6000000-8 2.37 ±196% 35 | _ZenQ_NumWriters8_InputSize6000000-8 4.77 ±89% 36 | _Chan_NumWriters100_InputSize6000000-8 162 ±19% 37 | _ZenQ_NumWriters100_InputSize6000000-8 27.6 ±41% 38 | _Chan_NumWriters1000_InputSize7000000-8 1.76k ± 4% 39 | _ZenQ_NumWriters1000_InputSize7000000-8 45.0 ±18% 40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0% 41 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0% -------------------------------------------------------------------------------- /bench_reports/darwin_arm64_m1/2.5.1.txt: -------------------------------------------------------------------------------- 1 | name time/op 2 | _Chan_NumWriters1_InputSize600-8 23.4µs ± 1% 3 | _ZenQ_NumWriters1_InputSize600-8 17.7µs ± 0% 4 | _Chan_NumWriters3_InputSize60000-8 5.54ms ± 5% 5 | _ZenQ_NumWriters3_InputSize60000-8 2.63ms ± 2% 6 | _Chan_NumWriters8_InputSize6000000-8 687ms ± 2% 7 | _ZenQ_NumWriters8_InputSize6000000-8 243ms ± 4% 8 | _Chan_NumWriters100_InputSize6000000-8 1.59s ± 4% 9 | _ZenQ_NumWriters100_InputSize6000000-8 296ms ± 2% 10 | _Chan_NumWriters1000_InputSize7000000-8 1.97s ± 0% 11 | _ZenQ_NumWriters1000_InputSize7000000-8 409ms ± 2% 12 | _Chan_Million_Blocking_Writers-8 10.4s ± 4% 13 | _ZenQ_Million_Blocking_Writers-8 1.83s ±10% 14 | 15 | name alloc/op 16 | _Chan_NumWriters1_InputSize600-8 0.00B 17 | _ZenQ_NumWriters1_InputSize600-8 0.00B 18 | _Chan_NumWriters3_InputSize60000-8 117B ±63% 19 | _ZenQ_NumWriters3_InputSize60000-8 22.1B ±122% 20 | _Chan_NumWriters8_InputSize6000000-8 1.01kB ±196% 21 | _ZenQ_NumWriters8_InputSize6000000-8 1.12kB ±89% 22 | _Chan_NumWriters100_InputSize6000000-8 42.6kB ±37% 23 | _ZenQ_NumWriters100_InputSize6000000-8 11.3kB ±28% 24 | _Chan_NumWriters1000_InputSize7000000-8 481kB ± 7% 25 | _ZenQ_NumWriters1000_InputSize7000000-8 90.5kB ± 6% 26 | _Chan_Million_Blocking_Writers-8 553MB ± 0% 27 | _ZenQ_Million_Blocking_Writers-8 123MB ± 4% 28 | 29 | name allocs/op 30 | _Chan_NumWriters1_InputSize600-8 0.00 31 | _ZenQ_NumWriters1_InputSize600-8 0.00 32 | _Chan_NumWriters3_InputSize60000-8 0.00 33 | _ZenQ_NumWriters3_InputSize60000-8 0.00 34 | _Chan_NumWriters8_InputSize6000000-8 3.43 ±162% 35 | _ZenQ_NumWriters8_InputSize6000000-8 5.23 ±53% 36 | _Chan_NumWriters100_InputSize6000000-8 158 ±20% 37 | _ZenQ_NumWriters100_InputSize6000000-8 26.3 ±29% 38 | _Chan_NumWriters1000_InputSize7000000-8 1.76k ± 2% 39 | _ZenQ_NumWriters1000_InputSize7000000-8 48.3 ±28% 40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0% 41 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0% 42 | -------------------------------------------------------------------------------- /bench_reports/darwin_arm64_m1/2.5.2.txt: -------------------------------------------------------------------------------- 1 | name time/op 2 | _Chan_NumWriters1_InputSize600-8 23.4µs ± 1% 3 | _ZenQ_NumWriters1_InputSize600-8 17.7µs ± 1% 4 | _Chan_NumWriters3_InputSize60000-8 5.49ms ± 3% 5 | _ZenQ_NumWriters3_InputSize60000-8 2.62ms ± 2% 6 | _Chan_NumWriters8_InputSize6000000-8 685ms ± 1% 7 | _ZenQ_NumWriters8_InputSize6000000-8 244ms ± 4% 8 | _Chan_NumWriters100_InputSize6000000-8 1.60s ± 1% 9 | _ZenQ_NumWriters100_InputSize6000000-8 296ms ± 2% 10 | _Chan_NumWriters1000_InputSize7000000-8 1.97s ± 0% 11 | _ZenQ_NumWriters1000_InputSize7000000-8 408ms ± 1% 12 | _Chan_Million_Blocking_Writers-8 10.5s ± 2% 13 | _ZenQ_Million_Blocking_Writers-8 1.94s ±12% 14 | 15 | name alloc/op 16 | _Chan_NumWriters1_InputSize600-8 0.00B 17 | _ZenQ_NumWriters1_InputSize600-8 0.00B 18 | _Chan_NumWriters3_InputSize60000-8 106B ±71% 19 | _ZenQ_NumWriters3_InputSize60000-8 26.5B ±119% 20 | _Chan_NumWriters8_InputSize6000000-8 634B ±339% 21 | _ZenQ_NumWriters8_InputSize6000000-8 1.10kB ±81% 22 | _Chan_NumWriters100_InputSize6000000-8 43.3kB ±42% 23 | _ZenQ_NumWriters100_InputSize6000000-8 10.4kB ±43% 24 | _Chan_NumWriters1000_InputSize7000000-8 481kB ± 6% 25 | _ZenQ_NumWriters1000_InputSize7000000-8 89.3kB ± 4% 26 | _Chan_Million_Blocking_Writers-8 553MB ± 0% 27 | _ZenQ_Million_Blocking_Writers-8 123MB ± 3% 28 | 29 | name allocs/op 30 | _Chan_NumWriters1_InputSize600-8 0.00 31 | _ZenQ_NumWriters1_InputSize600-8 0.00 32 | _Chan_NumWriters3_InputSize60000-8 0.00 33 | _ZenQ_NumWriters3_InputSize60000-8 0.00 34 | _Chan_NumWriters8_InputSize6000000-8 2.03 ±195% 35 | _ZenQ_NumWriters8_InputSize6000000-8 5.17 ±74% 36 | _Chan_NumWriters100_InputSize6000000-8 157 ±27% 37 | _ZenQ_NumWriters100_InputSize6000000-8 24.1 ±45% 38 | _Chan_NumWriters1000_InputSize7000000-8 1.77k ± 4% 39 | _ZenQ_NumWriters1000_InputSize7000000-8 45.2 ±17% 40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0% 41 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0% 42 | -------------------------------------------------------------------------------- /bench_reports/darwin_arm64_m1/2.7.0.txt: -------------------------------------------------------------------------------- 1 | name time/op 2 | _Chan_NumWriters1_InputSize600-8 23.4µs ± 1% 3 | _ZenQ_NumWriters1_InputSize600-8 18.0µs ± 1% 4 | _Chan_NumWriters3_InputSize60000-8 5.35ms ± 3% 5 | _ZenQ_NumWriters3_InputSize60000-8 2.39ms ± 5% 6 | _Chan_NumWriters8_InputSize6000000-8 674ms ± 2% 7 | _ZenQ_NumWriters8_InputSize6000000-8 236ms ± 2% 8 | _Chan_NumWriters100_InputSize6000000-8 1.58s ± 6% 9 | _ZenQ_NumWriters100_InputSize6000000-8 312ms ± 2% 10 | _Chan_NumWriters1000_InputSize7000000-8 1.97s ± 1% 11 | _ZenQ_NumWriters1000_InputSize7000000-8 397ms ± 4% 12 | _Chan_Million_Blocking_Writers-8 11.0s ± 2% 13 | _ZenQ_Million_Blocking_Writers-8 2.59s ±10% 14 | 15 | name alloc/op 16 | _Chan_NumWriters1_InputSize600-8 0.00B 17 | _ZenQ_NumWriters1_InputSize600-8 0.00B 18 | _Chan_NumWriters3_InputSize60000-8 114B ±82% 19 | _ZenQ_NumWriters3_InputSize60000-8 23.6B ±112% 20 | _Chan_NumWriters8_InputSize6000000-8 733B ±260% 21 | _ZenQ_NumWriters8_InputSize6000000-8 1.02kB ±121% 22 | _Chan_NumWriters100_InputSize6000000-8 43.7kB ±40% 23 | _ZenQ_NumWriters100_InputSize6000000-8 11.2kB ±54% 24 | _Chan_NumWriters1000_InputSize7000000-8 474kB ± 7% 25 | _ZenQ_NumWriters1000_InputSize7000000-8 90.0kB ± 6% 26 | _Chan_Million_Blocking_Writers-8 553MB ± 0% 27 | _ZenQ_Million_Blocking_Writers-8 121MB ± 4% 28 | 29 | name allocs/op 30 | _Chan_NumWriters1_InputSize600-8 0.00 31 | _ZenQ_NumWriters1_InputSize600-8 0.00 32 | _Chan_NumWriters3_InputSize60000-8 0.00 33 | _ZenQ_NumWriters3_InputSize60000-8 0.00 34 | _Chan_NumWriters8_InputSize6000000-8 2.18 ±175% 35 | _ZenQ_NumWriters8_InputSize6000000-8 5.13 ±56% 36 | _Chan_NumWriters100_InputSize6000000-8 157 ±30% 37 | _ZenQ_NumWriters100_InputSize6000000-8 26.3 ±56% 38 | _Chan_NumWriters1000_InputSize7000000-8 1.76k ± 4% 39 | _ZenQ_NumWriters1000_InputSize7000000-8 47.1 ±29% 40 | _Chan_Million_Blocking_Writers-8 2.00M ± 0% 41 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0% 42 | -------------------------------------------------------------------------------- /bench_reports/darwin_arm64_m1/2.7.1.txt: -------------------------------------------------------------------------------- 1 | 'c. alphadose@ReiEki.local 2 | ,xNMM. ---------------------- 3 | .OMMMMo OS: macOS 12.3 21E230 arm64 4 | OMMM0, Host: MacBookAir10,1 5 | .;loddo:' loolloddol;. Kernel: 21.4.0 6 | cKMMMMMMMMMMNWMMMMMMMMMM0: Uptime: 6 hours, 27 mins 7 | .KMMMMMMMMMMMMMMMMMMMMMMMWd. Packages: 98 (brew) 8 | XMMMMMMMMMMMMMMMMMMMMMMMX. Shell: zsh 5.8 9 | ;MMMMMMMMMMMMMMMMMMMMMMMM: Resolution: 1440x900 10 | :MMMMMMMMMMMMMMMMMMMMMMMM: DE: Aqua 11 | .MMMMMMMMMMMMMMMMMMMMMMMMX. WM: Rectangle 12 | kMMMMMMMMMMMMMMMMMMMMMMMMWd. Terminal: iTerm2 13 | .XMMMMMMMMMMMMMMMMMMMMMMMMMMk Terminal Font: FiraCodeNerdFontComplete-Medium 16 (normal) / FiraCodeNerdFontComplete-Medium 14 (non-ascii) 14 | .XMMMMMMMMMMMMMMMMMMMMMMMMK. CPU: Apple M1 15 | kMMMMMMMMMMMMMMMMMMMMMMd GPU: Apple M1 16 | ;KMMMMMMMWXXWMMMMMMMk. Memory: 1345MiB / 8192MiB 17 | .cooc,. .,coo:. 18 | 19 | 20 | 21 | name time/op 22 | _Chan_NumWriters1_InputSize600-8 23.2µs ± 1% 23 | _ZenQ_NumWriters1_InputSize600-8 17.9µs ± 1% 24 | _Chan_NumWriters3_InputSize60000-8 5.27ms ± 3% 25 | _ZenQ_NumWriters3_InputSize60000-8 2.36ms ± 2% 26 | _Chan_NumWriters8_InputSize6000000-8 671ms ± 2% 27 | _ZenQ_NumWriters8_InputSize6000000-8 234ms ± 6% 28 | _Chan_NumWriters100_InputSize6000000-8 1.59s ± 4% 29 | _ZenQ_NumWriters100_InputSize6000000-8 309ms ± 2% 30 | _Chan_NumWriters1000_InputSize7000000-8 1.97s ± 0% 31 | _ZenQ_NumWriters1000_InputSize7000000-8 389ms ± 4% 32 | _Chan_Million_Blocking_Writers-8 10.4s ± 2% 33 | _ZenQ_Million_Blocking_Writers-8 2.32s ±21% 34 | 35 | name alloc/op 36 | _Chan_NumWriters1_InputSize600-8 0.00B 37 | _ZenQ_NumWriters1_InputSize600-8 0.00B 38 | _Chan_NumWriters3_InputSize60000-8 109B ±68% 39 | _ZenQ_NumWriters3_InputSize60000-8 24.6B ±107% 40 | _Chan_NumWriters8_InputSize6000000-8 802B ±241% 41 | _ZenQ_NumWriters8_InputSize6000000-8 1.18kB ±100% 42 | _Chan_NumWriters100_InputSize6000000-8 44.2kB ±41% 43 | _ZenQ_NumWriters100_InputSize6000000-8 10.7kB ±38% 44 | _Chan_NumWriters1000_InputSize7000000-8 476kB ± 8% 45 | _ZenQ_NumWriters1000_InputSize7000000-8 90.6kB ±10% 46 | _Chan_Million_Blocking_Writers-8 553MB ± 0% 47 | _ZenQ_Million_Blocking_Writers-8 122MB ± 3% 48 | 49 | name allocs/op 50 | _Chan_NumWriters1_InputSize600-8 0.00 51 | _ZenQ_NumWriters1_InputSize600-8 0.00 52 | _Chan_NumWriters3_InputSize60000-8 0.00 53 | _ZenQ_NumWriters3_InputSize60000-8 0.00 54 | _Chan_NumWriters8_InputSize6000000-8 2.76 ±190% 55 | _ZenQ_NumWriters8_InputSize6000000-8 5.47 ±83% 56 | _Chan_NumWriters100_InputSize6000000-8 159 ±26% 57 | _ZenQ_NumWriters100_InputSize6000000-8 25.1 ±39% 58 | _Chan_NumWriters1000_InputSize7000000-8 1.76k ± 6% 59 | _ZenQ_NumWriters1000_InputSize7000000-8 47.3 ±31% 60 | _Chan_Million_Blocking_Writers-8 2.00M ± 0% 61 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0% 62 | -------------------------------------------------------------------------------- /bench_reports/raspian_arm32.txt: -------------------------------------------------------------------------------- 1 | `.::///+:/-. --///+//-:`` alphadose@neverwinter 2 | `+oooooooooooo: `+oooooooooooo: ------------------- 3 | /oooo++//ooooo: ooooo+//+ooooo. OS: Raspbian GNU/Linux 11 (bullseye) armv7l 4 | `+ooooooo:-:oo- +o+::/ooooooo: Host: Raspberry Pi 4 Model B Rev 1.5 5 | `:oooooooo+`` `.oooooooo+- Kernel: 5.15.32-v7l+ 6 | `:++ooo/. :+ooo+/.` Uptime: 1 hour, 58 mins 7 | ...` `.----.` ``.. Packages: 569 (dpkg) 8 | .::::-``:::::::::.`-:::-` Shell: bash 5.1.4 9 | -:::-` .:::::::-` `-:::- Terminal: /dev/pts/0 10 | `::. `.--.` `` `.---.``.::` CPU: BCM2711 (4) @ 1.800GHz 11 | .::::::::` -::::::::` ` Memory: 68MiB / 3838MiB 12 | .::` .:::::::::- `::::::::::``::. 13 | -:::` ::::::::::. ::::::::::.`:::- 14 | :::: -::::::::. `-:::::::: :::: 15 | -::- .-:::-.``....``.-::-. -::- 16 | .. `` .::::::::. `..`.. 17 | -:::-` -::::::::::` .:::::` 18 | :::::::` -::::::::::` :::::::. 19 | .::::::: -::::::::. :::::::: 20 | `-:::::` ..--.` ::::::. 21 | `...` `...--..` `...` 22 | .:::::::::: 23 | `.-::::-` 24 | 25 | 26 | goos: linux 27 | goarch: arm 28 | name time/op 29 | _Chan_NumWriters1_InputSize600-4 230µs ± 4% 30 | _ZenQ_NumWriters1_InputSize600-4 186µs ± 5% 31 | _Chan_NumWriters3_InputSize60000-4 28.2ms ± 3% 32 | _ZenQ_NumWriters3_InputSize60000-4 12.8ms ± 0% 33 | _Chan_NumWriters8_InputSize6000000-4 4.14s ±10% 34 | _ZenQ_NumWriters8_InputSize6000000-4 1.32s ± 1% 35 | _Chan_NumWriters100_InputSize6000000-4 5.97s ± 5% 36 | _ZenQ_NumWriters100_InputSize6000000-4 1.48s ± 5% 37 | _Chan_NumWriters1000_InputSize7000000-4 7.23s ± 6% 38 | _ZenQ_NumWriters1000_InputSize7000000-4 2.09s ± 4% 39 | _Chan_Million_Blocking_Writers-4 20.3s ± 2% 40 | _ZenQ_Million_Blocking_Writers-4 6.96s ± 4% 41 | 42 | name alloc/op 43 | _Chan_NumWriters1_InputSize600-4 0.00B 44 | _ZenQ_NumWriters1_InputSize600-4 0.00B 45 | _Chan_NumWriters3_InputSize60000-4 227B ±27% 46 | _ZenQ_NumWriters3_InputSize60000-4 77.9B ±91% 47 | _Chan_NumWriters8_InputSize6000000-4 499B ±189% 48 | _ZenQ_NumWriters8_InputSize6000000-4 1.49kB ± 4% 49 | _Chan_NumWriters100_InputSize6000000-4 27.5kB ±19% 50 | _ZenQ_NumWriters100_InputSize6000000-4 27.7kB ±42% 51 | _Chan_NumWriters1000_InputSize7000000-4 290kB ± 5% 52 | _ZenQ_NumWriters1000_InputSize7000000-4 135kB ± 8% 53 | _Chan_Million_Blocking_Writers-4 325MB ± 0% 54 | _ZenQ_Million_Blocking_Writers-4 76.2MB ± 3% 55 | 56 | name allocs/op 57 | _Chan_NumWriters1_InputSize600-4 0.00 58 | _ZenQ_NumWriters1_InputSize600-4 0.00 59 | _Chan_NumWriters3_InputSize60000-4 1.00 ± 0% 60 | _ZenQ_NumWriters3_InputSize60000-4 0.00 61 | _Chan_NumWriters8_InputSize6000000-4 4.30 ±109% 62 | _ZenQ_NumWriters8_InputSize6000000-4 19.2 ± 9% 63 | _Chan_NumWriters100_InputSize6000000-4 171 ±13% 64 | _ZenQ_NumWriters100_InputSize6000000-4 194 ±25% 65 | _Chan_NumWriters1000_InputSize7000000-4 1.84k ± 3% 66 | _ZenQ_NumWriters1000_InputSize7000000-4 1.09k ± 4% 67 | _Chan_Million_Blocking_Writers-4 2.00M ± 0% 68 | _ZenQ_Million_Blocking_Writers-4 1.00M ± 0% 69 | -------------------------------------------------------------------------------- /bench_reports/ubuntu_amd64_16core.txt: -------------------------------------------------------------------------------- 1 | goos: linux 2 | goarch: amd64 3 | cpu: AMD Ryzen 7 5800H with Radeon Graphics 4 | 5 | name time/op 6 | _Chan_NumWriters1_InputSize600-16 23.4µs ± 4% 7 | _ZenQ_NumWriters1_InputSize600-16 33.1µs ± 4% 8 | _Chan_NumWriters3_InputSize60000-16 2.59ms ± 3% 9 | _ZenQ_NumWriters3_InputSize60000-16 1.79ms ± 1% 10 | _Chan_NumWriters8_InputSize6000000-16 334ms ± 6% 11 | _ZenQ_NumWriters8_InputSize6000000-16 162ms ± 4% 12 | _Chan_NumWriters100_InputSize6000000-16 515ms ± 6% 13 | _ZenQ_NumWriters100_InputSize6000000-16 170ms ± 3% 14 | _Chan_NumWriters1000_InputSize7000000-16 1.76s ± 3% 15 | _ZenQ_NumWriters1000_InputSize7000000-16 273ms ± 2% 16 | _Chan_Million_Blocking_Writers-16 4.52s ± 5% 17 | _ZenQ_Million_Blocking_Writers-16 1.27s ±14% 18 | 19 | name alloc/op 20 | _Chan_NumWriters1_InputSize600-16 0.00B 21 | _ZenQ_NumWriters1_InputSize600-16 0.00B 22 | _Chan_NumWriters3_InputSize60000-16 91.7B ±51% 23 | _ZenQ_NumWriters3_InputSize60000-16 4.00B ± 0% 24 | _Chan_NumWriters8_InputSize6000000-16 487B ±275% 25 | _ZenQ_NumWriters8_InputSize6000000-16 879B ±111% 26 | _Chan_NumWriters100_InputSize6000000-16 30.0kB ±47% 27 | _ZenQ_NumWriters100_InputSize6000000-16 23.2kB ±54% 28 | _Chan_NumWriters1000_InputSize7000000-16 463kB ±11% 29 | _ZenQ_NumWriters1000_InputSize7000000-16 129kB ±10% 30 | _Chan_Million_Blocking_Writers-16 553MB ± 0% 31 | _ZenQ_Million_Blocking_Writers-16 124MB ± 3% 32 | 33 | name allocs/op 34 | _Chan_NumWriters1_InputSize600-16 0.00 35 | _ZenQ_NumWriters1_InputSize600-16 0.00 36 | _Chan_NumWriters3_InputSize60000-16 0.00 37 | _ZenQ_NumWriters3_InputSize60000-16 0.00 38 | _Chan_NumWriters8_InputSize6000000-16 1.57 ±219% 39 | _ZenQ_NumWriters8_InputSize6000000-16 3.48 ±44% 40 | _Chan_NumWriters100_InputSize6000000-16 87.8 ±40% 41 | _ZenQ_NumWriters100_InputSize6000000-16 54.3 ±54% 42 | _Chan_NumWriters1000_InputSize7000000-16 1.67k ± 9% 43 | _ZenQ_NumWriters1000_InputSize7000000-16 63.5 ±10% 44 | _Chan_Million_Blocking_Writers-16 2.00M ± 0% 45 | _ZenQ_Million_Blocking_Writers-16 1.00M ± 0% 46 | -------------------------------------------------------------------------------- /bench_reports/ubuntu_intel_xeon.txt: -------------------------------------------------------------------------------- 1 | .-/+oossssoo+/-. manas@dell-Precision-Tower-5810 2 | `:+ssssssssssssssssss+:` ------------------------------- 3 | -+ssssssssssssssssssyyssss+- OS: Ubuntu 20.04.3 LTS x86_64 4 | .ossssssssssssssssssdMMMNysssso. Host: Precision Tower 5810 5 | /ssssssssssshdmmNNmmyNMMMMhssssss/ Kernel: 5.11.0-27-generic 6 | +ssssssssshmydMMMMMMMNddddyssssssss+ Uptime: 2 hours, 49 mins 7 | /sssssssshNMMMyhhyyyyhmNMMMNhssssssss/ Packages: 3061 (dpkg), 9 (snap) 8 | .ssssssssdMMMNhsssssssssshNMMMdssssssss. Shell: zsh 5.8 9 | +sssshhhyNMMNyssssssssssssyNMMMysssssss+ Resolution: 1920x1080 10 | ossyNMMMNyMMhsssssssssssssshmmmhssssssso DE: Plasma 11 | ossyNMMMNyMMhsssssssssssssshmmmhssssssso WM: KWin 12 | +sssshhhyNMMNyssssssssssssyNMMMysssssss+ Theme: Breeze [Plasma], Breeze [GTK2/3] 13 | .ssssssssdMMMNhsssssssssshNMMMdssssssss. Icons: breeze [Plasma], breeze [GTK2/3] 14 | /sssssssshNMMMyhhyyyyhdNMMMNhssssssss/ Terminal: terminator 15 | +sssssssssdmydMMMMMMMMddddyssssssss+ CPU: Intel Xeon E5-1620 v4 (8) @ 3.800GHz 16 | /ssssssssssshdmNNNNmyNMMMMhssssss/ GPU: NVIDIA GeForce GTX 1080 17 | .ossssssssssssssssssdMMMNysssso. Memory: 2875MiB / 64244MiB 18 | -+sssssssssssssssssyyyssss+- 19 | `:+ssssssssssssssssss+:` 20 | .-/+oossssoo+/-. 21 | 22 | name time/op 23 | _Chan_NumWriters1_InputSize600-8 90.8µs ± 7% 24 | _ZenQ_NumWriters1_InputSize600-8 52.8µs ±15% 25 | _Chan_NumWriters3_InputSize60000-8 12.2ms ± 5% 26 | _ZenQ_NumWriters3_InputSize60000-8 5.14ms ± 3% 27 | _Chan_NumWriters8_InputSize6000000-8 1.48s ± 9% 28 | _ZenQ_NumWriters8_InputSize6000000-8 429ms ± 3% 29 | _Chan_NumWriters100_InputSize6000000-8 1.78s ± 7% 30 | _ZenQ_NumWriters100_InputSize6000000-8 453ms ± 4% 31 | _Chan_NumWriters1000_InputSize7000000-8 3.95s ± 5% 32 | _ZenQ_NumWriters1000_InputSize7000000-8 545ms ± 4% 33 | _Chan_Million_Blocking_Writers-8 7.49s ± 1% 34 | _ZenQ_Million_Blocking_Writers-8 2.04s ± 5% 35 | 36 | name alloc/op 37 | _Chan_NumWriters1_InputSize600-8 0.00B 38 | _ZenQ_NumWriters1_InputSize600-8 0.00B 39 | _Chan_NumWriters3_InputSize60000-8 202B ±81% 40 | _ZenQ_NumWriters3_InputSize60000-8 94.4B ±64% 41 | _Chan_NumWriters8_InputSize6000000-8 333B ±104% 42 | _ZenQ_NumWriters8_InputSize6000000-8 1.62kB ±124% 43 | _Chan_NumWriters100_InputSize6000000-8 41.6kB ±28% 44 | _ZenQ_NumWriters100_InputSize6000000-8 15.4kB ±46% 45 | _Chan_NumWriters1000_InputSize7000000-8 485kB ± 8% 46 | _ZenQ_NumWriters1000_InputSize7000000-8 136kB ± 8% 47 | _Chan_Million_Blocking_Writers-8 553MB ± 0% 48 | _ZenQ_Million_Blocking_Writers-8 123MB ± 3% 49 | 50 | name allocs/op 51 | _Chan_NumWriters1_InputSize600-8 0.00 52 | _ZenQ_NumWriters1_InputSize600-8 0.00 53 | _Chan_NumWriters3_InputSize60000-8 0.00 54 | _ZenQ_NumWriters3_InputSize60000-8 0.00 55 | _Chan_NumWriters8_InputSize6000000-8 3.59 ±123% 56 | _ZenQ_NumWriters8_InputSize6000000-8 8.24 ±46% 57 | _Chan_NumWriters100_InputSize6000000-8 156 ±19% 58 | _ZenQ_NumWriters100_InputSize6000000-8 36.2 ±46% 59 | _Chan_NumWriters1000_InputSize7000000-8 1.80k ± 4% 60 | _ZenQ_NumWriters1000_InputSize7000000-8 76.4 ±31% 61 | _Chan_Million_Blocking_Writers-8 2.00M ± 0% 62 | _ZenQ_Million_Blocking_Writers-8 1.00M ± 0% 63 | -------------------------------------------------------------------------------- /bench_reports/windows_amd64_16core.txt: -------------------------------------------------------------------------------- 1 | goos: windows 2 | goarch: amd64 3 | cpu: AMD Ryzen 7 5800H with Radeon Graphics 4 | 5 | name time/op 6 | _Chan_NumWriters1_InputSize600-16 24.5µs ± 5% 7 | _ZenQ_NumWriters1_InputSize600-16 17.7µs ± 2% 8 | _Chan_NumWriters3_InputSize60000-16 4.75ms ± 3% 9 | _ZenQ_NumWriters3_InputSize60000-16 1.86ms ± 1% 10 | _Chan_NumWriters8_InputSize6000000-16 800ms ± 5% 11 | _ZenQ_NumWriters8_InputSize6000000-16 150ms ± 1% 12 | _Chan_NumWriters100_InputSize6000000-16 1.66s ± 1% 13 | _ZenQ_NumWriters100_InputSize6000000-16 160ms ± 1% 14 | _Chan_NumWriters1000_InputSize7000000-16 1.95s ± 1% 15 | _ZenQ_NumWriters1000_InputSize7000000-16 269ms ± 1% 16 | _Chan_Million_Blocking_Writers-16 5.79s ± 2% 17 | _ZenQ_Million_Blocking_Writers-16 1.37s ± 6% 18 | 19 | name alloc/op 20 | _Chan_NumWriters1_InputSize600-16 0.00B 21 | _ZenQ_NumWriters1_InputSize600-16 0.00B 22 | _Chan_NumWriters3_InputSize60000-16 150B ±57% 23 | _ZenQ_NumWriters3_InputSize60000-16 20.6B ±201% 24 | _Chan_NumWriters8_InputSize6000000-16 472B ±283% 25 | _ZenQ_NumWriters8_InputSize6000000-16 1.05kB ±58% 26 | _Chan_NumWriters100_InputSize6000000-16 43.7kB ±38% 27 | _ZenQ_NumWriters100_InputSize6000000-16 29.7kB ±17% 28 | _Chan_NumWriters1000_InputSize7000000-16 484kB ± 7% 29 | _ZenQ_NumWriters1000_InputSize7000000-16 120kB ±14% 30 | _Chan_Million_Blocking_Writers-16 553MB ± 0% 31 | _ZenQ_Million_Blocking_Writers-16 128MB ± 4% 32 | 33 | name allocs/op 34 | _Chan_NumWriters1_InputSize600-16 0.00 35 | _ZenQ_NumWriters1_InputSize600-16 0.00 36 | _Chan_NumWriters3_InputSize60000-16 0.00 37 | _ZenQ_NumWriters3_InputSize60000-16 0.00 38 | _Chan_NumWriters8_InputSize6000000-16 2.00 ±150% 39 | _ZenQ_NumWriters8_InputSize6000000-16 3.90 ±28% 40 | _Chan_NumWriters100_InputSize6000000-16 148 ±34% 41 | _ZenQ_NumWriters100_InputSize6000000-16 68.3 ±24% 42 | _Chan_NumWriters1000_InputSize7000000-16 1.79k ± 5% 43 | _ZenQ_NumWriters1000_InputSize7000000-16 62.3 ±36% 44 | _Chan_Million_Blocking_Writers-16 2.00M ± 0% 45 | _ZenQ_Million_Blocking_Writers-16 1.00M ± 0% 46 | -------------------------------------------------------------------------------- /benchmarks/cgo_test/cgobench.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "runtime" 6 | "sync" 7 | "unsafe" 8 | _ "unsafe" 9 | ) 10 | 11 | /* 12 | #include 13 | */ 14 | import "C" 15 | 16 | //go:linkname noescape runtime.noescape 17 | func noescape(p unsafe.Pointer) unsafe.Pointer 18 | 19 | //go:linkname memmove runtime.memmove 20 | func memmove(to, from unsafe.Pointer, n uintptr) 21 | 22 | //go:linkname memclrNoHeapPointers runtime.memclrNoHeapPointers 23 | func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr) 24 | 25 | //go:linkname mallocgc runtime.mallocgc 26 | func mallocgc(size uintptr, typ unsafe.Pointer, needzero bool) unsafe.Pointer 27 | 28 | func alloc[T any](sample T, size uintptr) unsafe.Pointer { 29 | length := unsafe.Sizeof(sample) * size 30 | return mallocgc(length, nil, true) 31 | } 32 | 33 | func getIndexAt[T any](ptr unsafe.Pointer, offset uintptr) unsafe.Pointer { 34 | return unsafe.Pointer(uintptr(ptr) + offset) 35 | } 36 | 37 | type block struct { 38 | Data int 39 | Kooky string 40 | Endy float64 41 | // Last *uint 42 | } 43 | 44 | func main() { 45 | // a := make([]int32, 0, 3) 46 | // a = append(a, 10, 20, 30) 47 | // t := unsafe.Pointer(&a[0]) 48 | // fmt.Println(*(*int32)(unsafe.Pointer(uintptr(t) + 2*unsafe.Sizeof(int32(0))))) 49 | // return 50 | 51 | const n = uintptr(100) 52 | t := make([]block, n, n) 53 | k := unsafe.Pointer(&t[0]) 54 | // k := alloc(block{Data: 1, Kooky: "2", Endy: 3.2, Last: new(uint)}, n) 55 | // k := C.calloc(C.ulong(unsafe.Sizeof(block{})), C.ulong(n)) 56 | // unsafe.Slice(k, n) 57 | // memclrNoHeapPointers(k, n) 58 | // t := (*[]block)(k) 59 | // runtime.KeepAlive((*[n]block)(k)) 60 | // for i := uintptr(0); i < n; i++ { 61 | // slot := getIndexAt[block](k, i*unsafe.Sizeof(block{})) 62 | // slot.Data = int(i) 63 | // slot.Kooky = fmt.Sprintf("wutface%d", i) 64 | // slot.Endy = float64(i) 65 | // slot.Last = new(uint) 66 | // *slot.Last = uint(i) 67 | // } 68 | // for i := uintptr(0); i < n; i++ { 69 | // fmt.Printf("%#v\n", t[i]) 70 | // } 71 | // return 72 | var wg sync.WaitGroup 73 | wg.Add(int(n)) 74 | for i := uintptr(0); i < n; i++ { 75 | slot := unsafe.Pointer(uintptr(k) + i*unsafe.Sizeof(block{})) 76 | (*block)(slot).Data = int(i) 77 | (*block)(slot).Kooky = fmt.Sprintf("wutface%d", i) 78 | (*block)(slot).Endy = float64(i) 79 | // (*block)(slot).Last = new(uint) 80 | // *(*block)(slot).Last = uint(i) 81 | } 82 | for i := uintptr(0); i < n; i++ { 83 | j := i 84 | go func() { 85 | slot := unsafe.Pointer(uintptr(k) + j*unsafe.Sizeof(block{})) 86 | // *(*block)(slot).Last++ 87 | fmt.Println(uintptr(slot), " ", *(*block)(slot)) 88 | runtime.GC() 89 | wg.Done() 90 | }() 91 | } 92 | wg.Wait() 93 | } 94 | -------------------------------------------------------------------------------- /benchmarks/e2e/benchmark_test.go: -------------------------------------------------------------------------------- 1 | package zenq_test 2 | 3 | import ( 4 | "fmt" 5 | "runtime" 6 | "sync" 7 | "testing" 8 | 9 | "github.com/alphadose/zenq/v2" 10 | ) 11 | 12 | const bufferSize = 1 << 12 13 | 14 | type Payload struct { 15 | first byte 16 | second int64 17 | third float64 18 | fourth string 19 | fifth complex64 20 | sixth []rune 21 | seventh bool 22 | } 23 | 24 | type test struct { 25 | writers int 26 | readers int 27 | inputSize int 28 | } 29 | 30 | var testCases = []test{ 31 | {writers: 1, readers: 1, inputSize: 1e3}, 32 | {writers: 3, readers: 3, inputSize: 3e3}, 33 | {writers: 8, readers: 8, inputSize: 8e3}, 34 | {writers: bufferSize * 2, readers: 1, inputSize: bufferSize * 2 * 4}, 35 | {writers: 1, readers: bufferSize * 2, inputSize: bufferSize * 2 * 4}, 36 | {writers: 100, readers: 100, inputSize: 6e6}, 37 | {writers: 1e3, readers: 1e3, inputSize: 7e6}, 38 | } 39 | 40 | func init() { 41 | for _, t := range testCases { 42 | if t.inputSize%t.writers != 0 { 43 | panic(fmt.Sprintf("input size %d should be dividable by writers %d", t.inputSize, t.writers)) 44 | } 45 | if t.inputSize%t.readers != 0 { 46 | panic(fmt.Sprintf("input size %d should be dividable by readers %d", t.inputSize, t.readers)) 47 | } 48 | } 49 | } 50 | 51 | func BenchmarkChan_ProduceConsume(b *testing.B) { 52 | for _, t := range testCases { 53 | t := t 54 | b.Run(fmt.Sprintf("W%d/R%d/Size%d", t.writers, t.readers, t.inputSize), func(b *testing.B) { 55 | for i := 0; i < b.N; i++ { 56 | benchmarkProduceConsumeChan(b, t) 57 | } 58 | }) 59 | } 60 | } 61 | 62 | func benchmarkProduceConsumeChan(b *testing.B, t test) { 63 | q := make(chan Payload, bufferSize) 64 | defer runtime.KeepAlive(q) 65 | 66 | writesPerProducer := t.inputSize / t.writers 67 | readsPerConsumer := t.inputSize / t.readers 68 | 69 | var wg sync.WaitGroup 70 | wg.Add(t.writers) 71 | 72 | // b.ResetTimer() 73 | 74 | for writer := 0; writer < t.writers; writer++ { 75 | go func() { 76 | defer wg.Done() 77 | for i := 0; i < writesPerProducer; i++ { 78 | q <- Payload{} 79 | } 80 | }() 81 | } 82 | 83 | wg.Add(t.readers) 84 | for reader := 0; reader < t.readers; reader++ { 85 | go func() { 86 | defer wg.Done() 87 | for i := 0; i < readsPerConsumer; i++ { 88 | <-q 89 | } 90 | }() 91 | } 92 | 93 | wg.Wait() 94 | } 95 | 96 | func BenchmarkZenQ_ProduceConsume(b *testing.B) { 97 | for _, t := range testCases { 98 | t := t 99 | b.Run(fmt.Sprintf("W%d/R%d/Size%d", t.writers, t.readers, t.inputSize), func(b *testing.B) { 100 | for i := 0; i < b.N; i++ { 101 | benchmarkProduceConsumeZenQ(b, t) 102 | } 103 | }) 104 | } 105 | } 106 | 107 | func benchmarkProduceConsumeZenQ(b *testing.B, t test) { 108 | q := zenq.New[Payload](bufferSize) 109 | defer runtime.KeepAlive(q) 110 | 111 | writesPerProducer := t.inputSize / t.writers 112 | readsPerConsumer := t.inputSize / t.readers 113 | 114 | var wg sync.WaitGroup 115 | wg.Add(t.writers) 116 | 117 | // b.ResetTimer() 118 | 119 | for writer := 0; writer < t.writers; writer++ { 120 | go func() { 121 | defer wg.Done() 122 | for i := 0; i < writesPerProducer; i++ { 123 | q.Write(Payload{}) 124 | } 125 | }() 126 | } 127 | 128 | wg.Add(t.readers) 129 | for reader := 0; reader < t.readers; reader++ { 130 | go func() { 131 | defer wg.Done() 132 | for i := 0; i < readsPerConsumer; i++ { 133 | q.Read() 134 | } 135 | }() 136 | } 137 | 138 | wg.Wait() 139 | } 140 | 141 | func BenchmarkChan_New(b *testing.B) { 142 | b.Run("struct{}", func(b *testing.B) { 143 | b.ReportAllocs() 144 | for i := 0; i < b.N; i++ { 145 | ch := make(chan struct{}, bufferSize) 146 | runtime.KeepAlive(ch) 147 | } 148 | }) 149 | b.Run("byte", func(b *testing.B) { 150 | b.ReportAllocs() 151 | for i := 0; i < b.N; i++ { 152 | ch := make(chan byte, bufferSize) 153 | runtime.KeepAlive(ch) 154 | } 155 | }) 156 | b.Run("int64", func(b *testing.B) { 157 | b.ReportAllocs() 158 | for i := 0; i < b.N; i++ { 159 | ch := make(chan int64, bufferSize) 160 | runtime.KeepAlive(ch) 161 | } 162 | }) 163 | } 164 | 165 | func BenchmarkZenQ_New(b *testing.B) { 166 | b.Run("struct{}", func(b *testing.B) { 167 | b.ReportAllocs() 168 | for i := 0; i < b.N; i++ { 169 | zq := zenq.New[struct{}](bufferSize) 170 | runtime.KeepAlive(zq) 171 | } 172 | }) 173 | b.Run("byte", func(b *testing.B) { 174 | b.ReportAllocs() 175 | for i := 0; i < b.N; i++ { 176 | zq := zenq.New[byte](bufferSize) 177 | runtime.KeepAlive(zq) 178 | } 179 | }) 180 | b.Run("int64", func(b *testing.B) { 181 | b.ReportAllocs() 182 | for i := 0; i < b.N; i++ { 183 | zq := zenq.New[int64](bufferSize) 184 | runtime.KeepAlive(zq) 185 | } 186 | }) 187 | } 188 | 189 | func BenchmarkZenQ_BackgroundSelectWait(b *testing.B) { 190 | const N = 1e4 191 | q := zenq.New[struct{}](bufferSize) 192 | 193 | // create background waiters 194 | for i := 0; i < N; i++ { 195 | go func() { 196 | alt := zenq.New[struct{}](bufferSize) 197 | zenq.Select(q, alt) 198 | }() 199 | } 200 | 201 | b.ResetTimer() 202 | 203 | a := zenq.New[int](bufferSize) 204 | for i := 0; i < b.N; i++ { 205 | a.Write(i) 206 | runtime.Gosched() 207 | a.Read() 208 | } 209 | 210 | // release background waiters 211 | for i := 0; i < N; i++ { 212 | q.Write(struct{}{}) 213 | } 214 | } 215 | 216 | func BenchmarkChan_BackgroundSelectWait(b *testing.B) { 217 | const N = 1e4 218 | q := make(chan struct{}) 219 | 220 | // create background waiters 221 | for i := 0; i < N; i++ { 222 | go func() { 223 | x := make(chan struct{}) 224 | select { 225 | case <-q: 226 | case <-x: 227 | } 228 | }() 229 | } 230 | 231 | b.ResetTimer() 232 | 233 | a := make(chan int, bufferSize) 234 | for i := 0; i < b.N; i++ { 235 | a <- i 236 | runtime.Gosched() 237 | <-a 238 | } 239 | 240 | // release background waiters 241 | for i := 0; i < N; i++ { 242 | q <- struct{}{} 243 | } 244 | } 245 | -------------------------------------------------------------------------------- /benchmarks/e2e/benchsuite_test.go: -------------------------------------------------------------------------------- 1 | package zenq_test 2 | 3 | import ( 4 | "sync" 5 | "testing" 6 | 7 | "github.com/alphadose/zenq/v2" 8 | ) 9 | 10 | // wrapper for chan to have exactly the same api as zenq. 11 | type Chan[T any] struct { 12 | ch chan T 13 | } 14 | 15 | func NewChan[T any]() Chan[T] { 16 | return Chan[T]{ch: make(chan T, bufferSize)} 17 | } 18 | 19 | func (ch Chan[T]) Read() T { return <-ch.ch } 20 | func (ch Chan[T]) Write(v T) { ch.ch <- v } 21 | 22 | func BenchmarkChan_Suite(b *testing.B) { 23 | type Queue = Chan[int] 24 | ctor := NewChan[int] 25 | 26 | b.Run("Single", func(b *testing.B) { 27 | q := ctor() 28 | b.ResetTimer() 29 | for i := 0; i < b.N; i++ { 30 | q.Write(i) 31 | _ = q.Read() 32 | } 33 | }) 34 | 35 | b.Run("Uncontended/x100", func(b *testing.B) { 36 | b.RunParallel(func(pb *testing.PB) { 37 | q := ctor() 38 | for pb.Next() { 39 | for i := 0; i < 100; i++ { 40 | q.Write(i) 41 | _ = q.Read() 42 | } 43 | } 44 | }) 45 | }) 46 | 47 | b.Run("Contended/x100", func(b *testing.B) { 48 | q := ctor() 49 | b.RunParallel(func(pb *testing.PB) { 50 | for pb.Next() { 51 | for i := 0; i < 100; i++ { 52 | q.Write(i) 53 | _ = q.Read() 54 | } 55 | } 56 | }) 57 | }) 58 | 59 | b.Run("Multiple/x100", func(b *testing.B) { 60 | const P = 1000 61 | qs := [P]Queue{} 62 | for i := range qs { 63 | qs[i] = ctor() 64 | } 65 | 66 | b.ResetTimer() 67 | 68 | var wg sync.WaitGroup 69 | wg.Add(P * 2) 70 | for i := 0; i < P; i++ { 71 | go func(q Queue) { 72 | defer wg.Done() 73 | for i := 0; i < b.N; i++ { 74 | var v int 75 | q.Write(v) 76 | } 77 | }(qs[i]) 78 | go func(q Queue) { 79 | defer wg.Done() 80 | for i := 0; i < b.N; i++ { 81 | _ = q.Read() 82 | } 83 | 84 | }(qs[i]) 85 | } 86 | wg.Wait() 87 | }) 88 | 89 | b.Run("ProducerConsumer/x1", func(b *testing.B) { 90 | q := ctor() 91 | b.ResetTimer() 92 | var wg sync.WaitGroup 93 | wg.Add(2) 94 | go func() { 95 | defer wg.Done() 96 | for i := 0; i < b.N; i++ { 97 | var v int 98 | q.Write(v) 99 | work() 100 | } 101 | }() 102 | 103 | go func() { 104 | defer wg.Done() 105 | for i := 0; i < b.N; i++ { 106 | _ = q.Read() 107 | work() 108 | } 109 | }() 110 | wg.Wait() 111 | }) 112 | 113 | b.Run("ProducerConsumer/x100", func(b *testing.B) { 114 | q := ctor() 115 | b.ResetTimer() 116 | var wg sync.WaitGroup 117 | wg.Add(2) 118 | 119 | go func() { 120 | b.RunParallel(func(pb *testing.PB) { 121 | for pb.Next() { 122 | for i := 0; i < 100; i++ { 123 | q.Write(0) 124 | work() 125 | } 126 | } 127 | }) 128 | wg.Done() 129 | }() 130 | 131 | go func() { 132 | b.RunParallel(func(pb *testing.PB) { 133 | for pb.Next() { 134 | for i := 0; i < 100; i++ { 135 | _ = q.Read() 136 | work() 137 | } 138 | } 139 | }) 140 | wg.Done() 141 | }() 142 | wg.Wait() 143 | }) 144 | 145 | b.Run("PingPong/x1", func(b *testing.B) { 146 | q1 := ctor() 147 | q2 := ctor() 148 | b.ResetTimer() 149 | var wg sync.WaitGroup 150 | wg.Add(2) 151 | 152 | go func() { 153 | for i := 0; i < b.N; i++ { 154 | var v int 155 | q1.Write(v) 156 | work() 157 | _ = q2.Read() 158 | } 159 | wg.Done() 160 | }() 161 | 162 | go func() { 163 | for i := 0; i < b.N; i++ { 164 | var v int 165 | _ = q1.Read() 166 | work() 167 | q2.Write(v) 168 | } 169 | wg.Done() 170 | }() 171 | wg.Wait() 172 | }) 173 | } 174 | 175 | func BenchmarkZenq_Suite(b *testing.B) { 176 | type Queue = zenq.ZenQ[int] 177 | ctor := zenq.New[int] 178 | 179 | b.Run("Single", func(b *testing.B) { 180 | q := ctor(bufferSize) 181 | b.ResetTimer() 182 | for i := 0; i < b.N; i++ { 183 | q.Write(i) 184 | _, _ = q.Read() 185 | } 186 | }) 187 | 188 | b.Run("Uncontended/x100", func(b *testing.B) { 189 | b.RunParallel(func(pb *testing.PB) { 190 | q := ctor(bufferSize) 191 | for pb.Next() { 192 | for i := 0; i < 100; i++ { 193 | q.Write(i) 194 | _, _ = q.Read() 195 | } 196 | } 197 | }) 198 | }) 199 | 200 | b.Run("Contended/x100", func(b *testing.B) { 201 | q := ctor(bufferSize) 202 | b.RunParallel(func(pb *testing.PB) { 203 | for pb.Next() { 204 | for i := 0; i < 100; i++ { 205 | q.Write(i) 206 | _, _ = q.Read() 207 | } 208 | } 209 | }) 210 | }) 211 | 212 | b.Run("Multiple/x100", func(b *testing.B) { 213 | const P = 1000 214 | qs := [P]*Queue{} 215 | for i := range qs { 216 | qs[i] = ctor(bufferSize) 217 | } 218 | 219 | b.ResetTimer() 220 | 221 | var wg sync.WaitGroup 222 | wg.Add(P * 2) 223 | for i := 0; i < P; i++ { 224 | go func(q *Queue) { 225 | defer wg.Done() 226 | for i := 0; i < b.N; i++ { 227 | var v int 228 | q.Write(v) 229 | } 230 | }(qs[i]) 231 | go func(q *Queue) { 232 | defer wg.Done() 233 | for i := 0; i < b.N; i++ { 234 | _, _ = q.Read() 235 | } 236 | 237 | }(qs[i]) 238 | } 239 | wg.Wait() 240 | }) 241 | 242 | b.Run("ProducerConsumer/x1", func(b *testing.B) { 243 | q := ctor(bufferSize) 244 | b.ResetTimer() 245 | var wg sync.WaitGroup 246 | wg.Add(2) 247 | go func() { 248 | defer wg.Done() 249 | for i := 0; i < b.N; i++ { 250 | var v int 251 | q.Write(v) 252 | work() 253 | } 254 | }() 255 | 256 | go func() { 257 | defer wg.Done() 258 | for i := 0; i < b.N; i++ { 259 | _, _ = q.Read() 260 | work() 261 | } 262 | }() 263 | wg.Wait() 264 | }) 265 | 266 | b.Run("ProducerConsumer/x100", func(b *testing.B) { 267 | q := ctor(bufferSize) 268 | b.ResetTimer() 269 | var wg sync.WaitGroup 270 | wg.Add(2) 271 | 272 | go func() { 273 | b.RunParallel(func(pb *testing.PB) { 274 | for pb.Next() { 275 | for i := 0; i < 100; i++ { 276 | q.Write(0) 277 | work() 278 | } 279 | } 280 | }) 281 | wg.Done() 282 | }() 283 | 284 | go func() { 285 | b.RunParallel(func(pb *testing.PB) { 286 | for pb.Next() { 287 | for i := 0; i < 100; i++ { 288 | _, _ = q.Read() 289 | work() 290 | } 291 | } 292 | }) 293 | wg.Done() 294 | }() 295 | wg.Wait() 296 | }) 297 | 298 | b.Run("PingPong/x1", func(b *testing.B) { 299 | q1 := ctor(bufferSize) 300 | q2 := ctor(bufferSize) 301 | b.ResetTimer() 302 | var wg sync.WaitGroup 303 | wg.Add(2) 304 | 305 | go func() { 306 | for i := 0; i < b.N; i++ { 307 | var v int 308 | q1.Write(v) 309 | work() 310 | _, _ = q2.Read() 311 | } 312 | wg.Done() 313 | }() 314 | 315 | go func() { 316 | for i := 0; i < b.N; i++ { 317 | var v int 318 | _, _ = q1.Read() 319 | work() 320 | q2.Write(v) 321 | } 322 | wg.Done() 323 | }() 324 | wg.Wait() 325 | }) 326 | } 327 | 328 | //go:noinline 329 | func work() { 330 | // really tiny amount of work 331 | } 332 | -------------------------------------------------------------------------------- /benchmarks/selector/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | 7 | "github.com/alphadose/zenq/v2" 8 | ) 9 | 10 | type custom1 struct { 11 | alpha int 12 | beta string 13 | } 14 | 15 | type custom2 struct { 16 | gamma int 17 | } 18 | 19 | const ( 20 | bufferSize = 8 21 | 22 | numProducers = 4 23 | ) 24 | 25 | var ( 26 | throughput int 27 | 28 | // input batch size 29 | testcases = []int{60, 600, 6e3, 6e5} 30 | 31 | zq1 = zenq.New[int](bufferSize) 32 | zq2 = zenq.New[string](bufferSize) 33 | zq3 = zenq.New[custom1](bufferSize) 34 | zq4 = zenq.New[*custom2](bufferSize) 35 | 36 | ch1 = make(chan int, bufferSize) 37 | ch2 = make(chan string, bufferSize) 38 | ch3 = make(chan custom1, bufferSize) 39 | ch4 = make(chan *custom2, bufferSize) 40 | ) 41 | 42 | func zenqSelector() { 43 | go looper(intProducer1) 44 | go looper(stringProducer1) 45 | go looper(custom1Producer1) 46 | go looper(custom2Producer1) 47 | 48 | var ctr = 0 49 | 50 | var startTime time.Time = time.Now() 51 | for i := 0; i < throughput; i++ { 52 | if d := zenq.Select(zq1, zq2, zq3, zq4); d != nil { 53 | ctr++ 54 | } 55 | } 56 | 57 | if ctr != throughput { 58 | panic("Data Loss") 59 | } 60 | fmt.Printf("ZenQ Select Runner completed transfer in: %v\n", time.Since(startTime)) 61 | } 62 | 63 | func chanSelector() { 64 | go looper(intProducer2) 65 | go looper(stringProducer2) 66 | go looper(custom1Producer2) 67 | go looper(custom2Producer2) 68 | 69 | var ctr = 0 70 | 71 | var startTime time.Time = time.Now() 72 | for i := 0; i < throughput; i++ { 73 | select { 74 | case <-ch1: 75 | ctr++ 76 | case <-ch2: 77 | ctr++ 78 | case <-ch3: 79 | ctr++ 80 | case <-ch4: 81 | ctr++ 82 | } 83 | 84 | } 85 | 86 | if ctr != throughput { 87 | panic("Data Loss") 88 | } 89 | fmt.Printf("Chan Select Runner completed transfer in: %v\n", time.Since(startTime)) 90 | } 91 | 92 | func main() { 93 | for _, tput := range testcases { 94 | throughput = tput 95 | fmt.Printf("With Input Batch Size: %d and Num Concurrent Writers: %d\n", throughput, numProducers) 96 | fmt.Print("\n") 97 | 98 | // Run tests 99 | chanSelector() 100 | zenqSelector() 101 | fmt.Print("====================================================================\n\n") 102 | } 103 | } 104 | 105 | func intProducer1(ctr int) { zq1.Write(ctr) } 106 | 107 | func stringProducer1(ctr int) { zq2.Write(fmt.Sprint(ctr * 10)) } 108 | 109 | func custom1Producer1(ctr int) { zq3.Write(custom1{alpha: ctr, beta: fmt.Sprint(ctr)}) } 110 | 111 | func custom2Producer1(ctr int) { zq4.Write(&custom2{gamma: 1 << ctr}) } 112 | 113 | func intProducer2(ctr int) { ch1 <- ctr } 114 | 115 | func stringProducer2(ctr int) { ch2 <- fmt.Sprint(ctr * 10) } 116 | 117 | func custom1Producer2(ctr int) { ch3 <- custom1{alpha: ctr, beta: fmt.Sprint(ctr)} } 118 | 119 | func custom2Producer2(ctr int) { ch4 <- &custom2{gamma: 1 << ctr} } 120 | 121 | func looper(producer func(ctr int)) { 122 | for i := 0; i < throughput/numProducers; i++ { 123 | producer(i) 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /benchmarks/simple/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | 7 | "github.com/alphadose/zenq/v2" 8 | ) 9 | 10 | // Example item which we will be writing to and reading from the queue 11 | type Payload struct { 12 | first byte 13 | second int64 14 | third float64 15 | fourth string 16 | fifth complex64 17 | sixth []rune 18 | seventh bool 19 | } 20 | 21 | func NewPayload() *Payload { 22 | return &Payload{ 23 | first: 1, 24 | second: 2, 25 | third: 3.0, 26 | fourth: "4", 27 | fifth: 3 + 4i, 28 | sixth: []rune("🐈⚔️👍🌏💥🦖"), 29 | } 30 | } 31 | 32 | const ( 33 | bufferSize = 1 << 12 34 | ) 35 | 36 | var ( 37 | pl Payload = *NewPayload() 38 | 39 | currSize uint64 = throughput[0] 40 | 41 | // input batch size 42 | throughput = []uint64{60, 600, 6e3, 6e6, 6e8} 43 | // throughput = []uint64{5} 44 | 45 | // Number of writers/producers which will be writing to the queue concurrently 46 | numConcurrentWriters uint64 = 1 47 | 48 | // native channel 49 | ch chan Payload = make(chan Payload, bufferSize) 50 | 51 | // ZenQ 52 | zq *zenq.ZenQ[Payload] = zenq.New[Payload](bufferSize) 53 | ) 54 | 55 | func validatePayload(param Payload) { 56 | if param.first != pl.first || param.second != pl.second || param.third != pl.third || param.fourth != pl.fourth || param.fifth != pl.fifth || len(param.sixth) != len(pl.sixth) || param.seventh != pl.seventh { 57 | panic("Loss of data integrity") 58 | } 59 | } 60 | 61 | func chanProducer() { 62 | epochs := currSize / numConcurrentWriters 63 | for i := uint64(0); i < epochs; i++ { 64 | ch <- pl 65 | } 66 | } 67 | 68 | func chanConsumer() { 69 | for i := uint64(0); i < currSize; i++ { 70 | validatePayload(<-ch) 71 | } 72 | } 73 | 74 | func chanRunner() { 75 | for i := uint64(0); i < numConcurrentWriters; i++ { 76 | go chanProducer() 77 | } 78 | chanConsumer() 79 | } 80 | 81 | func zenqProducer() { 82 | epochs := currSize / numConcurrentWriters 83 | for i := uint64(0); i < epochs; i++ { 84 | zq.Write(pl) 85 | } 86 | } 87 | 88 | func zenqConsumer() { 89 | var data Payload 90 | for i := uint64(0); i < currSize; i++ { 91 | data, _ = zq.Read() 92 | validatePayload(data) 93 | } 94 | } 95 | 96 | func zenqRunner() { 97 | for i := uint64(0); i < numConcurrentWriters; i++ { 98 | go zenqProducer() 99 | } 100 | zenqConsumer() 101 | } 102 | 103 | func measureTime(callback func(), runnerName string) { 104 | var startTime time.Time = time.Now() 105 | callback() 106 | fmt.Printf("%s Runner completed transfer in: %v\n", runnerName, time.Since(startTime)) 107 | } 108 | 109 | // drain the channel and zenQ 110 | func cleanup() { 111 | for len(ch) > 0 { 112 | <-ch 113 | } 114 | zq.Reset() 115 | } 116 | 117 | func main() { 118 | cleanup() 119 | for _, tput := range throughput { 120 | currSize = tput 121 | fmt.Printf("With Input Batch Size: %d and Num Concurrent Writers: %d\n", currSize, numConcurrentWriters) 122 | fmt.Print("\n") 123 | 124 | // Run tests 125 | measureTime(chanRunner, "Native Channel") 126 | measureTime(zenqRunner, "ZenQ") 127 | fmt.Print("====================================================================\n\n") 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /benchmarks/simple/main_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func zenqTestRunner(numWriters uint64, size uint64, b *testing.B) { 8 | currSize = size 9 | numConcurrentWriters = numWriters 10 | 11 | cleanup() 12 | b.ResetTimer() 13 | for n := 0; n < b.N; n++ { 14 | zenqRunner() 15 | } 16 | } 17 | 18 | func chanTestRunner(numWriters uint64, size uint64, b *testing.B) { 19 | currSize = size 20 | numConcurrentWriters = numWriters 21 | 22 | cleanup() 23 | b.ResetTimer() 24 | for n := 0; n < b.N; n++ { 25 | chanRunner() 26 | } 27 | } 28 | 29 | func Benchmark_Chan_NumWriters1_InputSize600(b *testing.B) { chanTestRunner(1, 6e2, b) } 30 | 31 | func Benchmark_ZenQ_NumWriters1_InputSize600(b *testing.B) { zenqTestRunner(1, 6e2, b) } 32 | 33 | func Benchmark_Chan_NumWriters3_InputSize60000(b *testing.B) { chanTestRunner(3, 6e4, b) } 34 | 35 | func Benchmark_ZenQ_NumWriters3_InputSize60000(b *testing.B) { zenqTestRunner(3, 6e4, b) } 36 | 37 | func Benchmark_Chan_NumWriters8_InputSize6000000(b *testing.B) { chanTestRunner(8, 6e6, b) } 38 | 39 | func Benchmark_ZenQ_NumWriters8_InputSize6000000(b *testing.B) { zenqTestRunner(8, 6e6, b) } 40 | 41 | func Benchmark_Chan_NumWriters100_InputSize6000000(b *testing.B) { chanTestRunner(100, 6e6, b) } 42 | 43 | func Benchmark_ZenQ_NumWriters100_InputSize6000000(b *testing.B) { zenqTestRunner(100, 6e6, b) } 44 | 45 | func Benchmark_Chan_NumWriters1000_InputSize7000000(b *testing.B) { chanTestRunner(1e3, 7e6, b) } 46 | 47 | func Benchmark_ZenQ_NumWriters1000_InputSize7000000(b *testing.B) { zenqTestRunner(1e3, 7e6, b) } 48 | 49 | func Benchmark_Chan_Million_Blocking_Writers(b *testing.B) { chanTestRunner(1e6, 1e7, b) } 50 | 51 | func Benchmark_ZenQ_Million_Blocking_Writers(b *testing.B) { zenqTestRunner(1e6, 1e7, b) } 52 | -------------------------------------------------------------------------------- /constants/constants_386.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const CacheLinePadSize = 64 4 | -------------------------------------------------------------------------------- /constants/constants_amd64.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const CacheLinePadSize = 64 4 | -------------------------------------------------------------------------------- /constants/constants_arm.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const CacheLinePadSize = 32 4 | -------------------------------------------------------------------------------- /constants/constants_arm64.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const CacheLinePadSize = 64 4 | -------------------------------------------------------------------------------- /constants/constants_mips.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const CacheLinePadSize = 32 4 | -------------------------------------------------------------------------------- /constants/constants_mips64.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const CacheLinePadSize = 32 4 | -------------------------------------------------------------------------------- /constants/constants_mips64le.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const CacheLinePadSize = 32 4 | -------------------------------------------------------------------------------- /constants/constants_mipsle.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const CacheLinePadSize = 32 4 | -------------------------------------------------------------------------------- /constants/constants_ppc64x.go: -------------------------------------------------------------------------------- 1 | //go:build ppc64 || ppc64le 2 | 3 | package constants 4 | 5 | const CacheLinePadSize = 128 6 | -------------------------------------------------------------------------------- /constants/constants_riscv64.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const CacheLinePadSize = 32 4 | -------------------------------------------------------------------------------- /constants/constants_s390x.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const CacheLinePadSize = 256 4 | -------------------------------------------------------------------------------- /constants/constants_wasm.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const CacheLinePadSize = 64 4 | -------------------------------------------------------------------------------- /examples/selector/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/alphadose/zenq/v2" 7 | ) 8 | 9 | type custom1 struct { 10 | alpha int 11 | beta string 12 | } 13 | 14 | type custom2 struct { 15 | gamma int 16 | } 17 | 18 | const size = 100 19 | 20 | var ( 21 | zq1 = zenq.New[int](size) 22 | zq2 = zenq.New[string](size) 23 | zq3 = zenq.New[custom1](size) 24 | zq4 = zenq.New[*custom2](size) 25 | ) 26 | 27 | func main() { 28 | go looper(intProducer) 29 | go looper(stringProducer) 30 | go looper(custom1Producer) 31 | go looper(custom2Producer) 32 | 33 | for i := 0; i < 40; i++ { 34 | 35 | // Selection occurs here 36 | if data := zenq.Select(zq1, zq2, zq3, zq4); data != nil { 37 | switch data.(type) { 38 | case int: 39 | fmt.Printf("Received int %d\n", data) 40 | case string: 41 | fmt.Printf("Received string %s\n", data) 42 | case custom1: 43 | fmt.Printf("Received custom data type number 1 %#v\n", data) 44 | case *custom2: 45 | fmt.Printf("Received pointer %#v\n", data) 46 | } 47 | } 48 | } 49 | } 50 | 51 | func intProducer(ctr int) { zq1.Write(ctr) } 52 | 53 | func stringProducer(ctr int) { zq2.Write(fmt.Sprint(ctr * 10)) } 54 | 55 | func custom1Producer(ctr int) { zq3.Write(custom1{alpha: ctr, beta: fmt.Sprint(ctr)}) } 56 | 57 | func custom2Producer(ctr int) { zq4.Write(&custom2{gamma: 1 << ctr}) } 58 | 59 | func looper(producer func(ctr int)) { 60 | for i := 0; i < 10; i++ { 61 | producer(i) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /examples/simple/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "runtime" 6 | 7 | "github.com/alphadose/zenq/v2" 8 | ) 9 | 10 | type payload struct { 11 | alpha int 12 | beta string 13 | } 14 | 15 | func main() { 16 | zq := zenq.New[payload](10) 17 | 18 | for j := 0; j < 5; j++ { 19 | go func() { 20 | for i := 0; i < 20; i++ { 21 | zq.Write(payload{ 22 | alpha: i, 23 | beta: fmt.Sprint(i), 24 | }) 25 | } 26 | }() 27 | } 28 | 29 | // For lowest latency and best performance, allocate the ZenQ.Read() calling goroutine an entire OS thread 30 | // by calling runtime.LockOSThread() 31 | // Note:- If you have a single core then doing this will cause a deadlock 32 | runtime.LockOSThread() 33 | defer runtime.UnlockOSThread() 34 | 35 | for i := 0; i < 100; i++ { 36 | if data, queueOpen := zq.Read(); queueOpen { 37 | fmt.Printf("%+v\n", data) 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/alphadose/zenq/v2 2 | 3 | go 1.19 4 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alphadose/ZenQ/271950c9b7fa1f6b907dca7ffcdf91022ddaac00/go.sum -------------------------------------------------------------------------------- /lib_runtime_fastrand.go: -------------------------------------------------------------------------------- 1 | //go:build !go1.22 2 | 3 | package zenq 4 | 5 | import ( 6 | _ "unsafe" 7 | ) 8 | 9 | //go:linkname Fastrand runtime.fastrand 10 | func Fastrand() uint32 11 | -------------------------------------------------------------------------------- /lib_runtime_fastrand_1.22.go: -------------------------------------------------------------------------------- 1 | //go:build go1.22 2 | 3 | package zenq 4 | 5 | import ( 6 | _ "unsafe" 7 | ) 8 | 9 | //go:linkname Fastrand runtime.cheaprand 10 | func Fastrand() uint32 11 | -------------------------------------------------------------------------------- /lib_runtime_linkage.go: -------------------------------------------------------------------------------- 1 | package zenq 2 | 3 | import ( 4 | "runtime" 5 | "unsafe" 6 | _ "unsafe" 7 | 8 | "github.com/alphadose/zenq/v2/constants" 9 | ) 10 | 11 | type cacheLinePadding struct { 12 | _ [constants.CacheLinePadSize]byte 13 | } 14 | 15 | // Linking ZenQ with golang internal runtime library to allow usage of scheduling primitives 16 | // like goready(), mcall() etc to allow low-level scheduling of goroutines 17 | 18 | type mutex struct { 19 | // Futex-based impl treats it as uint32 key, 20 | // while sema-based impl as M* waitm. 21 | // Used to be a union, but unions break precise GC. 22 | key uintptr 23 | } 24 | 25 | // The functions below are used for scheduling goroutines with exclusive control 26 | // Shifting to the below flow will remove the spinning and mutex lock implementations 27 | 28 | //go:linkname lock runtime.lock 29 | func lock(l *mutex) 30 | 31 | //go:linkname nanotime runtime.nanotime 32 | func nanotime() int64 33 | 34 | //go:linkname unlock runtime.unlock 35 | func unlock(l *mutex) 36 | 37 | //go:linkname goparkunlock runtime.goparkunlock 38 | func goparkunlock(lock *mutex, reason waitReason, traceEv byte, traceskip int) 39 | 40 | // GetG returns the pointer to the current goroutine 41 | // defined in the asm files 42 | func GetG() unsafe.Pointer 43 | 44 | //go:linkname Fastlog2 runtime.fastlog2 45 | func Fastlog2(x float64) float64 46 | 47 | //go:linkname goready runtime.goready 48 | func goready(goroutinePtr unsafe.Pointer, traceskip int) 49 | 50 | //go:linkname gopark runtime.gopark 51 | func gopark(unlockf func(unsafe.Pointer, unsafe.Pointer) bool, lock unsafe.Pointer, reason waitReason, traceEv byte, traceskip int) 52 | 53 | // Active spinning runtime support. 54 | // runtime_canSpin reports whether spinning makes sense at the moment. 55 | //go:linkname runtime_canSpin sync.runtime_canSpin 56 | func runtime_canSpin(i int) bool 57 | 58 | // runtime_doSpin does active spinning. 59 | // //go:linkname runtime_doSpin sync.runtime_doSpin 60 | // func runtime_doSpin() 61 | 62 | func runtime_doSpin() { 63 | spin(30) 64 | } 65 | 66 | //go:linkname osyield runtime.osyield 67 | func osyield() 68 | 69 | //go:linkname runtime_nanotime sync.runtime_nanotime 70 | func runtime_nanotime() int64 71 | 72 | // Semacquire waits until *s > 0 and then atomically decrements it. 73 | // It is intended as a simple sleep primitive for use by the synchronization 74 | // library and should not be used directly. 75 | //go:linkname runtime_Semacquire sync.runtime_Semacquire 76 | func runtime_Semacquire(s *uint32) 77 | 78 | // SemacquireMutex is like Semacquire, but for profiling contended Mutexes. 79 | // If lifo is true, queue waiter at the head of wait queue. 80 | // skipframes is the number of frames to omit during tracing, counting from 81 | // runtime_SemacquireMutex's caller. 82 | //go:linkname runtime_SemacquireMutex sync.runtime_SemacquireMutex 83 | func runtime_SemacquireMutex(s *uint32, lifo bool, skipframes int) 84 | 85 | // Semrelease atomically increments *s and notifies a waiting goroutine 86 | // if one is blocked in Semacquire. 87 | // It is intended as a simple wakeup primitive for use by the synchronization 88 | // library and should not be used directly. 89 | // If handoff is true, pass count directly to the first waiter. 90 | // skipframes is the number of frames to omit during tracing, counting from 91 | // runtime_Semrelease's caller. 92 | //go:linkname runtime_Semrelease sync.runtime_Semrelease 93 | func runtime_Semrelease(s *uint32, handoff bool, skipframes int) 94 | 95 | //go:linkname goyield runtime.goyield 96 | func goyield() 97 | 98 | //go:linkname mcall runtime.mcall 99 | func mcall(fn func(unsafe.Pointer)) 100 | 101 | //go:linkname park_m runtime.park_m 102 | func park_m(gp unsafe.Pointer) 103 | 104 | //go:linkname fastrandn runtime.fastrandn 105 | func fastrandn(n uint32) uint32 106 | 107 | //go:linkname throw runtime.throw 108 | func throw(s string) 109 | 110 | //go:linkname Readgstatus runtime.readgstatus 111 | func Readgstatus(gp unsafe.Pointer) uint32 112 | 113 | //go:linkname casgstatus runtime.casgstatus 114 | func casgstatus(gp unsafe.Pointer, oldval, newval uint32) 115 | 116 | //go:linkname dropg runtime.dropg 117 | func dropg() 118 | 119 | //go:linkname schedule runtime.schedule 120 | func schedule() 121 | 122 | //go:linkname mallocgc runtime.mallocgc 123 | func mallocgc(size uintptr, typ unsafe.Pointer, needzero bool) unsafe.Pointer 124 | 125 | //go:linkname sysFree runtime.sysFree 126 | func sysFree(v unsafe.Pointer, n uintptr, sysStat unsafe.Pointer) 127 | 128 | //go:linkname sysFreeOS runtime.sysFreeOS 129 | func sysFreeOS(v unsafe.Pointer, n uintptr) 130 | 131 | //go:linkname gosched_m runtime.gosched_m 132 | func gosched_m(gp unsafe.Pointer) 133 | 134 | //go:linkname spin runtime.procyield 135 | func spin(cycles uint32) 136 | 137 | //go:linkname noescape runtime.noescape 138 | func noescape(p unsafe.Pointer) unsafe.Pointer 139 | 140 | // ProcPin and ProcUnpin disable pre-emption for any calling goroutine 141 | // can be used to guarantee consistent latency 142 | //go:linkname ProcPin runtime.procPin 143 | func ProcPin() int 144 | 145 | //go:linkname ProcUnpin runtime.procUnpin 146 | func ProcUnpin() 147 | 148 | //go:linkname memequal runtime.memequal 149 | func memequal(a, b unsafe.Pointer, size uintptr) bool 150 | 151 | //go:linkname Load8 runtime/internal/atomic.Load8 152 | func Load8(ptr *uint8) uint8 153 | 154 | //go:linkname And8 runtime/internal/atomic.And8 155 | func And8(ptr *uint8, val uint8) 156 | 157 | //go:linkname Or8 runtime/internal/atomic.Or8 158 | func Or8(ptr *uint8, val uint8) 159 | 160 | //go:linkname Store8 runtime/internal/atomic.Store8 161 | func Store8(ptr *uint8, val uint8) 162 | 163 | // custom parking function 164 | func fast_park(gp unsafe.Pointer) { 165 | dropg() 166 | casgstatus(gp, _Grunning, _Gwaiting) 167 | schedule() 168 | } 169 | 170 | // whether the system has multiple cores or a single core 171 | var multicore = runtime.NumCPU() > 1 172 | 173 | // call ready after ensuring the goroutine is parked 174 | func safe_ready(gp unsafe.Pointer) { 175 | // for better microprocessor branch prediction 176 | if multicore { 177 | for Readgstatus(gp)&^_Gscan != _Gwaiting { 178 | spin(20) 179 | } 180 | } else { 181 | for Readgstatus(gp)&^_Gscan != _Gwaiting { 182 | mcall(gosched_m) 183 | } 184 | } 185 | goready(gp, 1) 186 | } 187 | 188 | // simple wait 189 | func wait() { 190 | if multicore { 191 | spin(20) 192 | } else { 193 | mcall(gosched_m) 194 | } 195 | } 196 | 197 | type waitReason uint8 198 | 199 | const ( 200 | waitReasonZero waitReason = iota // "" 201 | waitReasonGCAssistMarking // "GC assist marking" 202 | waitReasonIOWait // "IO wait" 203 | waitReasonChanReceiveNilChan // "chan receive (nil chan)" 204 | waitReasonChanSendNilChan // "chan send (nil chan)" 205 | waitReasonDumpingHeap // "dumping heap" 206 | waitReasonGarbageCollection // "garbage collection" 207 | waitReasonGarbageCollectionScan // "garbage collection scan" 208 | waitReasonPanicWait // "panicwait" 209 | waitReasonSelect // "select" 210 | waitReasonSelectNoCases // "select (no cases)" 211 | waitReasonGCAssistWait // "GC assist wait" 212 | waitReasonGCSweepWait // "GC sweep wait" 213 | waitReasonGCScavengeWait // "GC scavenge wait" 214 | waitReasonChanReceive // "chan receive" 215 | waitReasonChanSend // "chan send" 216 | waitReasonFinalizerWait // "finalizer wait" 217 | waitReasonForceGCIdle // "force gc (idle)" 218 | waitReasonSemacquire // "semacquire" 219 | waitReasonSleep // "sleep" 220 | waitReasonSyncCondWait // "sync.Cond.Wait" 221 | waitReasonTimerGoroutineIdle // "timer goroutine (idle)" 222 | waitReasonTraceReaderBlocked // "trace reader (blocked)" 223 | waitReasonWaitForGCCycle // "wait for GC cycle" 224 | waitReasonGCWorkerIdle // "GC worker (idle)" 225 | waitReasonPreempted // "preempted" 226 | waitReasonDebugCall // "debug call" 227 | ) 228 | 229 | // Event types in the trace, args are given in square brackets. 230 | const ( 231 | traceEvNone = 0 // unused 232 | traceEvBatch = 1 // start of per-P batch of events [pid, timestamp] 233 | traceEvFrequency = 2 // contains tracer timer frequency [frequency (ticks per second)] 234 | traceEvStack = 3 // stack [stack id, number of PCs, array of {PC, func string ID, file string ID, line}] 235 | traceEvGomaxprocs = 4 // current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack id] 236 | traceEvProcStart = 5 // start of P [timestamp, thread id] 237 | traceEvProcStop = 6 // stop of P [timestamp] 238 | traceEvGCStart = 7 // GC start [timestamp, seq, stack id] 239 | traceEvGCDone = 8 // GC done [timestamp] 240 | traceEvGCSTWStart = 9 // GC STW start [timestamp, kind] 241 | traceEvGCSTWDone = 10 // GC STW done [timestamp] 242 | traceEvGCSweepStart = 11 // GC sweep start [timestamp, stack id] 243 | traceEvGCSweepDone = 12 // GC sweep done [timestamp, swept, reclaimed] 244 | traceEvGoCreate = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id] 245 | traceEvGoStart = 14 // goroutine starts running [timestamp, goroutine id, seq] 246 | traceEvGoEnd = 15 // goroutine ends [timestamp] 247 | traceEvGoStop = 16 // goroutine stops (like in select{}) [timestamp, stack] 248 | traceEvGoSched = 17 // goroutine calls Gosched [timestamp, stack] 249 | traceEvGoPreempt = 18 // goroutine is preempted [timestamp, stack] 250 | traceEvGoSleep = 19 // goroutine calls Sleep [timestamp, stack] 251 | traceEvGoBlock = 20 // goroutine blocks [timestamp, stack] 252 | traceEvGoUnblock = 21 // goroutine is unblocked [timestamp, goroutine id, seq, stack] 253 | traceEvGoBlockSend = 22 // goroutine blocks on chan send [timestamp, stack] 254 | traceEvGoBlockRecv = 23 // goroutine blocks on chan recv [timestamp, stack] 255 | traceEvGoBlockSelect = 24 // goroutine blocks on select [timestamp, stack] 256 | traceEvGoBlockSync = 25 // goroutine blocks on Mutex/RWMutex [timestamp, stack] 257 | traceEvGoBlockCond = 26 // goroutine blocks on Cond [timestamp, stack] 258 | traceEvGoBlockNet = 27 // goroutine blocks on network [timestamp, stack] 259 | traceEvGoSysCall = 28 // syscall enter [timestamp, stack] 260 | traceEvGoSysExit = 29 // syscall exit [timestamp, goroutine id, seq, real timestamp] 261 | traceEvGoSysBlock = 30 // syscall blocks [timestamp] 262 | traceEvGoWaiting = 31 // denotes that goroutine is blocked when tracing starts [timestamp, goroutine id] 263 | traceEvGoInSyscall = 32 // denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id] 264 | traceEvHeapAlloc = 33 // gcController.heapLive change [timestamp, heap_alloc] 265 | traceEvHeapGoal = 34 // gcController.heapGoal (formerly next_gc) change [timestamp, heap goal in bytes] 266 | traceEvTimerGoroutine = 35 // not currently used; previously denoted timer goroutine [timer goroutine id] 267 | traceEvFutileWakeup = 36 // denotes that the previous wakeup of this goroutine was futile [timestamp] 268 | traceEvString = 37 // string dictionary entry [ID, length, string] 269 | traceEvGoStartLocal = 38 // goroutine starts running on the same P as the last event [timestamp, goroutine id] 270 | traceEvGoUnblockLocal = 39 // goroutine is unblocked on the same P as the last event [timestamp, goroutine id, stack] 271 | traceEvGoSysExitLocal = 40 // syscall exit on the same P as the last event [timestamp, goroutine id, real timestamp] 272 | traceEvGoStartLabel = 41 // goroutine starts running with label [timestamp, goroutine id, seq, label string id] 273 | traceEvGoBlockGC = 42 // goroutine blocks on GC assist [timestamp, stack] 274 | traceEvGCMarkAssistStart = 43 // GC mark assist start [timestamp, stack] 275 | traceEvGCMarkAssistDone = 44 // GC mark assist done [timestamp] 276 | traceEvUserTaskCreate = 45 // trace.NewContext [timestamp, internal task id, internal parent task id, stack, name string] 277 | traceEvUserTaskEnd = 46 // end of a task [timestamp, internal task id, stack] 278 | traceEvUserRegion = 47 // trace.WithRegion [timestamp, internal task id, mode(0:start, 1:end), stack, name string] 279 | traceEvUserLog = 48 // trace.Log [timestamp, internal task id, key string id, stack, value string] 280 | traceEvCount = 49 281 | // Byte is used but only 6 bits are available for event type. 282 | // The remaining 2 bits are used to specify the number of arguments. 283 | // That means, the max event type value is 63. 284 | ) 285 | 286 | // defined constants 287 | const ( 288 | // G status 289 | // 290 | // Beyond indicating the general state of a G, the G status 291 | // acts like a lock on the goroutine's stack (and hence its 292 | // ability to execute user code). 293 | // 294 | // If you add to this list, add to the list 295 | // of "okay during garbage collection" status 296 | // in mgcmark.go too. 297 | // 298 | // TODO(austin): The _Gscan bit could be much lighter-weight. 299 | // For example, we could choose not to run _Gscanrunnable 300 | // goroutines found in the run queue, rather than CAS-looping 301 | // until they become _Grunnable. And transitions like 302 | // _Gscanwaiting -> _Gscanrunnable are actually okay because 303 | // they don't affect stack ownership. 304 | 305 | // _Gidle means this goroutine was just allocated and has not 306 | // yet been initialized. 307 | _Gidle = iota // 0 308 | 309 | // _Grunnable means this goroutine is on a run queue. It is 310 | // not currently executing user code. The stack is not owned. 311 | _Grunnable // 1 312 | 313 | // _Grunning means this goroutine may execute user code. The 314 | // stack is owned by this goroutine. It is not on a run queue. 315 | // It is assigned an M and a P (g.m and g.m.p are valid). 316 | _Grunning // 2 317 | 318 | // _Gsyscall means this goroutine is executing a system call. 319 | // It is not executing user code. The stack is owned by this 320 | // goroutine. It is not on a run queue. It is assigned an M. 321 | _Gsyscall // 3 322 | 323 | // _Gwaiting means this goroutine is blocked in the runtime. 324 | // It is not executing user code. It is not on a run queue, 325 | // but should be recorded somewhere (e.g., a channel wait 326 | // queue) so it can be ready()d when necessary. The stack is 327 | // not owned *except* that a channel operation may read or 328 | // write parts of the stack under the appropriate channel 329 | // lock. Otherwise, it is not safe to access the stack after a 330 | // goroutine enters _Gwaiting (e.g., it may get moved). 331 | _Gwaiting // 4 332 | 333 | // _Gmoribund_unused is currently unused, but hardcoded in gdb 334 | // scripts. 335 | _Gmoribund_unused // 5 336 | 337 | // _Gdead means this goroutine is currently unused. It may be 338 | // just exited, on a free list, or just being initialized. It 339 | // is not executing user code. It may or may not have a stack 340 | // allocated. The G and its stack (if any) are owned by the M 341 | // that is exiting the G or that obtained the G from the free 342 | // list. 343 | _Gdead // 6 344 | 345 | // _Genqueue_unused is currently unused. 346 | _Genqueue_unused // 7 347 | 348 | // _Gcopystack means this goroutine's stack is being moved. It 349 | // is not executing user code and is not on a run queue. The 350 | // stack is owned by the goroutine that put it in _Gcopystack. 351 | _Gcopystack // 8 352 | 353 | // _Gpreempted means this goroutine stopped itself for a 354 | // suspendG preemption. It is like _Gwaiting, but nothing is 355 | // yet responsible for ready()ing it. Some suspendG must CAS 356 | // the status to _Gwaiting to take responsibility for 357 | // ready()ing this G. 358 | _Gpreempted // 9 359 | 360 | // _Gscan combined with one of the above states other than 361 | // _Grunning indicates that GC is scanning the stack. The 362 | // goroutine is not executing user code and the stack is owned 363 | // by the goroutine that set the _Gscan bit. 364 | // 365 | // _Gscanrunning is different: it is used to briefly block 366 | // state transitions while GC signals the G to scan its own 367 | // stack. This is otherwise like _Grunning. 368 | // 369 | // atomicstatus&~Gscan gives the state the goroutine will 370 | // return to when the scan completes. 371 | _Gscan = 0x1000 372 | _Gscanrunnable = _Gscan + _Grunnable // 0x1001 373 | _Gscanrunning = _Gscan + _Grunning // 0x1002 374 | _Gscansyscall = _Gscan + _Gsyscall // 0x1003 375 | _Gscanwaiting = _Gscan + _Gwaiting // 0x1004 376 | _Gscanpreempted = _Gscan + _Gpreempted // 0x1009 377 | ) 378 | -------------------------------------------------------------------------------- /select_list.go: -------------------------------------------------------------------------------- 1 | package zenq 2 | 3 | import ( 4 | "sync" 5 | "sync/atomic" 6 | "unsafe" 7 | ) 8 | 9 | // global memory pool for storing and leasing node objects 10 | var ( 11 | nodePool = sync.Pool{New: func() any { return new(node) }} 12 | nodeGet = nodePool.Get 13 | nodePut = nodePool.Put 14 | ) 15 | 16 | // List is a lock-free linked list 17 | // theory -> https://www.cs.rochester.edu/u/scott/papers/1996_PODC_queues.pdf 18 | // pseudocode -> https://www.cs.rochester.edu/research/synchronization/pseudocode/queues.html 19 | type List struct { 20 | head atomic.Pointer[node] 21 | tail atomic.Pointer[node] 22 | } 23 | 24 | // NewList returns a new list 25 | func NewList() List { 26 | n := nodeGet().(*node) 27 | n.threadPtr, n.dataOut = nil, nil 28 | n.next.Store(nil) 29 | var ptr atomic.Pointer[node] 30 | ptr.Store(n) 31 | return List{head: ptr, tail: ptr} 32 | } 33 | 34 | // a single node in the linked list 35 | type node struct { 36 | next atomic.Pointer[node] 37 | threadPtr *unsafe.Pointer 38 | dataOut *any 39 | } 40 | 41 | // Enqueue inserts a value into the list 42 | func (l *List) Enqueue(threadPtr *unsafe.Pointer, dataOut *any) { 43 | var ( 44 | n = nodeGet().(*node) 45 | tail, next *node 46 | ) 47 | n.threadPtr, n.dataOut = threadPtr, dataOut 48 | for { 49 | tail = l.tail.Load() 50 | next = tail.next.Load() 51 | if tail == l.tail.Load() { // are tail and next consistent? 52 | if next == nil { 53 | if tail.next.CompareAndSwap(next, n) { 54 | l.tail.CompareAndSwap(tail, n) // Enqueue is done. try to swing tail to the inserted node 55 | return 56 | } 57 | } else { // tail was not pointing to the last node 58 | // try to swing Tail to the next node 59 | l.tail.CompareAndSwap(tail, next) 60 | } 61 | } 62 | } 63 | } 64 | 65 | // Dequeue removes and returns the value at the head of the queue to the memory pool 66 | // It returns nil if the list is empty 67 | func (l *List) Dequeue() (threadPtr *unsafe.Pointer, dataOut *any) { 68 | var head, tail, next *node 69 | for { 70 | head = l.head.Load() 71 | tail = l.tail.Load() 72 | next = head.next.Load() 73 | if head == l.head.Load() { // are head, tail, and next consistent? 74 | if head == tail { // is list empty or tail falling behind? 75 | if next == nil { // is list empty? 76 | return nil, nil 77 | } 78 | // tail is falling behind. try to advance it 79 | l.tail.CompareAndSwap(tail, next) 80 | } else { 81 | // read value before CAS_node otherwise another dequeue might free the next node 82 | threadPtr, dataOut = next.threadPtr, next.dataOut 83 | if l.head.CompareAndSwap(head, next) { 84 | // sysFreeOS(unsafe.Pointer(head), nodeSize) 85 | head.threadPtr, head.dataOut = nil, nil 86 | head.next.Store(nil) 87 | nodePut(head) 88 | return // Dequeue is done. return 89 | } 90 | } 91 | } 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /selector.go: -------------------------------------------------------------------------------- 1 | package zenq 2 | 3 | import ( 4 | "sync/atomic" 5 | "unsafe" 6 | ) 7 | 8 | // Selectable is an interface for getting selected among many others 9 | type Selectable interface { 10 | IsClosed() bool 11 | EnqueueSelector(*unsafe.Pointer, *any) 12 | ReadFromBackLog() (data any) 13 | Signal() uint8 14 | } 15 | 16 | // Select selects a single element out of multiple ZenQs 17 | // A maximum of 127 ZenQs can be selected from at a time owing to the size of int8 type 18 | // `nil` is returned if all streams are closed or if a stream gets closed during the selection process 19 | func Select(streams ...Selectable) (data any) { 20 | numStreams := int8(len(streams) - 1) 21 | filter: 22 | for idx := int8(0); idx < numStreams; idx++ { 23 | if streams[idx] == nil || streams[idx].IsClosed() { 24 | for ; numStreams >= 0 && (streams[numStreams] == nil || streams[numStreams].IsClosed()); numStreams-- { 25 | } 26 | if idx >= numStreams { 27 | break filter 28 | } 29 | streams[idx], streams[numStreams] = streams[numStreams], streams[idx] 30 | numStreams-- 31 | } 32 | } 33 | if numStreams < 0 { 34 | data = nil 35 | return 36 | } 37 | 38 | for idx := int8(0); idx <= numStreams; idx++ { 39 | if data = streams[idx].ReadFromBackLog(); data != nil { 40 | return 41 | } 42 | } 43 | 44 | g, numSignals, iter := GetG(), uint8(0), int8(0) 45 | 46 | for idx := int8(0); idx <= numStreams; idx++ { 47 | streams[idx].EnqueueSelector(&g, &data) 48 | } 49 | 50 | retry: 51 | for idx := int8(0); idx <= numStreams; idx++ { 52 | numSignals += streams[idx].Signal() 53 | } 54 | 55 | // might cause deadlock without this case 56 | if numSignals == 0 && atomic.LoadPointer(&g) != nil { 57 | // wait for some ZenQ to acquire this selector's thread 58 | if runtime_canSpin(int(iter)) { 59 | iter++ 60 | spin(30) 61 | } else { 62 | mcall(gosched_m) 63 | } 64 | goto retry 65 | } 66 | 67 | // park and wait for notification 68 | mcall(fast_park) 69 | return 70 | } 71 | -------------------------------------------------------------------------------- /thread_parker.go: -------------------------------------------------------------------------------- 1 | package zenq 2 | 3 | import ( 4 | "sync/atomic" 5 | "unsafe" 6 | ) 7 | 8 | // ThreadParker is a data-structure used for sleeping and waking up goroutines on user call 9 | // useful for saving up resources by parking excess goroutines and pre-empt them when required with minimal latency overhead 10 | // Uses the same lock-free linked list implementation as in `list.go` 11 | type ThreadParker[T any] struct { 12 | head atomic.Pointer[parkSpot[T]] 13 | tail atomic.Pointer[parkSpot[T]] 14 | } 15 | 16 | // NewThreadParker returns a new thread parker. 17 | func NewThreadParker[T any](spot *parkSpot[T]) *ThreadParker[T] { 18 | var ptr atomic.Pointer[parkSpot[T]] 19 | ptr.Store(spot) 20 | return &ThreadParker[T]{head: ptr, tail: ptr} 21 | } 22 | 23 | // a single parked goroutine 24 | type parkSpot[T any] struct { 25 | next atomic.Pointer[parkSpot[T]] 26 | threadPtr unsafe.Pointer 27 | value T 28 | } 29 | 30 | // Park parks the current calling goroutine 31 | // This keeps only one parked goroutine in state at all times 32 | // the parked goroutine is called with minimal overhead via goready() due to both being in userland 33 | // This ensures there is no thundering herd https://en.wikipedia.org/wiki/Thundering_herd_problem 34 | func (tp *ThreadParker[T]) Park(nextNode *parkSpot[T]) { 35 | var tail, next *parkSpot[T] 36 | for { 37 | tail = tp.tail.Load() 38 | next = tail.next.Load() 39 | if tail == tp.tail.Load() { 40 | if next == nil { 41 | if tail.next.CompareAndSwap(next, nextNode) { 42 | tp.tail.CompareAndSwap(tail, nextNode) 43 | return 44 | } 45 | } else { 46 | tp.tail.CompareAndSwap(tail, next) 47 | } 48 | } 49 | } 50 | } 51 | 52 | // Ready calls one parked goroutine from the queue if available 53 | func (tp *ThreadParker[T]) Ready() (data T, ok bool, freeable *parkSpot[T]) { 54 | var head, tail, next *parkSpot[T] 55 | for { 56 | head = tp.head.Load() 57 | tail = tp.tail.Load() 58 | next = head.next.Load() 59 | if head == tp.head.Load() { 60 | if head == tail { 61 | if next == nil { 62 | return 63 | } 64 | tp.tail.CompareAndSwap(tail, next) 65 | } else { 66 | safe_ready(next.threadPtr) 67 | data, ok = next.value, true 68 | if tp.head.CompareAndSwap(head, next) { 69 | freeable = head 70 | freeable.threadPtr = nil 71 | freeable.next.Store(nil) 72 | return 73 | } 74 | } 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /zenq.go: -------------------------------------------------------------------------------- 1 | // A minimalist thread-safe queue implemented using a lock-free ringbuffer which is faster 2 | // and has lower memory allocations than golang's native channels 3 | // Based on the LMAX disruptor pattern https://lmax-exchange.github.io/disruptor/disruptor.html 4 | 5 | // Known Limitations:- 6 | // 7 | // 1. Max queue_size = 2^16 8 | // 2. The queue_size is a power of 2, in case a different size is provided then queue_size is rounded up to the next greater power of 2 upto a max of 2^16 9 | 10 | // Suggestions:- 11 | // 12 | // 1. Use runtime.LockOSThread() on the goroutine calling ZenQ.Read() for lowest latency provided you have > 1 cpu cores 13 | 14 | package zenq 15 | 16 | import ( 17 | "fmt" 18 | "math" 19 | "sync" 20 | "sync/atomic" 21 | "unsafe" 22 | 23 | "github.com/alphadose/zenq/v2/constants" 24 | ) 25 | 26 | // ZenQ global state enums 27 | const ( 28 | // Both reads and writes are possible 29 | StateOpen = iota 30 | // No further writes can be performed and you can only read upto the last committed write in this state 31 | StateClosedForWrites 32 | // Neither reads nor writes are possible, queue is fully exhausted 33 | StateFullyClosed 34 | ) 35 | 36 | // ZenQ selector state enums 37 | const ( 38 | // Open for being selected 39 | SelectionOpen = iota 40 | // Running state 41 | SelectionRunning 42 | ) 43 | 44 | // ZenQ Slot state enums 45 | const ( 46 | SlotEmpty = iota 47 | SlotBusy 48 | SlotCommitted 49 | SlotClosed 50 | ) 51 | 52 | type ( 53 | // a single slot in the queue 54 | slot[T any] struct { 55 | writeParker *ThreadParker[T] 56 | atomic.Uint32 57 | item T 58 | } 59 | 60 | // metadata of the queue 61 | metaQ struct { 62 | globalState uint8 63 | // NOTE->self: strideLength and indexMask can be further optimized to uint8 for specialized ZenQs 64 | // with known data types instead of generic type 65 | // using variables with lower sizes decreases memory bandwidth consumption and increases speed 66 | strideLength uint16 67 | indexMask uint16 68 | contents unsafe.Pointer 69 | // memory pool refs for storing and leasing parking spots for goroutines 70 | alloc func() any 71 | free func(any) 72 | } 73 | 74 | // container for the selection events among multiple queues 75 | selectFactory[T any] struct { 76 | selectionState atomic.Uint32 77 | auxThread unsafe.Pointer 78 | backlog atomic.Pointer[T] 79 | waitList List 80 | } 81 | 82 | // ZenQ is the CPU cache optimized ringbuffer implementation 83 | ZenQ[T any] struct { 84 | // The padding members 0 to 4 below are here to ensure each item is on a separate cache line. 85 | // This prevents false sharing and hence improves performance. 86 | _ cacheLinePadding 87 | writerIndex atomic.Uint32 88 | _ [constants.CacheLinePadSize - unsafe.Sizeof(atomic.Uint32{})]byte 89 | readerIndex atomic.Uint32 90 | _ [constants.CacheLinePadSize - unsafe.Sizeof(atomic.Uint32{})]byte 91 | metaQ 92 | _ [constants.CacheLinePadSize - unsafe.Sizeof(metaQ{})]byte 93 | selectFactory[T] 94 | _ [constants.CacheLinePadSize - unsafe.Sizeof(selectFactory[T]{})]byte 95 | } 96 | ) 97 | 98 | // returns the next greater power of 2 relative to val 99 | func nextGreaterPowerOf2(val uint32) uint32 { 100 | return 1 << uint32(math.Min(math.Ceil(Fastlog2(math.Max(float64(val), 1))), 16)) 101 | } 102 | 103 | // New returns a new queue given its payload type passed as a generic parameter 104 | func New[T any](size uint32) *ZenQ[T] { 105 | var ( 106 | queueSize = nextGreaterPowerOf2(size) 107 | contents = make([]slot[T], queueSize, queueSize) 108 | parkPool = sync.Pool{New: func() any { return new(parkSpot[T]) }} 109 | ) 110 | for idx := uint32(0); idx < queueSize; idx++ { 111 | spot := parkPool.Get().(*parkSpot[T]) 112 | spot.threadPtr = nil 113 | contents[idx].writeParker = NewThreadParker(spot) 114 | } 115 | zenq := &ZenQ[T]{ 116 | metaQ: metaQ{ 117 | strideLength: uint16(unsafe.Sizeof(slot[T]{})), 118 | contents: unsafe.Pointer(&contents[0]), 119 | alloc: parkPool.Get, 120 | free: parkPool.Put, 121 | indexMask: uint16(queueSize - 1), 122 | }, 123 | selectFactory: selectFactory[T]{waitList: NewList()}, 124 | } 125 | go zenq.selectSender() 126 | // allow the above auxillary thread to manifest 127 | mcall(gosched_m) 128 | return zenq 129 | } 130 | 131 | // Size returns the number of items in the queue at any given time 132 | func (self *ZenQ[T]) Size() uint32 { 133 | var ( 134 | readerIndex uint32 = self.readerIndex.Load() & uint32(self.indexMask) 135 | writerIndex uint32 = self.writerIndex.Load() & uint32(self.indexMask) 136 | ) 137 | if readerIndex > writerIndex { 138 | return uint32(self.indexMask) + 2 - (readerIndex - writerIndex) 139 | } else if writerIndex > readerIndex { 140 | return writerIndex - readerIndex + 1 141 | } else { 142 | return 0 143 | } 144 | } 145 | 146 | // Write writes a value to the queue 147 | // It returns whether the queue is currently open for writes or not 148 | // If not then it might be still open for reads, which can be checked by calling zenq.IsClosed() 149 | func (self *ZenQ[T]) Write(value T) (queueClosedForWrites bool) { 150 | if Load8(&self.globalState) != StateOpen { 151 | queueClosedForWrites = true 152 | return 153 | } 154 | 155 | // Try to send directly to selector when possible or else just dequeue unselected references 156 | // in order to reduce the burden on the auxillary thread and save cpu time 157 | direct_send: 158 | if threadPtr, dataOut := self.waitList.Dequeue(); threadPtr != nil { 159 | if selThread := atomic.SwapPointer(threadPtr, nil); selThread != nil { 160 | // direct send to selector 161 | *dataOut = value 162 | // notify selector 163 | safe_ready(selThread) 164 | return 165 | } 166 | goto direct_send 167 | } 168 | 169 | slot := (*slot[T])(unsafe.Pointer(uintptr(self.strideLength)*(uintptr(self.indexMask)&uintptr(self.writerIndex.Add(1))) + uintptr(self.contents))) 170 | 171 | // CAS -> change slot_state to busy if slot_state == empty 172 | for !slot.CompareAndSwap(SlotEmpty, SlotBusy) { 173 | switch slot.Load() { 174 | case SlotBusy: 175 | wait() 176 | case SlotCommitted: 177 | n := self.alloc().(*parkSpot[T]) 178 | n.threadPtr, n.value = GetG(), value 179 | n.next.Store(nil) 180 | slot.writeParker.Park(n) 181 | mcall(fast_park) 182 | return 183 | case SlotEmpty: 184 | continue 185 | case SlotClosed: 186 | return 187 | } 188 | } 189 | slot.item = value 190 | slot.Store(SlotCommitted) 191 | return 192 | } 193 | 194 | // Read reads a value from the queue, you can once read once per object 195 | func (self *ZenQ[T]) Read() (data T, queueOpen bool) { 196 | slot := (*slot[T])(unsafe.Pointer(uintptr(self.strideLength)*(uintptr(self.indexMask)&uintptr(self.readerIndex.Add(1))) + uintptr(self.contents))) 197 | 198 | // CAS -> change slot_state to busy if slot_state == committed 199 | for !slot.CompareAndSwap(SlotCommitted, SlotBusy) { 200 | switch slot.Load() { 201 | case SlotBusy: 202 | wait() 203 | case SlotEmpty: 204 | var freeable *parkSpot[T] 205 | if data, queueOpen, freeable = slot.writeParker.Ready(); queueOpen { 206 | self.free(freeable) 207 | return 208 | } else if Load8(&self.globalState) != StateFullyClosed { 209 | mcall(gosched_m) 210 | } else { 211 | // queue is closed, decrement the reader index by 1 212 | self.readerIndex.Add(math.MaxUint32) 213 | queueOpen = false 214 | return 215 | } 216 | case SlotClosed: 217 | if slot.CompareAndSwap(SlotClosed, SlotEmpty) { 218 | Store8(&self.globalState, StateFullyClosed) 219 | } 220 | queueOpen = false 221 | return 222 | case SlotCommitted: 223 | continue 224 | } 225 | } 226 | data, queueOpen = slot.item, true 227 | slot.Store(SlotEmpty) 228 | return 229 | } 230 | 231 | // Close closes the ZenQ for further writes 232 | // You can only read uptill the last committed write after closing 233 | // This function will be blocking in case the queue is full 234 | // ZenQ is closed from a writer goroutine by design, hence it should always be called 235 | // from a writer goroutine and never from a reader goroutine which might cause the reader to get blocked and hence deadlock 236 | // It returns if the queue was already closed for writes or not 237 | func (self *ZenQ[T]) Close() (alreadyClosedForWrites bool) { 238 | // This ensures a ZenQ is closed only once even if this function is called multiple times making this operation safe 239 | if Load8(&self.globalState) != StateOpen { 240 | alreadyClosedForWrites = true 241 | return 242 | } 243 | Store8(&self.globalState, StateClosedForWrites) 244 | slot := (*slot[T])(unsafe.Pointer(uintptr(self.strideLength)*(uintptr(self.indexMask)&uintptr(self.writerIndex.Add(1))) + uintptr(self.contents))) 245 | 246 | // CAS -> change slot_state to busy if slot_state == empty 247 | for !slot.CompareAndSwap(SlotEmpty, SlotBusy) { 248 | switch slot.Load() { 249 | case SlotBusy, SlotCommitted: 250 | mcall(gosched_m) 251 | case SlotEmpty: 252 | continue 253 | case SlotClosed: 254 | return 255 | } 256 | } 257 | // Closing commit 258 | slot.Store(SlotClosed) 259 | return 260 | } 261 | 262 | // CloseAsync closes the channel asynchronously 263 | // Useful when an user wants to close the channel from a reader end without blocking the thread 264 | func (self *ZenQ[T]) CloseAsync() { 265 | go self.Close() 266 | } 267 | 268 | // The following 4 functions below implement the Selectable interface 269 | 270 | // ReadFromBackLog tries to read a data from backlog if available 271 | func (self *ZenQ[T]) ReadFromBackLog() (data any) { 272 | if d := self.backlog.Swap(nil); d != nil { 273 | data = *((*T)(d)) 274 | } 275 | return 276 | } 277 | 278 | // Signal is the mechanism by which a selector notifies this ZenQ's auxillary thread to contest for the selection 279 | func (self *ZenQ[T]) Signal() uint8 { 280 | if !self.selectionState.CompareAndSwap(SelectionOpen, SelectionRunning) { 281 | return 0 282 | } else { 283 | safe_ready(self.auxThread) 284 | return 1 285 | } 286 | } 287 | 288 | // EnqueueSelector pushes a calling selector to this ZenQ's selector waitlist 289 | func (self *ZenQ[T]) EnqueueSelector(threadPtr *unsafe.Pointer, dataOut *any) { 290 | self.waitList.Enqueue(threadPtr, dataOut) 291 | } 292 | 293 | // IsClosed returns whether the zenq is closed for both reads and writes 294 | func (self *ZenQ[T]) IsClosed() bool { 295 | return Load8(&self.globalState) == StateFullyClosed 296 | } 297 | 298 | // Reset resets the queue state 299 | // This also releases all parked goroutines if any and drains all committed writes 300 | func (self *ZenQ[T]) Reset() { 301 | // Close() is blocking when queue is full hence execute it asynchronously 302 | self.CloseAsync() 303 | // drain entire queue 304 | for open := true; open; _, open = self.Read() { 305 | } 306 | Store8(&self.globalState, StateOpen) 307 | } 308 | 309 | // Dump dumps the current queue state 310 | // Unsafe to be called from multiple goroutines 311 | func (self *ZenQ[T]) Dump() { 312 | fmt.Printf("writerIndex: %3d, readerIndex: %3d\n contents:-\n\n", self.writerIndex, self.readerIndex) 313 | for idx := uintptr(0); idx <= uintptr(self.indexMask); idx++ { 314 | slot := (*slot[T])(unsafe.Pointer(uintptr(self.contents) + idx*unsafe.Sizeof(slot[T]{}))) 315 | fmt.Printf("Slot -> %#v\n", *slot) 316 | } 317 | } 318 | 319 | // selectSender is an auxillary thread which remains parked by default 320 | // only when a selector sends a signal, it is notified and tries to send back to the selector 321 | // if it fails, then it parks again and waits for another signal from another selection process 322 | // since it is parked most of the times, it consumes minimal cpu time making the selection process efficient 323 | func (self *ZenQ[T]) selectSender() { 324 | atomic.StorePointer(&self.auxThread, GetG()) 325 | var ( 326 | data T 327 | threadPtr unsafe.Pointer 328 | readState, queueOpen bool = false, true 329 | selectorThread *unsafe.Pointer 330 | dataOut *any 331 | ) 332 | 333 | for { 334 | // park by default and wait for Signal() notification from a selection process 335 | mcall(fast_park) 336 | if !readState { 337 | data, queueOpen = self.Read() 338 | readState = true 339 | } 340 | 341 | selector_dequeue: 342 | for { 343 | // keep dequeuing selectors from waitlist and try to acquire one 344 | // if acquired write to selector, ready it and go back to parking state 345 | if selectorThread, dataOut = self.waitList.Dequeue(); selectorThread != nil { 346 | if threadPtr = atomic.SwapPointer(selectorThread, nil); threadPtr != nil { 347 | // implementaion of sending from closed channel to selector mechanism 348 | if queueOpen { 349 | // write to the selector 350 | *dataOut = data 351 | } else { 352 | // send nil from closed channel 353 | *dataOut = nil 354 | } 355 | // notify selector 356 | safe_ready(threadPtr) 357 | readState = false 358 | break selector_dequeue 359 | } else { 360 | continue 361 | } 362 | } else { 363 | break selector_dequeue 364 | } 365 | } 366 | // if not selected by any selector, commit data to backlog and wait for next signal 367 | // saves a lot of cpu time 368 | if readState && queueOpen { 369 | var i T = data 370 | self.backlog.Store(&i) 371 | } 372 | self.selectionState.Store(SelectionOpen) 373 | } 374 | } 375 | --------------------------------------------------------------------------------