├── .editorconfig
├── .github
    └── workflows
    │   └── run-tests.yml
├── .gitignore
├── .gitmodules
├── Cargo.toml
├── LICENSE
├── README.md
├── Taskfile.yml
├── asan
├── benches
    └── bench.rs
├── lsan
├── rustfmt.toml
├── src
    └── lib.rs
└── tsan


/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | indent_style = space
 5 | indent_size = 2
 6 | charset = utf-8
 7 | trim_trailing_whitespace = true
 8 | insert_final_newline = true
 9 | 
10 | [*.sql]
11 | insert_final_newline: false
12 | 


--------------------------------------------------------------------------------
/.github/workflows/run-tests.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |     branches:
 7 |       - master
 8 |       - 'feature/**'
 9 | 
10 | jobs:
11 |   test-tsan:
12 |     name: Test with ThreadSanitizer on ${{ matrix.target }}
13 |     runs-on: ubuntu-latest
14 |     strategy:
15 |       fail-fast: false
16 |       matrix:
17 |         target:
18 |           # 64-bit Linux (kernel 2.6.32+, glibc 2.11+)
19 |           - x86_64-unknown-linux-gnu
20 |         rust: [nightly]
21 |     steps:
22 |       - uses: actions/checkout@v2
23 |       - uses: hecrj/setup-rust-action@v1
24 |         with:
25 |           rust-version: ${{ matrix.rust }}
26 |           targets: ${{ matrix.target }}
27 |       - run: rustup component add rust-src --toolchain ${{ matrix.rust }}-${{ matrix.target }}
28 |       - run: TSAN_OPTIONS="suppressions=$(pwd)/tsan" RUSTFLAGS="-Z sanitizer=thread" cargo test -Z build-std --target=${{ matrix.target }}
29 | 
30 |   test-lsan:
31 |     name: Test with LeakSanitizer on ${{ matrix.target }}
32 |     runs-on: macos-latest
33 |     strategy:
34 |       fail-fast: false
35 |       matrix:
36 |         target:
37 |           # 64-bit macOS (10.7+, Lion+)
38 |           - x86_64-apple-darwin
39 |         rust: [nightly]
40 |     steps:
41 |       - uses: actions/checkout@v2
42 |       - uses: hecrj/setup-rust-action@v1
43 |         with:
44 |           rust-version: ${{ matrix.rust }}
45 |           targets: ${{ matrix.target }}
46 |       - run: rustup component add rust-src --toolchain ${{ matrix.rust }}-${{ matrix.target }}
47 |       - run: LSAN_OPTIONS="suppressions=$(pwd)/lsan" RUSTFLAGS="-Z sanitizer=leak" cargo test -Z build-std --target=${{ matrix.target }}
48 | 
49 |   test-miri:
50 |     name: Test with Miri on ${{ matrix.target }}
51 |     runs-on: ubuntu-latest
52 |     env:
53 |       MIRIFLAGS: -Zmiri-ignore-leaks
54 |     strategy:
55 |       fail-fast: false
56 |       matrix:
57 |         target:
58 |           # 64-bit Linux (kernel 2.6.32+, glibc 2.11+)
59 |           - x86_64-unknown-linux-gnu
60 |         rust: [nightly]
61 |     steps:
62 |       - uses: actions/checkout@v2
63 |       - uses: hecrj/setup-rust-action@v1
64 |         with:
65 |           rust-version: ${{ matrix.rust }}
66 |           targets: ${{ matrix.target }}
67 |       - run: rustup component add miri
68 |       - run: cargo miri test -Z build-std --target=${{ matrix.target }}
69 | 
70 |   test-loom:
71 |     name: Test with Loom on ${{ matrix.target }}
72 |     runs-on: ubuntu-latest
73 |     env:
74 |       LOOM_MAX_PREEMPTIONS: 10
75 |     strategy:
76 |       fail-fast: false
77 |       matrix:
78 |         target:
79 |           # 64-bit Linux (kernel 2.6.32+, glibc 2.11+)
80 |           - x86_64-unknown-linux-gnu
81 |         rust: [nightly]
82 |     steps:
83 |       - uses: actions/checkout@v2
84 |       - uses: hecrj/setup-rust-action@v1
85 |         with:
86 |           rust-version: ${{ matrix.rust }}
87 |           targets: ${{ matrix.target }}
88 |       - run: RUSTFLAGS="--cfg loom" cargo test --target=${{ matrix.target }}
89 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # These are backup files generated by rustfmt
 2 | **/*.rs.bk
 3 | 
 4 | .env
 5 | 
 6 | .DS_Store
 7 | 
 8 | # Added by cargo
 9 | #
10 | # already existing elements were commented out
11 | 
12 | /target
13 | Cargo.lock
14 | 
15 | .task
16 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "benchmarks"]
2 | 	path = target/criterion
3 | 	url = git@github.com:Bajix/swap-queue-benchmarks.git
4 | 	branch = master
5 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | authors = ["Thomas Sieverding <Thomas@bajix.com>"]
 3 | edition = "2021"
 4 | name = "swap-queue"
 5 | version = "1.1.0"
 6 | description = "A lock-free thread-owned queue whereby tasks are taken by stealers in entirety via buffer swapping"
 7 | readme = "./README.md"
 8 | license = "MIT"
 9 | repository = "https://github.com/Bajix/swap-queue-rs/"
10 | exclude = ["/target/criterion/*"]
11 | 
12 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
13 | 
14 | [dependencies]
15 | crossbeam-epoch = "0.9.8"
16 | crossbeam-utils = "0.8.8"
17 | futures = "0.3.21"
18 | tokio = { version = "1", features = ["sync", "rt", "macros"] }
19 | 
20 | [target.'cfg(loom)'.dependencies]
21 | loom = "0.5.6"
22 | 
23 | [lib]
24 | doctest = false
25 | bench = false
26 | 
27 | [dev-dependencies]
28 | criterion = { version = "0.3.5", features = ["async_tokio", "html_reports"] }
29 | crossbeam-deque = "0.8.1"
30 | flume = "0.10.13"
31 | jemallocator = "0.5.0"
32 | 
33 | [[bench]]
34 | name = "benchmarks"
35 | path = "benches/bench.rs"
36 | harness = false
37 | doc = false
38 | 
39 | [profile.bench]
40 | lto = "fat"
41 | opt-level = 3
42 | codegen-units = 1
43 | 
44 | [profile.release]
45 | lto = "fat"
46 | opt-level = 3
47 | codegen-units = 1
48 | panic = "abort"
49 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Thomas Sieverding
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Swap Queue
 2 | 
 3 | ![License](https://img.shields.io/badge/license-MIT-green.svg)
 4 | [![Cargo](https://img.shields.io/crates/v/swap-queue.svg)](https://crates.io/crates/swap-queue)
 5 | [![Documentation](https://docs.rs/swap-queue/badge.svg)](https://docs.rs/swap-queue)
 6 | [![CI](https://github.com/Bajix/swap-queue-rs/actions/workflows/run-tests.yml/badge.svg)](https://github.com/Bajix/swap-queue-rs/actions/workflows/run-tests.yml)
 7 | 
 8 | A lock-free thread-owned queue whereby tasks are taken by stealers in entirety via buffer swapping. For batching use-cases, this has the advantage that all tasks can be taken as a single batch in constant time irregardless of batch size, whereas alternatives using [`crossbeam_deque::Worker`](https://docs.rs/crossbeam-deque/0.8.1/crossbeam_deque/struct.Worker.html) and [`tokio::sync::mpsc`](https://docs.rs/tokio/1.14.0/tokio/sync/mpsc/index.html) need to collect each task separately and situationally lack a clear cutoff point. This design ensures that should you be waiting on a resource such as a connection to be available, that once it is so there is no further delay before a task batch can be processed. While push behavior alone is slower than [`crossbeam_deque::Worker`](https://docs.rs/crossbeam-deque/0.8.1/crossbeam_deque/struct.Worker.html) and faster than [`tokio::sync::mpsc`](https://docs.rs/tokio/1.14.0/tokio/sync/mpsc/index.html), overall batching performance is around ~11-19% faster than [`crossbeam_deque::Worker`](https://docs.rs/crossbeam-deque/0.8.1/crossbeam_deque/struct.Worker.html), and ~28-45% faster than [`tokio::sync::mpsc`](https://docs.rs/tokio/1.14.0/tokio/sync/mpsc/index.html) on ARM and there is never a slow cutoff between batches.
 9 | 
10 | ## Example
11 | 
12 | ```rust
13 | use swap_queue::Worker;
14 | use tokio::{
15 |   runtime::Handle,
16 |   sync::oneshot::{channel, Sender},
17 | };
18 | 
19 | // Jemalloc makes this library substantially faster
20 | #[global_allocator]
21 | static GLOBAL: jemallocator::Jemalloc = jemallocator::Jemalloc;
22 | 
23 | // Worker needs to be thread local because it is !Sync
24 | thread_local! {
25 |   static QUEUE: Worker<(u64, Sender<u64>)> = Worker::new();
26 | }
27 | 
28 | // This mechanism will batch optimally without overhead within an async-context because spawn will happen after things already scheduled
29 | async fn push_echo(i: u64) -> u64 {
30 |   {
31 |     let (tx, rx) = channel();
32 | 
33 |     QUEUE.with(|queue| {
34 |       // A new stealer is returned whenever the buffer is new or was empty
35 |       if let Some(stealer) = queue.push((i, tx)) {
36 |         Handle::current().spawn(async move {
37 |           // Take the underlying buffer in entirety; the next push will return a new Stealer
38 |           let batch = stealer.take().await;
39 | 
40 |           // Some sort of batched operation, such as a database query
41 | 
42 |           batch.into_iter().for_each(|(i, tx)| {
43 |             tx.send(i).ok();
44 |           });
45 |         });
46 |       }
47 |     });
48 | 
49 |     rx
50 |   }
51 |   .await
52 |   .unwrap()
53 | }
54 | ```
55 | 
56 | ## Benchmarks
57 | 
58 | Benchmarks ran on t4g.medium using ami-06391d741144b83c2
59 | 
60 | ### Async Batching
61 | 
62 | <img src="https://raw.githubusercontent.com/Bajix/swap-queue-benchmarks/master/Batching/64/report/violin.svg" alt="Benchmarks, 64 tasks" width="100%"/>
63 | <img src="https://raw.githubusercontent.com/Bajix/swap-queue-benchmarks/master/Batching/128/report/violin.svg" alt="Benchmarks, 128 tasks" width="100%"/>
64 | <img src="https://raw.githubusercontent.com/Bajix/swap-queue-benchmarks/master/Batching/256/report/violin.svg" alt="Benchmarks, 256 tasks" width="100%"/>
65 | <img src="https://raw.githubusercontent.com/Bajix/swap-queue-benchmarks/master/Batching/512/report/violin.svg" alt="Benchmarks, 512 tasks" width="100%"/>
66 | <img src="https://raw.githubusercontent.com/Bajix/swap-queue-benchmarks/master/Batching/1024/report/violin.svg" alt="Benchmarks, 1024 tasks" width="100%"/>
67 | 
68 | ### Push
69 | 
70 | <img src="https://raw.githubusercontent.com/Bajix/swap-queue-benchmarks/master/Push/report/lines.svg" alt="Benchmarks, 1024 tasks" width="100%"/>
71 | 
72 | ### Batch collecting
73 | 
74 | <img src="https://raw.githubusercontent.com/Bajix/swap-queue-benchmarks/master/Take/report/lines.svg" alt="Benchmarks, 1024 tasks" width="100%"/>
75 | 
76 | CI tested under ThreadSanitizer, LeakSanitizer, Miri and Loom.
77 | 


--------------------------------------------------------------------------------
/Taskfile.yml:
--------------------------------------------------------------------------------
 1 | version: "3"
 2 | 
 3 | env:
 4 |   TARGET: x86_64-apple-darwin
 5 | 
 6 | sources:
 7 |   - src/**/*.rs
 8 | 
 9 | tasks:
10 |   default:
11 |     cmds:
12 |       - task: test-loom
13 | 
14 |   bench:
15 |     env:
16 |       RUSTFLAGS: "-C target-cpu=native"
17 |     cmds:
18 |       - cargo bench --profile release
19 | 
20 |   update-benchmarks:
21 |     cmds:
22 |       - task: bench
23 |       - git add target/criterion --force
24 | 
25 |   test:
26 |     cmds:
27 |       - task: test-doc
28 |       - task: test-miri
29 |       - task: test-loom
30 |       - task: test-sanitizer
31 | 
32 |   test-doc:
33 |     cmds:
34 |       - cargo test --doc -- --nocapture
35 |     sources:
36 |       - src/**/*.rs
37 | 
38 |   test-miri:
39 |     cmds:
40 |       - cargo miri test -Z build-std --target $TARGET -- --nocapture
41 |     sources:
42 |       - src/**/*.rs
43 |     env:
44 |       MIRIFLAGS: -Zmiri-ignore-leaks
45 | 
46 |   test-loom:
47 |     cmds:
48 |       - cargo test -Z build-std --target $TARGET -- --nocapture
49 |     sources:
50 |       - src/**/*.rs
51 |     env:
52 |       RUSTFLAGS: --cfg loom
53 |       LOOM_MAX_PREEMPTIONS: 10
54 | 
55 |   test-sanitizer:
56 |     cmds:
57 |       - task: test-address-sanitizer
58 |       - task: test-thread-sanitizer
59 |       - task: test-leak-sanitizer
60 | 
61 |   test-address-sanitizer:
62 |     cmds:
63 |       - cargo test -Z build-std --target $TARGET -- --nocapture
64 |     sources:
65 |       - src/**/*.rs
66 |     ev:
67 |       RUSTFLAGS: -Z sanitizer=address
68 |       ASAN_OPTIONS: suppressions=asan
69 | 
70 |   test-thread-sanitizer:
71 |     cmds:
72 |       - cargo test -Z build-std --target $TARGET -- --nocapture
73 |     sources:
74 |       - src/**/*.rs
75 |     env:
76 |       RUSTFLAGS: -Z sanitizer=thread
77 |       TSAN_OPTIONS: suppressions=tsan
78 | 
79 |   test-leak-sanitizer:
80 |     cmds:
81 |       - cargo test -Z build-std --target $TARGET -- --nocapture
82 |     sources:
83 |       - src/**/*.rs
84 |     env:
85 |       RUSTFLAGS: -Z sanitizer=leak
86 |       LSAN_OPTIONS: suppressions=lsan
87 | 


--------------------------------------------------------------------------------
/asan:
--------------------------------------------------------------------------------
1 | race:crossbeam_epoch
2 | leak:crossbeam_epoch
3 | 


--------------------------------------------------------------------------------
/benches/bench.rs:
--------------------------------------------------------------------------------
  1 | use std::time::Duration;
  2 | 
  3 | use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion};
  4 | use tokio::runtime::Builder;
  5 | 
  6 | #[global_allocator]
  7 | static GLOBAL: jemallocator::Jemalloc = jemallocator::Jemalloc;
  8 | 
  9 | mod bench_swap_queue {
 10 |   use futures::future::join_all;
 11 |   use swap_queue::Worker;
 12 |   use tokio::{
 13 |     runtime::Handle,
 14 |     sync::oneshot::{channel, Sender},
 15 |   };
 16 | 
 17 |   thread_local! {
 18 |     static QUEUE: Worker<(u64, Sender<u64>)> = Worker::new();
 19 |   }
 20 | 
 21 |   async fn push_echo(i: u64) -> u64 {
 22 |     {
 23 |       let (tx, rx) = channel();
 24 | 
 25 |       QUEUE.with(|queue| {
 26 |         if let Some(stealer) = queue.push((i, tx)) {
 27 |           Handle::current().spawn(async move {
 28 |             let batch = stealer.take().await;
 29 | 
 30 |             batch.into_iter().for_each(|(i, tx)| {
 31 |               tx.send(i).ok();
 32 |             });
 33 |           });
 34 |         }
 35 |       });
 36 | 
 37 |       rx
 38 |     }
 39 |     .await
 40 |     .unwrap()
 41 |   }
 42 | 
 43 |   pub async fn bench_batching(batch_size: &u64) {
 44 |     let batch: Vec<u64> = join_all((0..*batch_size).map(|i| push_echo(i))).await;
 45 | 
 46 |     assert_eq!(batch, (0..*batch_size).collect::<Vec<u64>>())
 47 |   }
 48 | }
 49 | 
 50 | mod bench_crossbeam {
 51 |   use crossbeam_deque::{Steal, Worker};
 52 |   use futures::future::join_all;
 53 |   use tokio::{
 54 |     runtime::Handle,
 55 |     sync::oneshot::{channel, Sender},
 56 |   };
 57 | 
 58 |   thread_local! {
 59 |     static QUEUE: Worker<(u64, Sender<u64>)> = Worker::new_fifo();
 60 |   }
 61 | 
 62 |   async fn push_echo(i: u64) -> u64 {
 63 |     let (tx, rx) = channel();
 64 | 
 65 |     QUEUE.with(|queue| {
 66 |       // crossbeam_deque::Worker could be patched to return slot written, so we're going to give this the benefit of that potential optimization
 67 |       if i.eq(&0) {
 68 |         let stealer = queue.stealer();
 69 | 
 70 |         Handle::current().spawn(async move {
 71 |           let batch: Vec<(u64, Sender<u64>)> = std::iter::from_fn(|| loop {
 72 |             match stealer.steal() {
 73 |               Steal::Success(task) => break Some(task),
 74 |               Steal::Retry => continue,
 75 |               Steal::Empty => break None,
 76 |             }
 77 |           })
 78 |           .collect();
 79 | 
 80 |           batch.into_iter().for_each(|(i, tx)| {
 81 |             tx.send(i).ok();
 82 |           });
 83 |         });
 84 |       }
 85 | 
 86 |       queue.push((i, tx));
 87 |     });
 88 | 
 89 |     rx.await.unwrap()
 90 |   }
 91 | 
 92 |   pub async fn bench_batching(batch_size: &u64) {
 93 |     let batch: Vec<u64> = join_all((0..*batch_size).map(|i| push_echo(i))).await;
 94 | 
 95 |     assert_eq!(batch, (0..*batch_size).collect::<Vec<u64>>())
 96 |   }
 97 | }
 98 | 
 99 | mod bench_tokio {
100 |   use futures::future::join_all;
101 |   use tokio::{
102 |     runtime::Handle,
103 |     sync::{mpsc, oneshot},
104 |   };
105 | 
106 |   fn make_reactor() -> mpsc::UnboundedSender<(u64, oneshot::Sender<u64>)> {
107 |     let (tx, mut rx) = mpsc::unbounded_channel();
108 | 
109 |     Handle::current().spawn(async move {
110 |       loop {
111 |         if let Some(task) = rx.recv().await {
112 |           let batch: Vec<(u64, oneshot::Sender<u64>)> = std::iter::once(task)
113 |             .chain(std::iter::from_fn(|| rx.try_recv().ok()))
114 |             .collect();
115 | 
116 |           batch.into_iter().for_each(|(i, tx)| {
117 |             tx.send(i).ok();
118 |           });
119 |         }
120 |       }
121 |     });
122 | 
123 |     tx
124 |   }
125 | 
126 |   async fn push_echo(i: u64) -> u64 {
127 |     thread_local! {
128 |       static QUEUE: mpsc::UnboundedSender<(u64, oneshot::Sender<u64>)> = make_reactor();
129 |     }
130 | 
131 |     let (tx, rx) = oneshot::channel();
132 | 
133 |     QUEUE.with(|queue_tx| {
134 |       queue_tx.send((i, tx)).ok();
135 |     });
136 | 
137 |     rx.await.unwrap()
138 |   }
139 | 
140 |   pub async fn bench_batching(batch_size: &u64) {
141 |     let batch: Vec<u64> = join_all((0..*batch_size).map(|i| push_echo(i))).await;
142 | 
143 |     assert_eq!(batch, (0..*batch_size).collect::<Vec<u64>>())
144 |   }
145 | }
146 | 
147 | mod bench_flume {
148 |   use flume::{self, Sender};
149 |   use futures::future::join_all;
150 |   use tokio::{runtime::Handle, sync::oneshot};
151 | 
152 |   fn make_reactor() -> Sender<(u64, oneshot::Sender<u64>)> {
153 |     let (tx, rx) = flume::unbounded();
154 | 
155 |     Handle::current().spawn(async move {
156 |       loop {
157 |         if let Some(task) = rx.recv_async().await.ok() {
158 |           let batch: Vec<(u64, oneshot::Sender<u64>)> = std::iter::once(task)
159 |             .chain(std::iter::from_fn(|| rx.try_recv().ok()))
160 |             .collect();
161 | 
162 |           batch.into_iter().for_each(|(i, tx)| {
163 |             tx.send(i).ok();
164 |           });
165 |         }
166 |       }
167 |     });
168 | 
169 |     tx
170 |   }
171 | 
172 |   async fn push_echo(i: u64) -> u64 {
173 |     thread_local! {
174 |       static QUEUE: Sender<(u64, oneshot::Sender<u64>)> = make_reactor();
175 |     }
176 | 
177 |     let (tx, rx) = oneshot::channel();
178 | 
179 |     QUEUE.with(|queue_tx| {
180 |       queue_tx.send((i, tx)).ok();
181 |     });
182 | 
183 |     rx.await.unwrap()
184 |   }
185 | 
186 |   pub async fn bench_batching(batch_size: &u64) {
187 |     let batch: Vec<u64> = join_all((0..*batch_size).map(|i| push_echo(i))).await;
188 | 
189 |     assert_eq!(batch, (0..*batch_size).collect::<Vec<u64>>())
190 |   }
191 | }
192 | 
193 | fn criterion_benchmark(c: &mut Criterion) {
194 |   let rt = Builder::new_current_thread().build().unwrap();
195 | 
196 |   let mut push_tests = c.benchmark_group("Push");
197 |   push_tests.warm_up_time(Duration::from_millis(10));
198 |   push_tests.measurement_time(Duration::from_secs(1));
199 |   push_tests.sample_size(50);
200 | 
201 |   for n in 0..=12 {
202 |     let batch_size: u64 = 1 << n;
203 |     push_tests.bench_with_input(
204 |       BenchmarkId::new("swap-queue", batch_size),
205 |       &batch_size,
206 |       |b, batch_size| {
207 |         b.iter_batched(
208 |           || swap_queue::Worker::new(),
209 |           |queue| {
210 |             for i in 0..*batch_size {
211 |               queue.push(i);
212 |             }
213 |           },
214 |           BatchSize::PerIteration,
215 |         )
216 |       },
217 |     );
218 | 
219 |     push_tests.bench_with_input(
220 |       BenchmarkId::new("crossbeam", batch_size),
221 |       &batch_size,
222 |       |b, batch_size| {
223 |         b.iter_batched(
224 |           || crossbeam_deque::Worker::new_fifo(),
225 |           |queue| {
226 |             for i in 0..*batch_size {
227 |               queue.push(i);
228 |             }
229 |           },
230 |           BatchSize::PerIteration,
231 |         )
232 |       },
233 |     );
234 | 
235 |     push_tests.bench_with_input(
236 |       BenchmarkId::new("flume", batch_size),
237 |       &batch_size,
238 |       |b, batch_size| {
239 |         b.iter_batched(
240 |           || flume::unbounded(),
241 |           |(tx, _rx)| {
242 |             for i in 0..*batch_size {
243 |               tx.send(i).ok();
244 |             }
245 |           },
246 |           BatchSize::PerIteration,
247 |         )
248 |       },
249 |     );
250 | 
251 |     push_tests.bench_with_input(
252 |       BenchmarkId::new("tokio::mpsc", batch_size),
253 |       &batch_size,
254 |       |b, batch_size| {
255 |         b.iter_batched(
256 |           || tokio::sync::mpsc::unbounded_channel(),
257 |           |(tx, _rx)| {
258 |             for i in 0..*batch_size {
259 |               tx.send(i).ok();
260 |             }
261 |           },
262 |           BatchSize::PerIteration,
263 |         )
264 |       },
265 |     );
266 |   }
267 | 
268 |   push_tests.finish();
269 | 
270 |   let mut take_tests = c.benchmark_group("Take");
271 |   take_tests.warm_up_time(Duration::from_millis(10));
272 |   take_tests.measurement_time(Duration::from_secs(1));
273 |   take_tests.sample_size(50);
274 | 
275 |   for n in 0..=12 {
276 |     let batch_size: u64 = 1 << n;
277 |     take_tests.bench_with_input(
278 |       BenchmarkId::new("swap-queue", batch_size),
279 |       &batch_size,
280 |       |b, batch_size| {
281 |         b.iter_batched(
282 |           || {
283 |             let worker = swap_queue::Worker::new();
284 |             let stealer = worker.push(0).unwrap();
285 |             for i in 1..*batch_size {
286 |               worker.push(i);
287 |             }
288 | 
289 |             stealer
290 |           },
291 |           |stealer| stealer.take_blocking(),
292 |           BatchSize::PerIteration,
293 |         );
294 |       },
295 |     );
296 | 
297 |     take_tests.bench_with_input(
298 |       BenchmarkId::new("crossbeam", batch_size),
299 |       &batch_size,
300 |       |b, batch_size| {
301 |         b.iter_batched(
302 |           || {
303 |             let worker = crossbeam_deque::Worker::new_fifo();
304 |             let stealer = worker.stealer();
305 |             for i in 1..*batch_size {
306 |               worker.push(i);
307 |             }
308 | 
309 |             stealer
310 |           },
311 |           |stealer| {
312 |             let _: Vec<u64> = std::iter::from_fn(|| loop {
313 |               match stealer.steal() {
314 |                 crossbeam_deque::Steal::Success(task) => break Some(task),
315 |                 crossbeam_deque::Steal::Retry => continue,
316 |                 crossbeam_deque::Steal::Empty => break None,
317 |               }
318 |             })
319 |             .collect();
320 |           },
321 |           BatchSize::PerIteration,
322 |         );
323 |       },
324 |     );
325 | 
326 |     take_tests.bench_with_input(
327 |       BenchmarkId::new("flume", batch_size),
328 |       &batch_size,
329 |       |b, batch_size| {
330 |         b.iter_batched(
331 |           || {
332 |             let (tx, rx) = flume::unbounded();
333 |             for i in 1..*batch_size {
334 |               tx.send(i).ok();
335 |             }
336 |             rx
337 |           },
338 |           |rx| {
339 |             let _: Vec<u64> = rx.try_iter().collect();
340 |           },
341 |           BatchSize::PerIteration,
342 |         );
343 |       },
344 |     );
345 | 
346 |     take_tests.bench_with_input(
347 |       BenchmarkId::new("tokio::mpsc", batch_size),
348 |       &batch_size,
349 |       |b, batch_size| {
350 |         b.iter_batched(
351 |           || {
352 |             let (tx, rx) = tokio::sync::mpsc::unbounded_channel();
353 |             for i in 1..*batch_size {
354 |               tx.send(i).ok();
355 |             }
356 |             rx
357 |           },
358 |           |mut rx| {
359 |             let _: Vec<u64> = std::iter::from_fn(|| rx.try_recv().ok()).collect();
360 |           },
361 |           BatchSize::PerIteration,
362 |         );
363 |       },
364 |     );
365 |   }
366 | 
367 |   take_tests.finish();
368 | 
369 |   let mut async_batching_tests = c.benchmark_group("Batching");
370 |   async_batching_tests.warm_up_time(Duration::from_millis(10));
371 |   async_batching_tests.measurement_time(Duration::from_secs(1));
372 |   async_batching_tests.sample_size(50);
373 | 
374 |   for n in 0..=12 {
375 |     let batch_size: u64 = 1 << n;
376 | 
377 |     async_batching_tests.bench_with_input(
378 |       BenchmarkId::new("swap-queue", batch_size),
379 |       &batch_size,
380 |       |b, batch_size| {
381 |         b.to_async(&rt)
382 |           .iter(|| bench_swap_queue::bench_batching(batch_size))
383 |       },
384 |     );
385 | 
386 |     async_batching_tests.bench_with_input(
387 |       BenchmarkId::new("crossbeam", batch_size),
388 |       &batch_size,
389 |       |b, batch_size| {
390 |         b.to_async(&rt)
391 |           .iter(|| bench_crossbeam::bench_batching(batch_size))
392 |       },
393 |     );
394 | 
395 |     async_batching_tests.bench_with_input(
396 |       BenchmarkId::new("flume", batch_size),
397 |       &batch_size,
398 |       |b, batch_size| {
399 |         b.to_async(&rt)
400 |           .iter(|| bench_flume::bench_batching(batch_size))
401 |       },
402 |     );
403 | 
404 |     async_batching_tests.bench_with_input(
405 |       BenchmarkId::new("tokio::mpsc", batch_size),
406 |       &batch_size,
407 |       |b, batch_size| {
408 |         b.to_async(&rt)
409 |           .iter(|| bench_tokio::bench_batching(batch_size))
410 |       },
411 |     );
412 |   }
413 | 
414 |   async_batching_tests.finish();
415 | }
416 | 
417 | criterion_group!(benches, criterion_benchmark);
418 | criterion_main!(benches);
419 | 


--------------------------------------------------------------------------------
/lsan:
--------------------------------------------------------------------------------
1 | leak:crossbeam_epoch
2 | 


--------------------------------------------------------------------------------
/rustfmt.toml:
--------------------------------------------------------------------------------
1 | imports_granularity = "Crate"
2 | newline_style = "Unix"
3 | tab_spaces = 2
4 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //!
  2 | //! A lock-free thread-owned queue whereby tasks are taken by stealers in entirety via buffer swapping. This is meant to be used [`thread_local`] paired with [`tokio::task::spawn`] as a constant-time take-all batching mechanism that outperforms [`crossbeam_deque::Worker`](https://docs.rs/crossbeam-deque/0.8.1/crossbeam_deque/struct.Worker.html), and [`tokio::sync::mpsc`] for batching.
  3 | //!
  4 | //! ## Example
  5 | //!
  6 | //! ```
  7 | //! use swap_queue::Worker;
  8 | //! use tokio::{
  9 | //!   runtime::Handle,
 10 | //!   sync::oneshot::{channel, Sender},
 11 | //! };
 12 | //!
 13 | //! // Jemalloc makes this library substantially faster
 14 | //! #[global_allocator]
 15 | //! static GLOBAL: jemallocator::Jemalloc = jemallocator::Jemalloc;
 16 | //!
 17 | //! // Worker needs to be thread local because it is !Sync
 18 | //! thread_local! {
 19 | //!   static QUEUE: Worker<(u64, Sender<u64>)> = Worker::new();
 20 | //! }
 21 | //!
 22 | //! // This mechanism will batch optimally without overhead within an async-context because spawn will happen after things already scheduled
 23 | //! async fn push_echo(i: u64) -> u64 {
 24 | //!   {
 25 | //!     let (tx, rx) = channel();
 26 | //!
 27 | //!     QUEUE.with(|queue| {
 28 | //!       // A new stealer is returned whenever the buffer is new or was empty
 29 | //!       if let Some(stealer) = queue.push((i, tx)) {
 30 | //!         Handle::current().spawn(async move {
 31 | //!           // Take the underlying buffer in entirety; the next push will return a new Stealer
 32 | //!           let batch = stealer.take().await;
 33 | //!
 34 | //!           // Some sort of batched operation, such as a database query
 35 | //!
 36 | //!           batch.into_iter().for_each(|(i, tx)| {
 37 | //!             tx.send(i).ok();
 38 | //!           });
 39 | //!         });
 40 | //!       }
 41 | //!     });
 42 | //!
 43 | //!     rx
 44 | //!   }
 45 | //!   .await
 46 | //!   .unwrap()
 47 | //! }
 48 | //! ```
 49 | 
 50 | use crossbeam_epoch::{self as epoch, Atomic, Owned};
 51 | use crossbeam_utils::CachePadded;
 52 | 
 53 | use futures::executor::block_on;
 54 | use std::{cell::Cell, fmt, marker::PhantomData, mem, ptr, sync::Arc};
 55 | use tokio::sync::oneshot::{channel, Receiver, Sender};
 56 | 
 57 | #[cfg(loom)]
 58 | use loom::sync::atomic::{AtomicUsize, Ordering};
 59 | 
 60 | #[cfg(not(loom))]
 61 | use std::sync::atomic::{AtomicUsize, Ordering};
 62 | 
 63 | // Current buffer index
 64 | const BUFFER_IDX: usize = 1 << 0;
 65 | 
 66 | // Designates that write is in progress
 67 | const WRITE_IN_PROGRESS: usize = 1 << 1;
 68 | 
 69 | // Designates how many bits are set aside for flags
 70 | const FLAGS_SHIFT: usize = 1;
 71 | 
 72 | // Slot increments both for reads and writes, therefore we shift slot an extra bit to extract length
 73 | const LENGTH_SHIFT: usize = FLAGS_SHIFT + 1;
 74 | 
 75 | // Minimum buffer capacity.
 76 | const MIN_CAP: usize = 64;
 77 | 
 78 | /// A buffer that holds tasks in a worker queue.
 79 | ///
 80 | /// This is just a pointer to the buffer and its length - dropping an instance of this struct will
 81 | /// *not* deallocate the buffer.
 82 | struct Buffer<T> {
 83 |   /// Slot that represents the index offset and buffer idx
 84 |   slot: usize,
 85 | 
 86 |   /// Pointer to the allocated memory.
 87 |   ptr: *mut T,
 88 | 
 89 |   /// Capacity of the buffer. Always a power of two.
 90 |   cap: usize,
 91 | }
 92 | 
 93 | unsafe impl<T: Send> Send for Buffer<T> {}
 94 | unsafe impl<T: Send> Sync for Buffer<T> {}
 95 | 
 96 | impl<T> Buffer<T> {
 97 |   /// Allocates a new buffer with the specified capacity.
 98 |   fn alloc(slot: usize, cap: usize) -> Buffer<T> {
 99 |     debug_assert_eq!(cap, cap.next_power_of_two());
100 | 
101 |     let mut v = Vec::with_capacity(cap);
102 |     let ptr = v.as_mut_ptr();
103 |     mem::forget(v);
104 | 
105 |     Buffer { slot, ptr, cap }
106 |   }
107 | 
108 |   /// Deallocates the buffer.
109 |   unsafe fn dealloc(self) {
110 |     drop(Vec::from_raw_parts(self.ptr, 0, self.cap));
111 |   }
112 | 
113 |   /// Returns a pointer to the task at the specified `index`.
114 |   unsafe fn at(&self, index: usize) -> *mut T {
115 |     // `self.cap` is always a power of two.
116 |     self.ptr.offset((index & (self.cap - 1)) as isize)
117 |   }
118 | 
119 |   /// Writes `task` into the specified `index`.
120 |   unsafe fn write(&self, index: usize, task: T) {
121 |     ptr::write_volatile(self.at(index), task)
122 |   }
123 | 
124 |   unsafe fn to_vec(self, length: usize) -> Vec<T> {
125 |     let Buffer { ptr, cap, .. } = self;
126 |     Vec::from_raw_parts(ptr, length, cap)
127 |   }
128 | }
129 | 
130 | impl<T> Clone for Buffer<T> {
131 |   fn clone(&self) -> Buffer<T> {
132 |     Buffer {
133 |       slot: self.slot,
134 |       ptr: self.ptr,
135 |       cap: self.cap,
136 |     }
137 |   }
138 | }
139 | 
140 | impl<T> Copy for Buffer<T> {}
141 | 
142 | fn slot_delta(a: usize, b: usize) -> usize {
143 |   if a < b {
144 |     ((usize::MAX - b) >> LENGTH_SHIFT) + (a >> LENGTH_SHIFT)
145 |   } else {
146 |     (a >> LENGTH_SHIFT) - (b >> LENGTH_SHIFT)
147 |   }
148 | }
149 | 
150 | struct Inner<T> {
151 |   slot: AtomicUsize,
152 |   buffers: (
153 |     CachePadded<Atomic<Buffer<T>>>,
154 |     CachePadded<Atomic<Buffer<T>>>,
155 |   ),
156 | }
157 | 
158 | impl<T> Inner<T> {
159 |   fn get_buffer(&self, slot: usize) -> &CachePadded<Atomic<Buffer<T>>> {
160 |     if slot & BUFFER_IDX == 0 {
161 |       &self.buffers.0
162 |     } else {
163 |       &self.buffers.1
164 |     }
165 |   }
166 | }
167 | 
168 | /// A thread-owned worker queue that writes to a swappable buffer using atomic slotting
169 | ///
170 | /// # Examples
171 | ///
172 | /// ```
173 | /// use swap_queue::Worker;
174 | ///
175 | /// let w = Worker::new();
176 | /// let s = w.push(1).unwrap();
177 | /// w.push(2);
178 | /// w.push(3);
179 | /// // this is non-blocking because it's called on the same thread as Worker; a write in progress is not possible
180 | /// assert_eq!(s.take_blocking(), vec![1, 2, 3]);
181 | ///
182 | /// let s = w.push(4).unwrap();
183 | /// w.push(5);
184 | /// w.push(6);
185 | /// // this is identical to [`Stealer::take_blocking`]
186 | /// let batch: Vec<_> = s.into();
187 | /// assert_eq!(batch, vec![4, 5, 6]);
188 | /// ```
189 | 
190 | enum Flavor {
191 |   Unbounded,
192 |   AutoBatched { batch_size: usize },
193 | }
194 | 
195 | pub struct Worker<T> {
196 |   flavor: Flavor,
197 |   /// A reference to the inner representation of the queue.
198 |   inner: Arc<CachePadded<Inner<T>>>,
199 |   /// A copy of `inner.buffer` for quick access.
200 |   buffer: Cell<Buffer<T>>,
201 |   /// Send handle corresponding to the current Stealer
202 |   tx: Cell<Option<Sender<Vec<T>>>>,
203 |   /// Indicates that the worker cannot be shared among threads.
204 |   _marker: PhantomData<*mut ()>,
205 | }
206 | 
207 | unsafe impl<T: Send> Send for Worker<T> {}
208 | 
209 | impl<T> Worker<T> {
210 |   /// Creates a new Worker queue.
211 |   ///
212 |   /// # Examples
213 |   ///
214 |   /// ```
215 |   /// use swap_queue::Worker;
216 |   ///
217 |   /// let w = Worker::<i32>::new();
218 |   /// ```
219 |   pub fn new() -> Worker<T> {
220 |     // Placeholder buffer to force initial buffer swap
221 |     let buffer = Buffer {
222 |       slot: BUFFER_IDX,
223 |       ptr: std::ptr::null_mut(),
224 |       cap: MIN_CAP,
225 |     };
226 | 
227 |     let inner = Arc::new(CachePadded::new(Inner {
228 |       slot: AtomicUsize::new(0),
229 |       buffers: (
230 |         CachePadded::new(Atomic::new(buffer)),
231 |         CachePadded::new(Atomic::null()),
232 |       ),
233 |     }));
234 | 
235 |     Worker {
236 |       flavor: Flavor::Unbounded,
237 |       inner,
238 |       buffer: Cell::new(buffer),
239 |       tx: Cell::new(None),
240 |       _marker: PhantomData,
241 |     }
242 |   }
243 | 
244 |   /// Creates an auto-batched Worker queue with fixed-length buffers. At capacity, the buffer is swapped out and ownership taken by the returned Stealer. Batch size must be a power of 2
245 |   ///
246 |   /// # Examples
247 |   ///
248 |   /// ```
249 |   /// use swap_queue::Worker;
250 |   ///
251 |   /// let w = Worker::<i32>::auto_batched(64);
252 |   /// ```
253 |   pub fn auto_batched(batch_size: usize) -> Worker<T> {
254 |     debug_assert!(batch_size.ge(&64), "batch_size must be at least 64");
255 |     debug_assert_eq!(
256 |       batch_size,
257 |       batch_size.next_power_of_two(),
258 |       "batch_size must be a power of 2"
259 |     );
260 | 
261 |     // Placeholder buffer to force initial buffer swap
262 |     let buffer = Buffer {
263 |       slot: BUFFER_IDX,
264 |       ptr: std::ptr::null_mut(),
265 |       cap: MIN_CAP,
266 |     };
267 | 
268 |     let inner = Arc::new(CachePadded::new(Inner {
269 |       slot: AtomicUsize::new(0),
270 |       buffers: (
271 |         CachePadded::new(Atomic::new(buffer)),
272 |         CachePadded::new(Atomic::null()),
273 |       ),
274 |     }));
275 | 
276 |     Worker {
277 |       flavor: Flavor::AutoBatched { batch_size },
278 |       inner,
279 |       buffer: Cell::new(buffer),
280 |       tx: Cell::new(None),
281 |       _marker: PhantomData,
282 |     }
283 |   }
284 | 
285 |   /// Resizes the internal buffer to the new capacity of `new_cap`.
286 |   unsafe fn resize(&self, buffer: &mut Buffer<T>, slot: usize) {
287 |     let length = slot_delta(slot, buffer.slot);
288 | 
289 |     // Allocate a new buffer and copy data from the old buffer to the new one.
290 |     let new = Buffer::alloc(buffer.slot, buffer.cap * 2);
291 | 
292 |     ptr::copy_nonoverlapping(buffer.at(0), new.at(0), length);
293 | 
294 |     self.buffer.set(new);
295 | 
296 |     let old = std::mem::replace(buffer, new);
297 | 
298 |     self
299 |       .inner
300 |       .get_buffer(slot)
301 |       .store(Owned::new(new), Ordering::Release);
302 | 
303 |     old.dealloc();
304 |   }
305 | 
306 |   fn replace_buffer(&self, buffer: &mut Buffer<T>, slot: usize, cap: usize) -> Buffer<T> {
307 |     let new = Buffer::alloc(slot.to_owned(), cap);
308 | 
309 |     self
310 |       .inner
311 |       .get_buffer(slot)
312 |       .store(Owned::new(new), Ordering::Release);
313 | 
314 |     self.buffer.set(new);
315 | 
316 |     std::mem::replace(buffer, new)
317 |   }
318 | 
319 |   /// Write to the next slot, swapping buffers as necessary and returning a Stealer at the start of a new batch
320 |   pub fn push(&self, task: T) -> Option<Stealer<T>> {
321 |     let slot = self
322 |       .inner
323 |       .slot
324 |       .fetch_add(1 << FLAGS_SHIFT, Ordering::Relaxed);
325 | 
326 |     let mut buffer = self.buffer.get();
327 | 
328 |     // BUFFER_IDX bit changed, therefore buffer was stolen
329 |     if ((slot ^ buffer.slot) & BUFFER_IDX).eq(&BUFFER_IDX) {
330 |       buffer = Buffer::alloc(slot, buffer.cap);
331 | 
332 |       self
333 |         .inner
334 |         .get_buffer(slot)
335 |         .store(Owned::new(buffer), Ordering::Release);
336 | 
337 |       self.buffer.set(buffer);
338 | 
339 |       unsafe {
340 |         buffer.write(0, task);
341 |       }
342 | 
343 |       // There can be no stealer at this point, so no need to check IDX XOR
344 |       self
345 |         .inner
346 |         .slot
347 |         .fetch_add(1 << FLAGS_SHIFT, Ordering::Relaxed);
348 | 
349 |       let (tx, rx) = channel();
350 |       self.tx.set(Some(tx));
351 | 
352 |       Some(Stealer::Taker(StealHandle {
353 |         rx,
354 |         inner: self.inner.clone(),
355 |       }))
356 |     } else {
357 |       let index = slot_delta(slot, buffer.slot);
358 | 
359 |       match &self.flavor {
360 |         Flavor::Unbounded if index.eq(&buffer.cap) => {
361 |           unsafe {
362 |             self.resize(&mut buffer, slot);
363 |             buffer.write(index, task);
364 |           }
365 | 
366 |           let slot = self
367 |             .inner
368 |             .slot
369 |             .fetch_add(1 << FLAGS_SHIFT, Ordering::Relaxed);
370 | 
371 |           // Stealer expressed intention to take buffer by changing the buffer index, and is waiting on Worker to send buffer upon completion of the current write in progress
372 |           if ((slot ^ buffer.slot) & BUFFER_IDX).eq(&BUFFER_IDX) {
373 |             let (tx, rx) = channel();
374 |             let tx = self.tx.replace(Some(tx)).unwrap();
375 | 
376 |             // Send buffer as vec to receiver
377 |             tx.send(unsafe { buffer.to_vec(index) }).ok();
378 | 
379 |             Some(Stealer::Taker(StealHandle {
380 |               rx,
381 |               inner: self.inner.clone(),
382 |             }))
383 |           } else {
384 |             None
385 |           }
386 |         }
387 |         Flavor::AutoBatched { batch_size } if index.eq(batch_size) => {
388 |           let old = self.replace_buffer(&mut buffer, slot, *batch_size);
389 |           let batch = unsafe { old.to_vec(*batch_size) };
390 | 
391 |           unsafe {
392 |             buffer.write(0, task);
393 |           }
394 | 
395 |           let slot = self
396 |             .inner
397 |             .slot
398 |             .fetch_add(1 << FLAGS_SHIFT, Ordering::Relaxed);
399 | 
400 |           if ((slot ^ buffer.slot) & BUFFER_IDX).eq(&BUFFER_IDX) {
401 |             let (tx, rx) = channel();
402 |             let tx = self.tx.replace(Some(tx)).unwrap();
403 | 
404 |             tx.send(batch).ok();
405 | 
406 |             Some(Stealer::Taker(StealHandle {
407 |               rx,
408 |               inner: self.inner.clone(),
409 |             }))
410 |           } else {
411 |             Some(Stealer::Owner(batch))
412 |           }
413 |         }
414 |         _ => {
415 |           unsafe {
416 |             buffer.write(index, task);
417 |           }
418 | 
419 |           let slot = self
420 |             .inner
421 |             .slot
422 |             .fetch_add(1 << FLAGS_SHIFT, Ordering::Relaxed);
423 | 
424 |           if ((slot ^ buffer.slot) & BUFFER_IDX).eq(&BUFFER_IDX) {
425 |             let (tx, rx) = channel();
426 |             let tx = self.tx.replace(Some(tx)).unwrap();
427 | 
428 |             // Send buffer as vec to receiver
429 |             tx.send(unsafe { buffer.to_vec(index) }).ok();
430 | 
431 |             Some(Stealer::Taker(StealHandle {
432 |               rx,
433 |               inner: self.inner.clone(),
434 |             }))
435 |           } else {
436 |             None
437 |           }
438 |         }
439 |       }
440 |     }
441 |   }
442 | }
443 | 
444 | impl<T> Default for Worker<T> {
445 |   fn default() -> Self {
446 |     Self::new()
447 |   }
448 | }
449 | 
450 | impl<T> fmt::Debug for Worker<T> {
451 |   fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
452 |     f.pad("Worker { .. }")
453 |   }
454 | }
455 | 
456 | impl<T> Drop for Worker<T> {
457 |   fn drop(&mut self) {
458 |     // By leaving this as indefinitely write in progress the Stealer will always receive from the oneshot::Sender
459 |     let slot = self
460 |       .inner
461 |       .slot
462 |       .fetch_add(1 << FLAGS_SHIFT, Ordering::Relaxed);
463 | 
464 |     let buffer = self.buffer.get();
465 | 
466 |     // Is buffer still current? (If not Stealer has already taken buffer)
467 |     if slot & BUFFER_IDX == buffer.slot & BUFFER_IDX {
468 |       let length = slot_delta(slot, buffer.slot);
469 | 
470 |       // Send to Stealer if able
471 |       if let Some(tx) = self.tx.replace(None) {
472 |         if let Err(queue) = tx.send(unsafe { buffer.to_vec(length) }) {
473 |           drop(queue);
474 |         }
475 |       } else {
476 |         // Otherwise deallocate everything
477 |         unsafe {
478 |           // Go through the buffer from front to back and drop all tasks in the queue.
479 |           for i in 0..length {
480 |             buffer.at(i).drop_in_place();
481 |           }
482 | 
483 |           // Free the memory allocated by the buffer.
484 |           buffer.dealloc();
485 |         }
486 |       }
487 |     }
488 |   }
489 | }
490 | 
491 | #[doc(hidden)]
492 | pub struct StealHandle<T> {
493 |   /// Buffer receiver to be used when waiting on writes
494 |   rx: Receiver<Vec<T>>,
495 |   /// A reference to the inner representation of the queue.
496 |   inner: Arc<CachePadded<Inner<T>>>,
497 | }
498 | 
499 | /// Stealers swap out and take ownership of buffers in entirety from Workers
500 | pub enum Stealer<T> {
501 |   /// Stealer was created with an owned batch that can simply be unwrapped
502 |   Owner(Vec<T>),
503 |   /// A Steal Handle buffer swaps either by taking the buffer directly or by awaiting the Worker to send on write completion
504 |   Taker(StealHandle<T>),
505 | }
506 | 
507 | unsafe impl<T: Send> Send for Stealer<T> {}
508 | unsafe impl<T: Send> Sync for Stealer<T> {}
509 | 
510 | impl<T> Stealer<T> {
511 |   /// Take the entire queue by swapping the underlying buffer and converting back into a `Vec<T>` or by waiting to receive the buffer from the Worker if a write was in progress.
512 |   pub async fn take(self) -> Vec<T> {
513 |     match self {
514 |       Stealer::Owner(batch) => batch,
515 |       Stealer::Taker(StealHandle { rx, inner }) => {
516 |         let slot = inner.slot.fetch_xor(BUFFER_IDX, Ordering::Relaxed);
517 | 
518 |         // Worker will see the buffer has swapped when confirming length increment
519 |         if slot & WRITE_IN_PROGRESS == WRITE_IN_PROGRESS {
520 |           // Writer can never be dropped mid-write, therefore RecvError cannot occur
521 |           rx.await.unwrap()
522 |         } else {
523 |           let guard = &epoch::pin();
524 | 
525 |           let buffer = inner.get_buffer(slot).load_consume(guard);
526 | 
527 |           unsafe {
528 |             let buffer = *buffer.into_owned();
529 |             buffer.to_vec(slot_delta(slot, buffer.slot))
530 |           }
531 |         }
532 |       }
533 |     }
534 |   }
535 | 
536 |   /// Take the entire queue by swapping the underlying buffer and converting into a `Vec<T>` or by blocking to receive from the Worker if a write was in progress. This is always non-blocking when called on the same thread as the Worker
537 |   pub fn take_blocking(self) -> Vec<T> {
538 |     match self {
539 |       Stealer::Owner(batch) => batch,
540 |       Stealer::Taker(StealHandle { rx, inner }) => {
541 |         let slot = inner.slot.fetch_xor(BUFFER_IDX, Ordering::Relaxed);
542 | 
543 |         // Worker will see the buffer has swapped when confirming length increment
544 |         // It's not possible for this to be write in progress when called from the same thread as the queue
545 |         if slot & WRITE_IN_PROGRESS == WRITE_IN_PROGRESS {
546 |           // Writer can never be dropped mid-write, therefore RecvError cannot occur
547 |           block_on(rx).unwrap()
548 |         } else {
549 |           let guard = &epoch::pin();
550 | 
551 |           let buffer = inner.get_buffer(slot).load_consume(guard);
552 | 
553 |           unsafe {
554 |             let buffer = *buffer.into_owned();
555 |             buffer.to_vec(slot_delta(slot, buffer.slot))
556 |           }
557 |         }
558 |       }
559 |     }
560 |   }
561 | }
562 | 
563 | /// Uses [`Stealer::take_blocking`]; non-blocking when called on the same thread as Worker
564 | impl<T> From<Stealer<T>> for Vec<T> {
565 |   fn from(stealer: Stealer<T>) -> Self {
566 |     stealer.take_blocking()
567 |   }
568 | }
569 | 
570 | impl<T> fmt::Debug for Stealer<T> {
571 |   fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
572 |     f.pad("Stealer { .. }")
573 |   }
574 | }
575 | 
576 | #[cfg(all(test))]
577 | mod tests {
578 |   use super::*;
579 | 
580 |   #[cfg(loom)]
581 |   use loom::thread;
582 | 
583 |   #[cfg(not(loom))]
584 |   use std::thread;
585 | 
586 |   macro_rules! model {
587 |     ($test:block) => {
588 |       #[cfg(loom)]
589 |       loom::model(|| $test);
590 | 
591 |       #[cfg(not(loom))]
592 |       $test
593 |     };
594 |   }
595 | 
596 |   #[test]
597 |   fn slot_wraps_around() {
598 |     let delta = slot_delta(1 << LENGTH_SHIFT, usize::MAX);
599 | 
600 |     assert_eq!(delta, 1);
601 |   }
602 | 
603 |   #[test]
604 |   fn it_resizes() {
605 |     model!({
606 |       let queue = Worker::new();
607 |       let stealer = queue.push(0).unwrap();
608 | 
609 |       for i in 1..128 {
610 |         queue.push(i);
611 |       }
612 | 
613 |       let batch = stealer.take_blocking();
614 |       let expected = (0..128).collect::<Vec<i32>>();
615 | 
616 |       assert_eq!(batch, expected);
617 |     });
618 |   }
619 | 
620 |   #[test]
621 |   fn it_makes_new_stealer_per_batch() {
622 |     model!({
623 |       let queue = Worker::new();
624 |       let stealer = queue.push(0).unwrap();
625 | 
626 |       queue.push(1);
627 |       queue.push(2);
628 | 
629 |       assert_eq!(stealer.take_blocking(), vec![0, 1, 2]);
630 | 
631 |       let stealer = queue.push(3).unwrap();
632 |       queue.push(4);
633 |       queue.push(5);
634 | 
635 |       assert_eq!(stealer.take_blocking(), vec![3, 4, 5]);
636 |     });
637 |   }
638 | 
639 |   #[test]
640 |   fn it_auto_batches() {
641 |     model!({
642 |       let queue = Worker::auto_batched(64);
643 |       let mut stealers: Vec<Stealer<i32>> = vec![];
644 | 
645 |       for i in 0..128 {
646 |         if let Some(stealer) = queue.push(i) {
647 |           stealers.push(stealer);
648 |         }
649 |       }
650 | 
651 |       let batch: Vec<i32> = stealers
652 |         .into_iter()
653 |         .rev()
654 |         .flat_map(|stealer| stealer.take_blocking())
655 |         .collect();
656 | 
657 |       let expected = (0..128).collect::<Vec<i32>>();
658 | 
659 |       assert_eq!(batch, expected);
660 |     });
661 |   }
662 | 
663 |   #[cfg(not(loom))]
664 |   #[tokio::test]
665 |   async fn stealer_takes() {
666 |     let queue = Worker::new();
667 |     let stealer = queue.push(0).unwrap();
668 | 
669 |     for i in 1..1024 {
670 |       queue.push(i);
671 |     }
672 | 
673 |     let batch = stealer.take().await;
674 |     let expected = (0..1024).collect::<Vec<i32>>();
675 | 
676 |     assert_eq!(batch, expected);
677 |   }
678 | 
679 |   #[test]
680 |   fn stealer_takes_blocking() {
681 |     model!({
682 |       let queue = Worker::new();
683 |       let stealer = queue.push(0).unwrap();
684 | 
685 |       for i in 1..128 {
686 |         queue.push(i);
687 |       }
688 | 
689 |       thread::spawn(move || {
690 |         stealer.take_blocking();
691 |       })
692 |       .join()
693 |       .unwrap();
694 |     });
695 |   }
696 | 
697 |   #[cfg(not(loom))]
698 |   #[tokio::test]
699 |   async fn worker_drops() {
700 |     let queue = Worker::new();
701 |     let stealer = queue.push(0).unwrap();
702 | 
703 |     for i in 1..128 {
704 |       queue.push(i);
705 |     }
706 | 
707 |     drop(queue);
708 | 
709 |     let batch = stealer.take().await;
710 |     let expected = (0..128).collect::<Vec<i32>>();
711 | 
712 |     assert_eq!(batch, expected);
713 |   }
714 | 
715 |   #[cfg(loom)]
716 |   #[tokio::test]
717 |   async fn worker_drops() {
718 |     loom::model(|| {
719 |       let queue = Worker::new();
720 |       let stealer = queue.push(0).unwrap();
721 | 
722 |       for i in 1..128 {
723 |         queue.push(i);
724 |       }
725 | 
726 |       drop(queue);
727 | 
728 |       let batch = stealer.take_blocking();
729 |       let expected = (0..128).collect::<Vec<i32>>();
730 | 
731 |       assert_eq!(batch, expected);
732 |     });
733 |   }
734 | }
735 | 


--------------------------------------------------------------------------------
/tsan:
--------------------------------------------------------------------------------
1 | # TSAN suppressions file for swap_queue
2 | 
3 | # The epoch-based GC uses fences. See https://github.com/crossbeam-rs/crossbeam/issues/589
4 | race:crossbeam_epoch
5 | 


--------------------------------------------------------------------------------