├── .gitignore ├── .travis.yml ├── Cargo.lock ├── Cargo.toml ├── README.md └── src ├── atomics ├── mod.rs └── x86.rs ├── crq.rs ├── flag_and_u63.rs ├── lcrq.rs ├── lib.rs └── node.rs /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | 3 | rust: 4 | - nightly 5 | 6 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | [[package]] 2 | name = "concurrent_queue" 3 | version = "0.1.0" 4 | 5 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "concurrent_queue" 3 | version = "0.1.0" 4 | authors = ["Johannes Hoff "] 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Non-blocking concurrent queue [![Build Status](https://travis-ci.org/johshoff/concurrent_queue.svg?branch=master)](https://travis-ci.org/johshoff/concurrent_queue) 2 | ----------------------------- 3 | 4 | **This is currently leaking memory. Please don't actually use it.** (Fixing the 5 | leak should be straight forward, but hasn't been prioritized) 6 | 7 | Based on the paper [Fast Concurrent Queues for x86 8 | Processors](http://www.cs.technion.ac.il/~mad/publications/ppopp2013-x86queues.pdf) 9 | by Adam Morrison and Yehuda Afek. 10 | 11 | Only runs on *nightly* since I'm using both `asm!` and `repr(simd)`. The former 12 | is needed for atomic primitives in x86_64 and the latter for 16-byte alignment 13 | of structs, which is needed for using `CMPXCHG16B`. 14 | 15 | To run tests: 16 | 17 | cargo test 18 | 19 | Performance 20 | ----------- 21 | 22 | Initial performance numbers are quite promising. On my 4 core laptop, sending 23 | 10,000,000 numbers from each of two threads to be consumed by another takes 1.1 24 | seconds, while the same operation takes 2.4 seconds with `mpsc::channel`. 25 | Using 4 producer threads and 1 consumer thread uses 2.0 seconds with the 26 | concurrent channel and 4.9 seconds with `mpsc::channel`. 27 | 28 | So it seems to be about a 2x increase in speed. 29 | 30 | See the 31 | [concurrent-speed-test](https://github.com/johshoff/concurrent_speed_test) repo 32 | for the test code. 33 | 34 | TODO 35 | ---- 36 | 37 | - don't leak memory in LCRQ (by e.g. using hazard pointers). I tried using 38 | [crossbeam](https://github.com/aturon/crossbeam) for this, but it doesn't 39 | seem to fit the use case exactly. 40 | - use compiler intrinsic versions of `compare_and_swap`, `compare_and_swap_2`, 41 | `test_and_set` and `fetch_and_add` if possible 42 | - see `TODO`s in source code 43 | - store pointers instead of `u64`s 44 | 45 | -------------------------------------------------------------------------------- /src/atomics/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod x86; 2 | -------------------------------------------------------------------------------- /src/atomics/x86.rs: -------------------------------------------------------------------------------- 1 | 2 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 3 | pub fn compare_and_swap(destination: &u64, expected: u64, new_value: u64) -> bool { 4 | let value_at_dest : u64; 5 | unsafe { 6 | asm!("LOCK CMPXCHG qword ptr [RCX], RBX" 7 | : "={rax}"(value_at_dest) // output 8 | 9 | : "{rbx}"(new_value), 10 | "{rcx}"(destination), // input 11 | "{rax}"(expected) 12 | 13 | : "rax", "memory" // clobbers 14 | 15 | : "intel" // options 16 | ); 17 | } 18 | 19 | // this information is also available through the zero flag, but it's 20 | // impossible (?) to use that information without doing some sort of 21 | // secondary compare outside of the asm! block 22 | value_at_dest == expected 23 | } 24 | 25 | #[repr(align(16))] 26 | #[derive(Debug)] 27 | pub struct DoubleU64 { 28 | high: u64, 29 | low: u64, 30 | } 31 | 32 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 33 | pub fn compare_and_swap_2(destination: &DoubleU64, expected: &DoubleU64, new_value: &DoubleU64) -> bool { // TODO: return Result to pass back values? 34 | let value_at_dest_high : u64; 35 | let value_at_dest_low : u64; 36 | 37 | unsafe { 38 | asm!("LOCK CMPXCHG16B [R8]" 39 | : "={rax}"(value_at_dest_high), // output 40 | "={rdx}"(value_at_dest_low) 41 | 42 | : "{rbx}"(new_value.high), // input 43 | "{rcx}"(new_value.low), 44 | "{r8}"(destination), 45 | "{rax}"(expected.high) 46 | "{rdx}"(expected.low) 47 | 48 | : "rax", "rdx", "memory" // clobbers 49 | 50 | : "intel" // options 51 | ); 52 | } 53 | 54 | // this information is also available through the zero flag, but it's 55 | // impossible (?) to use that information without doing some sort of 56 | // secondary compare outside of the asm! block 57 | value_at_dest_high == expected.high && value_at_dest_low == expected.low 58 | } 59 | 60 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 61 | pub fn fetch_and_add(destination: &u64, addend: u64) -> u64 { 62 | let value_at_dest : u64; 63 | unsafe { 64 | asm!("LOCK XADD qword ptr [RCX], RBX" 65 | : "={rbx}"(value_at_dest) // output 66 | 67 | : "{rbx}"(addend), // input 68 | "{rcx}"(destination) 69 | 70 | : "rbx", "memory" // clobbers 71 | 72 | : "intel" // options 73 | ); 74 | } 75 | 76 | value_at_dest 77 | } 78 | 79 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 80 | pub fn test_and_set(destination: &u64) { 81 | unsafe { 82 | asm!("LOCK BTS qword ptr [RCX], 63" 83 | : // output 84 | : "{rcx}"(destination) // input 85 | : "rbx", "memory" // clobbers 86 | : "intel" // options 87 | ); 88 | } 89 | } 90 | 91 | #[cfg(test)] 92 | mod test { 93 | use super::*; 94 | 95 | #[test] 96 | fn test_fetch_and_add_single_thread() { 97 | let mut x = 5; 98 | assert_eq!(fetch_and_add(&mut x, 1), 5); 99 | assert_eq!(fetch_and_add(&mut x, 5), 6); 100 | assert_eq!(x, 11); 101 | } 102 | 103 | #[test] 104 | fn test_compare_and_swap_single_thread() { 105 | let mut x = 42; 106 | assert!(compare_and_swap(&mut x, 42, 10)); 107 | 108 | assert_eq!(x, 10); 109 | assert!(!compare_and_swap(&mut x, 42, 11)); 110 | 111 | assert_eq!(x, 10); 112 | } 113 | 114 | #[test] 115 | fn test_compare_and_swap_2_single_thread() { 116 | let mut x = DoubleU64 { high: 1, low: 2 }; 117 | assert!(compare_and_swap_2(&mut x, &DoubleU64 { high: 1, low: 2 }, &DoubleU64 { high: 2, low: 3 })); 118 | assert_eq!(x.high, 2); 119 | assert_eq!(x.low , 3); 120 | 121 | assert!(!compare_and_swap_2(&mut x, &DoubleU64 { high: 1, low: 2 }, &DoubleU64 { high: 3, low: 2 })); 122 | assert_eq!(x.high, 2); 123 | assert_eq!(x.low , 3); 124 | } 125 | 126 | #[test] 127 | fn test_test_and_set() { 128 | let mut x = 5; 129 | test_and_set(&mut x); 130 | assert_eq!(x, 0x8000000000000005u64); 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /src/crq.rs: -------------------------------------------------------------------------------- 1 | /// Concurrent ring queue 2 | 3 | use std::ptr; 4 | use std::mem; 5 | use std::marker::{Sync, Send}; 6 | 7 | use flag_and_u63::FlagAndU63; 8 | use node::{ Node, NODE_VALUE_EMPTY }; 9 | use atomics::x86::*; 10 | 11 | fn compare_and_swap_nodes(node: &Node, expected: &Node, new_value: &Node) -> bool { 12 | let mem_current : &DoubleU64 = unsafe { mem::transmute(node) }; 13 | let mem_expected : &DoubleU64 = unsafe { mem::transmute(expected) }; 14 | let mem_new_value : &DoubleU64 = unsafe { mem::transmute(new_value) }; 15 | 16 | compare_and_swap_2(mem_current, mem_expected, mem_new_value) 17 | } 18 | 19 | pub const RING_SIZE: usize = 256; 20 | 21 | // fields are padded to get them on their very own cache lines. 22 | // This assumes that usize is 64 bits, and a cache line is 64 bytes. 23 | pub struct CRQ { 24 | head: u64, // read location 25 | _pad_head: [usize; 7], 26 | tail_and_closed: FlagAndU63, // tail (u63, write location), closed queue (1 bit flag) 27 | _pad_tail: [usize; 7], 28 | pub next: *const CRQ, 29 | _pad_next: [usize; 7], 30 | ring: [Node; RING_SIZE] 31 | } 32 | 33 | unsafe impl Send for CRQ {} // TODO: remove need for this 34 | unsafe impl Sync for CRQ {} 35 | 36 | pub struct QueueClosed; 37 | 38 | impl CRQ { 39 | pub fn new() -> CRQ { 40 | /* 41 | TODO: It would be nice to do this without unsafe. I can create a vector: 42 | 43 | let ring = (0..RING_SIZE).map(|u| Node::new(u, NODE_VALUE_EMPTY)).collect::>(); 44 | 45 | But how do I get the underlying array? Getting a slice `&ring[..]` gets me a [Node] which is almost there, but it's unsized. 46 | */ 47 | let ring = unsafe { 48 | let mut ring: [Node; RING_SIZE] = mem::uninitialized(); 49 | 50 | for (i, element) in ring.iter_mut().enumerate() { 51 | let value = Node::new(i as u64, NODE_VALUE_EMPTY, true); 52 | ptr::write(element, value); 53 | } 54 | 55 | ring 56 | }; 57 | 58 | CRQ { head: 0, tail_and_closed: FlagAndU63::new(false, 0), next: ptr::null(), ring: ring, 59 | _pad_head: [0; 7], _pad_tail: [0; 7], _pad_next: [0; 7] } 60 | } 61 | 62 | pub fn enqueue(&self, new_value: u64) -> Result<(), QueueClosed> { 63 | loop { 64 | let current_tail_and_closed = FlagAndU63::from_repr(fetch_and_add(self.tail_and_closed.ref_combined(), 1)); 65 | let (closed, tail) = current_tail_and_closed.flag_and_value(); 66 | 67 | if closed { 68 | return Err(QueueClosed); 69 | } 70 | 71 | { 72 | let node = &self.ring[tail as usize % RING_SIZE]; // TODO: are we doing a range check? not needed 73 | let value = node.value(); 74 | 75 | if value == NODE_VALUE_EMPTY { 76 | let (is_safe, index) = node.safe_and_index(); 77 | if index <= tail && 78 | (is_safe || self.head <= tail) && 79 | compare_and_swap_nodes(node, &Node::new(index, NODE_VALUE_EMPTY, is_safe), &Node::new(tail, new_value, true)) { 80 | return Ok(()); 81 | } 82 | } 83 | } 84 | 85 | // NOTE: Checking `head < tail` is necessary to avoid underflow in `tail - head`, since 86 | // head can advance beyond tail 87 | let head = self.head; 88 | if (head < tail && (tail - head) as usize >= RING_SIZE) || self.is_starving() { 89 | test_and_set(self.tail_and_closed.ref_combined()); 90 | return Err(QueueClosed); 91 | } 92 | 93 | } 94 | } 95 | 96 | pub fn dequeue(&self) -> Option { 97 | loop { 98 | let head = fetch_and_add(&self.head, 1); 99 | { 100 | let node = &self.ring[head as usize % RING_SIZE]; // TODO: are we doing a range check? not needed 101 | 102 | loop { 103 | let value = node.value(); 104 | let (is_safe, index) = node.safe_and_index(); 105 | 106 | if index > head { 107 | break; 108 | } 109 | 110 | if value != NODE_VALUE_EMPTY { 111 | if index == head { 112 | if compare_and_swap_nodes(node, &Node::new(head, value, is_safe), &Node::new(head + RING_SIZE as u64, NODE_VALUE_EMPTY, is_safe)) { 113 | return Some(value) 114 | } 115 | } else { 116 | if compare_and_swap_nodes(node, &Node::new(index, value, is_safe), &Node::new(index, value, false)) { 117 | break; 118 | } 119 | } 120 | } else { 121 | if compare_and_swap_nodes(node, &Node::new(index, NODE_VALUE_EMPTY, is_safe), &Node::new(head + RING_SIZE as u64, NODE_VALUE_EMPTY, is_safe)) { 122 | break; 123 | } 124 | } 125 | } 126 | } 127 | let tail = self.tail_and_closed.value(); 128 | if tail <= head + 1 { 129 | self.fix_state(); 130 | return None; 131 | } 132 | 133 | } 134 | } 135 | 136 | fn is_starving(&self) -> bool { 137 | // TODO: IMPLEMENT 138 | false 139 | } 140 | 141 | fn fix_state(&self) { 142 | loop { 143 | let tail_repr = fetch_and_add(self.tail_and_closed.ref_combined(), 0); 144 | let head = fetch_and_add(&self.head, 0); 145 | 146 | if self.tail_and_closed.combined() != tail_repr { 147 | continue; 148 | } 149 | 150 | if head <= tail_repr { 151 | return; // nothing to do 152 | } 153 | 154 | // jh: Since tail_repr < head at this point it means that tail_repr does not have a it's highest bit set (the CLOSED bit). 155 | // Alternatively, it means that head has the highest bit set, and I guess that'll just close the queue? 156 | 157 | if compare_and_swap(self.tail_and_closed.ref_combined(), tail_repr, head) { 158 | return; 159 | } 160 | } 161 | } 162 | } 163 | 164 | #[cfg(test)] 165 | mod test { 166 | use std::sync::Arc; 167 | use std::ptr; 168 | use std::thread::spawn; 169 | use super::*; 170 | use node::NODE_VALUE_EMPTY; 171 | 172 | #[test] 173 | fn new_crq() { 174 | let crq = CRQ::new(); 175 | assert_eq!(crq.head, 0); 176 | assert_eq!(crq.tail_and_closed.value(), 0); 177 | assert!(!crq.tail_and_closed.is_flag_set()); 178 | assert_eq!(crq.next, ptr::null_mut()); 179 | assert_eq!(crq.ring.len(), RING_SIZE); 180 | 181 | for (i, element) in crq.ring.iter().enumerate() { 182 | assert!(element.is_safe()); 183 | assert_eq!(element.index(), i as u64); 184 | assert_eq!(element.value(), NODE_VALUE_EMPTY); 185 | } 186 | } 187 | 188 | #[test] 189 | fn test_full_queue() { 190 | let crq = CRQ::new(); 191 | for _ in 0..RING_SIZE { 192 | assert!(crq.enqueue(100).is_ok()); 193 | } 194 | assert!(crq.enqueue(100).is_err()); 195 | } 196 | 197 | #[test] 198 | fn test_deque_empty() { 199 | let crq = CRQ::new(); 200 | assert!(crq.dequeue() == None); 201 | } 202 | 203 | #[test] 204 | fn test_enqueue_and_deque() { 205 | let crq = CRQ::new(); 206 | for i in 0..RING_SIZE { 207 | assert!(crq.enqueue(100 + i as u64).is_ok()); 208 | } 209 | 210 | for i in 0..RING_SIZE { 211 | assert!(crq.dequeue() == Some(100 + i as u64)); 212 | } 213 | } 214 | 215 | #[test] 216 | fn test_enqueue_and_deque_laps() { 217 | let crq = CRQ::new(); 218 | for i in 0..RING_SIZE*10 { 219 | assert!(crq.enqueue(100 + i as u64).is_ok()); 220 | assert!(crq.dequeue() == Some(100 + i as u64)); 221 | } 222 | } 223 | 224 | #[test] 225 | fn test_enqueue_and_deque_multithreaded() { 226 | let crq = Arc::new(CRQ::new()); 227 | 228 | let prod_crq = crq.clone(); 229 | let cons_crq = crq.clone(); 230 | 231 | let producer = spawn(move || { 232 | for i in 0..RING_SIZE { 233 | loop { 234 | match prod_crq.enqueue(100 + i as u64) { 235 | Ok(()) => { break; }, 236 | Err(QueueClosed) => { panic!("Queue closed"); }, 237 | } 238 | } 239 | } 240 | }); 241 | 242 | let consumer = spawn(move || { 243 | for i in 0..RING_SIZE { 244 | loop { 245 | match cons_crq.dequeue() { 246 | Some(number) => { assert_eq!(number, 100 + i as u64); break }, 247 | None => { /* spin */ }, 248 | } 249 | } 250 | } 251 | }); 252 | 253 | assert!(producer.join().is_ok()); 254 | assert!(consumer.join().is_ok()); 255 | } 256 | } 257 | -------------------------------------------------------------------------------- /src/flag_and_u63.rs: -------------------------------------------------------------------------------- 1 | 2 | pub struct FlagAndU63 { 3 | combined: u64, // highest value bit is boolean, remaining 63 bits is u63 value 4 | } 5 | 6 | const FLAG_VALUE: u64 = 1 << 63; 7 | 8 | impl FlagAndU63 { 9 | pub fn new(flag: bool, value: u64) -> FlagAndU63 { 10 | if flag { FlagAndU63 { combined: value | FLAG_VALUE } } 11 | else { FlagAndU63 { combined: value } } 12 | } 13 | 14 | /// Create a FlagAndU63 from the internal representation of one 15 | pub fn from_repr(repr: u64) -> FlagAndU63 { 16 | FlagAndU63 { combined: repr } 17 | } 18 | 19 | pub fn is_flag_set(&self) -> bool { 20 | self.combined & FLAG_VALUE > 0 21 | } 22 | 23 | pub fn value(&self) -> u64 { 24 | self.combined & !FLAG_VALUE 25 | } 26 | 27 | /// Get both values is (hopefully) one memory read 28 | pub fn flag_and_value(&self) -> (bool, u64) { 29 | let current_combined = self.combined; 30 | 31 | (current_combined & FLAG_VALUE > 0, current_combined & !FLAG_VALUE) 32 | } 33 | 34 | pub fn set_flag(&mut self) { 35 | self.combined |= FLAG_VALUE; 36 | } 37 | 38 | pub fn unset_flag(&mut self) { 39 | self.combined &= !FLAG_VALUE; 40 | } 41 | 42 | /// Return a reference to the combined representation of flag+u63 43 | pub fn ref_combined(&self) -> &u64 { 44 | &self.combined 45 | } 46 | 47 | /// Return internal combined representation of flag+u63 48 | pub fn combined(&self) -> u64 { 49 | self.combined 50 | } 51 | } 52 | 53 | -------------------------------------------------------------------------------- /src/lcrq.rs: -------------------------------------------------------------------------------- 1 | /// Linked concurrent ring queue 2 | 3 | use std::ptr; 4 | use std::mem; 5 | use std::marker::{Sync, Send}; 6 | 7 | use crq::CRQ; 8 | use atomics::x86::compare_and_swap; 9 | 10 | // `head` and `tail` are padded to get them on their very own cache lines. 11 | // This assumes that usize is 64 bits, and a cache line is 64 bytes. 12 | pub struct LCRQ { 13 | tail: *const CRQ, 14 | _pad_tail: [usize; 7], 15 | head: *const CRQ, 16 | _pad_head: [usize; 7], 17 | } 18 | 19 | unsafe impl Send for LCRQ {} // TODO: remove need for this 20 | unsafe impl Sync for LCRQ {} 21 | 22 | fn compare_and_swap_crq_ptr(destination: &*const CRQ, expected: *const CRQ, new_value: *const CRQ) -> bool { 23 | unsafe { 24 | compare_and_swap(mem::transmute(destination), mem::transmute(expected), mem::transmute(new_value)) 25 | } 26 | } 27 | 28 | fn untracked_pointer(crq: CRQ) -> *const CRQ { 29 | unsafe { mem::transmute(Box::new(crq)) } 30 | } 31 | 32 | impl LCRQ { 33 | pub fn new() -> LCRQ { 34 | //let crq = untracked_crq(); 35 | let crq = untracked_pointer(CRQ::new()); 36 | LCRQ { tail: crq, head: crq, _pad_tail: [0; 7], _pad_head: [0; 7] } 37 | } 38 | 39 | pub fn dequeue(&self) -> Option { 40 | loop { 41 | let crq : &CRQ = unsafe { mem::transmute(self.head) }; 42 | match crq.dequeue() { 43 | Some(value) => { return Some(value); } 44 | None => { 45 | if (*crq).next == ptr::null() { 46 | return None; 47 | } 48 | match crq.dequeue() { 49 | Some(value) => { return Some(value); } 50 | None => { 51 | compare_and_swap_crq_ptr(&self.head, unsafe { mem::transmute(crq) }, crq.next); 52 | } 53 | } 54 | } 55 | } 56 | } 57 | } 58 | 59 | pub fn enqueue(&self, value: u64) { 60 | loop { 61 | let crq : &CRQ = unsafe { mem::transmute(self.tail) }; 62 | 63 | if crq.next != ptr::null() { 64 | compare_and_swap_crq_ptr(&self.tail, crq, crq.next); 65 | continue; 66 | } 67 | 68 | match crq.enqueue(value) { 69 | Ok(_) => return, 70 | Err(_) => { // queue closed 71 | let new_crq = CRQ::new(); 72 | new_crq.enqueue(value).ok().expect("Enqueue expected to always work on an empty queue"); 73 | let new_crq_ptr = untracked_pointer(new_crq); 74 | if compare_and_swap_crq_ptr(&crq.next, ptr::null(), new_crq_ptr) { 75 | compare_and_swap_crq_ptr(&self.tail, crq, new_crq_ptr); 76 | return; 77 | } 78 | } 79 | } 80 | } 81 | } 82 | } 83 | 84 | #[cfg(test)] 85 | mod test { 86 | use std::thread::{ spawn, JoinHandle }; 87 | use std::sync::Arc; 88 | use super::*; 89 | use crq::RING_SIZE; 90 | 91 | #[test] 92 | fn test_enqueue_ring_plus_one() { 93 | let lcrq = LCRQ::new(); 94 | for i in 0..RING_SIZE+1 { 95 | lcrq.enqueue(100 + i as u64); 96 | } 97 | } 98 | 99 | #[test] 100 | fn test_enqueue_front_load() { 101 | let lcrq = LCRQ::new(); 102 | for i in 0..RING_SIZE*10 { 103 | lcrq.enqueue(100 + i as u64); 104 | } 105 | for i in 0..RING_SIZE*10 { 106 | assert!(lcrq.dequeue() == Some(100 + i as u64)); 107 | } 108 | } 109 | 110 | #[test] 111 | fn test_enqueue_and_deque_multithreaded() { 112 | let lcrq = Arc::new(LCRQ::new()); 113 | 114 | let prod_lcrq = lcrq.clone(); 115 | let cons_lcrq = lcrq.clone(); 116 | 117 | let producer = spawn(move || { 118 | for i in 0..RING_SIZE*100 { 119 | prod_lcrq.enqueue(100 + i as u64); 120 | } 121 | }); 122 | 123 | let consumer = spawn(move || { 124 | for i in 0..RING_SIZE*100 { 125 | loop { 126 | match cons_lcrq.dequeue() { 127 | Some(number) => { assert_eq!(number, 100 + i as u64); break }, 128 | None => { /* spin */ }, 129 | } 130 | } 131 | } 132 | }); 133 | 134 | assert!(producer.join().is_ok()); 135 | assert!(consumer.join().is_ok()); 136 | } 137 | 138 | #[test] 139 | fn multi_producer_single_consumer() { 140 | let lcrq = Arc::new(LCRQ::new()); 141 | 142 | let producer_1 = start_producer(lcrq.clone(), 100000, 100100); 143 | let producer_2 = start_producer(lcrq.clone(), 100100, 100200); 144 | 145 | let cons_lcrq = lcrq.clone(); 146 | let consumer = spawn(move || { 147 | for _ in 0..200 { 148 | loop { 149 | match cons_lcrq.dequeue() { 150 | Some(number) => { assert!(number >= 100000); assert!(number < 100200); break }, 151 | None => { /* spin */ }, 152 | } 153 | } 154 | } 155 | }); 156 | 157 | assert!(producer_1.join().is_ok()); 158 | assert!(producer_2.join().is_ok()); 159 | assert!(consumer.join().is_ok()); 160 | } 161 | 162 | fn start_producer(queue: Arc, start: u64, end: u64) -> JoinHandle<()> { 163 | spawn(move || { 164 | for i in start..end { 165 | queue.enqueue(i); 166 | } 167 | }) 168 | } 169 | 170 | } 171 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![feature(asm)] 2 | 3 | pub mod crq; 4 | pub mod lcrq; 5 | pub mod flag_and_u63; // TODO: Using `pub` only to suppress unused warnings 6 | pub mod node; // TODO: Using `pub` only to suppress unused warnings 7 | mod atomics; 8 | -------------------------------------------------------------------------------- /src/node.rs: -------------------------------------------------------------------------------- 1 | use flag_and_u63::FlagAndU63; 2 | use std::u64; 3 | 4 | // TODO: abstract away 5 | pub const NODE_VALUE_EMPTY: u64 = u64::MAX; 6 | 7 | #[repr(align(16))] 8 | pub struct Node { 9 | index_and_safe: FlagAndU63, // highest bit: safe, remaining 63 bits: value 10 | value: u64, 11 | // TODO: pad to cache line size... Assume L2 cache? 12 | } 13 | 14 | impl Node { 15 | pub fn new(index: u64, value: u64, safe: bool) -> Node { 16 | Node { index_and_safe: FlagAndU63::new(safe, index), value: value } 17 | } 18 | 19 | pub fn is_safe(&self) -> bool { 20 | self.index_and_safe.is_flag_set() 21 | } 22 | 23 | pub fn index(&self) -> u64 { 24 | self.index_and_safe.value() 25 | } 26 | 27 | pub fn safe_and_index(&self) -> (bool, u64) { 28 | self.index_and_safe.flag_and_value() 29 | } 30 | 31 | pub fn value(&self) -> u64 { 32 | self.value 33 | } 34 | 35 | pub fn set_safe(&mut self) { 36 | self.index_and_safe.set_flag(); 37 | } 38 | 39 | pub fn set_unsafe(&mut self) { 40 | self.index_and_safe.unset_flag(); 41 | } 42 | } 43 | 44 | 45 | #[cfg(test)] 46 | mod test { 47 | use std::mem; 48 | use super::*; 49 | 50 | #[test] 51 | fn test_node_value() { 52 | assert_eq!(Node::new(0, 1, true).value(), 1); 53 | assert_eq!(Node::new(5, 9, true).value(), 9); 54 | assert_eq!(Node::new(8, 2, true).value(), 2); 55 | assert_eq!(Node::new(0, 1, false).value(), 1); 56 | assert_eq!(Node::new(5, 9, false).value(), 9); 57 | assert_eq!(Node::new(8, 2, false).value(), 2); 58 | } 59 | 60 | #[test] 61 | fn test_node_index() { 62 | assert_eq!(Node::new(0, 1, true).index(), 0); 63 | assert_eq!(Node::new(5, 9, true).index(), 5); 64 | assert_eq!(Node::new(8, 2, true).index(), 8); 65 | assert_eq!(Node::new(0, 1, false).index(), 0); 66 | assert_eq!(Node::new(5, 9, false).index(), 5); 67 | assert_eq!(Node::new(8, 2, false).index(), 8); 68 | } 69 | 70 | #[test] 71 | fn test_node_safe() { 72 | let node = Node::new(0, 0, true); 73 | assert!(node.is_safe()); 74 | 75 | let node = Node::new(0, 0, false); 76 | assert!(!node.is_safe()); 77 | 78 | let mut node = Node::new(1, 2, true); 79 | assert!(node.is_safe()); 80 | assert_eq!(node.index(), 1); 81 | assert_eq!(node.value(), 2); 82 | 83 | node.set_unsafe(); 84 | assert!(!node.is_safe()); 85 | assert_eq!(node.index(), 1); 86 | assert_eq!(node.value(), 2); 87 | 88 | node.set_safe(); 89 | assert!(node.is_safe()); 90 | assert_eq!(node.index(), 1); 91 | assert_eq!(node.value(), 2); 92 | } 93 | 94 | #[test] 95 | fn test_alignment() { 96 | // necessary for compare_and_swap_2 97 | assert_eq!(mem::align_of::(), 16); 98 | } 99 | 100 | #[test] 101 | fn test_size() { 102 | // necessary for compare_and_swap_2 103 | assert_eq!(mem::size_of::(), 16); 104 | } 105 | } 106 | --------------------------------------------------------------------------------