├── Makefile
├── README.md
├── queue_atomic.h
├── queue_std_mutex.h
├── rdtsc.h
└── test_queue.cc


/Makefile:
--------------------------------------------------------------------------------
1 | all: test_queue
2 | 
3 | clean:
4 | 	rm -f test_queue
5 | 
6 | test_queue: test_queue.cc queue_atomic.h
7 | 	c++ -pthread -O3 -std=c++11 $< -o $@
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # queue_atomic
 2 | 
 3 | Multiple producer multiple consumer queue template using C++11 atomics.
 4 | 
 5 | Solves the ABA problem and implements 2-phase ordered updates by packing a monotonically increasing version number into the queue front and back offsets. The contended case is detected by checking that the expected version counter is visible in the packed front or back offset.
 6 | 
 7 | During an update the version counter is checked against the version packed in the offset, if the offset is up-to-date the version counter is atomically incremented, data is stored (push_back) or retrieved (pop_front) and in a final phase the front or back offset is atomically updated with a new version and offset. Data only becomes visible in another thread when the version counter matchs the version packed into the offsets. The front and back offsets always increase in the common case and buffer offsets are calculated modulus the queue size.
 8 | 
 9 | - queue_atomic is completely lockless in the single producer single consumer case
10 | - queue_atomic can be used in multiple producer multiple consumer mode however it will spin calling std::this_thread::yield() when there is contention
11 | 
12 | ## Notes
13 | 
14 | ### queue_std_mutex
15 | 
16 |  - std::mutex wrapper around std::queue
17 | 
18 | ### queue_atomic
19 | 
20 | - uses 4 atomic variables: counter_back, version_back, counter_front and version_front
21 | - push_back reads 3 atomics: (counter_back, version_back and version_front)
22 |        and writes 2 atomics: (counter_back and version_back)
23 | - pop_front reads 3 atomics: (counter_front, version_back and version_front)
24 |        and writes 2 atomics: (counter_front and version_front)
25 | - uses two separate monotonically increasing version counters and 2-phase ordered updates
26 | - completely lockless in the single producer single consumer case
27 | - back version counter and back offset are packed into version_back
28 | - front version counter and front offset are packed into version_front
29 | * NOTE: limited to 140737488355328 (2^47) items
30 | ````
31 | queue_atomic::is_lock_free  = 1
32 | queue_atomic::atomic_bits   = 64
33 | queue_atomic::offset_bits   = 48
34 | queue_atomic::version_bits  = 16
35 | queue_atomic::offset_shift  = 0
36 | queue_atomic::version_shift = 48
37 | queue_atomic::size_max      = 0x0000800000000000 (140737488355328)
38 | queue_atomic::offset_limit  = 0x0001000000000000 (281474976710656)
39 | queue_atomic::version_limit = 0x0000000000010000 (65536)
40 | queue_atomic::offset_mask   = 0x0000ffffffffffff
41 | queue_atomic::version_mask  = 0x000000000000ffff
42 | ````
43 | 
44 | ## Timings
45 | 
46 | - -O3, OS X 10.10, Apple LLVM version 7.0.0, 22nm Ivy Bridge 2.7 GHz Intel Core i7
47 | 
48 | ````
49 | queue_implementation      threads iterations items/thread time(µs)    ops        op_time(µs)
50 | queue_atomic              8       10         1024         4711        81920      0.057507
51 | queue_atomic              8       10         65536        190221      5242880    0.036282
52 | queue_atomic              8       64         65536        1225404     33554432   0.036520
53 | queue_atomic              8       16         262144       1151575     33554432   0.034320
54 | queue_std_mutex           8       10         1024         752439      81920      9.185046 
55 | ````
56 | 
57 | - -O3, Linux 4.2.0-amd64, GCC 5.2.1, 45nm Bloomfield 3.33GHZ GHz Intel Core i7 975
58 | 
59 | ````
60 | queue_implementation      threads iterations items/thread time(µs)    ops        op_time(µs)
61 | queue_atomic              8       10         1024         8022        81920      0.097925
62 | queue_atomic              8       10         65536        505085      5242880    0.096337
63 | queue_atomic              8       64         65536        3182992     33554432   0.094861
64 | queue_atomic              8       16         262144       3259350     33554432   0.097136
65 | queue_std_mutex           8       10         1024         25139       81920      0.306873
66 | ````
67 | 


--------------------------------------------------------------------------------
/queue_atomic.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  queue_atomic.h
  3 | //
  4 | 
  5 | #ifndef queue_atomic_h
  6 | #define queue_atomic_h
  7 | 
  8 | /*
  9 |  * queue_atomic
 10 |  *
 11 |  * Multiple producer multiple consumer queue template using C++11 atomics.
 12 |  *
 13 |  * Completely lockless in the single producer single consumer case.
 14 |  *
 15 |  *   - uses 4 atomic variables: counter_back, version_back, counter_front and version_front
 16 |  *
 17 |  *   - push_back reads 3 atomics: counter_back, version_back and version_front
 18 |  *               writes 2 atomics: counter_back and version_back
 19 |  *
 20 |  *   - pop_front reads 3 atomics: counter_front, version_back and version_front
 21 |  *               writes 2 atomics: counter_front and version_front
 22 |  *
 23 |  *   - back version and front version are packed into version_back and version_front
 24 |  *
 25 |  *   - version is used for conflict detection during ordered writes
 26 |  *
 27 |  */
 28 | 
 29 | #if defined(_MSC_VER)
 30 | #define ALIGNED(x) __declspec(align(x))
 31 | #elif defined(__GNUC__)
 32 | #define ALIGNED(x) __attribute__((aligned(x)))
 33 | #else
 34 | #define ALIGNED(x)
 35 | #endif
 36 | 
 37 | template <typename T,
 38 |           const int debug_contention = false,
 39 |           typename ATOMIC_UINT = uint64_t,
 40 |           const int OFFSET_BITS = 48,
 41 |           const int VERSION_BITS = 16,
 42 |           std::memory_order relaxed_memory_order = std::memory_order_relaxed,
 43 |           std::memory_order acquire_memory_order = std::memory_order_acquire,
 44 |           std::memory_order release_memory_order = std::memory_order_release>
 45 | struct queue_atomic
 46 | {
 47 |     /* queue atomic type */
 48 |     
 49 |     typedef ATOMIC_UINT                         atomic_uint_t;
 50 |     typedef std::atomic<T>                      atomic_item_t;
 51 |     
 52 |     
 53 |     /* queue constants */
 54 |     
 55 |     static const int tight_spin_limit =         8;
 56 |     static const int spin_limit =               1 << 24;
 57 |     static const int debug_spin =               true;
 58 |     static const int atomic_bits =              sizeof(atomic_uint_t) << 3;
 59 |     static const int offset_bits =              OFFSET_BITS;
 60 |     static const int version_bits =             VERSION_BITS;
 61 |     static const int offset_shift =             0;
 62 |     static const int version_shift =            offset_bits;
 63 |     static const atomic_uint_t size_max =       (1ULL << (offset_bits - 1));
 64 |     static const atomic_uint_t offset_limit =   (1ULL << offset_bits);
 65 |     static const atomic_uint_t version_limit =  (1ULL << version_bits);
 66 |     static const atomic_uint_t offset_mask =    (1ULL << offset_bits) - 1;
 67 |     static const atomic_uint_t version_mask =   (1ULL << version_bits) - 1;
 68 |     
 69 |     
 70 |     /* queue storage */
 71 |     
 72 |     ALIGNED(64) atomic_item_t *vec;
 73 |     const atomic_uint_t size_limit;
 74 |     ALIGNED(64) std::atomic<atomic_uint_t> counter_back;
 75 |     std::atomic<atomic_uint_t> version_back;
 76 |     ALIGNED(64) std::atomic<atomic_uint_t> counter_front;
 77 |     std::atomic<atomic_uint_t> version_front;
 78 |     
 79 |     
 80 |     /* queue helpers */
 81 |     
 82 |     static inline bool ispow2(size_t val) { return val && !(val & (val-1)); }
 83 |     
 84 |     /*
 85 |      * pack a version number and an offset into an unsigned atomic integer
 86 |      */
 87 |     static inline const atomic_uint_t pack_offset(const atomic_uint_t version, const atomic_uint_t offset)
 88 |     {
 89 |         assert(version < version_limit);
 90 |         assert(offset < offset_limit);
 91 |         return (version << version_shift) | (offset << offset_shift);
 92 |     }
 93 |     
 94 |     /*
 95 |      * unpack a version number and offset and compare the version to a counter value
 96 |      * returns true if the version in the counter matches the version packed in the offset
 97 |      */
 98 |     static inline bool unpack_offsets(const atomic_uint_t counter, const atomic_uint_t pack,
 99 |                                       atomic_uint_t &offset)
100 |     {
101 |         if (((pack >> version_shift) & version_mask) == (counter & version_mask)) {
102 |             offset = (pack >> offset_shift) & offset_mask;
103 |             return true;
104 |         }
105 |         return false;
106 |     }
107 |     
108 |     /* queue implementation */
109 |     
110 |     atomic_uint_t _back_version()   { return (version_back >> version_shift) & version_mask; }
111 |     atomic_uint_t _front_version()  { return (version_front >> version_shift) & version_mask; }
112 |     atomic_uint_t _back()           { return (version_back >> offset_shift) & offset_mask; }
113 |     atomic_uint_t _front()          { return (version_front >> offset_shift) & offset_mask; }
114 |     size_t capacity()               { return size_limit; }
115 |     
116 |     
117 |     queue_atomic(size_t size_limit) :
118 |         size_limit(size_limit),
119 |         counter_back(0),
120 |         version_back(pack_offset(0, 0)),
121 |         counter_front(0),
122 |         version_front(pack_offset(0, size_limit))
123 |     {
124 |         static_assert(version_bits + offset_bits <= atomic_bits,
125 |                       "version_bits + offset_bits must fit into atomic integer type");
126 |         assert(size_limit > 0);
127 |         assert(size_limit <= size_max);
128 |         assert(ispow2(size_limit));
129 |         vec = new atomic_item_t[size_limit]();
130 |         assert(vec != nullptr);
131 |     }
132 |     
133 |     virtual ~queue_atomic()
134 |     {
135 |         delete [] vec;
136 |     }
137 |     
138 |     bool empty()
139 |     {
140 |         atomic_uint_t back = (version_back >> offset_shift) & offset_mask;
141 |         atomic_uint_t front = (version_front >> offset_shift) & offset_mask;
142 |         
143 |         /* return true if queue is empty */
144 |         return (front - back == size_limit);
145 |     }
146 |     
147 |     bool full()
148 |     {
149 |         atomic_uint_t back = (version_back >> offset_shift) & offset_mask;
150 |         atomic_uint_t front = (version_front >> offset_shift) & offset_mask;
151 | 
152 |         /* return true if queue is full */
153 |         return (front == back);
154 |     }
155 |     
156 |     size_t size()
157 |     {
158 |         atomic_uint_t back = (version_back >> offset_shift) & offset_mask;
159 |         atomic_uint_t front = (version_front >> offset_shift) & offset_mask;
160 | 
161 |         /* return queue size */
162 |         return size_limit - front + back;
163 |     }
164 |     
165 |     bool push_back(T elem)
166 |     {
167 |         atomic_uint_t back;
168 |         atomic_uint_t front = (version_front >> offset_shift) & offset_mask;
169 | 
170 |         int spin_count = 0;
171 |         do {
172 |             /*
173 |              * if packed version equals counter_back then attempt push back
174 |              *
175 |              * this is where we detect if another thread is in the push back
176 |              * critical section and we only proceed if the versions are consistent:
177 |              *
178 |              *     i.e. counter_back == version_back >> version_shift & version_mask
179 |              */
180 |             atomic_uint_t _counter_back = counter_back.load(relaxed_memory_order);
181 |             atomic_uint_t _version_back = version_back.load(relaxed_memory_order);
182 |             if (unpack_offsets(_counter_back, _version_back, back))
183 |             {
184 |                 /* if (full) return false; */
185 |                 if (front == back) return false;
186 |                 
187 |                 /* create new back version */
188 |                 atomic_uint_t new_back_version = (_counter_back + 1) & version_mask;
189 |                 
190 |                 /* calculate store offset and update back */
191 |                 size_t offset = back++ & (size_limit - 1);
192 |                 
193 |                 /* pack new back version and back offset */
194 |                 atomic_uint_t pack = pack_offset(new_back_version, back & (offset_limit - 1));
195 |                 
196 |                 /*
197 |                  * compare_exchange_weak and attempt to update the counter with the new version
198 |                  *
199 |                  * this is where we enter the critical section:
200 |                  *
201 |                  *     i.e. counter_back != version_back >> version_shift & version_mask
202 |                  *     for a brief number of instructions until we write the new version_back
203 |                  *
204 |                  * if successful other threads will spin until new version_back is visible
205 |                  * if successful we write the value followed by writing a new version_back
206 |                  * to leave the critical section
207 |                  */
208 |                 if (counter_back.compare_exchange_weak(_counter_back, new_back_version, std::memory_order_acq_rel))
209 |                 {
210 |                     vec[offset].store(elem, release_memory_order);
211 | 
212 |                     /*
213 |                      * exit the critical section and reveal the new back offset to other threads
214 |                      *
215 |                      *    i.e. counter_front == version_front >> version_shift & version_mask
216 |                      */
217 |                     version_back.store(pack, release_memory_order);
218 |                     return true;
219 |                     
220 |                 } else if (debug_contention) {
221 |                     uint64_t _tsc = rdtsc();
222 |                     log_debug("%s version=%llu time=%llu spin_count=%d thread:%p phase 2 contention",
223 |                               __func__, _counter_back, _tsc, spin_count, std::this_thread::get_id());
224 |                 }
225 |             } else {
226 |                 if (debug_contention) {
227 |                     uint64_t _tsc = rdtsc();
228 |                     log_debug("%s version=%llu time=%llu spin_count=%d thread:%p phase 1 contention",
229 |                               __func__, _counter_back, _tsc, spin_count, std::this_thread::get_id());
230 |                 }
231 |             }
232 | 
233 |             /*
234 |              * if we reach here then we detected an inconsistent version in phase 1 prepare
235 |              * or failed to update the counter to enter the critical section in phase 2
236 |              */
237 | 
238 |             /* yield the thread before retrying */
239 |             if (spin_limit > tight_spin_limit) {
240 |                 std::this_thread::yield();
241 |             }
242 |             
243 |         } while (++spin_count < spin_limit);
244 |         
245 |         if (debug_spin) {
246 |             log_debug("%s thread:%p failed: reached spin limit", __func__, std::this_thread::get_id());
247 |         }
248 |         
249 |         return false;
250 |     }
251 |     
252 |     T pop_front()
253 |     {
254 |         atomic_uint_t back = (version_back >> offset_shift) & offset_mask;
255 |         atomic_uint_t front;
256 |         
257 |         int spin_count = 0;
258 |         do {
259 |             /*
260 |              * if packed version equals counter_front then attempt pop front
261 |              *
262 |              * this is where we detect if another thread is in the pop front
263 |              * critical section and we only proceed if the versions are consistent:
264 |              *
265 |              *     i.e. counter_front == version_front >> version_shift & version_mask
266 |              */
267 |             atomic_uint_t _counter_front = counter_front.load(relaxed_memory_order);
268 |             atomic_uint_t _version_front = version_front.load(relaxed_memory_order);
269 |             if (unpack_offsets(_counter_front, _version_front, front))
270 |             {
271 |                 /* if (empty) return nullptr; */
272 |                 if (front - back == size_limit) return T(0);
273 |                 
274 |                 /* create new front version */
275 |                 atomic_uint_t new_front_version = (_counter_front + 1) & version_mask;
276 |                 
277 |                 /* calculate offset and update front */
278 |                 size_t offset = front++ & (size_limit - 1);
279 |                 
280 |                 /* pack new front version and front offset */
281 |                 atomic_uint_t pack = pack_offset(new_front_version, front & (offset_limit - 1));
282 |                 
283 |                 /*
284 |                  * compare_exchange_weak and attempt to update the counter with the new version
285 |                  *
286 |                  * this is where we enter the critical section:
287 |                  *
288 |                  *     i.e. counter_front != version_front >> version_shift & version_mask
289 |                  *     for a brief number of instructions until we write the new version_front
290 |                  *
291 |                  * if successful other threads will spin until new version_front is visible
292 |                  * if successful we read the value followed by writing a new version_front
293 |                  * to leave the critical section
294 |                  */
295 |                 if (counter_front.compare_exchange_weak(_counter_front, new_front_version, std::memory_order_acq_rel))
296 |                 {
297 |                     T val = vec[offset].load(acquire_memory_order);
298 |                     
299 |                     /*
300 |                      * exit the critical section and reveal the new front offset to other threads
301 |                      *
302 |                      *    i.e. counter_front == version_front >> version_shift & version_mask
303 |                      */
304 |                     version_front.store(pack, release_memory_order);
305 |                     return val;
306 |                     
307 |                 } else if (debug_contention) {
308 |                     uint64_t _tsc = rdtsc();
309 |                     log_debug("%s version=%llu time=%llu spin_count=%d thread:%p phase 2 contention",
310 |                               __func__, _counter_front, _tsc, spin_count, std::this_thread::get_id());
311 |                 }
312 |             } else {
313 |                 if (debug_contention) {
314 |                     uint64_t _tsc = rdtsc();
315 |                     log_debug("%s version=%llu time=%llu spin_count=%d thread:%p phase 1 contention",
316 |                               __func__, _counter_front, _tsc, spin_count, std::this_thread::get_id());
317 |                 }
318 |             }
319 |             
320 |             /*
321 |              * if we reach here then we detected an inconsistent version in phase 1 prepare
322 |              * or failed to update the counter to enter the critical section in phase 2
323 |              */
324 |             
325 |             /* yield the thread before retrying */
326 |             if (spin_limit > tight_spin_limit) {
327 |                 std::this_thread::yield();
328 |             }
329 |             
330 |         } while (++spin_count < spin_limit);
331 |         
332 |         if (debug_spin) {
333 |             log_debug("%s thread:%p failed: reached spin limit", __func__, std::this_thread::get_id());
334 |         }
335 |         
336 |         return T(0);
337 |     }
338 | };
339 | 
340 | #endif
341 | 


--------------------------------------------------------------------------------
/queue_std_mutex.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  queue_std_mutex.h
 3 | //
 4 | 
 5 | #ifndef queue_std_mutex_h
 6 | #define queue_std_mutex_h
 7 | 
 8 | /*
 9 |  * queue_std_mutex
10 |  *
11 |  *   - uses mutex protection around queue
12 |  */
13 | 
14 | template <typename T>
15 | struct queue_std_mutex
16 | {
17 |     typedef std::queue<T>                       queue_type;
18 |     typedef std::atomic<T>                      atomic_item_t;
19 |     
20 |     queue_type                                  queue;
21 |     std::mutex                                  queue_mutex;
22 |     
23 |     queue_std_mutex(size_t size_limit) {}
24 |     
25 |     size_t size()
26 |     {
27 |         size_t size;
28 |         queue_mutex.lock();
29 |         size = queue.size();
30 |         queue_mutex.unlock();
31 |         return size;
32 |     }
33 |     
34 |     bool push_back(T elem)
35 |     {
36 |         queue_mutex.lock();
37 |         queue.push(elem);
38 |         queue_mutex.unlock();
39 |         return true;
40 |     }
41 |     
42 |     T pop_front()
43 |     {
44 |         queue_mutex.lock();
45 |         T result(0);
46 |         if (queue.size() > 0) {
47 |             result = queue.front();
48 |             queue.pop();
49 |         }
50 |         queue_mutex.unlock();
51 |         return result;
52 |     }
53 | };
54 | 
55 | #endif
56 | 


--------------------------------------------------------------------------------
/rdtsc.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  rdtsc.h
 3 | //
 4 | 
 5 | #ifndef rdtsc_h
 6 | #define rdtsc_h
 7 | 
 8 | #ifdef _MSC_VER
 9 | 
10 | #ifdef _M_IX86
11 | 
12 | inline uint64_t rdtsc()
13 | {
14 |     uint64_t c;
15 |     __asm {
16 |         cpuid
17 |         rdtsc
18 |         mov dword ptr [c + 0], eax
19 |         mov dword ptr [c + 4], edx
20 |     }
21 |     return c;
22 | }
23 | 
24 | #elif defined(_M_X64)
25 | 
26 | extern "C" unsigned __int64 __rdtsc();
27 | #pragma intrinsic(__rdtsc)
28 | inline uint64_t rdtsc()
29 | {
30 |     return __rdtsc();
31 | }
32 | 
33 | #endif
34 | 
35 | #elif defined (__GNUC__)
36 | 
37 | #if defined(__i386__)
38 | 
39 | static __inline__ unsigned long long rdtsc(void)
40 | {
41 |     unsigned long long int x;
42 |     __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
43 |     return x;
44 | }
45 | 
46 | #elif defined(__x86_64__)
47 | 
48 | static __inline__ unsigned long long rdtsc(void)
49 | {
50 |     unsigned hi, lo;
51 |     __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
52 |     return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
53 | }
54 | 
55 | #endif
56 | 
57 | #endif
58 | 
59 | #endif /* rdtsc_h */
60 | 


--------------------------------------------------------------------------------
/test_queue.cc:
--------------------------------------------------------------------------------
  1 | //
  2 | //  test_queue.cc
  3 | //
  4 | 
  5 | #include <cstdio>
  6 | #include <cstdint>
  7 | #include <cstdarg>
  8 | #include <cassert>
  9 | #include <thread>
 10 | #include <mutex>
 11 | #include <atomic>
 12 | #include <memory>
 13 | #include <chrono>
 14 | #include <vector>
 15 | #include <queue>
 16 | #include <set>
 17 | 
 18 | extern void log_debug(const char* fmt, ...);
 19 | 
 20 | #include "rdtsc.h"
 21 | #include "queue_atomic.h"
 22 | #include "queue_std_mutex.h"
 23 | 
 24 | using namespace std::chrono;
 25 | 
 26 | typedef unsigned long long u64;
 27 | 
 28 | 
 29 | void log_prefix(const char* prefix, const char* fmt, va_list arg)
 30 | {
 31 |     std::vector<char> buf(1024);
 32 |     
 33 |     int len = vsnprintf(buf.data(), buf.capacity(), fmt, arg);
 34 | 
 35 |     if (len >= (int)buf.capacity()) {
 36 |         buf.resize(len + 1);
 37 |         vsnprintf(buf.data(), buf.capacity(), fmt, arg);
 38 |     }
 39 |     
 40 |     fprintf(stderr, "%s: %s\n", prefix, buf.data());
 41 | }
 42 | 
 43 | void log_debug(const char* fmt, ...)
 44 | {
 45 |     va_list ap;
 46 |     va_start(ap, fmt);
 47 |     log_prefix("debug", fmt, ap);
 48 |     va_end(ap);
 49 | }
 50 | 
 51 | 
 52 | /* test_push_pop_worker */
 53 | 
 54 | template<typename item_type, typename queue_type>
 55 | struct test_push_pop_worker : std::thread
 56 | {
 57 |     typedef std::vector<item_type> vec_type;
 58 |     
 59 |     vec_type vec;
 60 |     queue_type &queue;
 61 |     const size_t items_per_thread;
 62 |     std::thread thread;
 63 |     
 64 |     test_push_pop_worker(queue_type &queue, const size_t items_per_thread)
 65 |         : queue(queue), items_per_thread(items_per_thread), thread(&test_push_pop_worker::mainloop, this) {}
 66 |     
 67 |     void mainloop()
 68 |     {
 69 |         // transfer items from the queue to the vector
 70 |         for (size_t i = 0; i < items_per_thread; i++) {
 71 |             item_type v = queue.pop_front();
 72 |             if (v) {
 73 |                 vec.push_back(v);
 74 |             } else {
 75 |                 log_debug("%p queue.pop_front() returned null item", std::this_thread::get_id());
 76 |             }
 77 |         }
 78 |         // transfer items from vector to the queue
 79 |         for (auto v : vec) {
 80 |             if (!queue.push_back(v)) {
 81 |                 log_debug("%p queue.push_back() returned false", std::this_thread::get_id());
 82 |             }
 83 |         }
 84 |     }
 85 | };
 86 | 
 87 | /* test_push_pop_threads */
 88 | 
 89 | template<typename item_type, typename queue_type>
 90 | void test_push_pop_threads(const char* queue_type_name, const size_t num_threads, const size_t iterations, const size_t items_per_thread)
 91 | {
 92 |     const size_t num_items = num_threads * items_per_thread;
 93 |     const size_t num_ops = num_items * iterations;
 94 |     
 95 |     typedef test_push_pop_worker<item_type, queue_type> worker_type;
 96 |     typedef std::shared_ptr<worker_type> worker_ptr;
 97 |     typedef std::vector<worker_ptr> worker_list;
 98 |     typedef std::set<item_type> set_type;
 99 |     
100 |     queue_type queue(num_items);
101 |     
102 |     // populate queue
103 |     assert(queue.size() == 0);
104 |     for (size_t i = 1; i <= num_items; i++) {
105 |         queue.push_back(item_type(i));
106 |     }
107 |     assert(queue.size() == num_items);
108 |     
109 |     // run test iterations
110 |     const auto t1 = std::chrono::high_resolution_clock::now();
111 |     for (size_t iter = 0; iter < iterations; iter++)
112 |     {
113 |         // start worker threads
114 |         worker_list workers;
115 |         for (size_t i = 0; i < num_threads; i++) {
116 |             workers.push_back(std::make_shared<worker_type>(queue, items_per_thread));
117 |         }
118 |         
119 |         // join worker threads
120 |         for (auto worker : workers) {
121 |             worker->thread.join();
122 |         }
123 |         assert(queue.size() == num_items);
124 |     }
125 |     const auto t2 = std::chrono::high_resolution_clock::now();
126 |     uint64_t work_time_us = duration_cast<microseconds>(t2 - t1).count();
127 |     
128 |     // transfer items to a set
129 |     set_type check_set;
130 |     for (size_t i = 1; i <= num_items; i++) {
131 |         item_type v = queue.pop_front();
132 |         if (v) {
133 |             check_set.insert(v);
134 |         } else {
135 |             log_debug("queue.pop_front() returned null item");
136 |         }
137 |     }
138 |     assert(queue.size() == 0);
139 |     
140 |     // check items in set
141 |     size_t check_count = 0;
142 |     for (size_t i = 1; i <= num_items; i++) {
143 |         if (check_set.find(item_type(i)) != check_set.end()) {
144 |             check_count++;
145 |         }
146 |     }
147 |     assert(check_count == num_items);
148 |     
149 |     printf("%-20s %-9zu %-9zu %-9zu %-9llu %-9llu %-9.6lf\n",
150 |             queue_type_name, num_threads, iterations, items_per_thread,
151 |             (u64)work_time_us, (u64)num_ops, (double)work_time_us / (double)num_ops);
152 | }
153 | 
154 | static void heading_multi()
155 | {
156 |     printf("%-20s %-9s %-9s %-9s %-9s %-9s %-9s\n",
157 |             "name", "nthreads", "iters", "items",
158 |             "time(us)", "op_count", "op(us)");
159 | }
160 | 
161 | template<typename item_type, typename queue_type>
162 | void test_push_pop_single(const char* queue_type_name, const size_t num_items)
163 | {
164 |     queue_type queue(num_items);
165 | 
166 |     assert(queue.size() == 0);
167 | 
168 |     // populate queue
169 |     const auto t1 = std::chrono::high_resolution_clock::now();
170 |     for (size_t i = 1; i <= num_items; i++) {
171 |         queue.push_back(item_type(i));
172 |     }
173 |     const auto t2 = std::chrono::high_resolution_clock::now();
174 |     
175 |     assert(queue.size() == num_items);
176 |     
177 |     // empty queue
178 |     for (size_t i = 1; i <= num_items; i++) {
179 |         queue.pop_front();
180 |     }
181 |     const auto t3 = std::chrono::high_resolution_clock::now();
182 |     
183 |     assert(queue.size() == 0);
184 | 
185 |     uint64_t push_work_time_us = duration_cast<microseconds>(t2 - t1).count();
186 |     uint64_t pop_work_time_us = duration_cast<microseconds>(t3 - t2).count();
187 | 
188 |     printf("%-20s %-9zu %-9llu %-9llu %-9.6lf\n",
189 |            queue_type_name, num_items, (u64)push_work_time_us, (u64)num_items,
190 |            (double)push_work_time_us / (double)num_items);
191 |     printf("%-20s %-9zu %-9llu %-9llu %-9.6lf\n",
192 |            queue_type_name, num_items, (u64)pop_work_time_us, (u64)num_items,
193 |            (double)pop_work_time_us / (double)num_items);
194 | }
195 | 
196 | static void heading_single()
197 | {
198 |     printf("%-20s %-9s %-9s %-9s %-9s\n",
199 |             "name", "items", "time(us)", "op_count", "op(us)");
200 | }
201 | 
202 | /* test_queue */
203 | 
204 | struct test_queue
205 | {
206 |     void test_queue_constants()
207 |     {
208 |         const size_t qsize = 1024;
209 |         typedef queue_atomic<void*> qtype;
210 |         qtype q(qsize);
211 |         
212 |         printf("queue_atomic::is_lock_free  = %u\n", q.counter_back.is_lock_free());
213 |         printf("queue_atomic::atomic_bits   = %u\n", qtype::atomic_bits);
214 |         printf("queue_atomic::offset_bits   = %u\n", qtype::offset_bits);
215 |         printf("queue_atomic::version_bits  = %u\n", qtype::version_bits);
216 |         printf("queue_atomic::offset_shift  = %u\n", qtype::offset_shift);
217 |         printf("queue_atomic::version_shift = %u\n", qtype::version_shift);
218 |         printf("queue_atomic::size_max      = 0x%016llx (%llu)\n", (u64)qtype::size_max, (u64)qtype::size_max);
219 |         printf("queue_atomic::offset_limit  = 0x%016llx (%llu)\n", (u64)qtype::offset_limit, (u64)qtype::offset_limit);
220 |         printf("queue_atomic::version_limit = 0x%016llx (%llu)\n", (u64)qtype::version_limit, (u64)qtype::version_limit);
221 |         printf("queue_atomic::offset_mask   = 0x%016llx\n", (u64)qtype::offset_mask);
222 |         printf("queue_atomic::version_mask  = 0x%016llx\n", (u64)qtype::version_mask);
223 |         
224 |         assert(qtype::atomic_bits   == 64);
225 |         assert(qtype::offset_bits   == 48);
226 |         assert(qtype::version_bits  == 16);
227 |         assert(qtype::offset_shift  == 0);
228 |         assert(qtype::version_shift == 48);
229 |         assert(qtype::size_max      == 140737488355328ULL);
230 |         assert(qtype::offset_limit  == 281474976710656ULL);
231 |         assert(qtype::version_limit == 65536);
232 |         assert(qtype::offset_mask   == 0x0000ffffffffffffULL);
233 |         assert(qtype::version_mask  == 0x000000000000ffffULL);
234 |     }
235 |     
236 |     void test_empty_invariants()
237 |     {
238 |         const size_t qsize = 1024;
239 |         typedef queue_atomic<void*> qtype;
240 |         qtype q(qsize);
241 |         
242 |         assert(q.capacity() == 1024);
243 |         assert(q.size() == 0);
244 |         assert(q.empty() == true);
245 |         assert(q.full() == false);
246 |         assert(q.size_limit == 1024);
247 |         assert(q._back_version() == 0);
248 |         assert(q._front_version() == 0);
249 |         assert(q._back() == 0);
250 |         assert(q._front() == 1024);
251 |     }
252 |     
253 |     void test_push_pop()
254 |     {
255 |         const size_t qsize = 4;
256 |         typedef queue_atomic<void*> qtype;
257 |         qtype q(qsize);
258 |         
259 |         // check initial invariants
260 |         assert(q.capacity() == qsize);
261 |         assert(q.size() == 0);
262 |         assert(q.empty() == true);
263 |         assert(q.full() == false);
264 |         assert(q.size_limit == qsize);
265 |         assert(q._back_version() == 0);
266 |         assert(q._front_version() == 0);
267 |         assert(q._back() == 0);
268 |         assert(q._front() == qsize);
269 |         
270 |         // push_back 4 items
271 |         for (size_t i = 1; i <= 4; i++) {
272 |             assert(q.push_back((void*)i) == true);
273 |             assert(q._back_version() == i);
274 |             assert(q._front_version() == 0);
275 |             assert(q._back() == i);
276 |             assert(q._front() == qsize);
277 |             assert(q.size() == i);
278 |             assert(q.empty() == false);
279 |             assert(q.full() == (i < 4 ? false : true));
280 |         }
281 |         
282 |         // push_back overflow test
283 |         assert(q.push_back((void*)5) == false);
284 |         assert(q._back_version() == 4);
285 |         assert(q._front_version() == 0);
286 |         assert(q._back() == 4);
287 |         assert(q._front() == qsize);
288 |         assert(q.size() == 4);
289 |         assert(q.empty() == false);
290 |         assert(q.full() == true);
291 |         
292 |         // pop_front 4 items
293 |         for (size_t i = 1; i <= 4; i++) {
294 |             assert(q.pop_front() == (void*)i);
295 |             assert(q._back_version() == 4);
296 |             assert(q._front_version() == i);
297 |             assert(q._back() == 4);
298 |             assert(q._front() == 4 + i);
299 |             assert(q.size() == 4 - i);
300 |             assert(q.empty() == (i > 3 ? true : false));
301 |             assert(q.full() == false);
302 |         }
303 |         
304 |         // pop_front underflow test
305 |         assert(q.pop_front() == (void*)0);
306 |         assert(q._back_version() == 4);
307 |         assert(q._front_version() == 4);
308 |         assert(q._back() == 4);
309 |         assert(q._front() == 8);
310 |         assert(q.size() == 0);
311 |         assert(q.empty() == true);
312 |         assert(q.full() == false);
313 |         
314 |         // push_back 4 items
315 |         for (size_t i = 1; i <= 4; i++) {
316 |             assert(q.push_back((void*)i) == true);
317 |             assert(q._back_version() == 4 + i);
318 |             assert(q._front_version() == 4);
319 |             assert(q._back() == 4 + i);
320 |             assert(q._front() == 8);
321 |             assert(q.size() == i);
322 |             assert(q.empty() == false);
323 |             assert(q.full() == (i < 4 ? false : true));
324 |         }
325 |         
326 |         // push_back overflow test
327 |         assert(q.push_back((void*)5) == false);
328 |         assert(q._back_version() == 8);
329 |         assert(q._front_version() == 4);
330 |         assert(q._back() == 8);
331 |         assert(q._front() == 8);
332 |         assert(q.size() == 4);
333 |         assert(q.empty() == false);
334 |         assert(q.full() == true);
335 |         
336 |         // pop_front 4 items
337 |         for (size_t i = 1; i <= 4; i++) {
338 |             assert(q.pop_front() == (void*)i);
339 |             assert(q._back_version() == 8);
340 |             assert(q._front_version() == 4 + i);
341 |             assert(q._back() == 8);
342 |             assert(q._front() == 8 + i);
343 |             assert(q.size() == 4 - i);
344 |             assert(q.empty() == (i > 3 ? true : false));
345 |             assert(q.full() == false);
346 |         }
347 |         
348 |         // pop_front underflow test
349 |         assert(q.pop_front() == (void*)0);
350 |         assert(q._back_version() == 8);
351 |         assert(q._front_version() == 8);
352 |         assert(q._back() == 8);
353 |         assert(q._front() == 12);
354 |         assert(q.size() == 0);
355 |         assert(q.empty() == true);
356 |         assert(q.full() == false);
357 |     }
358 |     
359 |     void test_push_pop_single_queue_mutex()
360 |     {
361 |         test_push_pop_single<int,queue_std_mutex<int>>("queue_std_mutex", 8388608);
362 |     }
363 |     
364 |     void test_push_pop_single_queue_atomic()
365 |     {
366 |         test_push_pop_single<int,queue_atomic<int>>("queue_atomic", 8388608);
367 |     }
368 | 
369 |     void test_push_pop_threads_queue_mutex()
370 |     {
371 |         test_push_pop_threads<int,queue_std_mutex<int>>("queue_std_mutex", 8, 10, 1024);
372 |     }
373 | 
374 |     void test_push_pop_threads_queue_atomic()
375 |     {
376 |         test_push_pop_threads<int,queue_atomic<int>>("queue_atomic", 8, 10, 1024);
377 |         test_push_pop_threads<int,queue_atomic<int>>("queue_atomic", 8, 10, 1024);
378 |         test_push_pop_threads<int,queue_atomic<int>>("queue_atomic", 8, 10, 65536);
379 |         test_push_pop_threads<int,queue_atomic<int>>("queue_atomic", 8, 10, 65536);
380 |         test_push_pop_threads<int,queue_atomic<int>>("queue_atomic", 8, 64, 65536);
381 |         test_push_pop_threads<int,queue_atomic<int>>("queue_atomic", 8, 64, 65536);
382 |         test_push_pop_threads<int,queue_atomic<int>>("queue_atomic", 8, 16, 262144);
383 |         test_push_pop_threads<int,queue_atomic<int>>("queue_atomic", 8, 16, 262144);
384 |     }
385 | 
386 |     void test_push_pop_threads_queue_atomic_contention()
387 |     {
388 |         test_push_pop_threads<int,queue_atomic<int,true>>("queue_atomic:contention", 1, 10, 65536);
389 |         test_push_pop_threads<int,queue_atomic<int,true>>("queue_atomic:contention", 8, 1, 256);
390 |     }
391 | };
392 | 
393 | int main(int argc, const char * argv[])
394 | {
395 |     test_queue tq;
396 |     printf("# unit-tests\n");
397 |     tq.test_queue_constants();
398 |     tq.test_empty_invariants();
399 |     tq.test_push_pop();
400 |     printf("# single-thread\n");
401 |     heading_single();
402 |     tq.test_push_pop_single_queue_mutex();
403 |     tq.test_push_pop_single_queue_atomic();
404 |     printf("# multi-thread\n");
405 |     heading_multi();
406 |     tq.test_push_pop_threads_queue_mutex();
407 |     tq.test_push_pop_threads_queue_atomic();
408 |     printf("# contention tests\n");
409 |     heading_multi();
410 |     tq.test_push_pop_threads_queue_atomic_contention();
411 | }
412 | 
413 | 


--------------------------------------------------------------------------------