├── Makefile ├── README.md ├── queue_atomic.h ├── queue_std_mutex.h ├── rdtsc.h └── test_queue.cc /Makefile: -------------------------------------------------------------------------------- 1 | all: test_queue 2 | 3 | clean: 4 | rm -f test_queue 5 | 6 | test_queue: test_queue.cc queue_atomic.h 7 | c++ -pthread -O3 -std=c++11 $< -o $@ 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # queue_atomic 2 | 3 | Multiple producer multiple consumer queue template using C++11 atomics. 4 | 5 | Solves the ABA problem and implements 2-phase ordered updates by packing a monotonically increasing version number into the queue front and back offsets. The contended case is detected by checking that the expected version counter is visible in the packed front or back offset. 6 | 7 | During an update the version counter is checked against the version packed in the offset, if the offset is up-to-date the version counter is atomically incremented, data is stored (push_back) or retrieved (pop_front) and in a final phase the front or back offset is atomically updated with a new version and offset. Data only becomes visible in another thread when the version counter matchs the version packed into the offsets. The front and back offsets always increase in the common case and buffer offsets are calculated modulus the queue size. 8 | 9 | - queue_atomic is completely lockless in the single producer single consumer case 10 | - queue_atomic can be used in multiple producer multiple consumer mode however it will spin calling std::this_thread::yield() when there is contention 11 | 12 | ## Notes 13 | 14 | ### queue_std_mutex 15 | 16 | - std::mutex wrapper around std::queue 17 | 18 | ### queue_atomic 19 | 20 | - uses 4 atomic variables: counter_back, version_back, counter_front and version_front 21 | - push_back reads 3 atomics: (counter_back, version_back and version_front) 22 | and writes 2 atomics: (counter_back and version_back) 23 | - pop_front reads 3 atomics: (counter_front, version_back and version_front) 24 | and writes 2 atomics: (counter_front and version_front) 25 | - uses two separate monotonically increasing version counters and 2-phase ordered updates 26 | - completely lockless in the single producer single consumer case 27 | - back version counter and back offset are packed into version_back 28 | - front version counter and front offset are packed into version_front 29 | * NOTE: limited to 140737488355328 (2^47) items 30 | ```` 31 | queue_atomic::is_lock_free = 1 32 | queue_atomic::atomic_bits = 64 33 | queue_atomic::offset_bits = 48 34 | queue_atomic::version_bits = 16 35 | queue_atomic::offset_shift = 0 36 | queue_atomic::version_shift = 48 37 | queue_atomic::size_max = 0x0000800000000000 (140737488355328) 38 | queue_atomic::offset_limit = 0x0001000000000000 (281474976710656) 39 | queue_atomic::version_limit = 0x0000000000010000 (65536) 40 | queue_atomic::offset_mask = 0x0000ffffffffffff 41 | queue_atomic::version_mask = 0x000000000000ffff 42 | ```` 43 | 44 | ## Timings 45 | 46 | - -O3, OS X 10.10, Apple LLVM version 7.0.0, 22nm Ivy Bridge 2.7 GHz Intel Core i7 47 | 48 | ```` 49 | queue_implementation threads iterations items/thread time(µs) ops op_time(µs) 50 | queue_atomic 8 10 1024 4711 81920 0.057507 51 | queue_atomic 8 10 65536 190221 5242880 0.036282 52 | queue_atomic 8 64 65536 1225404 33554432 0.036520 53 | queue_atomic 8 16 262144 1151575 33554432 0.034320 54 | queue_std_mutex 8 10 1024 752439 81920 9.185046 55 | ```` 56 | 57 | - -O3, Linux 4.2.0-amd64, GCC 5.2.1, 45nm Bloomfield 3.33GHZ GHz Intel Core i7 975 58 | 59 | ```` 60 | queue_implementation threads iterations items/thread time(µs) ops op_time(µs) 61 | queue_atomic 8 10 1024 8022 81920 0.097925 62 | queue_atomic 8 10 65536 505085 5242880 0.096337 63 | queue_atomic 8 64 65536 3182992 33554432 0.094861 64 | queue_atomic 8 16 262144 3259350 33554432 0.097136 65 | queue_std_mutex 8 10 1024 25139 81920 0.306873 66 | ```` 67 | -------------------------------------------------------------------------------- /queue_atomic.h: -------------------------------------------------------------------------------- 1 | // 2 | // queue_atomic.h 3 | // 4 | 5 | #ifndef queue_atomic_h 6 | #define queue_atomic_h 7 | 8 | /* 9 | * queue_atomic 10 | * 11 | * Multiple producer multiple consumer queue template using C++11 atomics. 12 | * 13 | * Completely lockless in the single producer single consumer case. 14 | * 15 | * - uses 4 atomic variables: counter_back, version_back, counter_front and version_front 16 | * 17 | * - push_back reads 3 atomics: counter_back, version_back and version_front 18 | * writes 2 atomics: counter_back and version_back 19 | * 20 | * - pop_front reads 3 atomics: counter_front, version_back and version_front 21 | * writes 2 atomics: counter_front and version_front 22 | * 23 | * - back version and front version are packed into version_back and version_front 24 | * 25 | * - version is used for conflict detection during ordered writes 26 | * 27 | */ 28 | 29 | #if defined(_MSC_VER) 30 | #define ALIGNED(x) __declspec(align(x)) 31 | #elif defined(__GNUC__) 32 | #define ALIGNED(x) __attribute__((aligned(x))) 33 | #else 34 | #define ALIGNED(x) 35 | #endif 36 | 37 | template 45 | struct queue_atomic 46 | { 47 | /* queue atomic type */ 48 | 49 | typedef ATOMIC_UINT atomic_uint_t; 50 | typedef std::atomic atomic_item_t; 51 | 52 | 53 | /* queue constants */ 54 | 55 | static const int tight_spin_limit = 8; 56 | static const int spin_limit = 1 << 24; 57 | static const int debug_spin = true; 58 | static const int atomic_bits = sizeof(atomic_uint_t) << 3; 59 | static const int offset_bits = OFFSET_BITS; 60 | static const int version_bits = VERSION_BITS; 61 | static const int offset_shift = 0; 62 | static const int version_shift = offset_bits; 63 | static const atomic_uint_t size_max = (1ULL << (offset_bits - 1)); 64 | static const atomic_uint_t offset_limit = (1ULL << offset_bits); 65 | static const atomic_uint_t version_limit = (1ULL << version_bits); 66 | static const atomic_uint_t offset_mask = (1ULL << offset_bits) - 1; 67 | static const atomic_uint_t version_mask = (1ULL << version_bits) - 1; 68 | 69 | 70 | /* queue storage */ 71 | 72 | ALIGNED(64) atomic_item_t *vec; 73 | const atomic_uint_t size_limit; 74 | ALIGNED(64) std::atomic counter_back; 75 | std::atomic version_back; 76 | ALIGNED(64) std::atomic counter_front; 77 | std::atomic version_front; 78 | 79 | 80 | /* queue helpers */ 81 | 82 | static inline bool ispow2(size_t val) { return val && !(val & (val-1)); } 83 | 84 | /* 85 | * pack a version number and an offset into an unsigned atomic integer 86 | */ 87 | static inline const atomic_uint_t pack_offset(const atomic_uint_t version, const atomic_uint_t offset) 88 | { 89 | assert(version < version_limit); 90 | assert(offset < offset_limit); 91 | return (version << version_shift) | (offset << offset_shift); 92 | } 93 | 94 | /* 95 | * unpack a version number and offset and compare the version to a counter value 96 | * returns true if the version in the counter matches the version packed in the offset 97 | */ 98 | static inline bool unpack_offsets(const atomic_uint_t counter, const atomic_uint_t pack, 99 | atomic_uint_t &offset) 100 | { 101 | if (((pack >> version_shift) & version_mask) == (counter & version_mask)) { 102 | offset = (pack >> offset_shift) & offset_mask; 103 | return true; 104 | } 105 | return false; 106 | } 107 | 108 | /* queue implementation */ 109 | 110 | atomic_uint_t _back_version() { return (version_back >> version_shift) & version_mask; } 111 | atomic_uint_t _front_version() { return (version_front >> version_shift) & version_mask; } 112 | atomic_uint_t _back() { return (version_back >> offset_shift) & offset_mask; } 113 | atomic_uint_t _front() { return (version_front >> offset_shift) & offset_mask; } 114 | size_t capacity() { return size_limit; } 115 | 116 | 117 | queue_atomic(size_t size_limit) : 118 | size_limit(size_limit), 119 | counter_back(0), 120 | version_back(pack_offset(0, 0)), 121 | counter_front(0), 122 | version_front(pack_offset(0, size_limit)) 123 | { 124 | static_assert(version_bits + offset_bits <= atomic_bits, 125 | "version_bits + offset_bits must fit into atomic integer type"); 126 | assert(size_limit > 0); 127 | assert(size_limit <= size_max); 128 | assert(ispow2(size_limit)); 129 | vec = new atomic_item_t[size_limit](); 130 | assert(vec != nullptr); 131 | } 132 | 133 | virtual ~queue_atomic() 134 | { 135 | delete [] vec; 136 | } 137 | 138 | bool empty() 139 | { 140 | atomic_uint_t back = (version_back >> offset_shift) & offset_mask; 141 | atomic_uint_t front = (version_front >> offset_shift) & offset_mask; 142 | 143 | /* return true if queue is empty */ 144 | return (front - back == size_limit); 145 | } 146 | 147 | bool full() 148 | { 149 | atomic_uint_t back = (version_back >> offset_shift) & offset_mask; 150 | atomic_uint_t front = (version_front >> offset_shift) & offset_mask; 151 | 152 | /* return true if queue is full */ 153 | return (front == back); 154 | } 155 | 156 | size_t size() 157 | { 158 | atomic_uint_t back = (version_back >> offset_shift) & offset_mask; 159 | atomic_uint_t front = (version_front >> offset_shift) & offset_mask; 160 | 161 | /* return queue size */ 162 | return size_limit - front + back; 163 | } 164 | 165 | bool push_back(T elem) 166 | { 167 | atomic_uint_t back; 168 | atomic_uint_t front = (version_front >> offset_shift) & offset_mask; 169 | 170 | int spin_count = 0; 171 | do { 172 | /* 173 | * if packed version equals counter_back then attempt push back 174 | * 175 | * this is where we detect if another thread is in the push back 176 | * critical section and we only proceed if the versions are consistent: 177 | * 178 | * i.e. counter_back == version_back >> version_shift & version_mask 179 | */ 180 | atomic_uint_t _counter_back = counter_back.load(relaxed_memory_order); 181 | atomic_uint_t _version_back = version_back.load(relaxed_memory_order); 182 | if (unpack_offsets(_counter_back, _version_back, back)) 183 | { 184 | /* if (full) return false; */ 185 | if (front == back) return false; 186 | 187 | /* create new back version */ 188 | atomic_uint_t new_back_version = (_counter_back + 1) & version_mask; 189 | 190 | /* calculate store offset and update back */ 191 | size_t offset = back++ & (size_limit - 1); 192 | 193 | /* pack new back version and back offset */ 194 | atomic_uint_t pack = pack_offset(new_back_version, back & (offset_limit - 1)); 195 | 196 | /* 197 | * compare_exchange_weak and attempt to update the counter with the new version 198 | * 199 | * this is where we enter the critical section: 200 | * 201 | * i.e. counter_back != version_back >> version_shift & version_mask 202 | * for a brief number of instructions until we write the new version_back 203 | * 204 | * if successful other threads will spin until new version_back is visible 205 | * if successful we write the value followed by writing a new version_back 206 | * to leave the critical section 207 | */ 208 | if (counter_back.compare_exchange_weak(_counter_back, new_back_version, std::memory_order_acq_rel)) 209 | { 210 | vec[offset].store(elem, release_memory_order); 211 | 212 | /* 213 | * exit the critical section and reveal the new back offset to other threads 214 | * 215 | * i.e. counter_front == version_front >> version_shift & version_mask 216 | */ 217 | version_back.store(pack, release_memory_order); 218 | return true; 219 | 220 | } else if (debug_contention) { 221 | uint64_t _tsc = rdtsc(); 222 | log_debug("%s version=%llu time=%llu spin_count=%d thread:%p phase 2 contention", 223 | __func__, _counter_back, _tsc, spin_count, std::this_thread::get_id()); 224 | } 225 | } else { 226 | if (debug_contention) { 227 | uint64_t _tsc = rdtsc(); 228 | log_debug("%s version=%llu time=%llu spin_count=%d thread:%p phase 1 contention", 229 | __func__, _counter_back, _tsc, spin_count, std::this_thread::get_id()); 230 | } 231 | } 232 | 233 | /* 234 | * if we reach here then we detected an inconsistent version in phase 1 prepare 235 | * or failed to update the counter to enter the critical section in phase 2 236 | */ 237 | 238 | /* yield the thread before retrying */ 239 | if (spin_limit > tight_spin_limit) { 240 | std::this_thread::yield(); 241 | } 242 | 243 | } while (++spin_count < spin_limit); 244 | 245 | if (debug_spin) { 246 | log_debug("%s thread:%p failed: reached spin limit", __func__, std::this_thread::get_id()); 247 | } 248 | 249 | return false; 250 | } 251 | 252 | T pop_front() 253 | { 254 | atomic_uint_t back = (version_back >> offset_shift) & offset_mask; 255 | atomic_uint_t front; 256 | 257 | int spin_count = 0; 258 | do { 259 | /* 260 | * if packed version equals counter_front then attempt pop front 261 | * 262 | * this is where we detect if another thread is in the pop front 263 | * critical section and we only proceed if the versions are consistent: 264 | * 265 | * i.e. counter_front == version_front >> version_shift & version_mask 266 | */ 267 | atomic_uint_t _counter_front = counter_front.load(relaxed_memory_order); 268 | atomic_uint_t _version_front = version_front.load(relaxed_memory_order); 269 | if (unpack_offsets(_counter_front, _version_front, front)) 270 | { 271 | /* if (empty) return nullptr; */ 272 | if (front - back == size_limit) return T(0); 273 | 274 | /* create new front version */ 275 | atomic_uint_t new_front_version = (_counter_front + 1) & version_mask; 276 | 277 | /* calculate offset and update front */ 278 | size_t offset = front++ & (size_limit - 1); 279 | 280 | /* pack new front version and front offset */ 281 | atomic_uint_t pack = pack_offset(new_front_version, front & (offset_limit - 1)); 282 | 283 | /* 284 | * compare_exchange_weak and attempt to update the counter with the new version 285 | * 286 | * this is where we enter the critical section: 287 | * 288 | * i.e. counter_front != version_front >> version_shift & version_mask 289 | * for a brief number of instructions until we write the new version_front 290 | * 291 | * if successful other threads will spin until new version_front is visible 292 | * if successful we read the value followed by writing a new version_front 293 | * to leave the critical section 294 | */ 295 | if (counter_front.compare_exchange_weak(_counter_front, new_front_version, std::memory_order_acq_rel)) 296 | { 297 | T val = vec[offset].load(acquire_memory_order); 298 | 299 | /* 300 | * exit the critical section and reveal the new front offset to other threads 301 | * 302 | * i.e. counter_front == version_front >> version_shift & version_mask 303 | */ 304 | version_front.store(pack, release_memory_order); 305 | return val; 306 | 307 | } else if (debug_contention) { 308 | uint64_t _tsc = rdtsc(); 309 | log_debug("%s version=%llu time=%llu spin_count=%d thread:%p phase 2 contention", 310 | __func__, _counter_front, _tsc, spin_count, std::this_thread::get_id()); 311 | } 312 | } else { 313 | if (debug_contention) { 314 | uint64_t _tsc = rdtsc(); 315 | log_debug("%s version=%llu time=%llu spin_count=%d thread:%p phase 1 contention", 316 | __func__, _counter_front, _tsc, spin_count, std::this_thread::get_id()); 317 | } 318 | } 319 | 320 | /* 321 | * if we reach here then we detected an inconsistent version in phase 1 prepare 322 | * or failed to update the counter to enter the critical section in phase 2 323 | */ 324 | 325 | /* yield the thread before retrying */ 326 | if (spin_limit > tight_spin_limit) { 327 | std::this_thread::yield(); 328 | } 329 | 330 | } while (++spin_count < spin_limit); 331 | 332 | if (debug_spin) { 333 | log_debug("%s thread:%p failed: reached spin limit", __func__, std::this_thread::get_id()); 334 | } 335 | 336 | return T(0); 337 | } 338 | }; 339 | 340 | #endif 341 | -------------------------------------------------------------------------------- /queue_std_mutex.h: -------------------------------------------------------------------------------- 1 | // 2 | // queue_std_mutex.h 3 | // 4 | 5 | #ifndef queue_std_mutex_h 6 | #define queue_std_mutex_h 7 | 8 | /* 9 | * queue_std_mutex 10 | * 11 | * - uses mutex protection around queue 12 | */ 13 | 14 | template 15 | struct queue_std_mutex 16 | { 17 | typedef std::queue queue_type; 18 | typedef std::atomic atomic_item_t; 19 | 20 | queue_type queue; 21 | std::mutex queue_mutex; 22 | 23 | queue_std_mutex(size_t size_limit) {} 24 | 25 | size_t size() 26 | { 27 | size_t size; 28 | queue_mutex.lock(); 29 | size = queue.size(); 30 | queue_mutex.unlock(); 31 | return size; 32 | } 33 | 34 | bool push_back(T elem) 35 | { 36 | queue_mutex.lock(); 37 | queue.push(elem); 38 | queue_mutex.unlock(); 39 | return true; 40 | } 41 | 42 | T pop_front() 43 | { 44 | queue_mutex.lock(); 45 | T result(0); 46 | if (queue.size() > 0) { 47 | result = queue.front(); 48 | queue.pop(); 49 | } 50 | queue_mutex.unlock(); 51 | return result; 52 | } 53 | }; 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /rdtsc.h: -------------------------------------------------------------------------------- 1 | // 2 | // rdtsc.h 3 | // 4 | 5 | #ifndef rdtsc_h 6 | #define rdtsc_h 7 | 8 | #ifdef _MSC_VER 9 | 10 | #ifdef _M_IX86 11 | 12 | inline uint64_t rdtsc() 13 | { 14 | uint64_t c; 15 | __asm { 16 | cpuid 17 | rdtsc 18 | mov dword ptr [c + 0], eax 19 | mov dword ptr [c + 4], edx 20 | } 21 | return c; 22 | } 23 | 24 | #elif defined(_M_X64) 25 | 26 | extern "C" unsigned __int64 __rdtsc(); 27 | #pragma intrinsic(__rdtsc) 28 | inline uint64_t rdtsc() 29 | { 30 | return __rdtsc(); 31 | } 32 | 33 | #endif 34 | 35 | #elif defined (__GNUC__) 36 | 37 | #if defined(__i386__) 38 | 39 | static __inline__ unsigned long long rdtsc(void) 40 | { 41 | unsigned long long int x; 42 | __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x)); 43 | return x; 44 | } 45 | 46 | #elif defined(__x86_64__) 47 | 48 | static __inline__ unsigned long long rdtsc(void) 49 | { 50 | unsigned hi, lo; 51 | __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); 52 | return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 ); 53 | } 54 | 55 | #endif 56 | 57 | #endif 58 | 59 | #endif /* rdtsc_h */ 60 | -------------------------------------------------------------------------------- /test_queue.cc: -------------------------------------------------------------------------------- 1 | // 2 | // test_queue.cc 3 | // 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | extern void log_debug(const char* fmt, ...); 19 | 20 | #include "rdtsc.h" 21 | #include "queue_atomic.h" 22 | #include "queue_std_mutex.h" 23 | 24 | using namespace std::chrono; 25 | 26 | typedef unsigned long long u64; 27 | 28 | 29 | void log_prefix(const char* prefix, const char* fmt, va_list arg) 30 | { 31 | std::vector buf(1024); 32 | 33 | int len = vsnprintf(buf.data(), buf.capacity(), fmt, arg); 34 | 35 | if (len >= (int)buf.capacity()) { 36 | buf.resize(len + 1); 37 | vsnprintf(buf.data(), buf.capacity(), fmt, arg); 38 | } 39 | 40 | fprintf(stderr, "%s: %s\n", prefix, buf.data()); 41 | } 42 | 43 | void log_debug(const char* fmt, ...) 44 | { 45 | va_list ap; 46 | va_start(ap, fmt); 47 | log_prefix("debug", fmt, ap); 48 | va_end(ap); 49 | } 50 | 51 | 52 | /* test_push_pop_worker */ 53 | 54 | template 55 | struct test_push_pop_worker : std::thread 56 | { 57 | typedef std::vector vec_type; 58 | 59 | vec_type vec; 60 | queue_type &queue; 61 | const size_t items_per_thread; 62 | std::thread thread; 63 | 64 | test_push_pop_worker(queue_type &queue, const size_t items_per_thread) 65 | : queue(queue), items_per_thread(items_per_thread), thread(&test_push_pop_worker::mainloop, this) {} 66 | 67 | void mainloop() 68 | { 69 | // transfer items from the queue to the vector 70 | for (size_t i = 0; i < items_per_thread; i++) { 71 | item_type v = queue.pop_front(); 72 | if (v) { 73 | vec.push_back(v); 74 | } else { 75 | log_debug("%p queue.pop_front() returned null item", std::this_thread::get_id()); 76 | } 77 | } 78 | // transfer items from vector to the queue 79 | for (auto v : vec) { 80 | if (!queue.push_back(v)) { 81 | log_debug("%p queue.push_back() returned false", std::this_thread::get_id()); 82 | } 83 | } 84 | } 85 | }; 86 | 87 | /* test_push_pop_threads */ 88 | 89 | template 90 | void test_push_pop_threads(const char* queue_type_name, const size_t num_threads, const size_t iterations, const size_t items_per_thread) 91 | { 92 | const size_t num_items = num_threads * items_per_thread; 93 | const size_t num_ops = num_items * iterations; 94 | 95 | typedef test_push_pop_worker worker_type; 96 | typedef std::shared_ptr worker_ptr; 97 | typedef std::vector worker_list; 98 | typedef std::set set_type; 99 | 100 | queue_type queue(num_items); 101 | 102 | // populate queue 103 | assert(queue.size() == 0); 104 | for (size_t i = 1; i <= num_items; i++) { 105 | queue.push_back(item_type(i)); 106 | } 107 | assert(queue.size() == num_items); 108 | 109 | // run test iterations 110 | const auto t1 = std::chrono::high_resolution_clock::now(); 111 | for (size_t iter = 0; iter < iterations; iter++) 112 | { 113 | // start worker threads 114 | worker_list workers; 115 | for (size_t i = 0; i < num_threads; i++) { 116 | workers.push_back(std::make_shared(queue, items_per_thread)); 117 | } 118 | 119 | // join worker threads 120 | for (auto worker : workers) { 121 | worker->thread.join(); 122 | } 123 | assert(queue.size() == num_items); 124 | } 125 | const auto t2 = std::chrono::high_resolution_clock::now(); 126 | uint64_t work_time_us = duration_cast(t2 - t1).count(); 127 | 128 | // transfer items to a set 129 | set_type check_set; 130 | for (size_t i = 1; i <= num_items; i++) { 131 | item_type v = queue.pop_front(); 132 | if (v) { 133 | check_set.insert(v); 134 | } else { 135 | log_debug("queue.pop_front() returned null item"); 136 | } 137 | } 138 | assert(queue.size() == 0); 139 | 140 | // check items in set 141 | size_t check_count = 0; 142 | for (size_t i = 1; i <= num_items; i++) { 143 | if (check_set.find(item_type(i)) != check_set.end()) { 144 | check_count++; 145 | } 146 | } 147 | assert(check_count == num_items); 148 | 149 | printf("%-20s %-9zu %-9zu %-9zu %-9llu %-9llu %-9.6lf\n", 150 | queue_type_name, num_threads, iterations, items_per_thread, 151 | (u64)work_time_us, (u64)num_ops, (double)work_time_us / (double)num_ops); 152 | } 153 | 154 | static void heading_multi() 155 | { 156 | printf("%-20s %-9s %-9s %-9s %-9s %-9s %-9s\n", 157 | "name", "nthreads", "iters", "items", 158 | "time(us)", "op_count", "op(us)"); 159 | } 160 | 161 | template 162 | void test_push_pop_single(const char* queue_type_name, const size_t num_items) 163 | { 164 | queue_type queue(num_items); 165 | 166 | assert(queue.size() == 0); 167 | 168 | // populate queue 169 | const auto t1 = std::chrono::high_resolution_clock::now(); 170 | for (size_t i = 1; i <= num_items; i++) { 171 | queue.push_back(item_type(i)); 172 | } 173 | const auto t2 = std::chrono::high_resolution_clock::now(); 174 | 175 | assert(queue.size() == num_items); 176 | 177 | // empty queue 178 | for (size_t i = 1; i <= num_items; i++) { 179 | queue.pop_front(); 180 | } 181 | const auto t3 = std::chrono::high_resolution_clock::now(); 182 | 183 | assert(queue.size() == 0); 184 | 185 | uint64_t push_work_time_us = duration_cast(t2 - t1).count(); 186 | uint64_t pop_work_time_us = duration_cast(t3 - t2).count(); 187 | 188 | printf("%-20s %-9zu %-9llu %-9llu %-9.6lf\n", 189 | queue_type_name, num_items, (u64)push_work_time_us, (u64)num_items, 190 | (double)push_work_time_us / (double)num_items); 191 | printf("%-20s %-9zu %-9llu %-9llu %-9.6lf\n", 192 | queue_type_name, num_items, (u64)pop_work_time_us, (u64)num_items, 193 | (double)pop_work_time_us / (double)num_items); 194 | } 195 | 196 | static void heading_single() 197 | { 198 | printf("%-20s %-9s %-9s %-9s %-9s\n", 199 | "name", "items", "time(us)", "op_count", "op(us)"); 200 | } 201 | 202 | /* test_queue */ 203 | 204 | struct test_queue 205 | { 206 | void test_queue_constants() 207 | { 208 | const size_t qsize = 1024; 209 | typedef queue_atomic qtype; 210 | qtype q(qsize); 211 | 212 | printf("queue_atomic::is_lock_free = %u\n", q.counter_back.is_lock_free()); 213 | printf("queue_atomic::atomic_bits = %u\n", qtype::atomic_bits); 214 | printf("queue_atomic::offset_bits = %u\n", qtype::offset_bits); 215 | printf("queue_atomic::version_bits = %u\n", qtype::version_bits); 216 | printf("queue_atomic::offset_shift = %u\n", qtype::offset_shift); 217 | printf("queue_atomic::version_shift = %u\n", qtype::version_shift); 218 | printf("queue_atomic::size_max = 0x%016llx (%llu)\n", (u64)qtype::size_max, (u64)qtype::size_max); 219 | printf("queue_atomic::offset_limit = 0x%016llx (%llu)\n", (u64)qtype::offset_limit, (u64)qtype::offset_limit); 220 | printf("queue_atomic::version_limit = 0x%016llx (%llu)\n", (u64)qtype::version_limit, (u64)qtype::version_limit); 221 | printf("queue_atomic::offset_mask = 0x%016llx\n", (u64)qtype::offset_mask); 222 | printf("queue_atomic::version_mask = 0x%016llx\n", (u64)qtype::version_mask); 223 | 224 | assert(qtype::atomic_bits == 64); 225 | assert(qtype::offset_bits == 48); 226 | assert(qtype::version_bits == 16); 227 | assert(qtype::offset_shift == 0); 228 | assert(qtype::version_shift == 48); 229 | assert(qtype::size_max == 140737488355328ULL); 230 | assert(qtype::offset_limit == 281474976710656ULL); 231 | assert(qtype::version_limit == 65536); 232 | assert(qtype::offset_mask == 0x0000ffffffffffffULL); 233 | assert(qtype::version_mask == 0x000000000000ffffULL); 234 | } 235 | 236 | void test_empty_invariants() 237 | { 238 | const size_t qsize = 1024; 239 | typedef queue_atomic qtype; 240 | qtype q(qsize); 241 | 242 | assert(q.capacity() == 1024); 243 | assert(q.size() == 0); 244 | assert(q.empty() == true); 245 | assert(q.full() == false); 246 | assert(q.size_limit == 1024); 247 | assert(q._back_version() == 0); 248 | assert(q._front_version() == 0); 249 | assert(q._back() == 0); 250 | assert(q._front() == 1024); 251 | } 252 | 253 | void test_push_pop() 254 | { 255 | const size_t qsize = 4; 256 | typedef queue_atomic qtype; 257 | qtype q(qsize); 258 | 259 | // check initial invariants 260 | assert(q.capacity() == qsize); 261 | assert(q.size() == 0); 262 | assert(q.empty() == true); 263 | assert(q.full() == false); 264 | assert(q.size_limit == qsize); 265 | assert(q._back_version() == 0); 266 | assert(q._front_version() == 0); 267 | assert(q._back() == 0); 268 | assert(q._front() == qsize); 269 | 270 | // push_back 4 items 271 | for (size_t i = 1; i <= 4; i++) { 272 | assert(q.push_back((void*)i) == true); 273 | assert(q._back_version() == i); 274 | assert(q._front_version() == 0); 275 | assert(q._back() == i); 276 | assert(q._front() == qsize); 277 | assert(q.size() == i); 278 | assert(q.empty() == false); 279 | assert(q.full() == (i < 4 ? false : true)); 280 | } 281 | 282 | // push_back overflow test 283 | assert(q.push_back((void*)5) == false); 284 | assert(q._back_version() == 4); 285 | assert(q._front_version() == 0); 286 | assert(q._back() == 4); 287 | assert(q._front() == qsize); 288 | assert(q.size() == 4); 289 | assert(q.empty() == false); 290 | assert(q.full() == true); 291 | 292 | // pop_front 4 items 293 | for (size_t i = 1; i <= 4; i++) { 294 | assert(q.pop_front() == (void*)i); 295 | assert(q._back_version() == 4); 296 | assert(q._front_version() == i); 297 | assert(q._back() == 4); 298 | assert(q._front() == 4 + i); 299 | assert(q.size() == 4 - i); 300 | assert(q.empty() == (i > 3 ? true : false)); 301 | assert(q.full() == false); 302 | } 303 | 304 | // pop_front underflow test 305 | assert(q.pop_front() == (void*)0); 306 | assert(q._back_version() == 4); 307 | assert(q._front_version() == 4); 308 | assert(q._back() == 4); 309 | assert(q._front() == 8); 310 | assert(q.size() == 0); 311 | assert(q.empty() == true); 312 | assert(q.full() == false); 313 | 314 | // push_back 4 items 315 | for (size_t i = 1; i <= 4; i++) { 316 | assert(q.push_back((void*)i) == true); 317 | assert(q._back_version() == 4 + i); 318 | assert(q._front_version() == 4); 319 | assert(q._back() == 4 + i); 320 | assert(q._front() == 8); 321 | assert(q.size() == i); 322 | assert(q.empty() == false); 323 | assert(q.full() == (i < 4 ? false : true)); 324 | } 325 | 326 | // push_back overflow test 327 | assert(q.push_back((void*)5) == false); 328 | assert(q._back_version() == 8); 329 | assert(q._front_version() == 4); 330 | assert(q._back() == 8); 331 | assert(q._front() == 8); 332 | assert(q.size() == 4); 333 | assert(q.empty() == false); 334 | assert(q.full() == true); 335 | 336 | // pop_front 4 items 337 | for (size_t i = 1; i <= 4; i++) { 338 | assert(q.pop_front() == (void*)i); 339 | assert(q._back_version() == 8); 340 | assert(q._front_version() == 4 + i); 341 | assert(q._back() == 8); 342 | assert(q._front() == 8 + i); 343 | assert(q.size() == 4 - i); 344 | assert(q.empty() == (i > 3 ? true : false)); 345 | assert(q.full() == false); 346 | } 347 | 348 | // pop_front underflow test 349 | assert(q.pop_front() == (void*)0); 350 | assert(q._back_version() == 8); 351 | assert(q._front_version() == 8); 352 | assert(q._back() == 8); 353 | assert(q._front() == 12); 354 | assert(q.size() == 0); 355 | assert(q.empty() == true); 356 | assert(q.full() == false); 357 | } 358 | 359 | void test_push_pop_single_queue_mutex() 360 | { 361 | test_push_pop_single>("queue_std_mutex", 8388608); 362 | } 363 | 364 | void test_push_pop_single_queue_atomic() 365 | { 366 | test_push_pop_single>("queue_atomic", 8388608); 367 | } 368 | 369 | void test_push_pop_threads_queue_mutex() 370 | { 371 | test_push_pop_threads>("queue_std_mutex", 8, 10, 1024); 372 | } 373 | 374 | void test_push_pop_threads_queue_atomic() 375 | { 376 | test_push_pop_threads>("queue_atomic", 8, 10, 1024); 377 | test_push_pop_threads>("queue_atomic", 8, 10, 1024); 378 | test_push_pop_threads>("queue_atomic", 8, 10, 65536); 379 | test_push_pop_threads>("queue_atomic", 8, 10, 65536); 380 | test_push_pop_threads>("queue_atomic", 8, 64, 65536); 381 | test_push_pop_threads>("queue_atomic", 8, 64, 65536); 382 | test_push_pop_threads>("queue_atomic", 8, 16, 262144); 383 | test_push_pop_threads>("queue_atomic", 8, 16, 262144); 384 | } 385 | 386 | void test_push_pop_threads_queue_atomic_contention() 387 | { 388 | test_push_pop_threads>("queue_atomic:contention", 1, 10, 65536); 389 | test_push_pop_threads>("queue_atomic:contention", 8, 1, 256); 390 | } 391 | }; 392 | 393 | int main(int argc, const char * argv[]) 394 | { 395 | test_queue tq; 396 | printf("# unit-tests\n"); 397 | tq.test_queue_constants(); 398 | tq.test_empty_invariants(); 399 | tq.test_push_pop(); 400 | printf("# single-thread\n"); 401 | heading_single(); 402 | tq.test_push_pop_single_queue_mutex(); 403 | tq.test_push_pop_single_queue_atomic(); 404 | printf("# multi-thread\n"); 405 | heading_multi(); 406 | tq.test_push_pop_threads_queue_mutex(); 407 | tq.test_push_pop_threads_queue_atomic(); 408 | printf("# contention tests\n"); 409 | heading_multi(); 410 | tq.test_push_pop_threads_queue_atomic_contention(); 411 | } 412 | 413 | --------------------------------------------------------------------------------