├── .github └── workflows │ ├── mac_arm64.yml │ ├── mac_boost_arm64.yml │ ├── mac_boost_x64.yml │ ├── mac_x64.yml │ ├── ubuntu_arm64.yml │ ├── ubuntu_boost_x64.yml │ ├── ubuntu_x64.yml │ ├── win_arm64.yml │ ├── win_boost_x64.yml │ └── win_x64.yml ├── .gitignore ├── CMakeLists.txt ├── FastQueue.h ├── FastQueueASM.h ├── FastQueueCompare.cpp ├── FastQueueIntegrityTest.cpp ├── LICENSE ├── PinToCPU.h ├── README.md ├── SPSCQueue.h ├── apple.toolchain.cmake ├── deaod_spsc ├── LICENSE └── spsc_queue.hpp ├── fastqueue_arm64.asm ├── fastqueue_x86_64.asm ├── fastqueuesmall.png ├── ubuntu.toolchain.cmake └── windows.toolchain.cmake /.github/workflows/mac_arm64.yml: -------------------------------------------------------------------------------- 1 | name: mac_arm64 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: macos-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v3 16 | - name: CMake set-up 17 | run: cmake . -G Xcode -DCMAKE_TOOLCHAIN_FILE=./apple.toolchain.cmake -DPLATFORM=MAC_ARM64 18 | - name: make 19 | run: cmake --build . --config Release 20 | -------------------------------------------------------------------------------- /.github/workflows/mac_boost_arm64.yml: -------------------------------------------------------------------------------- 1 | name: mac_boost_arm64 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: macos-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v3 16 | - name: Install BOOST 17 | run: brew install boost 18 | - name: CMake set-up 19 | run: cmake . -G Xcode -DCMAKE_TOOLCHAIN_FILE=./apple.toolchain.cmake -DPLATFORM=MAC_ARM64 -DUSE_BOOST=ON 20 | - name: make 21 | run: cmake --build . --config Release 22 | -------------------------------------------------------------------------------- /.github/workflows/mac_boost_x64.yml: -------------------------------------------------------------------------------- 1 | name: mac_boost_x64 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: macos-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v3 16 | - name: Install BOOST and NASM 17 | run: brew install boost nasm 18 | - name: CMake set-up 19 | run: cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_APPLE_SILICON_PROCESSOR=x86_64 -DUSE_BOOST=ON . 20 | - name: make 21 | run: cmake --build . 22 | -------------------------------------------------------------------------------- /.github/workflows/mac_x64.yml: -------------------------------------------------------------------------------- 1 | name: mac_x64 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: macos-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v3 16 | - name: CMake set-up 17 | run: cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_APPLE_SILICON_PROCESSOR=x86_64 . 18 | - name: make 19 | run: cmake --build . 20 | -------------------------------------------------------------------------------- /.github/workflows/ubuntu_arm64.yml: -------------------------------------------------------------------------------- 1 | name: ubuntu_arm64 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v3 16 | - name: Prepare crosscompile 17 | run: | 18 | sudo apt update 19 | sudo apt -y install crossbuild-essential-arm64 20 | - name: CMake set-up 21 | run: cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=./ubuntu.toolchain.cmake . 22 | - name: make 23 | run: cmake --build . 24 | -------------------------------------------------------------------------------- /.github/workflows/ubuntu_boost_x64.yml: -------------------------------------------------------------------------------- 1 | name: ubuntu_boost_x64 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v3 16 | - name: Install BOOST and NASM 17 | run: | 18 | sudo apt update 19 | sudo apt -y install libboost-all-dev nasm 20 | - name: CMake set-up 21 | run: cmake -DCMAKE_BUILD_TYPE=Release -DUSE_BOOST=ON . 22 | - name: make 23 | run: cmake --build . 24 | 25 | -------------------------------------------------------------------------------- /.github/workflows/ubuntu_x64.yml: -------------------------------------------------------------------------------- 1 | name: ubuntu_x64 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v3 16 | - name: CMake set-up 17 | run: cmake -DCMAKE_BUILD_TYPE=Release . 18 | - name: make 19 | run: cmake --build . 20 | 21 | -------------------------------------------------------------------------------- /.github/workflows/win_arm64.yml: -------------------------------------------------------------------------------- 1 | name: win_arm64 2 | on: 3 | push: 4 | branches: [ main ] 5 | pull_request: 6 | branches: [ main ] 7 | jobs: 8 | 9 | build-and-test-windows: 10 | name: Build Windows arm64 11 | runs-on: windows-latest 12 | 13 | steps: 14 | - name: Setup Developer Command Prompt 15 | uses: ilammy/msvc-dev-cmd@v1 16 | with: 17 | arch: amd64_arm64 18 | - name: Check Out Source Code 19 | uses: actions/checkout@v3 20 | - name: CMake 21 | run: cmake -G "Visual Studio 17 2022" -A ARM64 -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=".\windows.toolchain.cmake" . 22 | - name: build 23 | run: cmake --build . --config Release 24 | 25 | -------------------------------------------------------------------------------- /.github/workflows/win_boost_x64.yml: -------------------------------------------------------------------------------- 1 | name: win_boost_x64 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: windows-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v3 16 | - name: Install BOOST and NASM 17 | run: | 18 | vcpkg install boost-system:x64-windows 19 | vcpkg install boost-thread:x64-windows 20 | vcpkg install boost-lockfree:x64-windows 21 | vcpkg integrate install 22 | choco install nasm 23 | - name: CMake set-up 24 | run: cmake -S . `-D CMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=C:/vcpkg/scripts/buildsystems/vcpkg.cmake -DUSE_BOOST=ON` 25 | - name: make 26 | run: cmake --build . --config Release 27 | -------------------------------------------------------------------------------- /.github/workflows/win_x64.yml: -------------------------------------------------------------------------------- 1 | name: win_x64 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: windows-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v3 16 | - name: CMake set-up 17 | run: cmake -S . `-D CMAKE_BUILD_TYPE=Release` 18 | - name: make 19 | run: cmake --build . --config Release 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | cmake-build* 3 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | project(fast_queue) 3 | set(CMAKE_CXX_STANDARD 17) 4 | #set(CMAKE_OSX_ARCHITECTURES "x86_64") #for testing x86_64 on my mac 5 | message(STATUS "Building for architecture: ${CMAKE_SYSTEM_PROCESSOR}") 6 | 7 | find_package (Threads REQUIRED) 8 | 9 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/deaod_spsc/) 10 | 11 | if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") 12 | if (CMAKE_GENERATOR MATCHES "Visual Studio") 13 | # Visual Studio specific 14 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2") 15 | else() 16 | # Other compilers 17 | #SET(CMAKE_CXX_FLAGS "-mavx2") 18 | endif() 19 | elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "(aarch64)|(arm64)") 20 | else () 21 | message( FATAL_ERROR "Not supported architecture, X86_64 and arm64 is." ) 22 | endif () 23 | 24 | add_executable(fast_queue_integrity_test FastQueueIntegrityTest.cpp) 25 | target_link_libraries(fast_queue_integrity_test Threads::Threads) 26 | 27 | #cmake -DUSE_BOOST=ON .. 28 | #to compile the code comparing against boost::lockfree::spsc_queue and rigtorp 29 | if(USE_BOOST) 30 | find_package (Boost REQUIRED) 31 | include_directories(${Boost_INCLUDE_DIR}) 32 | 33 | if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)" OR CMAKE_OSX_ARCHITECTURES MATCHES "x86_64") 34 | 35 | message("Building X86_64") 36 | enable_language(ASM_NASM) 37 | 38 | if(APPLE) 39 | string(APPEND CMAKE_ASM_NASM_FLAGS "--prefix _") 40 | endif(APPLE) 41 | 42 | if(WIN32) 43 | string(APPEND CMAKE_ASM_NASM_FLAGS "-dWIN32=1") 44 | endif(WIN32) 45 | 46 | add_executable(fast_queue_compare FastQueueCompare.cpp fastqueue_x86_64.asm) 47 | 48 | elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "(aarch64)|(arm64)") 49 | 50 | message("Building ARM64") 51 | enable_language(ASM) 52 | 53 | add_executable(fast_queue_compare FastQueueCompare.cpp fastqueue_arm64.asm) 54 | 55 | else () 56 | message( FATAL_ERROR "Not supported architecture, X86_64 and arm64 is." ) 57 | endif () 58 | 59 | 60 | if (Boost_LIBRARIES STREQUAL "") 61 | message(STATUS "Boost_LIBRARIES string is empty. Will try to compile anyway.") 62 | #comment contains an alternative method if BOOST not found by CMake 63 | #get_filename_component(FQBoostRoot "${Boost_INCLUDE_DIR}" DIRECTORY) 64 | #message("${FQBoostRoot}/lib") 65 | #link_directories(${FQBoostRoot}/lib) 66 | #target_link_libraries(fast_queue_compare boost_system Threads::Threads) 67 | target_link_libraries(fast_queue_compare Threads::Threads) 68 | else() 69 | target_link_libraries(fast_queue_compare Threads::Threads ${Boost_LIBRARIES}) 70 | endif() 71 | endif(USE_BOOST) 72 | -------------------------------------------------------------------------------- /FastQueue.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Anders Cedronius 3 | // 4 | 5 | // Usage 6 | 7 | // Create the queue 8 | // auto queue = FastQueue 9 | // Type of data 10 | // Size of queue as a contiguous bitmask from LSB example 0b1111 11 | // The ring buffer is acting as a rubber band between the 12 | // producer/consumer to avoid unnecessary stalls when pushing new data. 13 | // L1-Cache size typically 64 bytes 14 | 15 | // queue.push is blocking if queue is full 16 | // queue.stopQueue() or a popped entry will release the spinlock only. 17 | // queue.push(object/pointer) 18 | 19 | // queue.pop is blocking if the queue is empty 20 | // queue.stopQueue() or a pushed entry will release the spinlock only. 21 | // auto result = queue.pop(); 22 | // if result is {} this signals all objects are popped and the consumer should 23 | // not pop any more data 24 | 25 | // use tryPush and/or tryPop if you want to avoid spinlock CPU hogging for low 26 | // frequency data transfer tryPush should be followed by pushAfterTry if used 27 | // and tryPop should be followed by popAfterTry. 28 | 29 | // Call queue.stopQueue() from any thread to signal end of transaction 30 | // the user may drop the queue or pop the queue until {} is returned. 31 | 32 | // Call queue.isQueueStopped() to see the status of the queue. 33 | // May be used to manage the life cycle of the thread pushing data for example. 34 | 35 | 36 | #pragma once 37 | 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | 45 | #if __x86_64__ || _M_X64 46 | #include 47 | #ifdef _MSC_VER 48 | #include 49 | #endif 50 | #elif __aarch64__ || _M_ARM64 51 | #ifdef _MSC_VER 52 | #include 53 | #endif 54 | #else 55 | #error Arhitecture not supported 56 | #endif 57 | 58 | template 59 | class FastQueue { 60 | public: 61 | 62 | enum class FastQueueMessages : uint64_t { 63 | END_OF_SERVICE, 64 | READY_TO_POP, 65 | NOT_READY_TO_POP, 66 | READY_TO_PUSH, 67 | NOT_READY_TO_PUSH, 68 | }; 69 | 70 | explicit FastQueue() { 71 | uint64_t lSource = RING_BUFFER_SIZE; 72 | uint64_t lContiguousBits = 0; 73 | while (true) { 74 | if (!(lSource & 1)) break; 75 | lSource = lSource >> 1; 76 | lContiguousBits++; 77 | } 78 | 79 | uint64_t lBitsSetTotal = std::bitset<64>(RING_BUFFER_SIZE).count(); 80 | if (lContiguousBits != lBitsSetTotal || !lContiguousBits) { 81 | throw std::runtime_error( 82 | "Buffer size must be a number of contiguous bits set from LSB. Example: 0b00001111 not 0b01001111"); 83 | } 84 | if ((uint64_t) &mWritePositionPush % 8 || (uint64_t) &mReadPositionPop % 8) { 85 | throw std::runtime_error("Queue-pointers are misaligned in memory."); 86 | } 87 | } 88 | 89 | /////////////////////// 90 | /// Push part 91 | /////////////////////// 92 | 93 | FastQueueMessages tryPush() { 94 | if (mWritePositionPush - mReadPositionPush >= RING_BUFFER_SIZE || mExitThreadSemaphore) { 95 | return FastQueueMessages::NOT_READY_TO_PUSH; 96 | } 97 | return FastQueueMessages::READY_TO_PUSH; 98 | } 99 | 100 | void pushAfterTry(T &rItem) { 101 | mRingBuffer[mWritePositionPush & RING_BUFFER_SIZE].mObj = std::move(rItem); 102 | #if __x86_64__ || _M_X64 103 | _mm_sfence(); 104 | #elif __aarch64__ || _M_ARM64 105 | #ifdef _MSC_VER 106 | __dmb(_ARM64_BARRIER_ISHST); 107 | #else 108 | asm volatile("dmb ishst" : : : "memory"); 109 | #endif 110 | #else 111 | #error Architecture not supported 112 | #endif 113 | mWritePositionPop = ++mWritePositionPush; 114 | } 115 | 116 | void push(T &rItem) noexcept { 117 | while (mWritePositionPush - mReadPositionPush >= RING_BUFFER_SIZE) { 118 | if (mExitThreadSemaphore) { 119 | return; 120 | } 121 | } 122 | mRingBuffer[mWritePositionPush & RING_BUFFER_SIZE].mObj = std::move(rItem); 123 | #if __x86_64__ || _M_X64 124 | _mm_sfence(); 125 | #elif __aarch64__ || _M_ARM64 126 | #ifdef _MSC_VER 127 | __dmb(_ARM64_BARRIER_ISHST); 128 | #else 129 | asm volatile("dmb ishst" : : : "memory"); 130 | #endif 131 | #else 132 | #error Architecture not supported 133 | #endif 134 | mWritePositionPop = ++mWritePositionPush; 135 | } 136 | 137 | void pushRaw(T &rItem) noexcept { 138 | while (mWritePositionPush - mReadPositionPush >= RING_BUFFER_SIZE) { 139 | } 140 | mRingBuffer[mWritePositionPush & RING_BUFFER_SIZE].mObj = std::move(rItem); 141 | #if __x86_64__ || _M_X64 142 | _mm_sfence(); 143 | #elif __aarch64__ || _M_ARM64 144 | #ifdef _MSC_VER 145 | __dmb(_ARM64_BARRIER_ISHST); 146 | #else 147 | asm volatile("dmb ishst" : : : "memory"); 148 | #endif 149 | #else 150 | #error Architecture not supported 151 | #endif 152 | mWritePositionPop = ++mWritePositionPush; 153 | } 154 | 155 | /////////////////////// 156 | /// Pop part 157 | /////////////////////// 158 | 159 | FastQueueMessages tryPop() { 160 | if (mWritePositionPop == mReadPositionPop) { 161 | if ((mExitThread == mReadPositionPop) && mExitThreadSemaphore) { 162 | return FastQueueMessages::END_OF_SERVICE; 163 | } 164 | return FastQueueMessages::NOT_READY_TO_POP; 165 | } 166 | return FastQueueMessages::READY_TO_POP; 167 | } 168 | 169 | T popAfterTry() { 170 | T lData = std::move(mRingBuffer[mReadPositionPop & RING_BUFFER_SIZE].mObj); 171 | #if __x86_64__ || _M_X64 172 | _mm_lfence(); 173 | #elif __aarch64__ || _M_ARM64 174 | #ifdef _MSC_VER 175 | __dmb(_ARM64_BARRIER_ISHLD); 176 | #else 177 | asm volatile("dmb ishld" : : : "memory"); 178 | #endif 179 | #else 180 | #error Architecture not supported 181 | #endif 182 | mReadPositionPush = ++mReadPositionPop; 183 | return lData; 184 | } 185 | 186 | T pop() noexcept { 187 | while (mWritePositionPop == mReadPositionPop) { 188 | if ((mExitThread == mReadPositionPop) && mExitThreadSemaphore) { 189 | return {}; 190 | } 191 | } 192 | T lData = std::move(mRingBuffer[mReadPositionPop & RING_BUFFER_SIZE].mObj); 193 | #if __x86_64__ || _M_X64 194 | _mm_lfence(); 195 | #elif __aarch64__ || _M_ARM64 196 | #ifdef _MSC_VER 197 | __dmb(_ARM64_BARRIER_ISHLD); 198 | #else 199 | asm volatile("dmb ishld" : : : "memory"); 200 | #endif 201 | #else 202 | #error Architecture not supported 203 | #endif 204 | mReadPositionPush = ++mReadPositionPop; 205 | return lData; 206 | } 207 | 208 | void popRaw(T& out) noexcept { 209 | while (mWritePositionPop == mReadPositionPop) { 210 | } 211 | out = std::move(mRingBuffer[mReadPositionPop & RING_BUFFER_SIZE].mObj); 212 | #if __x86_64__ || _M_X64 213 | _mm_lfence(); 214 | #elif __aarch64__ || _M_ARM64 215 | #ifdef _MSC_VER 216 | __dmb(_ARM64_BARRIER_ISHLD); 217 | #else 218 | asm volatile("dmb ishld" : : : "memory"); 219 | #endif 220 | #else 221 | #error Architecture not supported 222 | #endif 223 | mReadPositionPush = ++mReadPositionPop; 224 | } 225 | 226 | //Stop queue (Maybe called from any thread) 227 | void stopQueue() { 228 | mExitThread = mWritePositionPush; 229 | mExitThreadSemaphore = true; 230 | } 231 | 232 | //Is the queue stopped? 233 | bool isQueueStopped() { 234 | return mExitThreadSemaphore; 235 | } 236 | 237 | ///Delete copy and move constructors and assign operators 238 | FastQueue(FastQueue const &) = delete; // Copy construct 239 | FastQueue(FastQueue &&) = delete; // Move construct 240 | FastQueue &operator=(FastQueue const &) = delete; // Copy assign 241 | FastQueue &operator=(FastQueue &&) = delete; // Move assign 242 | private: 243 | struct alignas(L1_CACHE_LNE) mAlign { 244 | T mObj; 245 | volatile uint8_t mStuff[L1_CACHE_LNE - sizeof(T)]; 246 | }; 247 | 248 | alignas(L1_CACHE_LNE) volatile uint8_t mBorderUpp[L1_CACHE_LNE]; 249 | alignas(L1_CACHE_LNE) volatile uint64_t mWritePositionPush = 0; 250 | alignas(L1_CACHE_LNE) volatile uint64_t mReadPositionPop = 0; 251 | alignas(L1_CACHE_LNE) volatile uint64_t mWritePositionPop = 0; 252 | alignas(L1_CACHE_LNE) volatile uint64_t mReadPositionPush = 0; 253 | alignas(L1_CACHE_LNE) volatile uint64_t mExitThread = 0; 254 | alignas(L1_CACHE_LNE) volatile bool mExitThreadSemaphore = false; 255 | alignas(L1_CACHE_LNE) mAlign mRingBuffer[RING_BUFFER_SIZE + 1]; 256 | alignas(L1_CACHE_LNE) volatile uint8_t mBorderDown[L1_CACHE_LNE]; 257 | }; 258 | 259 | -------------------------------------------------------------------------------- /FastQueueASM.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Anders Cedronius 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #ifdef _MSC_VER 12 | #include 13 | #endif 14 | 15 | //Remember to set the parameters in the ASM file as well if changed 16 | #define BUFFER_MASK 15 17 | #define L1_CACHE 64 18 | 19 | namespace FastQueueASM { 20 | 21 | //ASM function declarations 22 | extern "C" { 23 | //param1 = pointer to the struct FastQueueASM::DataBlock 24 | //param2 = the pointer 25 | void push_item(void *, void *); 26 | //param1 = pointer to the struct FastQueueASM::DataBlock, returns the pointer, or NULL if last item is popped 27 | void *pop_item(void *); 28 | //param1 = the mask used by the C header file, returns 0 if the ASM queue and C buffer size match 29 | uint64_t verify_mask(uint64_t); 30 | //param1 = the cache size used by the C header file, returns 0 if the ASM cache size and C cache size setting match 31 | uint64_t verify_cache_size(uint64_t); 32 | } 33 | 34 | //The data block 'template' used by the queue 35 | //Block is set to 0 after initialised 36 | struct DataBlock { 37 | struct alignas(L1_CACHE) mAlign { 38 | void *mObj; 39 | volatile uint8_t mStuff[L1_CACHE - sizeof(void *)]; 40 | }; 41 | 42 | alignas(L1_CACHE) volatile uint8_t mBorderUpp[L1_CACHE]; 43 | alignas(L1_CACHE) volatile uint64_t mWritePositionPush; //L1CACHE * 1 44 | alignas(L1_CACHE) volatile uint64_t mReadPositionPush; //L1CACHE * 2 45 | alignas(L1_CACHE) volatile uint64_t mWritePositionPop; //L1CACHE * 3 46 | alignas(L1_CACHE) volatile uint64_t mReadPositionPop; //L1CACHE * 4 47 | alignas(L1_CACHE) volatile uint64_t mExitThread; //L1CACHE * 5 48 | alignas(L1_CACHE) volatile uint64_t mExitThreadSemaphore; //L1CACHE * 6 49 | alignas(L1_CACHE) volatile mAlign mRingBuffer[BUFFER_MASK + 1]; //L1CACHE * 7 50 | alignas(L1_CACHE) volatile uint8_t mBorderDown[L1_CACHE]; 51 | }; 52 | 53 | //Allocate an new queue 54 | DataBlock *newQueue() { 55 | 56 | //Verify the compiler generated data block 57 | static_assert(sizeof(DataBlock) == ((6 * L1_CACHE) + ((BUFFER_MASK + 1) * L1_CACHE) + (L1_CACHE * 2)), 58 | "FastQueueASM::DataBlock is not matching expected size"); 59 | #ifdef _MSC_VER 60 | auto pData = (DataBlock *)_aligned_malloc(sizeof(DataBlock), L1_CACHE); 61 | #else 62 | auto pData = (DataBlock *)std::aligned_alloc(L1_CACHE, sizeof(DataBlock)); 63 | #endif 64 | if (pData) std::memset((void *) pData, 0, sizeof(DataBlock)); 65 | 66 | uint64_t lSource = BUFFER_MASK; 67 | uint64_t lContiguousBits = 0; 68 | while (true) { 69 | if (!(lSource & 1)) break; 70 | lSource = lSource >> 1; 71 | lContiguousBits++; 72 | } 73 | uint64_t lBitsSetTotal = std::bitset<64>(BUFFER_MASK).count(); 74 | if (lContiguousBits != lBitsSetTotal || !lContiguousBits) 75 | throw std::runtime_error( 76 | "Buffer size must be a number of contiguous bits set from LSB. Example: 0b00001111 not 0b01001111"); 77 | if (verify_mask(BUFFER_MASK)) 78 | throw std::runtime_error("the buffer size in fast queue ASM and C-header missmatch."); 79 | if (std::bitset<64>(L1_CACHE).count() != 1) throw std::runtime_error("L1_CACHE must be a 2 complement number ( 2pow(6) = 64 )"); 80 | if (verify_cache_size(L1_CACHE)) 81 | throw std::runtime_error("the cache size in fast queue ASM and C-header missmatch."); 82 | return pData; 83 | } 84 | 85 | //Free the memory of an allocated queue 86 | void deleteQueue(DataBlock *pData) { 87 | #ifdef _MSC_VER 88 | _aligned_free(pData); 89 | #else 90 | std::free(pData); 91 | #endif 92 | } 93 | 94 | //Stop queue (Maybe called from any thread) 95 | void stopQueue(DataBlock *pData) { 96 | pData->mExitThread = pData->mWritePositionPush; 97 | pData->mExitThreadSemaphore = true; 98 | } 99 | 100 | //Is the queue stopped? 101 | bool isQueueStopped(DataBlock *pData) { 102 | return pData->mExitThreadSemaphore; 103 | } 104 | 105 | } 106 | -------------------------------------------------------------------------------- /FastQueueCompare.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Anders Cedronius on 2022-10-10. 3 | // 4 | 5 | // speed-test. 6 | // FastQueue, boost::lockfree, FastQueueASM and Rigtorps SPSC queue 7 | // 1. Generate the data 8 | // 2. Stamp something unique 9 | // 3. Push the data through the queue 10 | // 4. The receiver pops the data in another thread 11 | // 5. Checks the data for the expected value 12 | // 6. Garbage collects the data. 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include "PinToCPU.h" 19 | #include "FastQueue.h" 20 | #include "SPSCQueue.h" 21 | #include "FastQueueASM.h" 22 | #include "spsc_queue.hpp" 23 | 24 | #define QUEUE_MASK 0b1111 25 | #define L1_CACHE_LINE 64 26 | #define TEST_TIME_DURATION_SEC 20 27 | //Run the consumer on CPU 28 | #define CONSUMER_CPU 0 29 | //Run the producer on CPU 30 | #define PRODUCER_CPU 2 31 | 32 | std::atomic gActiveConsumer = 0; 33 | std::atomic gCounter = 0; 34 | bool gStartBench = false; 35 | bool gActiveProducer = true; 36 | 37 | class MyObject { 38 | public: 39 | uint64_t mIndex; 40 | }; 41 | 42 | /// ----------------------------------------------------------- 43 | /// 44 | /// Boost queue section Start 45 | /// 46 | /// ----------------------------------------------------------- 47 | 48 | void boostLockFreeProducer(boost::lockfree::spsc_queue> *pQueue, 49 | int32_t aCPU) { 50 | if (!pinThread(aCPU)) { 51 | std::cout << "Pin CPU fail. " << std::endl; 52 | return; 53 | } 54 | while (!gStartBench) { 55 | #ifdef _MSC_VER 56 | __nop(); 57 | #else 58 | asm volatile ("NOP"); 59 | #endif 60 | } 61 | uint64_t lCounter = 0; 62 | while (gActiveProducer) { 63 | auto lTheObject = new MyObject(); 64 | lTheObject->mIndex = lCounter++; 65 | while (!pQueue->push(lTheObject) && gActiveProducer) { 66 | }; 67 | } 68 | } 69 | 70 | void boostLockFreeConsumer(boost::lockfree::spsc_queue> *pQueue, 71 | int32_t aCPU) { 72 | if (!pinThread(aCPU)) { 73 | std::cout << "Pin CPU fail. " << std::endl; 74 | gActiveConsumer--; 75 | return; 76 | } 77 | uint64_t lCounter = 0; 78 | MyObject *lpMyObject; 79 | while (gActiveProducer) { 80 | while (pQueue->pop(lpMyObject)) { 81 | if (lpMyObject->mIndex != lCounter) { 82 | std::cout << "Queue item error" << std::endl; 83 | } 84 | delete lpMyObject; 85 | lCounter++; 86 | } 87 | } 88 | gCounter += lCounter; 89 | gActiveConsumer--; 90 | } 91 | 92 | /// ----------------------------------------------------------- 93 | /// 94 | /// Boost queue section End 95 | /// 96 | /// ----------------------------------------------------------- 97 | 98 | /// ----------------------------------------------------------- 99 | /// 100 | /// FastQueue section Start 101 | /// 102 | /// ----------------------------------------------------------- 103 | 104 | void fastQueueProducer(FastQueue *pQueue, int32_t aCPU) { 105 | if (!pinThread(aCPU)) { 106 | std::cout << "Pin CPU fail. " << std::endl; 107 | return; 108 | } 109 | while (!gStartBench) { 110 | #ifdef _MSC_VER 111 | __nop(); 112 | #else 113 | asm volatile ("NOP"); 114 | #endif 115 | } 116 | uint64_t lCounter = 0; 117 | while (gActiveProducer) { 118 | auto lTheObject = new MyObject(); 119 | lTheObject->mIndex = lCounter++; 120 | pQueue->push(lTheObject); 121 | } 122 | pQueue->stopQueue(); 123 | } 124 | 125 | void fastQueueConsumer(FastQueue *pQueue, int32_t aCPU) { 126 | if (!pinThread(aCPU)) { 127 | std::cout << "Pin CPU fail. " << std::endl; 128 | gActiveConsumer--; 129 | return; 130 | } 131 | uint64_t lCounter = 0; 132 | while (true) { 133 | auto lResult = pQueue->pop(); 134 | if (lResult == nullptr) { 135 | break; 136 | } 137 | if (lResult->mIndex != lCounter) { 138 | std::cout << "Queue item error" << std::endl; 139 | } 140 | lCounter++; 141 | delete lResult; 142 | } 143 | gCounter += lCounter; 144 | gActiveConsumer--; 145 | } 146 | 147 | /// ----------------------------------------------------------- 148 | /// 149 | /// FastQueue section End 150 | /// 151 | /// ----------------------------------------------------------- 152 | 153 | /// ----------------------------------------------------------- 154 | /// 155 | /// Rigtorp section Start 156 | /// 157 | /// ----------------------------------------------------------- 158 | 159 | 160 | void rigtorpQueueProducer(rigtorp::SPSCQueue *pQueue, int32_t aCPU) { 161 | if (!pinThread(aCPU)) { 162 | std::cout << "Pin CPU fail. " << std::endl; 163 | return; 164 | } 165 | while (!gStartBench) { 166 | #ifdef _MSC_VER 167 | __nop(); 168 | #else 169 | asm volatile ("NOP"); 170 | #endif 171 | } 172 | uint64_t lCounter = 0; 173 | while (gActiveProducer) { 174 | auto lTheObject = new MyObject(); 175 | lTheObject->mIndex = lCounter++; 176 | pQueue->push(lTheObject); 177 | } 178 | pQueue->push(nullptr); 179 | } 180 | 181 | void rigtorpQueueConsumer(rigtorp::SPSCQueue *pQueue, int32_t aCPU) { 182 | if (!pinThread(aCPU)) { 183 | std::cout << "Pin CPU fail. " << std::endl; 184 | gActiveConsumer--; 185 | return; 186 | } 187 | uint64_t lCounter = 0; 188 | while (true) { 189 | while (!pQueue->front()); 190 | auto lResult = *pQueue->front(); 191 | if (lResult == nullptr) { 192 | break; 193 | } 194 | pQueue->pop(); 195 | if (lResult->mIndex != lCounter) { 196 | std::cout << "Queue item error" << std::endl; 197 | } 198 | lCounter++; 199 | delete lResult; 200 | } 201 | gCounter += lCounter; 202 | gActiveConsumer--; 203 | } 204 | 205 | /// ----------------------------------------------------------- 206 | /// 207 | /// Rigtorp section End 208 | /// 209 | /// ----------------------------------------------------------- 210 | 211 | /// ----------------------------------------------------------- 212 | /// 213 | /// FastQueueASM section Start 214 | /// 215 | /// ----------------------------------------------------------- 216 | 217 | void fastQueueASMProducer(FastQueueASM::DataBlock *pQueue, int32_t aCPU) { 218 | if (!pinThread(aCPU)) { 219 | std::cout << "Pin CPU fail. " << std::endl; 220 | return; 221 | } 222 | while (!gStartBench) { 223 | #ifdef _MSC_VER 224 | __nop(); 225 | #else 226 | asm volatile ("NOP"); 227 | #endif 228 | } 229 | uint64_t lCounter = 0; 230 | while (gActiveProducer) { 231 | auto lTheObject = new MyObject(); 232 | lTheObject->mIndex = lCounter++; 233 | FastQueueASM::push_item(pQueue, lTheObject); 234 | } 235 | stopQueue(pQueue); 236 | } 237 | 238 | void fastQueueASMConsumer(FastQueueASM::DataBlock *pQueue, int32_t aCPU) { 239 | if (!pinThread(aCPU)) { 240 | std::cout << "Pin CPU fail. " << std::endl; 241 | gActiveConsumer--; 242 | return; 243 | } 244 | uint64_t lCounter = 0; 245 | while (true) { 246 | auto lResult = (MyObject *) FastQueueASM::pop_item(pQueue); 247 | if (lResult == nullptr) { 248 | break; 249 | } 250 | if (lResult->mIndex != lCounter) { 251 | std::cout << "Queue item error " << lResult->mIndex << " " << lCounter << std::endl; 252 | } 253 | delete lResult; 254 | lCounter++; 255 | } 256 | gCounter += lCounter; 257 | gActiveConsumer--; 258 | } 259 | 260 | 261 | /// ----------------------------------------------------------- 262 | /// 263 | /// FastQueueASM section End 264 | /// 265 | /// ----------------------------------------------------------- 266 | 267 | /// ----------------------------------------------------------- 268 | /// 269 | /// deaodSPSC section Start 270 | /// 271 | /// ----------------------------------------------------------- 272 | 273 | 274 | void deaodSPSCProducer(deaod::spsc_queue *pQueue, int32_t aCPU) { 275 | if (!pinThread(aCPU)) { 276 | std::cout << "Pin CPU fail. " << std::endl; 277 | return; 278 | } 279 | while (!gStartBench) { 280 | #ifdef _MSC_VER 281 | __nop(); 282 | #else 283 | asm volatile ("NOP"); 284 | #endif 285 | } 286 | uint64_t lCounter = 0; 287 | while (gActiveProducer) { 288 | auto lTheObject = new MyObject(); 289 | lTheObject->mIndex = lCounter++; 290 | bool lAbleToPush = false; 291 | while (!lAbleToPush && gActiveProducer) { 292 | lAbleToPush = pQueue->push(lTheObject); 293 | } 294 | } 295 | } 296 | 297 | void deaodSPSCConsumer(deaod::spsc_queue *pQueue, int32_t aCPU) { 298 | if (!pinThread(aCPU)) { 299 | std::cout << "Pin CPU fail. " << std::endl; 300 | gActiveConsumer--; 301 | return; 302 | } 303 | uint64_t lCounter = 0; 304 | while (true) { 305 | 306 | MyObject *lResult = nullptr; 307 | bool lAbleToPop = false; 308 | while (!lAbleToPop && gActiveProducer) { 309 | lAbleToPop = pQueue->pop(lResult); 310 | } 311 | if (lResult == nullptr) { 312 | break; 313 | } 314 | if (lResult->mIndex != lCounter) { 315 | std::cout << "Queue item error" << std::endl; 316 | } 317 | lCounter++; 318 | delete lResult; 319 | } 320 | gCounter += lCounter; 321 | gActiveConsumer--; 322 | } 323 | 324 | /// ----------------------------------------------------------- 325 | /// 326 | /// deaodSPSC section End 327 | /// 328 | /// ----------------------------------------------------------- 329 | 330 | /// ----------------------------------------------------------- 331 | /// 332 | /// FastQueueRaw section Start 333 | /// 334 | /// ----------------------------------------------------------- 335 | 336 | void fastQueueProducerRaw(FastQueue *pQueue, int32_t aCPU) { 337 | if (!pinThread(aCPU)) { 338 | std::cout << "Pin CPU fail. " << std::endl; 339 | return; 340 | } 341 | while (!gStartBench) { 342 | #ifdef _MSC_VER 343 | __nop(); 344 | #else 345 | asm volatile ("NOP"); 346 | #endif 347 | } 348 | uint64_t lCounter = 0; 349 | MyObject* lTheObject = nullptr; 350 | while (gActiveProducer) { 351 | lTheObject = new MyObject(); 352 | lTheObject->mIndex = lCounter++; 353 | pQueue->pushRaw(lTheObject); 354 | } 355 | lTheObject = nullptr; 356 | pQueue->pushRaw(lTheObject); 357 | } 358 | 359 | void fastQueueConsumerRaw(FastQueue *pQueue, int32_t aCPU) { 360 | if (!pinThread(aCPU)) { 361 | std::cout << "Pin CPU fail. " << std::endl; 362 | gActiveConsumer--; 363 | return; 364 | } 365 | uint64_t lCounter = 0; 366 | MyObject* lResult = nullptr; 367 | while (true) { 368 | pQueue->popRaw(lResult); 369 | if (lResult == nullptr) { 370 | break; 371 | } 372 | if (lResult->mIndex != lCounter) { 373 | std::cout << "Queue item error" << std::endl; 374 | } 375 | lCounter++; 376 | delete lResult; 377 | } 378 | gCounter += lCounter; 379 | gActiveConsumer--; 380 | } 381 | 382 | /// ----------------------------------------------------------- 383 | /// 384 | /// FastQueueRaw section End 385 | /// 386 | /// ----------------------------------------------------------- 387 | 388 | 389 | int main() { 390 | 391 | /// 392 | /// BoostLockfree test -> 393 | /// 394 | 395 | // Create the queue 396 | auto lBoostLockFree = new boost::lockfree::spsc_queue>; 397 | 398 | // Start the consumer(s) / Producer(s) 399 | gActiveConsumer++; 400 | std::thread([lBoostLockFree] { return boostLockFreeConsumer(lBoostLockFree, CONSUMER_CPU); }).detach(); 401 | std::thread([lBoostLockFree] { return boostLockFreeProducer(lBoostLockFree, PRODUCER_CPU); }).detach(); 402 | 403 | // Wait for the OS to actually get it done. 404 | std::this_thread::sleep_for(std::chrono::milliseconds(10)); 405 | 406 | // Start the test 407 | std::cout << "BoostLockFree pointer test started." << std::endl; 408 | gStartBench = true; 409 | std::this_thread::sleep_for(std::chrono::seconds(TEST_TIME_DURATION_SEC)); 410 | 411 | // End the test 412 | gActiveProducer = false; 413 | std::cout << "BoostLockFree pointer test ended." << std::endl; 414 | 415 | // Wait for the consumers to 'join' 416 | // Why not the classic join? I prepared for a multi thread case I need this function for. 417 | while (gActiveConsumer) { 418 | std::this_thread::sleep_for(std::chrono::milliseconds(1)); 419 | } 420 | 421 | // Garbage collect the queue 422 | delete lBoostLockFree; 423 | 424 | // Print the result. 425 | std::cout << "BoostLockFree Transactions -> " << gCounter / TEST_TIME_DURATION_SEC << "/s" << std::endl; 426 | 427 | // Zero the test parameters. 428 | gStartBench = false; 429 | gActiveProducer = true; 430 | gCounter = 0; 431 | gActiveConsumer = 0; 432 | 433 | /// 434 | /// FastQueue test -> 435 | /// 436 | 437 | // Create the queue 438 | auto lFastQueue = new FastQueue(); 439 | 440 | // Start the consumer(s) / Producer(s) 441 | gActiveConsumer++; 442 | std::thread([lFastQueue] { return fastQueueConsumer(lFastQueue, CONSUMER_CPU); }).detach(); 443 | std::thread([lFastQueue] { return fastQueueProducer(lFastQueue, PRODUCER_CPU); }).detach(); 444 | 445 | // Wait for the OS to actually get it done. 446 | std::this_thread::sleep_for(std::chrono::milliseconds(10)); 447 | 448 | // Start the test 449 | std::cout << "FastQueue pointer test started." << std::endl; 450 | gStartBench = true; 451 | std::this_thread::sleep_for(std::chrono::seconds(TEST_TIME_DURATION_SEC)); 452 | 453 | // End the test 454 | gActiveProducer = false; 455 | std::cout << "FastQueue pointer test ended." << std::endl; 456 | 457 | // Wait for the consumers to 'join' 458 | // Why not the classic join? I prepared for a multi thread case I need this function for. 459 | while (gActiveConsumer) { 460 | std::this_thread::sleep_for(std::chrono::milliseconds(1)); 461 | } 462 | 463 | // Garbage collect the queue 464 | delete lFastQueue; 465 | 466 | // Print the result. 467 | std::cout << "FastQueue Transactions -> " << gCounter / TEST_TIME_DURATION_SEC << "/s" << std::endl; 468 | 469 | // Zero the test parameters. 470 | gStartBench = false; 471 | gActiveProducer = true; 472 | gCounter = 0; 473 | gActiveConsumer = 0; 474 | 475 | /// 476 | /// Erik Rigtorp SPSC test -> 477 | /// 478 | 479 | // Create the queue 480 | auto lRigtorpSPSCQueue = new rigtorp::SPSCQueue(QUEUE_MASK); 481 | 482 | // Start the consumer(s) / Producer(s) 483 | gActiveConsumer++; 484 | std::thread([lRigtorpSPSCQueue] { return rigtorpQueueConsumer(lRigtorpSPSCQueue, CONSUMER_CPU); }).detach(); 485 | std::thread([lRigtorpSPSCQueue] { return rigtorpQueueProducer(lRigtorpSPSCQueue, PRODUCER_CPU); }).detach(); 486 | 487 | // Wait for the OS to actually get it done. 488 | std::this_thread::sleep_for(std::chrono::milliseconds(10)); 489 | 490 | // Start the test 491 | std::cout << "Rigtorp pointer test started." << std::endl; 492 | gStartBench = true; 493 | std::this_thread::sleep_for(std::chrono::seconds(TEST_TIME_DURATION_SEC)); 494 | 495 | 496 | // End the test 497 | gActiveProducer = false; 498 | std::cout << "Rigtorp pointer test ended." << std::endl; 499 | 500 | // Wait for the consumers to 'join' 501 | // Why not the classic join? I prepared for a multi thread case I need this function for. 502 | while (gActiveConsumer) { 503 | std::this_thread::sleep_for(std::chrono::milliseconds(1)); 504 | } 505 | 506 | // Garbage collect the queue 507 | delete lRigtorpSPSCQueue; 508 | 509 | // Print the result. 510 | std::cout << "Rigtorp Transactions -> " << gCounter / TEST_TIME_DURATION_SEC << "/s" << std::endl; 511 | 512 | // Zero the test parameters. 513 | gStartBench = false; 514 | gActiveProducer = true; 515 | gCounter = 0; 516 | gActiveConsumer = 0; 517 | 518 | /// 519 | /// FastQueueASM test -> 520 | /// 521 | 522 | // Create the queue 523 | auto pQueue = FastQueueASM::newQueue(); 524 | 525 | // Start the consumer(s) / Producer(s) 526 | gActiveConsumer++; 527 | 528 | std::thread([pQueue] { fastQueueASMConsumer(pQueue, CONSUMER_CPU); }).detach(); 529 | std::thread([pQueue] { fastQueueASMProducer(pQueue, PRODUCER_CPU); }).detach(); 530 | 531 | // Wait for the OS to actually get it done. 532 | std::this_thread::sleep_for(std::chrono::milliseconds(100)); 533 | 534 | // Start the test 535 | std::cout << "FastQueueASM pointer test started." << std::endl; 536 | gStartBench = true; 537 | std::this_thread::sleep_for(std::chrono::seconds(TEST_TIME_DURATION_SEC)); 538 | 539 | // End the test 540 | gActiveProducer = false; 541 | std::cout << "FastQueueASM pointer test ended." << std::endl; 542 | 543 | // Wait for the consumers to 'join' 544 | // Why not the classic join? I prepared for a multi thread case I need this function for. 545 | while (gActiveConsumer) { 546 | std::this_thread::sleep_for(std::chrono::milliseconds(1)); 547 | } 548 | 549 | // Garbage collect the queue 550 | deleteQueue(pQueue); 551 | 552 | // Print the result. 553 | std::cout << "FastQueueASM Transactions -> " << gCounter / TEST_TIME_DURATION_SEC << "/s" << std::endl; 554 | 555 | // Zero the test parameters. 556 | gStartBench = false; 557 | gActiveProducer = true; 558 | gCounter = 0; 559 | gActiveConsumer = 0; 560 | 561 | /// 562 | /// DeaodSPSC test -> 563 | /// 564 | 565 | // Create the queue 566 | auto deaodSPSC = new deaod::spsc_queue(); 567 | 568 | // Start the consumer(s) / Producer(s) 569 | gActiveConsumer++; 570 | 571 | std::thread([deaodSPSC] { deaodSPSCConsumer(deaodSPSC, CONSUMER_CPU); }).detach(); 572 | std::thread([deaodSPSC] { deaodSPSCProducer(deaodSPSC, PRODUCER_CPU); }).detach(); 573 | 574 | // Wait for the OS to actually get it done. 575 | std::this_thread::sleep_for(std::chrono::milliseconds(100)); 576 | 577 | // Start the test 578 | std::cout << "DeaodSPSC pointer test started." << std::endl; 579 | gStartBench = true; 580 | std::this_thread::sleep_for(std::chrono::seconds(TEST_TIME_DURATION_SEC)); 581 | 582 | // End the test 583 | gActiveProducer = false; 584 | std::cout << "DeaodSPSC pointer test ended." << std::endl; 585 | 586 | // Wait for the consumers to 'join' 587 | // Why not the classic join? I prepared for a multi thread case I need this function for. 588 | while (gActiveConsumer) { 589 | std::this_thread::sleep_for(std::chrono::milliseconds(1)); 590 | } 591 | 592 | // Garbage collect the queue 593 | delete deaodSPSC; 594 | 595 | // Print the result. 596 | std::cout << "DeaodSPSC Transactions -> " << gCounter / TEST_TIME_DURATION_SEC << "/s" << std::endl; 597 | 598 | // Zero the test parameters. 599 | gStartBench = false; 600 | gActiveProducer = true; 601 | gCounter = 0; 602 | gActiveConsumer = 0; 603 | 604 | /// 605 | /// FastQueueRaw test -> 606 | /// 607 | 608 | // Create the queue 609 | auto lFastQueueRaw = new FastQueue(); 610 | 611 | // Start the consumer(s) / Producer(s) 612 | gActiveConsumer++; 613 | std::thread([lFastQueueRaw] { return fastQueueConsumerRaw(lFastQueueRaw, CONSUMER_CPU); }).detach(); 614 | std::thread([lFastQueueRaw] { return fastQueueProducerRaw(lFastQueueRaw, PRODUCER_CPU); }).detach(); 615 | 616 | // Wait for the OS to actually get it done. 617 | std::this_thread::sleep_for(std::chrono::milliseconds(10)); 618 | 619 | // Start the test 620 | std::cout << "FastQueueRaw pointer test started." << std::endl; 621 | gStartBench = true; 622 | std::this_thread::sleep_for(std::chrono::seconds(TEST_TIME_DURATION_SEC)); 623 | 624 | // End the test 625 | gActiveProducer = false; 626 | std::cout << "FastQueueRaw pointer test ended." << std::endl; 627 | 628 | // Wait for the consumers to 'join' 629 | // Why not the classic join? I prepared for a multi thread case I need this function for. 630 | while (gActiveConsumer) { 631 | std::this_thread::sleep_for(std::chrono::milliseconds(1)); 632 | } 633 | 634 | // Garbage collect the queue 635 | delete lFastQueueRaw; 636 | 637 | // Print the result. 638 | std::cout << "FastQueueRaw Transactions -> " << gCounter / TEST_TIME_DURATION_SEC << "/s" << std::endl; 639 | 640 | 641 | return EXIT_SUCCESS; 642 | } 643 | -------------------------------------------------------------------------------- /FastQueueIntegrityTest.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Anders Cedronius 3 | // 4 | 5 | // Lock-free producer (one thread) and consumer (another thread) integrity test 6 | // The test is performed by the producer producing data at an irregular rate in time 7 | // containing random data and a simple checksum + counter. 8 | // And a consumer reading the data at an equally (same dynamic range in time) irregular rate 9 | // verifying the checksum and linearity of the counter. The queue is set shallow (2 entries) to 10 | // make the test face queue full/empty situations as often as possible. 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include "PinToCPU.h" 18 | #include "FastQueue.h" 19 | 20 | #define QUEUE_MASK 0b1 21 | #define L1_CACHE_LINE 64 22 | #define TEST_TIME_DURATION_SEC 200 23 | 24 | bool gActiveProducer = true; 25 | std::atomic gActiveConsumer = 0; 26 | bool gStartBench = false; 27 | std::atomic gTransactions = 0; 28 | uint64_t gChk = 0; 29 | 30 | void producer(FastQueue>, QUEUE_MASK, L1_CACHE_LINE> *rQueue, int32_t aCPU) { 31 | std::random_device lRndDevice; 32 | std::mt19937 lMersenneEngine{lRndDevice()}; 33 | std::uniform_int_distribution lDist{1, 500}; 34 | auto lGen = [&lDist, &lMersenneEngine]() { 35 | return lDist(lMersenneEngine); 36 | }; 37 | if (!pinThread(aCPU)) { 38 | std::cout << "Pin CPU fail. " << std::endl; 39 | rQueue->stopQueue(); 40 | return; 41 | } 42 | while (!gStartBench) { 43 | #ifdef _MSC_VER 44 | __nop(); 45 | #else 46 | asm("NOP"); 47 | #endif 48 | } 49 | uint64_t lCounter = 0; 50 | while (gActiveProducer) { 51 | auto lpData = std::make_unique>(1000); 52 | std::generate(lpData->begin(), lpData->end(), lGen); 53 | *(uint64_t *) lpData->data() = lCounter++; 54 | uint64_t lSimpleSum = std::accumulate(lpData->begin() + 16, lpData->end(), 0); 55 | *(uint64_t *) (lpData->data() + 8) = lSimpleSum; 56 | rQueue->push(lpData); 57 | uint64_t lSleep = lDist(lMersenneEngine); 58 | std::this_thread::sleep_for(std::chrono::nanoseconds(lSleep)); 59 | } 60 | rQueue->stopQueue(); 61 | } 62 | 63 | void consumer(FastQueue>, QUEUE_MASK, L1_CACHE_LINE> *rQueue, int32_t aCPU) { 64 | uint64_t lCounter = 0; 65 | std::random_device lRndDevice; 66 | std::mt19937 lMersenneEngine{lRndDevice()}; 67 | std::uniform_int_distribution lDist{1, 500}; 68 | if (!pinThread(aCPU)) { 69 | std::cout << "Pin CPU fail. " << std::endl; 70 | gActiveConsumer--; 71 | return; 72 | } 73 | gActiveConsumer++; 74 | while (true) { 75 | auto lResult = std::move(rQueue->pop()); 76 | if (lResult == nullptr) { 77 | break; 78 | } 79 | if (lCounter != *(uint64_t *) lResult->data()) { 80 | std::cout << "Test failed.. Not linear data. " << *(uint64_t *) lResult->data() << std::endl; 81 | gActiveConsumer--; 82 | return; 83 | } 84 | uint64_t lSimpleSum = std::accumulate(lResult->begin() + 16, lResult->end(), 0); 85 | if (lSimpleSum != *(uint64_t *) (lResult->data() + 8)) { 86 | std::cout << "Test failed.. Not consistent data. " << lSimpleSum << " " << lCounter << " " << gChk 87 | << std::endl; 88 | gActiveConsumer--; 89 | return; 90 | } 91 | lCounter++; 92 | uint64_t lSleep = lDist(lMersenneEngine); 93 | std::this_thread::sleep_for(std::chrono::nanoseconds(lSleep)); 94 | } 95 | gTransactions = lCounter; 96 | gActiveConsumer--; 97 | } 98 | 99 | int main() { 100 | auto lQueue1 = new FastQueue>, QUEUE_MASK, L1_CACHE_LINE>(); 101 | std::thread([lQueue1] { return consumer(lQueue1, 0); }).detach(); 102 | std::thread([lQueue1] { return producer(lQueue1, 2); }).detach(); 103 | std::cout << "Producer -> Consumer (start)" << std::endl; 104 | gStartBench = true; 105 | std::this_thread::sleep_for(std::chrono::seconds(TEST_TIME_DURATION_SEC)); 106 | gActiveProducer = false; 107 | lQueue1->stopQueue(); 108 | std::cout << "Producer -> Consumer (end)" << std::endl; 109 | while (gActiveConsumer) { 110 | std::this_thread::sleep_for(std::chrono::milliseconds(1)); 111 | } 112 | delete lQueue1; 113 | std::cout << "Test ended. Did " << gTransactions << " transactions." << std::endl; 114 | return EXIT_SUCCESS; 115 | } 116 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Anders Cedronius 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /PinToCPU.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Anders Cedronius 3 | // 4 | 5 | #pragma once 6 | 7 | #ifdef __APPLE__ 8 | #include 9 | #ifdef TARGET_OS_MAC 10 | 11 | #include 12 | #include 13 | #import 14 | #define SYSCTL_CORE_COUNT "machdep.cpu.core_count" 15 | 16 | typedef struct cpu_set { 17 | uint32_t count; 18 | } cpu_set_t; 19 | 20 | static inline void 21 | CPU_ZERO(cpu_set_t *cs) { cs->count = 0; } 22 | 23 | static inline void 24 | CPU_SET(int num, cpu_set_t *cs) { cs->count |= (1 << num); } 25 | 26 | static inline int 27 | CPU_ISSET(int num, cpu_set_t *cs) { return (cs->count & (1 << num)); } 28 | 29 | int sched_getaffinity(pid_t pid, size_t cpu_size, cpu_set_t *cpu_set) 30 | { 31 | int32_t core_count = 0; 32 | size_t len = sizeof(core_count); 33 | int ret = sysctlbyname(SYSCTL_CORE_COUNT, &core_count, &len, 0, 0); 34 | if (ret) { 35 | return -1; 36 | } 37 | cpu_set->count = 0; 38 | for (int i = 0; i < core_count; i++) { 39 | cpu_set->count |= (1 << i); 40 | } 41 | return 0; 42 | } 43 | 44 | int pthread_setaffinity_np(pthread_t thread, size_t cpu_size, 45 | cpu_set_t *cpu_set) { 46 | thread_port_t mach_thread; 47 | int core = 0; 48 | 49 | for (core = 0; core < 8 * cpu_size; core++) { 50 | if (CPU_ISSET(core, cpu_set)) break; 51 | } 52 | thread_affinity_policy_data_t policy = { core }; 53 | mach_thread = pthread_mach_thread_np(thread); 54 | thread_policy_set(mach_thread, THREAD_AFFINITY_POLICY, 55 | (thread_policy_t)&policy, 1); 56 | return 0; 57 | } 58 | 59 | bool pinThread(int32_t aCpu) { 60 | if (aCpu < 0) { 61 | return false; 62 | } 63 | cpu_set_t lCpuSet; 64 | CPU_ZERO(&lCpuSet); 65 | CPU_SET(aCpu, &lCpuSet); 66 | if (pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &lCpuSet)) { 67 | return false; 68 | } 69 | return true; 70 | } 71 | 72 | #else 73 | #error Only MacOS supported 74 | #endif 75 | #elif defined _WIN64 76 | #include 77 | bool pinThread(int32_t aCpu) { 78 | if (aCpu > 64) { 79 | throw std::runtime_error("Support for more than 64 CPU's under Windows is not implemented."); 80 | } 81 | HANDLE lThread = GetCurrentThread(); 82 | DWORD_PTR lThreadAffinityMask = 1ULL << aCpu; 83 | DWORD_PTR lReturn = SetThreadAffinityMask(lThread, lThreadAffinityMask); 84 | if (lReturn) { 85 | return true; 86 | } 87 | return false; 88 | } 89 | #elif __linux 90 | bool pinThread(int32_t aCpu) { 91 | if (aCpu < 0) { 92 | return false; 93 | } 94 | cpu_set_t lCpuSet; 95 | CPU_ZERO(&lCpuSet); 96 | CPU_SET(aCpu, &lCpuSet); 97 | if (pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &lCpuSet)) { 98 | return false; 99 | } 100 | return true; 101 | } 102 | #else 103 | #error OS not supported 104 | #endif 105 | 106 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Logo](fastqueuesmall.png) 2 | 3 | # FastQueue 4 | 5 | Please also see the 8 byte (64 bit pointer) version -> 6 | [FastQueue2](https://github.com/andersc/fastqueue2) 7 | 8 | FastQueue is a single producer single consumer (SPSC) 'process to process queue' similar to *boost::lockfree::spsc_queue* 9 | 10 | FastQueue is slightly faster than the boost implementation (tested on a handful systems and architectures) and is not as strict as boost is when it comes to data-types it can carry, it's possible to transfer smart pointers for example. 11 | 12 | FastQueue is what's called a lock-free queue. However, there must always be some sort of lock to prevent race conditions when two asynchronous workers communicate. Many SPSC solutions use atomics to guard the data. FastQueue uses a memory barrier technique and limit it's usage to 64-bit platforms only for cross thread variable data consistency. 13 | 14 | FastQueue can be pictured as illustrated below: 15 | 16 | ``` 17 | FastQueue 18 | ◀───────────────────────────────────▶ 19 | .─────────. 20 | ┌────┐ '─. 21 | ,'│Data│ ───▶ ┌────┐ 22 | Core x / Thread x ╱ └────┴──────.│Data│╲ Core y / Thread y 23 | ◀───────────────▶ ╱ ,' └────┘ ╲ ◀───────────────▶ 24 | ┌───────────────┐ Push ┌────┐╱ ╲ : Pop ┌───────────────┐ 25 | │ Data producer │ ─────▶│Data│Circular buffer: │─────▶ │ Data consumer │ 26 | └───────────────┘ ├────┘ ┌────┤ └───────────────┘ 27 | : ╲ ╱│Empty 28 | ╲ ╲ ╱ └────┘ 29 | ╲ ┌────┐ ,' ╱ 30 | ╲│Empty`─────┌────┐ ╱ 31 | └────┘ │Empty' 32 | '─. ◀─── └────┘ 33 | `───────' 34 | ``` 35 | 36 | FastQueue is aiming to be among the top performing SPSC queues. When it comes to measuring performance when using templated code the compiler may optimize the final solution in ways where the queue might change its performance depending on code changes not related to the queue. If you're aiming for speed, it might be wise to benchmark FastQueue against other SPSC queues in your live implementation, match L1_CACHE (see below \*2) size to the executing CPU and tune the queue depth (see below \*1) to match the data flow avoiding for example hitting the limit too often. 37 | 38 | Various compilers, CPU's and Architectures will result in different reults. 39 | For example in my tests [Rigtorps SPSC](https://github.com/rigtorp/SPSCQueue) queue is really fast on ARM64. 40 | 41 | **Apple M1 Pro** 42 | 43 | ``` 44 | boost lock free pointer test started. 45 | boost lock free pointer test ended. 46 | BoostLockFree Transactions -> 8437017/s 47 | FastQueue pointer test started. 48 | FastQueue pointer test ended. 49 | FastQueue Transactions -> 9886604/s 50 | Rigtorp pointer test started. 51 | Rigtorp pointer test ended. 52 | Rigtorp Transactions -> 10974382/s 53 | FastQueueASM pointer test started. 54 | FastQueueASM pointer test ended. 55 | FastQueueASM Transactions -> 9471164/s 56 | ``` 57 | 58 | However, on X64 platforms I don't see the same gain in my benchmarks. 59 | 60 | **AMD EPYC 7763 64-Core Processor** 61 | 62 | ``` 63 | BoostLockFree pointer test started. 64 | BoostLockFree pointer test ended. 65 | BoostLockFree Transactions -> 6851164/s 66 | FastQueue pointer test started. 67 | FastQueue pointer test ended. 68 | FastQueue Transactions -> 8516819/s 69 | Rigtorp pointer test started. 70 | Rigtorp pointer test ended. 71 | Rigtorp Transactions -> 8332916/s 72 | FastQueueASM pointer test started. 73 | FastQueueASM pointer test ended. 74 | FastQueueASM Transactions -> 8856282/s 75 | ``` 76 | 77 | The fastest queue I've found is Deaod's solution part of FastQueueCompare.cpp. Test with your compiler on your architecture on your CPU to see the result. 78 | 79 | ## FastQueue Info 80 | 81 | The queue is a header only template class and is implemented in a few lines of C++. 82 | 83 | The code compiles on arm64 or x86_64 CPU's running Windows, MacOS or Linux OS. 84 | 85 | FastQueue is initialised by using three parameters 86 | 87 | ```cpp 88 | auto fastQueue = FastQueue>, QUEUE_MASK, L1_CACHE_LINE>(); 89 | ``` 90 | 91 | The *first parameter* is declaring the type of data to be put on the queue (Typically a pointer). 92 | 93 | *\*1*) 94 | The *second parameter* defines the size of the ringbuffer. It must be a contiguous bitmask from LSB. For example 0b1111 (decimal 15) and is internally used as a mask for the ringbuffer boundaries. Depending on your data flow you want to set this parameter to cover for bursts of data. If you produce data faster than you consume or the other way around the size of the buffer is irrelevant as you will end upp full or empty anyway. For that case a combination of push/tryPop / tryPush/Pop can be used to avoid extensive spinlock on the push or pop side depending on if you drain or flood the queue. 95 | 96 | *\*2*) 97 | The *third parameter* defines the spacing in bytes between the data stored in the ring buffer. It's recommended to allign with the size of the L1 cache line size. To obtain the L1 cache line size on linux: *getconf LEVEL1_DCACHE_LINESIZE* om MacOS: *sudo sysctl -a | grep hw.cachelinesize* for more detailed information click the link to Rigtorps solution and read the **Implementation** section. 98 | 99 | 100 | There is also a pure Assembly version *FastQueueASM.h* that I've been playing around with (not 100% tested). FastQueueASM is a bit more difficult to build compared to just dropping in the FastQueue.h into your project. Just look in the CMake file for guidance if you want to test it. I have not found any way to pass parameters or use a common file during precompiling from C/C++ to MASM so the cache line size and buffer mask must be changed in both the C++ and ASM files. The constructor verifies the values so if you by mistake forget to update either value the constructor will throw. 101 | 102 | ## Build 103 | 104 | Build the integrity test by: 105 | 106 | ``` 107 | cmake -DCMAKE_BUILD_TYPE=Release . 108 | cmake --build . 109 | ``` 110 | The integrity test is sending and consuming data at an irregular rate. The data is verified for consistency when consumed. The test executes for 200 seconds and will create race conditions where the queue is full, drained and when data is consumed at the same time data is put on the queue. 111 | 112 | Build the integrity and queue benchmark by: 113 | 114 | ``` 115 | cmake -DCMAKE_BUILD_TYPE=Release -DUSE_BOOST=ON . 116 | cmake --build . 117 | ``` 118 | 119 | This requires BOOST and MASM. See the github actions for how to install deps. on the respective platforms. 120 | 121 | The benchmark spins up one consumer thread on a CPU and a consumer thread on another CPU. Then sends as many objects between the two as possible. The data is displayed when done for each test. 122 | 123 | For accurate benchmarks there is C-States, nice factors or running as 'rt' and so on and so on. People write about how to test stuff all the time. My benchmark is maybe indicative, or maybe not. 124 | 125 | 126 | Current build status -> 127 | 128 | **ARM64** 129 | 130 | [![mac_arm64](https://github.com/andersc/fastqueue/actions/workflows/mac_arm64.yml/badge.svg)](https://github.com/andersc/fastqueue/actions/workflows/mac_arm64.yml) 131 | 132 | [![ubuntu_arm64](https://github.com/andersc/fastqueue/actions/workflows/ubuntu_arm64.yml/badge.svg)](https://github.com/andersc/fastqueue/actions/workflows/ubuntu_arm64.yml) 133 | 134 | [![win_arm64](https://github.com/andersc/fastqueue/actions/workflows/win_arm64.yml/badge.svg)](https://github.com/andersc/fastqueue/actions/workflows/win_arm64.yml) 135 | 136 | **X86_64** 137 | 138 | [![mac_x64](https://github.com/andersc/fastqueue/actions/workflows/mac_x64.yml/badge.svg)](https://github.com/andersc/fastqueue/actions/workflows/mac_x64.yml) 139 | 140 | [![ubuntu_x64](https://github.com/andersc/fastqueue/actions/workflows/ubuntu_x64.yml/badge.svg)](https://github.com/andersc/fastqueue/actions/workflows/ubuntu_x64.yml) 141 | 142 | [![win_x64](https://github.com/andersc/fastqueue/actions/workflows/win_x64.yml/badge.svg)](https://github.com/andersc/fastqueue/actions/workflows/win_x64.yml) 143 | 144 | **Builds with -DUSE_BOOST=ON** 145 | 146 | Github do not provide ARM64 runners. The above ARM64 compiles are cross compiled. Cross compiling BOOST is about as booring as looking at drying paint. So I have not created actions for all systems and architectures. But here is the one I made. 147 | 148 | **BOOST_ASM_ARM64** 149 | 150 | [![mac_boost_arm64](https://github.com/andersc/fastqueue/actions/workflows/mac_boost_arm64.yml/badge.svg)](https://github.com/andersc/fastqueue/actions/workflows/mac_boost_arm64.yml) 151 | 152 | **BOOST_ASM_X86_64** 153 | 154 | [![mac_boost_x64](https://github.com/andersc/fastqueue/actions/workflows/mac_boost_x64.yml/badge.svg)](https://github.com/andersc/fastqueue/actions/workflows/mac_boost_x64.yml) 155 | 156 | [![ubuntu_boost_x64](https://github.com/andersc/fastqueue/actions/workflows/ubuntu_boost_x64.yml/badge.svg)](https://github.com/andersc/fastqueue/actions/workflows/ubuntu_boost_x64.yml) 157 | 158 | [![win_boost_x64](https://github.com/andersc/fastqueue/actions/workflows/win_boost_x64.yml/badge.svg)](https://github.com/andersc/fastqueue/actions/workflows/win_boost_x64.yml) 159 | 160 | 161 | ## Usage 162 | 163 | Copy the *FastQueue.h* file to your project and -> 164 | 165 | ```cpp 166 | 167 | #include "FastQueue.h" 168 | 169 | //Create the queue 170 | //In this example a unique pointer to a vector of uint8_t 171 | //QUEUE_MASK == size of queue as a contiguous bitmask from LSB example 0b1111 172 | //L1_CACHE_LINE == the size of the L1 cache, usually 64 173 | auto fastQueue = FastQueue>, QUEUE_MASK, L1_CACHE_LINE>(); 174 | 175 | //Then the producer of the data pushes the data from one thread 176 | auto dataProduced = std::make_unique>(1000); 177 | fastQueue.push(dataProduced); 178 | 179 | //And the consumer pops the data in another thread 180 | auto dataConsume = fastQueue.pop(); 181 | 182 | //When done signal that from anywhere (can be a third thread) 183 | fastQueue.stopQueue(); 184 | 185 | //The consumer/producer may stop immediately or pop the queue 186 | //until nullptr (in the above example) is received to drain all pushed items. 187 | 188 | 189 | ``` 190 | 191 | If the producer and / or consumer irregularly consumes or produces data it might be wise to use the **tryPush** / **pushAfterTry** and **tryPop** / **popAfterTry**. This to avoid spending excessive amount of CPU time in spinlocks. Using the tryPush/Pop you may sleep or do other things while waiting for data to consume or free queue slots to put data in. 192 | 193 | 194 | For more examples see the included implementations and tests. 195 | 196 | ## Final words 197 | 198 | Please steal the PinToCPU.h header it's a cross platform CPU affinity tool. And while you're at it, add support for more than 64 CPU's on Windows platforms (I'm obviously not a Windows person ;-) ). 199 | 200 | Have fun and please let me know if you find any quirks bugs or 'ooopses'. 201 | 202 | ## License 203 | 204 | *MIT* 205 | 206 | Read *LICENCE* for details 207 | -------------------------------------------------------------------------------- /SPSCQueue.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2020 Erik Rigtorp 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in all 12 | copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 | SOFTWARE. 21 | */ 22 | 23 | #pragma once 24 | 25 | #include 26 | #include 27 | #include 28 | #include // std::allocator 29 | #include // std::hardware_destructive_interference_size 30 | #include 31 | #include // std::enable_if, std::is_*_constructible 32 | 33 | #ifdef __has_cpp_attribute 34 | #if __has_cpp_attribute(nodiscard) 35 | #define RIGTORP_NODISCARD [[nodiscard]] 36 | #endif 37 | #endif 38 | #ifndef RIGTORP_NODISCARD 39 | #define RIGTORP_NODISCARD 40 | #endif 41 | 42 | namespace rigtorp { 43 | 44 | template > class SPSCQueue { 45 | 46 | #if defined(__cpp_if_constexpr) && defined(__cpp_lib_void_t) 47 | template 48 | struct has_allocate_at_least : std::false_type {}; 49 | 50 | template 51 | struct has_allocate_at_least< 52 | Alloc2, std::void_t().allocate_at_least( 54 | size_t{}))>> : std::true_type {}; 55 | #endif 56 | 57 | public: 58 | explicit SPSCQueue(const size_t capacity, 59 | const Allocator &allocator = Allocator()) 60 | : capacity_(capacity), allocator_(allocator) { 61 | // The queue needs at least one element 62 | if (capacity_ < 1) { 63 | capacity_ = 1; 64 | } 65 | capacity_++; // Needs one slack element 66 | // Prevent overflowing size_t 67 | if (capacity_ > SIZE_MAX - 2 * kPadding) { 68 | capacity_ = SIZE_MAX - 2 * kPadding; 69 | } 70 | 71 | #if defined(__cpp_if_constexpr) && defined(__cpp_lib_void_t) 72 | if constexpr (has_allocate_at_least::value) { 73 | auto res = allocator_.allocate_at_least(capacity_ + 2 * kPadding); 74 | slots_ = res.ptr; 75 | capacity_ = res.count - 2 * kPadding; 76 | } else { 77 | slots_ = std::allocator_traits::allocate( 78 | allocator_, capacity_ + 2 * kPadding); 79 | } 80 | #else 81 | slots_ = std::allocator_traits::allocate( 82 | allocator_, capacity_ + 2 * kPadding); 83 | #endif 84 | 85 | static_assert(alignof(SPSCQueue) == kCacheLineSize, ""); 86 | static_assert(sizeof(SPSCQueue) >= 3 * kCacheLineSize, ""); 87 | assert(reinterpret_cast(&readIdx_) - 88 | reinterpret_cast(&writeIdx_) >= 89 | static_cast(kCacheLineSize)); 90 | } 91 | 92 | ~SPSCQueue() { 93 | while (front()) { 94 | pop(); 95 | } 96 | std::allocator_traits::deallocate(allocator_, slots_, 97 | capacity_ + 2 * kPadding); 98 | } 99 | 100 | // non-copyable and non-movable 101 | SPSCQueue(const SPSCQueue &) = delete; 102 | SPSCQueue &operator=(const SPSCQueue &) = delete; 103 | 104 | template 105 | void emplace(Args &&...args) noexcept( 106 | std::is_nothrow_constructible::value) { 107 | static_assert(std::is_constructible::value, 108 | "T must be constructible with Args&&..."); 109 | auto const writeIdx = writeIdx_.load(std::memory_order_relaxed); 110 | auto nextWriteIdx = writeIdx + 1; 111 | if (nextWriteIdx == capacity_) { 112 | nextWriteIdx = 0; 113 | } 114 | while (nextWriteIdx == readIdxCache_) { 115 | readIdxCache_ = readIdx_.load(std::memory_order_acquire); 116 | } 117 | new (&slots_[writeIdx + kPadding]) T(std::forward(args)...); 118 | writeIdx_.store(nextWriteIdx, std::memory_order_release); 119 | } 120 | 121 | template 122 | RIGTORP_NODISCARD bool try_emplace(Args &&...args) noexcept( 123 | std::is_nothrow_constructible::value) { 124 | static_assert(std::is_constructible::value, 125 | "T must be constructible with Args&&..."); 126 | auto const writeIdx = writeIdx_.load(std::memory_order_relaxed); 127 | auto nextWriteIdx = writeIdx + 1; 128 | if (nextWriteIdx == capacity_) { 129 | nextWriteIdx = 0; 130 | } 131 | if (nextWriteIdx == readIdxCache_) { 132 | readIdxCache_ = readIdx_.load(std::memory_order_acquire); 133 | if (nextWriteIdx == readIdxCache_) { 134 | return false; 135 | } 136 | } 137 | new (&slots_[writeIdx + kPadding]) T(std::forward(args)...); 138 | writeIdx_.store(nextWriteIdx, std::memory_order_release); 139 | return true; 140 | } 141 | 142 | void push(const T &v) noexcept(std::is_nothrow_copy_constructible::value) { 143 | static_assert(std::is_copy_constructible::value, 144 | "T must be copy constructible"); 145 | emplace(v); 146 | } 147 | 148 | template ::value>::type> 150 | void push(P &&v) noexcept(std::is_nothrow_constructible::value) { 151 | emplace(std::forward

(v)); 152 | } 153 | 154 | RIGTORP_NODISCARD bool 155 | try_push(const T &v) noexcept(std::is_nothrow_copy_constructible::value) { 156 | static_assert(std::is_copy_constructible::value, 157 | "T must be copy constructible"); 158 | return try_emplace(v); 159 | } 160 | 161 | template ::value>::type> 163 | RIGTORP_NODISCARD bool 164 | try_push(P &&v) noexcept(std::is_nothrow_constructible::value) { 165 | return try_emplace(std::forward

(v)); 166 | } 167 | 168 | RIGTORP_NODISCARD T *front() noexcept { 169 | auto const readIdx = readIdx_.load(std::memory_order_relaxed); 170 | if (readIdx == writeIdxCache_) { 171 | writeIdxCache_ = writeIdx_.load(std::memory_order_acquire); 172 | if (writeIdxCache_ == readIdx) { 173 | return nullptr; 174 | } 175 | } 176 | return &slots_[readIdx + kPadding]; 177 | } 178 | 179 | void pop() noexcept { 180 | static_assert(std::is_nothrow_destructible::value, 181 | "T must be nothrow destructible"); 182 | auto const readIdx = readIdx_.load(std::memory_order_relaxed); 183 | assert(writeIdx_.load(std::memory_order_acquire) != readIdx); 184 | slots_[readIdx + kPadding].~T(); 185 | auto nextReadIdx = readIdx + 1; 186 | if (nextReadIdx == capacity_) { 187 | nextReadIdx = 0; 188 | } 189 | readIdx_.store(nextReadIdx, std::memory_order_release); 190 | } 191 | 192 | RIGTORP_NODISCARD size_t size() const noexcept { 193 | std::ptrdiff_t diff = writeIdx_.load(std::memory_order_acquire) - 194 | readIdx_.load(std::memory_order_acquire); 195 | if (diff < 0) { 196 | diff += capacity_; 197 | } 198 | return static_cast(diff); 199 | } 200 | 201 | RIGTORP_NODISCARD bool empty() const noexcept { 202 | return writeIdx_.load(std::memory_order_acquire) == 203 | readIdx_.load(std::memory_order_acquire); 204 | } 205 | 206 | RIGTORP_NODISCARD size_t capacity() const noexcept { return capacity_ - 1; } 207 | 208 | private: 209 | #ifdef __cpp_lib_hardware_interference_size 210 | static constexpr size_t kCacheLineSize = 211 | std::hardware_destructive_interference_size; 212 | #else 213 | static constexpr size_t kCacheLineSize = 64; 214 | #endif 215 | 216 | // Padding to avoid false sharing between slots_ and adjacent allocations 217 | static constexpr size_t kPadding = (kCacheLineSize - 1) / sizeof(T) + 1; 218 | 219 | private: 220 | size_t capacity_; 221 | T *slots_; 222 | #if defined(__has_cpp_attribute) && __has_cpp_attribute(no_unique_address) 223 | Allocator allocator_ [[no_unique_address]]; 224 | #else 225 | Allocator allocator_; 226 | #endif 227 | 228 | // Align to cache line size in order to avoid false sharing 229 | // readIdxCache_ and writeIdxCache_ is used to reduce the amount of cache 230 | // coherency traffic 231 | alignas(kCacheLineSize) std::atomic writeIdx_ = {0}; 232 | alignas(kCacheLineSize) size_t readIdxCache_ = 0; 233 | alignas(kCacheLineSize) std::atomic readIdx_ = {0}; 234 | alignas(kCacheLineSize) size_t writeIdxCache_ = 0; 235 | 236 | // Padding to avoid adjacent allocations to share cache line with 237 | // writeIdxCache_ 238 | char padding_[kCacheLineSize - sizeof(writeIdxCache_)]; 239 | }; 240 | } // namespace rigtorp 241 | -------------------------------------------------------------------------------- /apple.toolchain.cmake: -------------------------------------------------------------------------------- 1 | # This file is part of the ios-cmake project. It was retrieved from 2 | # https://github.com/leetal/ios-cmake.git, which is a fork of 3 | # https://github.com/gerstrong/ios-cmake.git, which is a fork of 4 | # https://github.com/cristeab/ios-cmake.git, which is a fork of 5 | # https://code.google.com/p/ios-cmake/. Which in turn is based off of 6 | # the Platform/Darwin.cmake and Platform/UnixPaths.cmake files which 7 | # are included with CMake 2.8.4 8 | # 9 | # The ios-cmake project is licensed under the new BSD license. 10 | # 11 | # Copyright (c) 2014, Bogdan Cristea and LTE Engineering Software, 12 | # Kitware, Inc., Insight Software Consortium. All rights reserved. 13 | # Redistribution and use in source and binary forms, with or without 14 | # modification, are permitted provided that the following conditions 15 | # are met: 16 | # 1. Redistributions of source code must retain the above copyright 17 | # notice, this list of conditions and the following disclaimer. 18 | # 19 | # 2. Redistributions in binary form must reproduce the above copyright 20 | # notice, this list of conditions and the following disclaimer in the 21 | # documentation and/or other materials provided with the distribution. 22 | # 23 | # 3. Neither the name of the copyright holder nor the names of its 24 | # contributors may be used to endorse or promote products derived from 25 | # this software without specific prior written permission. 26 | # 27 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 28 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 29 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 30 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 31 | # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 32 | # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 33 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 34 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 35 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 37 | # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 38 | # POSSIBILITY OF SUCH DAMAGE. 39 | # 40 | # This file is based off of the Platform/Darwin.cmake and 41 | # Platform/UnixPaths.cmake files which are included with CMake 2.8.4 42 | # It has been altered for iOS development. 43 | # 44 | # Updated by Alex Stewart (alexs.mac@gmail.com) 45 | # 46 | # ***************************************************************************** 47 | # Now maintained by Alexander Widerberg (widerbergaren [at] gmail.com) 48 | # under the BSD-3-Clause license 49 | # https://github.com/leetal/ios-cmake 50 | # ***************************************************************************** 51 | # 52 | # INFORMATION / HELP 53 | # 54 | # The following options control the behaviour of this toolchain: 55 | # 56 | # PLATFORM: (default "OS64") 57 | # OS = Build for iPhoneOS. 58 | # OS64 = Build for arm64 iphoneOS. 59 | # OS64COMBINED = Build for arm64 x86_64 iphoneOS. Combined into FAT STATIC lib (supported on 3.14+ of CMakewith "-G Xcode" argument ONLY) 60 | # SIMULATOR = Build for x86 i386 iphoneOS Simulator. 61 | # SIMULATOR64 = Build for x86_64 iphoneOS Simulator. 62 | # SIMULATORARM64 = Build for arm64 iphoneOS Simulator. 63 | # TVOS = Build for arm64 tvOS. 64 | # TVOSCOMBINED = Build for arm64 x86_64 tvOS. Combined into FAT STATIC lib (supported on 3.14+ of CMake with "-G Xcode" argument ONLY) 65 | # SIMULATOR_TVOS = Build for x86_64 tvOS Simulator. 66 | # WATCHOS = Build for armv7k arm64_32 for watchOS. 67 | # WATCHOSCOMBINED = Build for armv7k arm64_32 x86_64 watchOS. Combined into FAT STATIC lib (supported on 3.14+ of CMake with "-G Xcode" argument ONLY) 68 | # SIMULATOR_WATCHOS = Build for x86_64 for watchOS Simulator. 69 | # MAC = Build for x86_64 macOS. 70 | # MAC_ARM64 = Build for Apple Silicon macOS. 71 | # MAC_CATALYST = Build for x86_64 macOS with Catalyst support (iOS toolchain on macOS). 72 | # Note: The build argument "MACOSX_DEPLOYMENT_TARGET" can be used to control min-version of macOS 73 | # MAC_CATALYST_ARM64 = Build for Apple Silicon macOS with Catalyst support (iOS toolchain on macOS). 74 | # Note: The build argument "MACOSX_DEPLOYMENT_TARGET" can be used to control min-version of macOS 75 | # 76 | # CMAKE_OSX_SYSROOT: Path to the SDK to use. By default this is 77 | # automatically determined from PLATFORM and xcodebuild, but 78 | # can also be manually specified (although this should not be required). 79 | # 80 | # CMAKE_DEVELOPER_ROOT: Path to the Developer directory for the platform 81 | # being compiled for. By default this is automatically determined from 82 | # CMAKE_OSX_SYSROOT, but can also be manually specified (although this should 83 | # not be required). 84 | # 85 | # DEPLOYMENT_TARGET: Minimum SDK version to target. Default 2.0 on watchOS and 9.0 on tvOS+iOS 86 | # 87 | # ENABLE_BITCODE: (1|0) Enables or disables bitcode support. Default 1 (true) 88 | # 89 | # ENABLE_ARC: (1|0) Enables or disables ARC support. Default 1 (true, ARC enabled by default) 90 | # 91 | # ENABLE_VISIBILITY: (1|0) Enables or disables symbol visibility support. Default 0 (false, visibility hidden by default) 92 | # 93 | # ENABLE_STRICT_TRY_COMPILE: (1|0) Enables or disables strict try_compile() on all Check* directives (will run linker 94 | # to actually check if linking is possible). Default 0 (false, will set CMAKE_TRY_COMPILE_TARGET_TYPE to STATIC_LIBRARY) 95 | # 96 | # ARCHS: (armv7 armv7s armv7k arm64 arm64_32 i386 x86_64) If specified, will override the default architectures for the given PLATFORM 97 | # OS = armv7 armv7s arm64 (if applicable) 98 | # OS64 = arm64 (if applicable) 99 | # SIMULATOR = i386 100 | # SIMULATOR64 = x86_64 101 | # SIMULATORARM64 = arm64 102 | # TVOS = arm64 103 | # SIMULATOR_TVOS = x86_64 (i386 has since long been deprecated) 104 | # WATCHOS = armv7k arm64_32 (if applicable) 105 | # SIMULATOR_WATCHOS = x86_64 (i386 has since long been deprecated) 106 | # MAC = x86_64 107 | # MAC_ARM64 = arm64 108 | # MAC_CATALYST = x86_64 109 | # MAC_CATALYST_ARM64 = arm64 110 | # 111 | # This toolchain defines the following properties (available via get_property()) for use externally: 112 | # 113 | # PLATFORM: The currently targeted platform. 114 | # XCODE_VERSION: Version number (not including Build version) of Xcode detected. 115 | # SDK_VERSION: Version of SDK being used. 116 | # OSX_ARCHITECTURES: Architectures being compiled for (generated from PLATFORM). 117 | # APPLE_TARGET_TRIPLE: Used by autoconf build systems. NOTE: If "ARCHS" are overridden, this will *NOT* be set! 118 | # 119 | # This toolchain defines the following macros for use externally: 120 | # 121 | # set_xcode_property (TARGET XCODE_PROPERTY XCODE_VALUE XCODE_VARIANT) 122 | # A convenience macro for setting xcode specific properties on targets. 123 | # Available variants are: All, Release, RelWithDebInfo, Debug, MinSizeRel 124 | # example: set_xcode_property (myioslib IPHONEOS_DEPLOYMENT_TARGET "3.1" "all"). 125 | # 126 | # find_host_package (PROGRAM ARGS) 127 | # A macro used to find executable programs on the host system, not within the 128 | # environment. Thanks to the android-cmake project for providing the 129 | # command. 130 | # 131 | 132 | cmake_minimum_required(VERSION 3.8.0) 133 | 134 | # CMake invokes the toolchain file twice during the first build, but only once during subsequent rebuilds. 135 | if(IOS_TOOLCHAIN_HAS_RUN) 136 | return() 137 | endif(IOS_TOOLCHAIN_HAS_RUN) 138 | set(IOS_TOOLCHAIN_HAS_RUN true) 139 | 140 | ############################################################################### 141 | # OPTIONS # 142 | ############################################################################### 143 | 144 | option(DROP_32_BIT "Drops the 32-bit targets universally." YES) 145 | 146 | ############################################################################### 147 | # END OPTIONS # 148 | ############################################################################### 149 | 150 | # List of supported platform values 151 | list(APPEND _supported_platforms 152 | "OS" "OS64" "OS64COMBINED" "SIMULATOR" "SIMULATOR64" "SIMULATORARM64" 153 | "TVOS" "TVOSCOMBINED" "SIMULATOR_TVOS" 154 | "WATCHOS" "WATCHOSCOMBINED" "SIMULATOR_WATCHOS" 155 | "MAC" "MAC_ARM64" 156 | "MAC_CATALYST" "MAC_CATALYST_ARM64") 157 | 158 | # Cache what generator is used 159 | set(USED_CMAKE_GENERATOR "${CMAKE_GENERATOR}") 160 | 161 | # Check if using a CMake version capable of building combined FAT builds (simulator and target slices combined in one static lib) 162 | if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14") 163 | set(MODERN_CMAKE YES) 164 | endif() 165 | 166 | # Get the Xcode version being used. 167 | # Problem: CMake runs toolchain files multiple times, but can't read cache variables on some runs. 168 | # Workaround: On first run (in which cache variables are always accessible), set an intermediary environment variable. 169 | # 170 | # NOTE: This pattern is used i many places in this toolchain to speed up checks of all sorts 171 | if(DEFINED XCODE_VERSION_INT) 172 | # Environment variables are always preserved. 173 | set(ENV{_XCODE_VERSION_INT} "${XCODE_VERSION_INT}") 174 | elseif(DEFINED ENV{_XCODE_VERSION_INT}) 175 | set(XCODE_VERSION_INT "$ENV{_XCODE_VERSION_INT}") 176 | elseif(NOT DEFINED XCODE_VERSION_INT) 177 | find_program(XCODEBUILD_EXECUTABLE xcodebuild) 178 | if(NOT XCODEBUILD_EXECUTABLE) 179 | message(FATAL_ERROR "xcodebuild not found. Please install either the standalone commandline tools or Xcode.") 180 | endif() 181 | execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -version 182 | OUTPUT_VARIABLE XCODE_VERSION_INT 183 | ERROR_QUIET 184 | OUTPUT_STRIP_TRAILING_WHITESPACE) 185 | string(REGEX MATCH "Xcode [0-9\\.]+" XCODE_VERSION_INT "${XCODE_VERSION_INT}") 186 | string(REGEX REPLACE "Xcode ([0-9\\.]+)" "\\1" XCODE_VERSION_INT "${XCODE_VERSION_INT}") 187 | set(XCODE_VERSION_INT "${XCODE_VERSION_INT}" CACHE INTERNAL "") 188 | endif() 189 | 190 | # Assuming that xcode 12.0 is installed you most probably have ios sdk 14.0 or later installed (tested on Big Sur) 191 | # if you don't set a deployment target it will be set the way you only get 64-bit builds 192 | if(NOT DEFINED DEPLOYMENT_TARGET AND XCODE_VERSION_INT VERSION_GREATER 12.0) 193 | # Temporarily fix the arm64 issues in CMake install-combined by excluding arm64 for simulator builds (needed for Apple Silicon...) 194 | set(CMAKE_XCODE_ATTRIBUTE_EXCLUDED_ARCHS[sdk=iphonesimulator*] "arm64") 195 | endif() 196 | 197 | # Check if the platform variable is set 198 | if(DEFINED PLATFORM) 199 | # Environment variables are always preserved. 200 | set(ENV{_PLATFORM} "${PLATFORM}") 201 | elseif(DEFINED ENV{_PLATFORM}) 202 | set(PLATFORM "$ENV{_PLATFORM}") 203 | elseif(NOT DEFINED PLATFORM) 204 | message(FATAL_ERROR "PLATFORM argument not set. Bailing configure since I don't know what target you want to build for!") 205 | endif () 206 | 207 | # Safeguard that the platform value is set and is one of the supported values 208 | list(FIND _supported_platforms ${PLATFORM} contains_PLATFORM) 209 | if("${contains_PLATFORM}" EQUAL "-1") 210 | string(REPLACE ";" "\n * " _supported_platforms_formatted "${_supported_platforms}") 211 | message(FATAL_ERROR " Invalid PLATFORM specified! Current value: ${PLATFORM}.\n" 212 | " Supported PLATFORM values: \n * ${_supported_platforms_formatted}") 213 | endif() 214 | 215 | # Check if Apple Silicon is supported 216 | if(PLATFORM MATCHES "^(MAC_ARM64)$|^(MAC_CATALYST_ARM64)$" AND ${CMAKE_VERSION} VERSION_LESS "3.19.5") 217 | message(FATAL_ERROR "Apple Silicon builds requires a minimum of CMake 3.19.5") 218 | endif() 219 | 220 | # Touch toolchain variable to suppress "unused variable" warning. 221 | # This happens if CMake is invoked with the same command line the second time. 222 | if(CMAKE_TOOLCHAIN_FILE) 223 | endif() 224 | 225 | # Fix for PThread library not in path 226 | set(CMAKE_THREAD_LIBS_INIT "-lpthread") 227 | set(CMAKE_HAVE_THREADS_LIBRARY 1) 228 | set(CMAKE_USE_WIN32_THREADS_INIT 0) 229 | set(CMAKE_USE_PTHREADS_INIT 1) 230 | 231 | # Specify minimum version of deployment target. 232 | if(NOT DEFINED DEPLOYMENT_TARGET) 233 | if (PLATFORM MATCHES "WATCHOS") 234 | # Unless specified, SDK version 4.0 is used by default as minimum target version (watchOS). 235 | set(DEPLOYMENT_TARGET "4.0") 236 | elseif(PLATFORM STREQUAL "MAC") 237 | # Unless specified, SDK version 10.13 (High sierra) is used by default as minimum target version (macos). 238 | set(DEPLOYMENT_TARGET "10.13") 239 | elseif(PLATFORM STREQUAL "MAC_ARM64") 240 | # Unless specified, SDK version 11.0 (Big Sur) is used by default as minimum target version (macos on arm). 241 | set(DEPLOYMENT_TARGET "11.0") 242 | elseif(PLATFORM STREQUAL "MAC_CATALYST" OR PLATFORM STREQUAL "MAC_CATALYST_ARM64") 243 | # Unless specified, SDK version 13.0 is used by default as minimum target version (mac catalyst minimum requirement). 244 | set(DEPLOYMENT_TARGET "13.0") 245 | else() 246 | # Unless specified, SDK version 11.0 is used by default as minimum target version (iOS, tvOS). 247 | set(DEPLOYMENT_TARGET "11.0") 248 | endif() 249 | message(STATUS "[DEFAULTS] Using the default min-version since DEPLOYMENT_TARGET not provided!") 250 | elseif(DEFINED DEPLOYMENT_TARGET AND PLATFORM MATCHES "^MAC_CATALYST" AND ${DEPLOYMENT_TARGET} VERSION_LESS "13.0") 251 | message(FATAL_ERROR "Mac Catalyst builds requires a minimum deployment target of 13.0!") 252 | endif() 253 | 254 | # Store the DEPLOYMENT_TARGET in the cache 255 | set(DEPLOYMENT_TARGET "${DEPLOYMENT_TARGET}" CACHE INTERNAL "") 256 | 257 | # Handle the case where we are targeting iOS and a version above 10.3.4 (32-bit support dropped officially) 258 | if(PLATFORM STREQUAL "OS" AND DEPLOYMENT_TARGET VERSION_GREATER_EQUAL 10.3.4) 259 | set(PLATFORM "OS64") 260 | message(STATUS "Targeting minimum SDK version ${DEPLOYMENT_TARGET}. Dropping 32-bit support.") 261 | elseif(PLATFORM STREQUAL "SIMULATOR" AND DEPLOYMENT_TARGET VERSION_GREATER_EQUAL 10.3.4) 262 | set(PLATFORM "SIMULATOR64") 263 | message(STATUS "Targeting minimum SDK version ${DEPLOYMENT_TARGET}. Dropping 32-bit support.") 264 | endif() 265 | 266 | set(PLATFORM_INT "${PLATFORM}") 267 | 268 | if(DEFINED ARCHS) 269 | string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}") 270 | endif() 271 | 272 | # Determine the platform name and architectures for use in xcodebuild commands 273 | # from the specified PLATFORM_INT name. 274 | if(PLATFORM_INT STREQUAL "OS") 275 | set(SDK_NAME iphoneos) 276 | if(NOT ARCHS) 277 | set(ARCHS armv7 armv7s arm64) 278 | set(APPLE_TARGET_TRIPLE_INT arm-apple-ios) 279 | endif() 280 | elseif(PLATFORM_INT STREQUAL "OS64") 281 | set(SDK_NAME iphoneos) 282 | if(NOT ARCHS) 283 | if (XCODE_VERSION_INT VERSION_GREATER 10.0) 284 | set(ARCHS arm64) # Add arm64e when Apple have fixed the integration issues with it, libarclite_iphoneos.a is currently missung bitcode markers for example 285 | else() 286 | set(ARCHS arm64) 287 | endif() 288 | set(APPLE_TARGET_TRIPLE_INT aarch64-apple-ios) 289 | else() 290 | set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios) 291 | endif() 292 | elseif(PLATFORM_INT STREQUAL "OS64COMBINED") 293 | set(SDK_NAME iphoneos) 294 | if(MODERN_CMAKE) 295 | if(NOT ARCHS) 296 | if (XCODE_VERSION_INT VERSION_GREATER 10.0) 297 | set(ARCHS arm64 x86_64) # Add arm64e when Apple have fixed the integration issues with it, libarclite_iphoneos.a is currently missung bitcode markers for example 298 | set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphoneos*] "arm64") 299 | set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphonesimulator*] "x86_64") 300 | set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphoneos*] "arm64") 301 | set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphonesimulator*] "x86_64") 302 | else() 303 | set(ARCHS arm64 x86_64) 304 | set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphoneos*] "arm64") 305 | set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphonesimulator*] "x86_64") 306 | set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphoneos*] "arm64") 307 | set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphonesimulator*] "x86_64") 308 | endif() 309 | set(APPLE_TARGET_TRIPLE_INT aarch64-x86_64-apple-ios) 310 | else() 311 | set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios) 312 | endif() 313 | else() 314 | message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the OS64COMBINED setting work") 315 | endif() 316 | elseif(PLATFORM_INT STREQUAL "SIMULATOR") 317 | set(SDK_NAME iphonesimulator) 318 | if(NOT ARCHS) 319 | set(ARCHS i386) 320 | set(APPLE_TARGET_TRIPLE_INT i386-apple-ios) 321 | else() 322 | set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios) 323 | endif() 324 | message(DEPRECATION "SIMULATOR IS DEPRECATED. Consider using SIMULATOR64 instead.") 325 | elseif(PLATFORM_INT STREQUAL "SIMULATOR64") 326 | set(SDK_NAME iphonesimulator) 327 | if(NOT ARCHS) 328 | set(ARCHS x86_64) 329 | set(APPLE_TARGET_TRIPLE_INT x86_64-apple-ios) 330 | else() 331 | set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios) 332 | endif() 333 | elseif(PLATFORM_INT STREQUAL "SIMULATORARM64") 334 | set(SDK_NAME iphonesimulator) 335 | if(NOT ARCHS) 336 | set(ARCHS arm64) 337 | set(APPLE_TARGET_TRIPLE_INT aarch64-apple-ios) 338 | else() 339 | set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios) 340 | endif() 341 | elseif(PLATFORM_INT STREQUAL "TVOS") 342 | set(SDK_NAME appletvos) 343 | if(NOT ARCHS) 344 | set(ARCHS arm64) 345 | set(APPLE_TARGET_TRIPLE_INT aarch64-apple-tvos) 346 | else() 347 | set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos) 348 | endif() 349 | elseif (PLATFORM_INT STREQUAL "TVOSCOMBINED") 350 | set(SDK_NAME appletvos) 351 | if(MODERN_CMAKE) 352 | if(NOT ARCHS) 353 | set(ARCHS arm64 x86_64) 354 | set(APPLE_TARGET_TRIPLE_INT aarch64-x86_64-apple-tvos) 355 | set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=appletvos*] "arm64") 356 | set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=appletvsimulator*] "x86_64") 357 | set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=appletvos*] "arm64") 358 | set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=appletvsimulator*] "x86_64") 359 | else() 360 | set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos) 361 | endif() 362 | else() 363 | message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the TVOSCOMBINED setting work") 364 | endif() 365 | elseif(PLATFORM_INT STREQUAL "SIMULATOR_TVOS") 366 | set(SDK_NAME appletvsimulator) 367 | if(NOT ARCHS) 368 | set(ARCHS x86_64) 369 | set(APPLE_TARGET_TRIPLE_INT x86_64-apple-tvos) 370 | else() 371 | set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos) 372 | endif() 373 | elseif(PLATFORM_INT STREQUAL "WATCHOS") 374 | set(SDK_NAME watchos) 375 | if(NOT ARCHS) 376 | if (XCODE_VERSION_INT VERSION_GREATER 10.0) 377 | set(ARCHS armv7k arm64_32) 378 | set(APPLE_TARGET_TRIPLE_INT aarch64_32-apple-watchos) 379 | else() 380 | set(ARCHS armv7k) 381 | set(APPLE_TARGET_TRIPLE_INT arm-apple-watchos) 382 | endif() 383 | else() 384 | set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos) 385 | endif() 386 | elseif(PLATFORM_INT STREQUAL "WATCHOSCOMBINED") 387 | set(SDK_NAME watchos) 388 | if(MODERN_CMAKE) 389 | if(NOT ARCHS) 390 | if (XCODE_VERSION_INT VERSION_GREATER 10.0) 391 | set(ARCHS armv7k arm64_32 i386) 392 | set(APPLE_TARGET_TRIPLE_INT aarch64_32-i386-apple-watchos) 393 | set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchos*] "armv7k arm64_32") 394 | set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchsimulator*] "i386") 395 | set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchos*] "armv7k arm64_32") 396 | set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchsimulator*] "i386") 397 | else() 398 | set(ARCHS armv7k i386) 399 | set(APPLE_TARGET_TRIPLE_INT arm-i386-apple-watchos) 400 | set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchos*] "armv7k") 401 | set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchsimulator*] "i386") 402 | set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchos*] "armv7k") 403 | set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchsimulator*] "i386") 404 | endif() 405 | else() 406 | set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos) 407 | endif() 408 | else() 409 | message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the WATCHOSCOMBINED setting work") 410 | endif() 411 | elseif(PLATFORM_INT STREQUAL "SIMULATOR_WATCHOS") 412 | set(SDK_NAME watchsimulator) 413 | if(NOT ARCHS) 414 | set(ARCHS i386) 415 | set(APPLE_TARGET_TRIPLE_INT i386-apple-watchos) 416 | else() 417 | set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos) 418 | endif() 419 | elseif(PLATFORM_INT STREQUAL "MAC" OR PLATFORM_INT STREQUAL "MAC_CATALYST") 420 | set(SDK_NAME macosx) 421 | if(NOT ARCHS) 422 | set(ARCHS x86_64) 423 | endif() 424 | string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}") 425 | if(PLATFORM_INT STREQUAL "MAC") 426 | set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-macosx) 427 | elseif(PLATFORM_INT STREQUAL "MAC_CATALYST") 428 | set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-macabi) 429 | endif() 430 | elseif(PLATFORM_INT MATCHES "^(MAC_ARM64)$|^(MAC_CATALYST_ARM64)$") 431 | set(SDK_NAME macosx) 432 | if(NOT ARCHS) 433 | set(ARCHS arm64) 434 | endif() 435 | string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}") 436 | if(PLATFORM_INT STREQUAL "MAC_ARM64") 437 | set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-macosx) 438 | elseif(PLATFORM_INT STREQUAL "MAC_CATALYST_ARM64") 439 | set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-macabi) 440 | endif() 441 | else() 442 | message(FATAL_ERROR "Invalid PLATFORM: ${PLATFORM_INT}") 443 | endif() 444 | 445 | if(MODERN_CMAKE AND PLATFORM_INT MATCHES ".*COMBINED" AND NOT CMAKE_GENERATOR MATCHES "Xcode") 446 | message(FATAL_ERROR "The COMBINED options only work with Xcode generator, -G Xcode") 447 | endif() 448 | 449 | if(CMAKE_GENERATOR MATCHES "Xcode" AND PLATFORM_INT MATCHES "^MAC_CATALYST") 450 | set(CMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++") 451 | set(CMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS "macosx") 452 | set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-maccatalyst") 453 | if(NOT DEFINED MACOSX_DEPLOYMENT_TARGET) 454 | set(CMAKE_XCODE_ATTRIBUTE_MACOSX_DEPLOYMENT_TARGET "10.15") 455 | else() 456 | set(CMAKE_XCODE_ATTRIBUTE_MACOSX_DEPLOYMENT_TARGET "${MACOSX_DEPLOYMENT_TARGET}") 457 | endif() 458 | elseif(CMAKE_GENERATOR MATCHES "Xcode") 459 | set(CMAKE_XCODE_ATTRIBUTE_IPHONEOS_DEPLOYMENT_TARGET "${DEPLOYMENT_TARGET}") 460 | if(NOT PLATFORM_INT MATCHES ".*COMBINED") 461 | set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=${SDK_NAME}*] "${ARCHS}") 462 | set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=${SDK_NAME}*] "${ARCHS}") 463 | endif() 464 | endif() 465 | 466 | # If user did not specify the SDK root to use, then query xcodebuild for it. 467 | if(DEFINED CMAKE_OSX_SYSROOT_INT) 468 | # Environment variables are always preserved. 469 | set(ENV{_CMAKE_OSX_SYSROOT_INT} "${CMAKE_OSX_SYSROOT_INT}") 470 | elseif(DEFINED ENV{_CMAKE_OSX_SYSROOT_INT}) 471 | set(CMAKE_OSX_SYSROOT_INT "$ENV{_CMAKE_OSX_SYSROOT_INT}") 472 | elseif(NOT DEFINED CMAKE_OSX_SYSROOT_INT) 473 | execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -version -sdk ${SDK_NAME} Path 474 | OUTPUT_VARIABLE CMAKE_OSX_SYSROOT_INT 475 | ERROR_QUIET 476 | OUTPUT_STRIP_TRAILING_WHITESPACE) 477 | endif() 478 | 479 | if (NOT DEFINED CMAKE_OSX_SYSROOT_INT AND NOT DEFINED CMAKE_OSX_SYSROOT) 480 | message(SEND_ERROR "Please make sure that Xcode is installed and that the toolchain" 481 | "is pointing to the correct path. Please run:" 482 | "sudo xcode-select -s /Applications/Xcode.app/Contents/Developer" 483 | "and see if that fixes the problem for you.") 484 | message(FATAL_ERROR "Invalid CMAKE_OSX_SYSROOT: ${CMAKE_OSX_SYSROOT} " 485 | "does not exist.") 486 | elseif(DEFINED CMAKE_OSX_SYSROOT_INT) 487 | set(CMAKE_OSX_SYSROOT_INT "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "") 488 | # Specify the location or name of the platform SDK to be used in CMAKE_OSX_SYSROOT. 489 | set(CMAKE_OSX_SYSROOT "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "") 490 | endif() 491 | 492 | # Use bitcode or not 493 | if(NOT DEFINED ENABLE_BITCODE AND NOT ARCHS MATCHES "((^|;|, )(i386|x86_64))+") 494 | # Unless specified, enable bitcode support by default 495 | message(STATUS "[DEFAULTS] Enabling bitcode support by default. ENABLE_BITCODE not provided!") 496 | set(ENABLE_BITCODE TRUE) 497 | elseif(NOT DEFINED ENABLE_BITCODE) 498 | message(STATUS "[DEFAULTS] Disabling bitcode support by default on simulators. ENABLE_BITCODE not provided for override!") 499 | set(ENABLE_BITCODE FALSE) 500 | endif() 501 | set(ENABLE_BITCODE_INT ${ENABLE_BITCODE} CACHE BOOL 502 | "Whether or not to enable bitcode" FORCE) 503 | # Use ARC or not 504 | if(NOT DEFINED ENABLE_ARC) 505 | # Unless specified, enable ARC support by default 506 | set(ENABLE_ARC TRUE) 507 | message(STATUS "[DEFAULTS] Enabling ARC support by default. ENABLE_ARC not provided!") 508 | endif() 509 | set(ENABLE_ARC_INT ${ENABLE_ARC} CACHE BOOL "Whether or not to enable ARC" FORCE) 510 | # Use hidden visibility or not 511 | if(NOT DEFINED ENABLE_VISIBILITY) 512 | # Unless specified, disable symbols visibility by default 513 | set(ENABLE_VISIBILITY FALSE) 514 | message(STATUS "[DEFAULTS] Hiding symbols visibility by default. ENABLE_VISIBILITY not provided!") 515 | endif() 516 | set(ENABLE_VISIBILITY_INT ${ENABLE_VISIBILITY} CACHE BOOL "Whether or not to hide symbols from the dynamic linker (-fvisibility=hidden)" FORCE) 517 | # Set strict compiler checks or not 518 | if(NOT DEFINED ENABLE_STRICT_TRY_COMPILE) 519 | # Unless specified, disable strict try_compile() 520 | set(ENABLE_STRICT_TRY_COMPILE FALSE) 521 | message(STATUS "[DEFAULTS] Using NON-strict compiler checks by default. ENABLE_STRICT_TRY_COMPILE not provided!") 522 | endif() 523 | set(ENABLE_STRICT_TRY_COMPILE_INT ${ENABLE_STRICT_TRY_COMPILE} CACHE BOOL 524 | "Whether or not to use strict compiler checks" FORCE) 525 | 526 | # Get the SDK version information. 527 | if(DEFINED SDK_VERSION) 528 | # Environment variables are always preserved. 529 | set(ENV{_SDK_VERSION} "${SDK_VERSION}") 530 | elseif(DEFINED ENV{_SDK_VERSION}) 531 | set(SDK_VERSION "$ENV{_SDK_VERSION}") 532 | elseif(NOT DEFINED SDK_VERSION) 533 | execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -sdk ${CMAKE_OSX_SYSROOT_INT} -version SDKVersion 534 | OUTPUT_VARIABLE SDK_VERSION 535 | ERROR_QUIET 536 | OUTPUT_STRIP_TRAILING_WHITESPACE) 537 | endif() 538 | 539 | # Find the Developer root for the specific iOS platform being compiled for 540 | # from CMAKE_OSX_SYSROOT. Should be ../../ from SDK specified in 541 | # CMAKE_OSX_SYSROOT. There does not appear to be a direct way to obtain 542 | # this information from xcrun or xcodebuild. 543 | if (NOT DEFINED CMAKE_DEVELOPER_ROOT AND NOT CMAKE_GENERATOR MATCHES "Xcode") 544 | get_filename_component(PLATFORM_SDK_DIR ${CMAKE_OSX_SYSROOT_INT} PATH) 545 | get_filename_component(CMAKE_DEVELOPER_ROOT ${PLATFORM_SDK_DIR} PATH) 546 | if (NOT EXISTS "${CMAKE_DEVELOPER_ROOT}") 547 | message(FATAL_ERROR "Invalid CMAKE_DEVELOPER_ROOT: ${CMAKE_DEVELOPER_ROOT} does not exist.") 548 | endif() 549 | endif() 550 | 551 | # Find the C & C++ compilers for the specified SDK. 552 | if(DEFINED CMAKE_C_COMPILER) 553 | # Environment variables are always preserved. 554 | set(ENV{_CMAKE_C_COMPILER} "${CMAKE_C_COMPILER}") 555 | elseif(DEFINED ENV{_CMAKE_C_COMPILER}) 556 | set(CMAKE_C_COMPILER "$ENV{_CMAKE_C_COMPILER}") 557 | elseif(NOT DEFINED CMAKE_C_COMPILER) 558 | execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find clang 559 | OUTPUT_VARIABLE CMAKE_C_COMPILER 560 | ERROR_QUIET 561 | OUTPUT_STRIP_TRAILING_WHITESPACE) 562 | endif() 563 | if(DEFINED CMAKE_CXX_COMPILER) 564 | # Environment variables are always preserved. 565 | set(ENV{_CMAKE_CXX_COMPILER} "${CMAKE_CXX_COMPILER}") 566 | elseif(DEFINED ENV{_CMAKE_CXX_COMPILER}) 567 | set(CMAKE_CXX_COMPILER "$ENV{_CMAKE_CXX_COMPILER}") 568 | elseif(NOT DEFINED CMAKE_CXX_COMPILER) 569 | execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find clang++ 570 | OUTPUT_VARIABLE CMAKE_CXX_COMPILER 571 | ERROR_QUIET 572 | OUTPUT_STRIP_TRAILING_WHITESPACE) 573 | endif() 574 | # Find (Apple's) libtool. 575 | if(DEFINED BUILD_LIBTOOL) 576 | # Environment variables are always preserved. 577 | set(ENV{_BUILD_LIBTOOL} "${BUILD_LIBTOOL}") 578 | elseif(DEFINED ENV{_BUILD_LIBTOOL}) 579 | set(BUILD_LIBTOOL "$ENV{_BUILD_LIBTOOL}") 580 | elseif(NOT DEFINED BUILD_LIBTOOL) 581 | execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find libtool 582 | OUTPUT_VARIABLE BUILD_LIBTOOL 583 | ERROR_QUIET 584 | OUTPUT_STRIP_TRAILING_WHITESPACE) 585 | endif() 586 | # Find the toolchain's provided install_name_tool if none is found on the host 587 | if(DEFINED CMAKE_INSTALL_NAME_TOOL) 588 | # Environment variables are always preserved. 589 | set(ENV{_CMAKE_INSTALL_NAME_TOOL} "${CMAKE_INSTALL_NAME_TOOL}") 590 | elseif(DEFINED ENV{_CMAKE_INSTALL_NAME_TOOL}) 591 | set(CMAKE_INSTALL_NAME_TOOL "$ENV{_CMAKE_INSTALL_NAME_TOOL}") 592 | elseif(NOT DEFINED CMAKE_INSTALL_NAME_TOOL) 593 | execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find install_name_tool 594 | OUTPUT_VARIABLE CMAKE_INSTALL_NAME_TOOL_INT 595 | ERROR_QUIET 596 | OUTPUT_STRIP_TRAILING_WHITESPACE) 597 | set(CMAKE_INSTALL_NAME_TOOL ${CMAKE_INSTALL_NAME_TOOL_INT} CACHE INTERNAL "") 598 | endif() 599 | 600 | # Configure libtool to be used instead of ar + ranlib to build static libraries. 601 | # This is required on Xcode 7+, but should also work on previous versions of 602 | # Xcode. 603 | get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES) 604 | foreach(lang ${languages}) 605 | set(CMAKE_${lang}_CREATE_STATIC_LIBRARY "${BUILD_LIBTOOL} -static -o " CACHE INTERNAL "") 606 | endforeach() 607 | 608 | # CMake 3.14+ support building for iOS, watchOS and tvOS out of the box. 609 | if(MODERN_CMAKE) 610 | if(SDK_NAME MATCHES "iphone") 611 | set(CMAKE_SYSTEM_NAME iOS) 612 | elseif(SDK_NAME MATCHES "macosx") 613 | set(CMAKE_SYSTEM_NAME Darwin) 614 | elseif(SDK_NAME MATCHES "appletv") 615 | set(CMAKE_SYSTEM_NAME tvOS) 616 | elseif(SDK_NAME MATCHES "watch") 617 | set(CMAKE_SYSTEM_NAME watchOS) 618 | endif() 619 | # Provide flags for a combined FAT library build on newer CMake versions 620 | if(PLATFORM_INT MATCHES ".*COMBINED") 621 | set(CMAKE_XCODE_ATTRIBUTE_ONLY_ACTIVE_ARCH "NO") 622 | set(CMAKE_IOS_INSTALL_COMBINED YES) 623 | message(STATUS "Will combine built (static) artifacts into FAT lib...") 624 | endif() 625 | elseif(NOT DEFINED CMAKE_SYSTEM_NAME AND ${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.10") 626 | # Legacy code path prior to CMake 3.14 or fallback if no CMAKE_SYSTEM_NAME specified 627 | set(CMAKE_SYSTEM_NAME iOS) 628 | elseif(NOT DEFINED CMAKE_SYSTEM_NAME) 629 | # Legacy code path prior to CMake 3.14 or fallback if no CMAKE_SYSTEM_NAME specified 630 | set(CMAKE_SYSTEM_NAME Darwin) 631 | endif() 632 | # Standard settings. 633 | set(CMAKE_SYSTEM_VERSION ${SDK_VERSION} CACHE INTERNAL "") 634 | set(UNIX TRUE CACHE BOOL "") 635 | set(APPLE TRUE CACHE BOOL "") 636 | if(PLATFORM STREQUAL "MAC" OR PLATFORM STREQUAL "MAC_ARM64") 637 | set(IOS FALSE CACHE BOOL "") 638 | set(MACOS TRUE CACHE BOOL "") 639 | elseif(PLATFORM STREQUAL "MAC_CATALYST" OR PLATFORM STREQUAL "MAC_CATALYST_ARM64") 640 | set(IOS TRUE CACHE BOOL "") 641 | set(MACOS TRUE CACHE BOOL "") 642 | else() 643 | set(IOS TRUE CACHE BOOL "") 644 | endif() 645 | set(CMAKE_AR ar CACHE FILEPATH "" FORCE) 646 | set(CMAKE_RANLIB ranlib CACHE FILEPATH "" FORCE) 647 | set(CMAKE_STRIP strip CACHE FILEPATH "" FORCE) 648 | # Set the architectures for which to build. 649 | set(CMAKE_OSX_ARCHITECTURES ${ARCHS} CACHE INTERNAL "") 650 | # Change the type of target generated for try_compile() so it'll work when cross-compiling, weak compiler checks 651 | if(NOT ENABLE_STRICT_TRY_COMPILE_INT) 652 | set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) 653 | endif() 654 | # All iOS/Darwin specific settings - some may be redundant. 655 | set(CMAKE_MACOSX_BUNDLE YES) 656 | set(CMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED "NO") 657 | set(CMAKE_SHARED_LIBRARY_PREFIX "lib") 658 | set(CMAKE_SHARED_LIBRARY_SUFFIX ".dylib") 659 | set(CMAKE_SHARED_MODULE_PREFIX "lib") 660 | set(CMAKE_SHARED_MODULE_SUFFIX ".so") 661 | set(CMAKE_C_COMPILER_ABI ELF) 662 | set(CMAKE_CXX_COMPILER_ABI ELF) 663 | set(CMAKE_C_HAS_ISYSROOT 1) 664 | set(CMAKE_CXX_HAS_ISYSROOT 1) 665 | set(CMAKE_MODULE_EXISTS 1) 666 | set(CMAKE_DL_LIBS "") 667 | set(CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG "-compatibility_version ") 668 | set(CMAKE_C_OSX_CURRENT_VERSION_FLAG "-current_version ") 669 | set(CMAKE_CXX_OSX_COMPATIBILITY_VERSION_FLAG "${CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG}") 670 | set(CMAKE_CXX_OSX_CURRENT_VERSION_FLAG "${CMAKE_C_OSX_CURRENT_VERSION_FLAG}") 671 | 672 | if(ARCHS MATCHES "((^|;|, )(arm64|arm64e|x86_64))+") 673 | set(CMAKE_C_SIZEOF_DATA_PTR 8) 674 | set(CMAKE_CXX_SIZEOF_DATA_PTR 8) 675 | if(ARCHS MATCHES "((^|;|, )(arm64|arm64e))+") 676 | set(CMAKE_SYSTEM_PROCESSOR "aarch64") 677 | else() 678 | set(CMAKE_SYSTEM_PROCESSOR "x86_64") 679 | endif() 680 | else() 681 | set(CMAKE_C_SIZEOF_DATA_PTR 4) 682 | set(CMAKE_CXX_SIZEOF_DATA_PTR 4) 683 | set(CMAKE_SYSTEM_PROCESSOR "arm") 684 | endif() 685 | 686 | # Note that only Xcode 7+ supports the newer more specific: 687 | # -m${SDK_NAME}-version-min flags, older versions of Xcode use: 688 | # -m(ios/ios-simulator)-version-min instead. 689 | if(${CMAKE_VERSION} VERSION_LESS "3.11") 690 | if(PLATFORM_INT STREQUAL "OS" OR PLATFORM_INT STREQUAL "OS64") 691 | if(XCODE_VERSION_INT VERSION_LESS 7.0) 692 | set(SDK_NAME_VERSION_FLAGS 693 | "-mios-version-min=${DEPLOYMENT_TARGET}") 694 | else() 695 | # Xcode 7.0+ uses flags we can build directly from SDK_NAME. 696 | set(SDK_NAME_VERSION_FLAGS 697 | "-m${SDK_NAME}-version-min=${DEPLOYMENT_TARGET}") 698 | endif() 699 | elseif(PLATFORM_INT STREQUAL "TVOS") 700 | set(SDK_NAME_VERSION_FLAGS 701 | "-mtvos-version-min=${DEPLOYMENT_TARGET}") 702 | elseif(PLATFORM_INT STREQUAL "SIMULATOR_TVOS") 703 | set(SDK_NAME_VERSION_FLAGS 704 | "-mtvos-simulator-version-min=${DEPLOYMENT_TARGET}") 705 | elseif(PLATFORM_INT STREQUAL "WATCHOS") 706 | set(SDK_NAME_VERSION_FLAGS 707 | "-mwatchos-version-min=${DEPLOYMENT_TARGET}") 708 | elseif(PLATFORM_INT STREQUAL "SIMULATOR_WATCHOS") 709 | set(SDK_NAME_VERSION_FLAGS 710 | "-mwatchos-simulator-version-min=${DEPLOYMENT_TARGET}") 711 | elseif(PLATFORM_INT STREQUAL "MAC") 712 | set(SDK_NAME_VERSION_FLAGS 713 | "-mmacosx-version-min=${DEPLOYMENT_TARGET}") 714 | else() 715 | # SIMULATOR or SIMULATOR64 both use -mios-simulator-version-min. 716 | set(SDK_NAME_VERSION_FLAGS 717 | "-mios-simulator-version-min=${DEPLOYMENT_TARGET}") 718 | endif() 719 | elseif(NOT PLATFORM_INT MATCHES "^MAC_CATALYST") 720 | # Newer versions of CMake sets the version min flags correctly, skip this for Mac Catalyst targets 721 | set(CMAKE_OSX_DEPLOYMENT_TARGET ${DEPLOYMENT_TARGET}) 722 | endif() 723 | 724 | if(DEFINED APPLE_TARGET_TRIPLE_INT) 725 | set(APPLE_TARGET_TRIPLE ${APPLE_TARGET_TRIPLE_INT} CACHE INTERNAL "") 726 | endif() 727 | 728 | if(PLATFORM_INT MATCHES "^MAC_CATALYST") 729 | set(C_TARGET_FLAGS "-target ${APPLE_TARGET_TRIPLE_INT} -isystem ${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/usr/include") 730 | endif() 731 | 732 | if(ENABLE_BITCODE_INT) 733 | set(BITCODE "-fembed-bitcode") 734 | set(CMAKE_XCODE_ATTRIBUTE_BITCODE_GENERATION_MODE "bitcode") 735 | set(CMAKE_XCODE_ATTRIBUTE_ENABLE_BITCODE "YES") 736 | else() 737 | set(BITCODE "") 738 | set(CMAKE_XCODE_ATTRIBUTE_ENABLE_BITCODE "NO") 739 | endif() 740 | 741 | if(ENABLE_ARC_INT) 742 | set(FOBJC_ARC "-fobjc-arc") 743 | set(CMAKE_XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC "YES") 744 | else() 745 | set(FOBJC_ARC "-fno-objc-arc") 746 | set(CMAKE_XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC "NO") 747 | endif() 748 | 749 | if(NOT ENABLE_VISIBILITY_INT) 750 | foreach(lang ${languages}) 751 | set(CMAKE_${lang}_VISIBILITY_PRESET "hidden" CACHE INTERNAL "") 752 | endforeach() 753 | set(CMAKE_XCODE_ATTRIBUTE_GCC_SYMBOLS_PRIVATE_EXTERN "YES") 754 | set(VISIBILITY "-fvisibility=hidden -fvisibility-inlines-hidden") 755 | else() 756 | foreach(lang ${languages}) 757 | set(CMAKE_${lang}_VISIBILITY_PRESET "default" CACHE INTERNAL "") 758 | endforeach() 759 | set(CMAKE_XCODE_ATTRIBUTE_GCC_SYMBOLS_PRIVATE_EXTERN "NO") 760 | set(VISIBILITY "-fvisibility=default") 761 | endif() 762 | 763 | #Check if Xcode generator is used, since that will handle these flags automagically 764 | if(CMAKE_GENERATOR MATCHES "Xcode") 765 | message(STATUS "Not setting any manual command-line buildflags, since Xcode is selected as generator.") 766 | else() 767 | # Hidden visibility is required for C++ on iOS. 768 | set(CMAKE_C_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} ${BITCODE} -fobjc-abi-version=2 ${FOBJC_ARC} ${CMAKE_C_FLAGS}") 769 | set(CMAKE_CXX_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} ${BITCODE} ${VISIBILITY} -fobjc-abi-version=2 ${FOBJC_ARC} ${CMAKE_CXX_FLAGS}") 770 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} -O0 -g ${CMAKE_CXX_FLAGS_DEBUG}") 771 | set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS} -DNDEBUG -Os -ffast-math ${CMAKE_CXX_FLAGS_MINSIZEREL}") 772 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS} -DNDEBUG -O2 -g -ffast-math ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}") 773 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -DNDEBUG -O3 -ffast-math ${CMAKE_CXX_FLAGS_RELEASE}") 774 | set(CMAKE_C_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_C_LINK_FLAGS}") 775 | set(CMAKE_CXX_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_CXX_LINK_FLAGS}") 776 | set(CMAKE_ASM_FLAGS "${CMAKE_C_FLAGS} -x assembler-with-cpp -arch ${CMAKE_OSX_ARCHITECTURES}") 777 | endif() 778 | 779 | ## Print status messages to inform of the current state 780 | message(STATUS "Configuring ${SDK_NAME} build for platform: ${PLATFORM_INT}, architecture(s): ${ARCHS}") 781 | message(STATUS "Using SDK: ${CMAKE_OSX_SYSROOT_INT}") 782 | message(STATUS "Using C compiler: ${CMAKE_C_COMPILER}") 783 | message(STATUS "Using CXX compiler: ${CMAKE_CXX_COMPILER}") 784 | message(STATUS "Using libtool: ${BUILD_LIBTOOL}") 785 | message(STATUS "Using install name tool: ${CMAKE_INSTALL_NAME_TOOL}") 786 | if(DEFINED APPLE_TARGET_TRIPLE) 787 | message(STATUS "Autoconf target triple: ${APPLE_TARGET_TRIPLE}") 788 | endif() 789 | message(STATUS "Using minimum deployment version: ${DEPLOYMENT_TARGET}" 790 | " (SDK version: ${SDK_VERSION})") 791 | if(MODERN_CMAKE) 792 | message(STATUS "Merging integrated CMake 3.14+ iOS,tvOS,watchOS,macOS toolchain(s) with this toolchain!") 793 | endif() 794 | if(CMAKE_GENERATOR MATCHES "Xcode") 795 | message(STATUS "Using Xcode version: ${XCODE_VERSION_INT}") 796 | endif() 797 | message(STATUS "CMake version: ${CMAKE_VERSION}") 798 | if(DEFINED SDK_NAME_VERSION_FLAGS) 799 | message(STATUS "Using version flags: ${SDK_NAME_VERSION_FLAGS}") 800 | endif() 801 | message(STATUS "Using a data_ptr size of: ${CMAKE_CXX_SIZEOF_DATA_PTR}") 802 | if(ENABLE_BITCODE_INT) 803 | message(STATUS "Bitcode: Enabled") 804 | else() 805 | message(STATUS "Bitcode: Disabled") 806 | endif() 807 | 808 | if(ENABLE_ARC_INT) 809 | message(STATUS "ARC: Enabled") 810 | else() 811 | message(STATUS "ARC: Disabled") 812 | endif() 813 | 814 | if(ENABLE_VISIBILITY_INT) 815 | message(STATUS "Hiding symbols: Disabled") 816 | else() 817 | message(STATUS "Hiding symbols: Enabled") 818 | endif() 819 | 820 | # Set global properties 821 | set_property(GLOBAL PROPERTY PLATFORM "${PLATFORM}") 822 | set_property(GLOBAL PROPERTY APPLE_TARGET_TRIPLE "${APPLE_TARGET_TRIPLE_INT}") 823 | set_property(GLOBAL PROPERTY SDK_VERSION "${SDK_VERSION}") 824 | set_property(GLOBAL PROPERTY XCODE_VERSION "${XCODE_VERSION_INT}") 825 | set_property(GLOBAL PROPERTY OSX_ARCHITECTURES "${CMAKE_OSX_ARCHITECTURES}") 826 | 827 | # Export configurable variables for the try_compile() command. 828 | set(CMAKE_TRY_COMPILE_PLATFORM_VARIABLES 829 | PLATFORM 830 | XCODE_VERSION_INT 831 | SDK_VERSION 832 | DEPLOYMENT_TARGET 833 | CMAKE_DEVELOPER_ROOT 834 | CMAKE_OSX_SYSROOT_INT 835 | ENABLE_BITCODE 836 | ENABLE_ARC 837 | CMAKE_C_COMPILER 838 | CMAKE_CXX_COMPILER 839 | BUILD_LIBTOOL 840 | CMAKE_INSTALL_NAME_TOOL 841 | CMAKE_C_FLAGS 842 | CMAKE_CXX_FLAGS 843 | CMAKE_CXX_FLAGS_DEBUG 844 | CMAKE_CXX_FLAGS_MINSIZEREL 845 | CMAKE_CXX_FLAGS_RELWITHDEBINFO 846 | CMAKE_CXX_FLAGS_RELEASE 847 | CMAKE_C_LINK_FLAGS 848 | CMAKE_CXX_LINK_FLAGS 849 | CMAKE_ASM_FLAGS 850 | ) 851 | 852 | set(CMAKE_PLATFORM_HAS_INSTALLNAME 1) 853 | set(CMAKE_SHARED_LINKER_FLAGS "-rpath @executable_path/Frameworks -rpath @loader_path/Frameworks") 854 | set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib -Wl,-headerpad_max_install_names") 855 | set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle -Wl,-headerpad_max_install_names") 856 | set(CMAKE_SHARED_MODULE_LOADER_C_FLAG "-Wl,-bundle_loader,") 857 | set(CMAKE_SHARED_MODULE_LOADER_CXX_FLAG "-Wl,-bundle_loader,") 858 | set(CMAKE_FIND_LIBRARY_SUFFIXES ".tbd" ".dylib" ".so" ".a") 859 | set(CMAKE_SHARED_LIBRARY_SONAME_C_FLAG "-install_name") 860 | 861 | # Set the find root to the SDK developer roots. 862 | # Note: CMAKE_FIND_ROOT_PATH is only useful when cross-compiling. Thus, do not set on macOS builds. 863 | if(NOT PLATFORM_INT STREQUAL "MAC" AND NOT PLATFORM_INT STREQUAL "MAC_ARM64") 864 | list(APPEND CMAKE_FIND_ROOT_PATH "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "") 865 | set(CMAKE_IGNORE_PATH "/System/Library/Frameworks;/usr/local/lib" CACHE INTERNAL "") 866 | endif() 867 | 868 | # Default to searching for frameworks first. 869 | set(CMAKE_FIND_FRAMEWORK FIRST) 870 | 871 | # Set up the default search directories for frameworks. 872 | if(PLATFORM_INT MATCHES "^MAC_CATALYST") 873 | set(CMAKE_FRAMEWORK_PATH 874 | ${CMAKE_DEVELOPER_ROOT}/Library/PrivateFrameworks 875 | ${CMAKE_OSX_SYSROOT_INT}/System/Library/Frameworks 876 | ${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/System/Library/Frameworks 877 | ${CMAKE_FRAMEWORK_PATH} CACHE INTERNAL "") 878 | else() 879 | set(CMAKE_FRAMEWORK_PATH 880 | ${CMAKE_DEVELOPER_ROOT}/Library/PrivateFrameworks 881 | ${CMAKE_OSX_SYSROOT_INT}/System/Library/Frameworks 882 | ${CMAKE_FRAMEWORK_PATH} CACHE INTERNAL "") 883 | endif() 884 | 885 | # By default, search both the specified iOS SDK and the remainder of the host filesystem. 886 | if(NOT CMAKE_FIND_ROOT_PATH_MODE_PROGRAM) 887 | set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH CACHE INTERNAL "") 888 | endif() 889 | if(NOT CMAKE_FIND_ROOT_PATH_MODE_LIBRARY) 890 | set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH CACHE INTERNAL "") 891 | endif() 892 | if(NOT CMAKE_FIND_ROOT_PATH_MODE_INCLUDE) 893 | set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH CACHE INTERNAL "") 894 | endif() 895 | if(NOT CMAKE_FIND_ROOT_PATH_MODE_PACKAGE) 896 | set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH CACHE INTERNAL "") 897 | endif() 898 | 899 | # 900 | # Some helper-macros below to simplify and beautify the CMakeFile 901 | # 902 | 903 | # This little macro lets you set any Xcode specific property. 904 | macro(set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE XCODE_RELVERSION) 905 | set(XCODE_RELVERSION_I "${XCODE_RELVERSION}") 906 | if(XCODE_RELVERSION_I STREQUAL "All") 907 | set_property(TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY} "${XCODE_VALUE}") 908 | else() 909 | set_property(TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY}[variant=${XCODE_RELVERSION_I}] "${XCODE_VALUE}") 910 | endif() 911 | endmacro(set_xcode_property) 912 | 913 | # This macro lets you find executable programs on the host system. 914 | macro(find_host_package) 915 | set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) 916 | set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER) 917 | set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER) 918 | set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE NEVER) 919 | set(_TOOLCHAIN_IOS ${IOS}) 920 | set(IOS FALSE) 921 | find_package(${ARGN}) 922 | set(IOS ${_TOOLCHAIN_IOS}) 923 | set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH) 924 | set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) 925 | set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH) 926 | set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH) 927 | endmacro(find_host_package) -------------------------------------------------------------------------------- /deaod_spsc/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019, Lukas Bagaric 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | - Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | - Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 17 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 19 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 20 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 21 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 22 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | -------------------------------------------------------------------------------- /deaod_spsc/spsc_queue.hpp: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | Copyright (c) 2019, Lukas Bagaric 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | - Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | - Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | ******************************************************************************* 25 | 26 | This file defines a single template, spsc_queue, which implements a bounded 27 | queue with at most one producer, and one consumer at the same time. 28 | 29 | spsc_queue is intended to be used in environments, where heap-allocation must 30 | never occur. While it is possible to use spsc_queue in real-time environments, 31 | the implementation trades a worse worst-case for a significantly better 32 | average-case. 33 | 34 | spsc_queue has highest throughput under contention if: 35 | * you have small (register sized) elements OR 36 | * if the total size of the queue (size of element times number of elements) 37 | will not exceed the size of your processors fastest cache. 38 | 39 | spsc_queue takes up to three template parameters: 40 | * T: The type of a single element 41 | * queue_size: The number of slots for elements within the queue. 42 | Note: Due to implementation details, one slot is reserved and 43 | cannot be used. 44 | * align_log2: The number of bytes to align on, expressed as an exponent for 45 | two, so the actual alignment is (1 << align_log2) bytes. This 46 | number should be at least log2(alignof(size_t)). Ideal values 47 | avoid destructive hardware interference (false sharing). 48 | Default is 7. 49 | alignof(T) must not be greater than (1 << align_log2). 50 | 51 | Interface: 52 | General: 53 | bool is_empty() const; 54 | Returns true if there is currently no object in the queue. 55 | Returns false otherwise. 56 | 57 | bool is_full() const; 58 | Returns true if no more objects can be added to the queue. 59 | Returns false otherwise. 60 | 61 | Enqueue: 62 | bool push(const T& elem); 63 | bool push(T&& elem); 64 | Tries to insert elem into the queue. Returns true if successful, false 65 | otherwise. 66 | 67 | size_type push_n(size_type count, const T& elem); 68 | Tries to insert count copies of elem into the queue. Returns the 69 | number of copies successfully inserted. 70 | 71 | template 72 | size_type write(Iterator beg, Iterator end); 73 | Tries to copy elements into the queue from beg, until end is reached. 74 | Returns the number of elements copied into the queue. 75 | 76 | template 77 | size_type write(size_type count, Iterator elems); 78 | Tries to copy count elements into the queue from elems until either the 79 | queue is full or all have been copied. 80 | Returns the number of elements copied into the queue. 81 | 82 | template 83 | bool emplace(Args&&... args); 84 | Tries to insert an object of type T constructed from args into the 85 | queue. Returns true if successful, false otherwise. 86 | 87 | template 88 | size_type emplace_n(size_type count, Args&&... args); 89 | Tries to insert count objects of type T constructed from args into 90 | the queue. Returns the number of objects successfully inserted. 91 | 92 | template 93 | bool produce(Callable&& f); 94 | Tries to insert an object into the queue by calling Callable if there is 95 | space for an object. Returns true if there was space for an object, and 96 | Callable returned true. Returns false otherwise. 97 | Callable is an invocable with one parameter of type void*, and a return 98 | type of bool. Callable is expected to place a new object of type T at 99 | the address passed to it. 100 | 101 | template 102 | size_type produce_n(size_type count, Callable&& f); 103 | Tries to insert count objects into the queue by calling Callable as long 104 | as there is space in the queue, or until Callable returns false once. 105 | Returns the number of times Callable was invoked and returned true. 106 | Callable is an invocable with one parameter of type void*, and a return 107 | type of bool. Callable is expected to place a new object of type T at 108 | the address passed to it. 109 | 110 | Dequeue: 111 | const T* front() const; 112 | T* front(); 113 | Returns a pointer to the next object in the queue, if such an object 114 | exists. Returns nullptr if the queue is empty. 115 | 116 | void discard(); 117 | Removes the next object from the queue. This function must not be called 118 | if the queue is empty. 119 | 120 | bool pop(T& out); 121 | Tries to move the next object in the queue into out, if such an object 122 | exists. Returns true if out contains a new object. Returns false if the 123 | queue was empty. 124 | 125 | template 126 | size_type read(Iterator beg, Iterator end) 127 | Tries to move elements out of the queue to [beg .. end), until either 128 | all have been moved or the queue is empty. 129 | Returns the number of elements that were moved. 130 | 131 | template 132 | size_type read(size_type count, Iterator elems) 133 | Tries to move elements out of the queue to [elems .. elems + count), 134 | until either count elements have been moved, or the queue is empty. 135 | Returns the number of elements that were moved. 136 | 137 | template 138 | bool consume(Callable&& f); 139 | Tries to remove an object from the queue by calling Callable and passing 140 | the object to it. Returns true if there was an object in the queue and 141 | Callable returned true. Returns false otherwise. 142 | Callable is an invocable with one parameter of type T*, and a return 143 | type of bool. 144 | 145 | template 146 | size_type consume_all(Callable&& f); 147 | Tries to remove all objects from the queue by calling Callable for each 148 | object, passing the address of each object to it, until either the queue 149 | is empty, or Callable returns false. Returns the number of times 150 | Callable was invoked and returned true. 151 | Callable is an invocable with one parameter of type T*, and a return 152 | type of bool. 153 | 154 | ******************************************************************************/ 155 | #pragma once 156 | 157 | #include // for std::copy_n 158 | #include // for std::array 159 | #include // for std::atomic and std::atomic_thread_fence 160 | #include // for std::byte 161 | #include // for std::invoke 162 | #include // for std::iterator_traits 163 | #include // for std::launder and placement-new operator 164 | #include // for std::forward, std::is_invocable_r, and 165 | // std::is_constructible 166 | 167 | namespace deaod { 168 | 169 | namespace detail { 170 | 171 | template 172 | using if_t = typename std::conditional::type; 173 | 174 | #if __cplusplus >= 201703L || __cpp_lib_byte > 0 175 | using std::byte; 176 | #else 177 | using byte = unsigned char; 178 | #endif 179 | 180 | #if __cplusplus >= 201703L || __cpp_lib_void_t > 0 181 | using std::void_t; 182 | #else 183 | 184 | template 185 | struct make_void { 186 | using type = void; 187 | }; 188 | 189 | template 190 | using void_t = typename make_void::type; 191 | 192 | #endif 193 | 194 | #if __cplusplus >= 201703L || __cpp_lib_launder > 0 195 | using std::launder; 196 | #else 197 | template 198 | constexpr T* launder(T* p) noexcept { 199 | static_assert( 200 | std::is_function::value == false && std::is_void::value == false, 201 | "launder is invalid for function pointers and pointers to cv void" 202 | ); 203 | return p; 204 | } 205 | #endif 206 | 207 | template 208 | struct is_reference_wrapper : std::false_type {}; 209 | 210 | template 211 | struct is_reference_wrapper> : std::true_type{}; 212 | 213 | #if __cplusplus >= 201703L || __cpp_lib_invoke > 0 214 | 215 | using std::invoke; 216 | 217 | #else 218 | 219 | struct fp_with_inst_ptr {}; 220 | struct fp_with_inst_val {}; 221 | struct fp_with_ref_wrap {}; 222 | 223 | struct dp_with_inst_ptr {}; 224 | struct dp_with_inst_val {}; 225 | struct dp_with_ref_wrap {}; 226 | 227 | template 228 | struct invoke_traits { 229 | using result_type = 230 | decltype(std::declval()(std::declval()...)); 231 | }; 232 | 233 | template 234 | struct invoke_traits { 235 | private: 236 | constexpr static bool _is_mem_func = 237 | std::is_member_function_pointer::value; 238 | constexpr static bool _is_a1_a_ptr = 239 | std::is_base_of::type>::value == false; 240 | constexpr static bool _is_a1_a_ref_wrap = is_reference_wrapper::value; 241 | 242 | public: 243 | using tag_type = if_t<_is_mem_func, 244 | if_t<_is_a1_a_ptr, fp_with_inst_ptr, 245 | if_t<_is_a1_a_ref_wrap, fp_with_ref_wrap, 246 | /* else */ fp_with_inst_val>>, 247 | /* else */ 248 | if_t<_is_a1_a_ptr, dp_with_inst_ptr, 249 | if_t<_is_a1_a_ref_wrap, dp_with_ref_wrap, 250 | /* else */ dp_with_inst_val>> 251 | >; 252 | 253 | using result_type = decltype(invoke( 254 | std::declval(), 255 | std::declval(), 256 | std::declval(), 257 | std::declval()... 258 | )); 259 | }; 260 | 261 | template 262 | auto invoke(Callable&& f, Args&& ... args) 263 | -> decltype(std::forward(f)(std::forward(args)...)) { 264 | return std::forward(f)(std::forward(args)...); 265 | } 266 | 267 | 268 | 269 | template 270 | auto invoke(fp_with_inst_ptr, Type T::* f, A1&& a1, Args&& ... args) 271 | -> decltype((*std::forward(a1).*f)(std::forward(args)...)) { 272 | return (*std::forward(a1).*f)(std::forward(args)...); 273 | } 274 | 275 | template 276 | auto invoke(fp_with_inst_val, Type T::* f, A1&& a1, Args&& ... args) 277 | -> decltype((std::forward(a1).*f)(std::forward(args)...)) { 278 | return (std::forward(a1).*f)(std::forward(args)...); 279 | } 280 | 281 | template 282 | auto invoke(fp_with_ref_wrap, Type T::* f, A1&& a1, Args&& ... args) 283 | -> decltype((a1.get().*f)(std::forward(args)...)) { 284 | return (a1.get().*f)(std::forward(args)...); 285 | } 286 | 287 | template 288 | auto invoke(dp_with_inst_ptr, Type T::* f, A1&& a1, Args&& ...) 289 | -> typename std::decay::type { 290 | static_assert(sizeof...(Args) == 0, 291 | "invoke on data member pointer must not provide arguments other than " 292 | "instance pointer"); 293 | return *std::forward(a1).*f; 294 | } 295 | 296 | template 297 | auto invoke(dp_with_inst_val, Type T::* f, A1&& a1, Args&& ...) 298 | -> typename std::decay::type { 299 | static_assert(sizeof...(Args) == 0, 300 | "invoke on data member pointer must not provide arguments other than " 301 | "instance pointer"); 302 | return std::forward(a1).*f; 303 | } 304 | 305 | template 306 | auto invoke(dp_with_ref_wrap, Type T::* f, A1&& a1, Args&& ...) 307 | -> typename std::decay::type { 308 | static_assert(sizeof...(Args) == 0, 309 | "invoke on data member pointer must not provide arguments other than " 310 | "instance pointer"); 311 | return (a1.get().*f); 312 | } 313 | 314 | template 315 | auto invoke(Type T::* f, A1&& a1, Args&& ... args) 316 | -> typename invoke_traits< 317 | decltype(f), 318 | decltype(a1), 319 | decltype(args)... 320 | >::result_type { 321 | typename invoke_traits< 322 | decltype(f), 323 | decltype(a1), 324 | decltype(args)... 325 | >::tag_type tag; 326 | 327 | return invoke(tag, f, std::forward(a1), std::forward(args)...); 328 | } 329 | 330 | #endif 331 | 332 | #if __cplusplus >= 201703L || __cpp_lib_is_invocable > 0 333 | 334 | using std::is_invocable; 335 | using std::is_invocable_r; 336 | 337 | #elif __has_include() 338 | 339 | #include 340 | using boost::callable_traits::is_invocable; 341 | using boost::callable_traits::is_invocable_r; 342 | 343 | #else 344 | 345 | // Dummy implementation because these are not used for correctness, 346 | // only for better error messages 347 | template 348 | struct is_invocable : std::true_type {}; 349 | template 350 | struct is_invocable_r : std::true_type {}; 351 | 352 | #endif 353 | 354 | template 355 | struct scope_guard { 356 | scope_guard(Callable&& f) : _f(std::forward(f)) {} 357 | ~scope_guard() { 358 | if (should_call()) { 359 | _f(); 360 | } 361 | }; 362 | 363 | scope_guard(const scope_guard&) = delete; 364 | scope_guard& operator=(const scope_guard&) = delete; 365 | 366 | #if __cplusplus >= 201703L || __cpp_guaranteed_copy_elision > 0 367 | 368 | private: 369 | bool should_call() const { 370 | return true; 371 | } 372 | 373 | #else 374 | 375 | scope_guard(scope_guard&& other) : _f(std::move(other._f)) { 376 | other._ignore = true; 377 | } 378 | 379 | scope_guard& operator=(scope_guard&& other) { 380 | _ignore = false; 381 | _f = std::move(other._f); 382 | 383 | other._ignore = true; 384 | } 385 | 386 | private: 387 | bool _ignore = false; 388 | bool should_call() const { 389 | return _ignore == false; 390 | } 391 | 392 | #endif 393 | Callable _f; 394 | }; 395 | 396 | template 397 | scope_guard make_scope_guard(Callable&& f) { 398 | return scope_guard(std::forward(f)); 399 | } 400 | 401 | } // namespace detail 402 | 403 | template 404 | struct alignas((size_t)1 << align_log2) spsc_queue { // gcc bug 89683 405 | using value_type = T; 406 | using size_type = size_t; 407 | 408 | static const auto size = queue_size; 409 | static const auto align = size_t(1) << align_log2; 410 | 411 | static_assert( 412 | alignof(T) <= align, 413 | "Type T must not be more aligned than this queue" 414 | ); 415 | 416 | spsc_queue() = default; 417 | 418 | ~spsc_queue() { 419 | std::atomic_thread_fence(std::memory_order_seq_cst); 420 | consume_all([](T*) { return true; }); 421 | } 422 | 423 | spsc_queue(const spsc_queue& other) { 424 | auto tail = 0; 425 | 426 | auto g = detail::make_scope_guard([&, this] { 427 | tail_cache = tail; 428 | _tail.store(tail); 429 | }); 430 | 431 | auto src_tail = other._tail.load(); 432 | auto src_head = other._head.load(); 433 | 434 | while (src_head != src_tail) { 435 | new(_buffer.data() + tail * sizeof(T)) 436 | T(*detail::launder(reinterpret_cast( 437 | other._buffer.data() + src_head * sizeof(T) 438 | ))); 439 | 440 | tail += 1; 441 | src_head += 1; 442 | if (src_head == size) src_head = 0; 443 | } 444 | } 445 | 446 | spsc_queue& operator=(const spsc_queue& other) { 447 | if (this == &other) return *this; 448 | 449 | { 450 | auto head = _head.load(); 451 | auto tail = _tail.load(); 452 | 453 | auto g = detail::make_scope_guard([&, this] { 454 | head_cache = head; 455 | _head.store(head); 456 | }); 457 | 458 | while (head != tail) { 459 | auto elem = detail::launder( 460 | reinterpret_cast(_buffer.data() + head * sizeof(T)) 461 | ); 462 | elem->~T(); 463 | 464 | head += 1; 465 | if (head == size) head = 0; 466 | } 467 | } 468 | 469 | _tail.store(0); 470 | head_cache = 0; 471 | _head.store(0); 472 | tail_cache = 0; 473 | 474 | { 475 | auto tail = 0; 476 | 477 | auto g = detail::make_scope_guard([&, this] { 478 | tail_cache = tail; 479 | _tail.store(tail); 480 | }); 481 | 482 | auto src_tail = other._tail.load(); 483 | auto src_head = other._head.load(); 484 | 485 | while (src_head != src_tail) { 486 | new(_buffer.data() + tail * sizeof(T)) 487 | T(*detail::launder(reinterpret_cast( 488 | other._buffer.data() + src_head * sizeof(T) 489 | ))); 490 | 491 | tail += 1; 492 | src_head += 1; 493 | if (src_head == size) src_head = 0; 494 | } 495 | } 496 | 497 | return *this; 498 | } 499 | 500 | bool is_empty() const { 501 | auto head = _head.load(std::memory_order_acquire); 502 | auto tail = _tail.load(std::memory_order_acquire); 503 | 504 | return head == tail; 505 | } 506 | 507 | bool is_full() const { 508 | auto head = _head.load(std::memory_order_acquire); 509 | auto tail = _tail.load(std::memory_order_acquire) + 1; 510 | if (tail == size) tail = 0; 511 | 512 | return head == tail; 513 | } 514 | 515 | // copies elem into queue, if theres space 516 | // returns true if successful, false otherwise 517 | bool push(const T& elem) { 518 | return this->emplace(elem); 519 | } 520 | 521 | // tries to move elem into queue, if theres space 522 | // returns true if successful, false otherwise 523 | bool push(T&& elem) { 524 | return this->emplace(std::move(elem)); 525 | } 526 | 527 | // tries to copy count elements into the queue 528 | // returns the number of elements that actually got copied 529 | size_type push_n(size_type count, const T& elem) { 530 | return this->emplace_n(count, elem); 531 | } 532 | 533 | 534 | // copies elements into queue until end is reached or queue is full, 535 | // whichever happens first 536 | // returns the number of elements copied into the queue 537 | template 538 | size_type write(Iterator beg, Iterator end) { 539 | static_assert( 540 | std::is_constructible::value, 541 | "T must be constructible from Iterator::reference" 542 | ); 543 | 544 | using traits = std::iterator_traits; 545 | 546 | constexpr bool is_random_access = std::is_same< 547 | typename traits::iterator_category, 548 | std::random_access_iterator_tag 549 | >::value; 550 | 551 | // std::contiguous_iterator_tag is a feature of C++20, so try to be 552 | // compatible with it. Fall back on an approximate implementation for 553 | // C++17 or earlier. The value to compare against was chosen such that 554 | // compilers that implement some features of future standards and 555 | // indicate that using the value of __cplusplus dont accidentally fall 556 | // into the requirement to implement std::contiguous_iterator_tag. 557 | #if __cplusplus > 202000L 558 | constexpr bool is_contiguous = std::is_same< 559 | typename traits::iterator_category, 560 | std::contiguous_iterator_tag 561 | >::value; 562 | #else 563 | constexpr bool is_contiguous = std::is_pointer::value; 564 | #endif 565 | 566 | readwrite_tag< 567 | is_random_access || is_contiguous, 568 | std::is_trivially_constructible::value 569 | > tag; 570 | 571 | return this->write_fwd(tag, beg, end); 572 | } 573 | 574 | // copies elements into queue until count elements have been copied or 575 | // queue is full, whichever happens first 576 | // returns the number of elements copied into queue 577 | template 578 | size_type write(size_type count, Iterator elems) { 579 | static_assert( 580 | std::is_constructible::value, 581 | "T must be constructible from Iterator::reference" 582 | ); 583 | 584 | readwrite_tag< 585 | true, 586 | std::is_trivially_constructible::value 587 | > tag; 588 | 589 | return this->write_fwd(tag, count, elems); 590 | } 591 | 592 | private: 593 | template 594 | struct readwrite_tag {}; 595 | 596 | template 597 | size_type write_fwd( 598 | readwrite_tag, 599 | Iterator beg, 600 | Iterator end) 601 | { 602 | return this->write_internal(beg, end); 603 | } 604 | 605 | template 606 | size_type write_fwd( 607 | readwrite_tag, 608 | Iterator beg, 609 | Iterator end) 610 | { 611 | return this->write_internal(beg, end); 612 | } 613 | 614 | template 615 | size_type write_fwd( 616 | readwrite_tag, 617 | Iterator beg, 618 | Iterator end) 619 | { 620 | return this->write_copy(end - beg, beg); 621 | } 622 | 623 | template 624 | size_type write_fwd( 625 | readwrite_tag, 626 | Iterator beg, 627 | Iterator end) 628 | { 629 | return this->write_trivial(end - beg, beg); 630 | } 631 | 632 | template 633 | size_type write_fwd( 634 | readwrite_tag, 635 | size_type count, 636 | Iterator elems) 637 | { 638 | return this->write_copy(count, elems); 639 | } 640 | 641 | template 642 | size_type write_fwd( 643 | readwrite_tag, 644 | size_type count, 645 | Iterator elems) 646 | { 647 | return this->write_trivial(count, elems); 648 | } 649 | 650 | template 651 | size_type write_trivial(size_type count, Iterator elems) { 652 | auto tail = _tail.load(std::memory_order_relaxed); 653 | auto head = head_cache; 654 | auto free = size - (tail - head); 655 | if (free > size) free -= size; 656 | 657 | if (count >= free) { 658 | head = head_cache = _head.load(std::memory_order_acquire); 659 | free = size - (tail - head); 660 | if (free > size) free -= size; 661 | 662 | if (count >= free) { 663 | count = free - 1; 664 | } 665 | } 666 | 667 | auto next = tail + count; 668 | if (next >= size) { 669 | next -= size; 670 | auto split_pos = count - next; 671 | std::copy_n( 672 | elems, 673 | split_pos, 674 | reinterpret_cast(_buffer.data() + tail * sizeof(T)) 675 | ); 676 | std::copy_n( 677 | elems + split_pos, 678 | next, 679 | reinterpret_cast(_buffer.data()) 680 | ); 681 | } else { 682 | std::copy_n( 683 | elems, 684 | count, 685 | reinterpret_cast(_buffer.data() + tail * sizeof(T)) 686 | ); 687 | } 688 | 689 | _tail.store(next, std::memory_order_release); 690 | return count; 691 | } 692 | 693 | template 694 | size_type write_copy(size_type count, Iterator elems) { 695 | auto tail = _tail.load(std::memory_order_relaxed); 696 | auto head = head_cache; 697 | auto free = size - (tail - head); 698 | if (free > size) free -= size; 699 | 700 | if (count >= free) { 701 | head = head_cache = _head.load(std::memory_order_acquire); 702 | free = size - (tail - head); 703 | if (free > size) free -= size; 704 | 705 | if (count >= free) { 706 | count = free - 1; 707 | } 708 | } 709 | 710 | auto next = tail + count; 711 | if (next >= size) next -= size; 712 | 713 | auto g = detail::make_scope_guard([&, this] { 714 | _tail.store(tail, std::memory_order_release); 715 | }); 716 | 717 | while (tail != next) { 718 | new(_buffer.data() + tail * sizeof(T)) T(*elems); 719 | 720 | ++elems; 721 | tail += 1; 722 | if (tail == size) tail = 0; 723 | } 724 | 725 | return count; 726 | } 727 | 728 | template 729 | size_type write_internal(Iterator beg, Iterator end) { 730 | auto tail = _tail.load(std::memory_order_relaxed); 731 | 732 | auto g = detail::make_scope_guard([&, this] { 733 | _tail.store(tail, std::memory_order_release); 734 | }); 735 | 736 | auto count = size_type(0); 737 | for (; beg != end; ++beg) { 738 | auto next = tail + 1; 739 | if (next == size) next = 0; 740 | 741 | auto head = head_cache; 742 | if (next == head) { 743 | head = head_cache = _head.load(std::memory_order_acquire); 744 | if (next == head) { 745 | break; 746 | } 747 | } 748 | 749 | new(_buffer.data() + tail * sizeof(T)) T(*beg); 750 | tail = next; 751 | count += 1; 752 | } 753 | 754 | return count; 755 | } 756 | 757 | public: 758 | // constructs an element of type T in place using Args 759 | // returns true if successful, false otherwise 760 | template 761 | bool emplace(Args&&... args) { 762 | static_assert( 763 | std::is_constructible::value, 764 | "Type T must be constructible from Args..." 765 | ); 766 | 767 | auto tail = _tail.load(std::memory_order_relaxed); 768 | auto next = tail + 1; 769 | if (next == size) next = 0; 770 | 771 | auto head = head_cache; 772 | if (next == head) { 773 | head = head_cache = _head.load(std::memory_order_acquire); 774 | if (next == head) { 775 | return false; 776 | } 777 | } 778 | 779 | new(_buffer.data() + tail * sizeof(T)) T{ std::forward(args)... }; 780 | 781 | _tail.store(next, std::memory_order_release); 782 | return true; 783 | } 784 | 785 | // tries to construct count elements of type T in place using Args 786 | // returns the number of elements that got constructed 787 | template 788 | size_type emplace_n(size_type count, Args&&... args) { 789 | static_assert( 790 | std::is_constructible::value, 791 | "Type T must be constructible from Args..." 792 | ); 793 | 794 | auto tail = _tail.load(std::memory_order_relaxed); 795 | auto head = head_cache; 796 | auto free = size - (tail - head); 797 | if (free > size) free -= size; 798 | 799 | if (count >= free) { 800 | head = head_cache = _head.load(std::memory_order_acquire); 801 | free = size - (tail - head); 802 | if (free > size) free -= size; 803 | 804 | if (count >= free) { 805 | count = free - 1; 806 | } 807 | } 808 | 809 | auto next = tail + count; 810 | if (next >= size) next -= size; 811 | 812 | auto g = detail::make_scope_guard([&, this] { 813 | _tail.store(tail, std::memory_order_release); 814 | }); 815 | 816 | while (tail != next) { 817 | new(_buffer.data() + tail * sizeof(T)) T{ args... }; 818 | 819 | tail += 1; 820 | if (tail == size) tail = 0; 821 | } 822 | 823 | return count; 824 | } 825 | 826 | // Callable is an invocable that takes void* and returns bool 827 | // Callable must use placement new to construct an object of type T at the 828 | // pointer passed to it. If it cannot do so, it must return false. If it 829 | // returns false, an object of type T must not have been constructed. 830 | // 831 | // This function returns true if there was space for at least one element, 832 | // and Callable returned true. Otherwise, false will be returned. 833 | template 834 | bool produce(Callable&& f) { 835 | static_assert( 836 | detail::is_invocable_r::value, 837 | "Callable must return bool, and take void*" 838 | ); 839 | 840 | auto tail = _tail.load(std::memory_order_relaxed); 841 | auto next = tail + 1; 842 | if (next == size) next = 0; 843 | 844 | auto head = head_cache; 845 | if (next == head) { 846 | head = head_cache = _head.load(std::memory_order_acquire); 847 | if (next == head) { 848 | return false; 849 | } 850 | } 851 | 852 | void* storage = _buffer.data() + tail * sizeof(T); 853 | if (detail::invoke(std::forward(f), storage)) { 854 | _tail.store(next, std::memory_order_release); 855 | return true; 856 | } 857 | 858 | return false; 859 | } 860 | 861 | // Callable is an invocable that takes void* and returns bool 862 | // Callable must use placement new to construct an object of type T at the 863 | // pointer passed to it. If it cannot do so, it must return false. If it 864 | // returns false, an object of type T must not have been constructed. 865 | // 866 | // This function tries to construct count elements by calling Callable for 867 | // each address where an object can be constructed. This function returns 868 | // the number of elements that were successfully constructed, that is the 869 | // number of times Callable returned true. 870 | template 871 | size_type produce_n(size_type count, Callable&& f) { 872 | static_assert( 873 | detail::is_invocable_r::value, 874 | "Callable must return bool, and take void*" 875 | ); 876 | 877 | auto tail = _tail.load(std::memory_order_relaxed); 878 | auto head = head_cache; 879 | auto free = size - (tail - head); 880 | if (free > size) free -= size; 881 | 882 | if (count >= free) { 883 | head = head_cache = _head.load(std::memory_order_acquire); 884 | free = size - (tail - head); 885 | if (free > size) free -= size; 886 | 887 | if (count >= free) { 888 | count = free - 1; 889 | } 890 | } 891 | 892 | auto next = tail + count; 893 | if (next >= size) next -= size; 894 | 895 | auto g = detail::make_scope_guard([&, this] { 896 | _tail.store(tail, std::memory_order_release); 897 | }); 898 | 899 | while (tail != next) { 900 | void* storage = _buffer.data() + tail * sizeof(T); 901 | if (!detail::invoke(f, storage)) { 902 | auto ret = next - tail; 903 | if (ret < 0) ret += size; 904 | return ret; 905 | } 906 | 907 | tail += 1; 908 | if (tail == size) tail = 0; 909 | } 910 | 911 | return count; 912 | } 913 | 914 | // Returns a pointer to the next element that can be dequeued, or nullptr 915 | // if the queue is empty. 916 | const T* front() const { 917 | auto head = _head.load(std::memory_order_relaxed); 918 | auto tail = tail_cache; 919 | 920 | if (head == tail) { 921 | tail = tail_cache = _tail.load(std::memory_order_acquire); 922 | if (head == tail) { 923 | return nullptr; 924 | } 925 | } 926 | 927 | return detail::launder( 928 | reinterpret_cast(_buffer.data() + head * sizeof(T)) 929 | ); 930 | } 931 | 932 | // Returns a pointer to the next element that can be dequeued, or nullptr 933 | // if the queue is empty. 934 | T* front() { 935 | auto head = _head.load(std::memory_order_relaxed); 936 | auto tail = tail_cache; 937 | 938 | if (head == tail) { 939 | tail = tail_cache = _tail.load(std::memory_order_acquire); 940 | if (head == tail) { 941 | return nullptr; 942 | } 943 | } 944 | 945 | return detail::launder( 946 | reinterpret_cast(_buffer.data() + head * sizeof(T)) 947 | ); 948 | } 949 | 950 | // Discards the next element to be dequeued. The queue must contain at 951 | // least one element before calling this function. 952 | void discard() { 953 | auto head = _head.load(std::memory_order_relaxed); 954 | 955 | auto elem = detail::launder( 956 | reinterpret_cast(_buffer.data() + head * sizeof(T)) 957 | ); 958 | elem->~T(); 959 | 960 | auto next = head + 1; 961 | if (next == size) next = 0; 962 | _head.store(next, std::memory_order_release); 963 | } 964 | 965 | // tries to move the next element to be dequeued into out. 966 | // Returns true if out was assigned to, false otherwise. 967 | bool pop(T& out) { 968 | auto head = _head.load(std::memory_order_relaxed); 969 | auto tail = tail_cache; 970 | 971 | if (head == tail) { 972 | tail = tail_cache = _tail.load(std::memory_order_acquire); 973 | if (head == tail) { 974 | return false; 975 | } 976 | } 977 | 978 | auto elem = detail::launder( 979 | reinterpret_cast(_buffer.data() + head * sizeof(T)) 980 | ); 981 | 982 | out = std::move(*elem); 983 | elem->~T(); 984 | 985 | auto next = head + 1; 986 | if (next == size) next = 0; 987 | _head.store(next, std::memory_order_release); 988 | return true; 989 | } 990 | 991 | // tries to move elements to [beg .. end), or until the queue is empty 992 | // returns the number of elements moved 993 | template 994 | size_type read(Iterator beg, Iterator end) { 995 | static_assert( 996 | std::is_assignable::value, 997 | "You must be able to assign T&& to Iterator::reference" 998 | ); 999 | 1000 | using traits = std::iterator_traits; 1001 | 1002 | constexpr bool is_random_access = std::is_same< 1003 | typename traits::iterator_category, 1004 | std::random_access_iterator_tag 1005 | >::value; 1006 | 1007 | // std::contiguous_iterator_tag is a feature of C++20, so try to be 1008 | // compatible with it. Fall back on an approximate implementation for 1009 | // C++17 or earlier. The value to compare against was chosen such that 1010 | // compilers that implement some features of future standards and 1011 | // indicate that using the value of __cplusplus dont accidentally fall 1012 | // into the requirement to implement std::contiguous_iterator_tag. 1013 | #if __cplusplus > 202000L 1014 | constexpr bool is_contiguous = std::is_same< 1015 | typename traits::iterator_category, 1016 | std::contiguous_iterator_tag 1017 | >::value; 1018 | #else 1019 | constexpr bool is_contiguous = std::is_pointer::value; 1020 | #endif 1021 | 1022 | readwrite_tag< 1023 | is_random_access || is_contiguous, 1024 | std::is_trivially_constructible::value 1025 | > tag; 1026 | 1027 | return this->read_fwd(tag, beg, end); 1028 | } 1029 | 1030 | // tries to move elements to [elems .. elems + count) or until the queue is 1031 | // empty 1032 | // returns the number of elements moved 1033 | template 1034 | size_type read(size_type count, Iterator elems) { 1035 | static_assert( 1036 | std::is_assignable::value, 1037 | "You must be able to assign T&& to Iterator::reference" 1038 | ); 1039 | 1040 | readwrite_tag< 1041 | true, 1042 | std::is_trivially_constructible::value 1043 | > tag; 1044 | 1045 | return this->read_fwd(tag, count, elems); 1046 | } 1047 | 1048 | private: 1049 | template 1050 | size_type read_fwd( 1051 | readwrite_tag, 1052 | Iterator beg, 1053 | Iterator end) 1054 | { 1055 | return this->read_internal(beg, end); 1056 | } 1057 | 1058 | template 1059 | size_type read_fwd( 1060 | readwrite_tag, 1061 | Iterator beg, 1062 | Iterator end) 1063 | { 1064 | return this->read_internal(beg, end); 1065 | } 1066 | 1067 | template 1068 | size_type read_fwd( 1069 | readwrite_tag, 1070 | Iterator beg, 1071 | Iterator end) 1072 | { 1073 | return this->read_copy(end - beg, beg); 1074 | } 1075 | 1076 | template 1077 | size_type read_fwd( 1078 | readwrite_tag, 1079 | Iterator beg, 1080 | Iterator end) 1081 | { 1082 | return this->read_trivial(end - beg, beg); 1083 | } 1084 | 1085 | template 1086 | size_type read_fwd( 1087 | readwrite_tag, 1088 | size_type count, 1089 | Iterator elems) 1090 | { 1091 | return this->read_copy(count, elems); 1092 | } 1093 | 1094 | template 1095 | size_type read_fwd( 1096 | readwrite_tag, 1097 | size_type count, 1098 | Iterator elems) 1099 | { 1100 | return this->read_trivial(count, elems); 1101 | } 1102 | 1103 | template 1104 | size_type read_trivial(size_type count, Iterator elems) { 1105 | auto head = _head.load(std::memory_order_relaxed); 1106 | auto tail = tail_cache; 1107 | auto filled = (tail - head); 1108 | if (filled > size) filled += size; 1109 | 1110 | if (count >= filled) { 1111 | tail = tail_cache = _tail.load(std::memory_order_acquire); 1112 | filled = (tail - head); 1113 | if (filled > size) filled += size; 1114 | 1115 | if (count >= filled) { 1116 | count = filled; 1117 | } 1118 | } 1119 | 1120 | auto next = head + count; 1121 | if (next >= size) { 1122 | next -= size; 1123 | auto split_pos = count - next; 1124 | std::copy_n( 1125 | elems, 1126 | split_pos, 1127 | detail::launder( 1128 | reinterpret_cast(_buffer.data() + head * sizeof(T)) 1129 | ) 1130 | ); 1131 | std::copy_n( 1132 | elems + split_pos, 1133 | next, 1134 | detail::launder(reinterpret_cast(_buffer.data())) 1135 | ); 1136 | } else { 1137 | std::copy_n( 1138 | elems, 1139 | count, 1140 | detail::launder( 1141 | reinterpret_cast(_buffer.data() + head * sizeof(T)) 1142 | ) 1143 | ); 1144 | } 1145 | 1146 | _head.store(next, std::memory_order_release); 1147 | return count; 1148 | } 1149 | 1150 | template 1151 | size_type read_copy(size_type count, Iterator elems) { 1152 | auto head = _head.load(std::memory_order_relaxed); 1153 | auto tail = tail_cache; 1154 | auto filled = (tail - head); 1155 | if (filled > size) filled += size; 1156 | 1157 | if (count >= filled) { 1158 | tail = tail_cache = _tail.load(std::memory_order_acquire); 1159 | filled = (tail - head); 1160 | if (filled > size) filled += size; 1161 | 1162 | if (count >= filled) { 1163 | count = filled; 1164 | } 1165 | } 1166 | 1167 | auto next = head + count; 1168 | if (next >= size) next -= size; 1169 | 1170 | auto g = detail::make_scope_guard([&, this] { 1171 | _head.store(head, std::memory_order_release); 1172 | }); 1173 | 1174 | while (head != next) { 1175 | auto elem = detail::launder( 1176 | reinterpret_cast(_buffer.data() + head * sizeof(T)) 1177 | ); 1178 | 1179 | *elems = std::move(elem); 1180 | elem->~T(); 1181 | 1182 | head += 1; 1183 | if (head == size) head = 0; 1184 | } 1185 | 1186 | return count; 1187 | } 1188 | 1189 | template 1190 | size_type read_internal(Iterator beg, Iterator end) { 1191 | auto head = _head.load(std::memory_order_relaxed); 1192 | 1193 | auto g = detail::make_scope_guard([&, this] { 1194 | _head.store(head, std::memory_order_release); 1195 | }); 1196 | 1197 | auto count = size_type(0); 1198 | for (; beg != end; ++beg) { 1199 | auto tail = tail_cache; 1200 | if (head == tail) { 1201 | tail = tail_cache = _tail.load(std::memory_order_acquire); 1202 | if (head == tail) { 1203 | break; 1204 | } 1205 | } 1206 | 1207 | auto elem = detail::launder( 1208 | reinterpret_cast(_buffer.data() + head * sizeof(T)) 1209 | ); 1210 | 1211 | *beg = std::move(elem); 1212 | elem->~T(); 1213 | 1214 | head += 1; 1215 | if (head == size) head = 0; 1216 | count += 1; 1217 | } 1218 | 1219 | return count; 1220 | } 1221 | 1222 | public: 1223 | // Callable is an invocable that takes T* and returns bool 1224 | // 1225 | // This function calls Callable with the address of the next element to be 1226 | // dequeued, if the queue is not empty. If Callable returns true, the 1227 | // element is removed from the queue and this function returns true. 1228 | // Otherwise this function returns false. 1229 | template 1230 | bool consume(Callable&& f) { 1231 | static_assert( 1232 | detail::is_invocable_r::value, 1233 | "Callable must return bool, and take T*" 1234 | ); 1235 | 1236 | auto head = _head.load(std::memory_order_relaxed); 1237 | auto tail = tail_cache; 1238 | 1239 | if (head == tail) { 1240 | tail = tail_cache = _tail.load(std::memory_order_acquire); 1241 | if (head == tail) { 1242 | return false; 1243 | } 1244 | } 1245 | 1246 | auto elem = detail::launder( 1247 | reinterpret_cast(_buffer.data() + head * sizeof(T)) 1248 | ); 1249 | 1250 | if (detail::invoke(std::forward(f), elem)) { 1251 | elem->~T(); 1252 | auto next = head + 1; 1253 | if (next == size) next = 0; 1254 | _head.store(next, std::memory_order_release); 1255 | return true; 1256 | } 1257 | 1258 | return false; 1259 | } 1260 | 1261 | // Callable is an invocable that takes T* and returns bool 1262 | // 1263 | // This function calls Callable for each element currently in the queue, 1264 | // with the address of that element. If Callable returns true, the element 1265 | // is removed from the queue. If Callable returns false, the element is not 1266 | // removed, and this function returns. This function always returns the 1267 | // number of times Callable returned true. 1268 | template 1269 | size_type consume_all(Callable&& f) { 1270 | static_assert( 1271 | detail::is_invocable_r::value, 1272 | "Callable must return bool, and take T*" 1273 | ); 1274 | 1275 | auto head = _head.load(std::memory_order_relaxed); 1276 | auto tail = tail_cache = _tail.load(std::memory_order_acquire); 1277 | auto old_head = head; 1278 | 1279 | auto g = detail::make_scope_guard([&, this] { 1280 | _head.store(head, std::memory_order_release); 1281 | }); 1282 | 1283 | while (head != tail) { 1284 | auto elem = detail::launder( 1285 | reinterpret_cast(_buffer.data() + head * sizeof(T)) 1286 | ); 1287 | 1288 | if (!detail::invoke(f, elem)) { 1289 | break; 1290 | } 1291 | 1292 | elem->~T(); 1293 | head += 1; 1294 | if (head == size) head = 0; 1295 | } 1296 | 1297 | ptrdiff_t ret = head - old_head; 1298 | if (ret < 0) ret += size; 1299 | return ret; 1300 | } 1301 | 1302 | private: 1303 | alignas(align) std::array _buffer; 1304 | 1305 | alignas(align) std::atomic _tail{ 0 }; 1306 | mutable size_t head_cache{ 0 }; 1307 | 1308 | alignas(align) std::atomic _head{ 0 }; 1309 | mutable size_t tail_cache{ 0 }; 1310 | }; 1311 | 1312 | } // namespace deaod 1313 | -------------------------------------------------------------------------------- /fastqueue_arm64.asm: -------------------------------------------------------------------------------- 1 | ; Created by Anders Cedronius 2 | 3 | .text 4 | .align 6 5 | .global _push_item 6 | .global _pop_item 7 | .global _verify_mask 8 | .global _verify_cache_size 9 | 10 | .equ BUFFER_MASK, 15 11 | .equ L1_CACHE, 64 12 | .equ SHIFT_NO, ((L1_CACHE) / ((L1_CACHE) % 255 + 1) / 255 % 255 * 8 + 7 - 86 / ((L1_CACHE) % 255 + 12)) 13 | 14 | _pop_item: 15 | mov x3, x0 16 | ldr x1, [x0, #L1_CACHE * 4] ;mReadPositionPop 17 | pop_loop: 18 | ldr x2, [x3, #L1_CACHE * 3] ;mWritePositionPop 19 | cmp x1,x2 20 | bne entry_found 21 | ldr x4, [x3, #L1_CACHE * 5] ;mExitThread 22 | cmp x4, x1 23 | bne pop_loop 24 | ldr x5, [x3, #L1_CACHE * 6] ;mExitThreadSemaphore (1 = true) 25 | cmp x5, #0 26 | beq pop_loop 27 | eor x0, x0, x0 28 | ret 29 | entry_found: 30 | add x2, x1, #1 31 | and x1, x1, BUFFER_MASK 32 | lsl x1, x1, SHIFT_NO 33 | add x1, x1, #L1_CACHE * 7 ;mRingBuffer 34 | ldr x0, [x3, x1] 35 | dmb ishld 36 | str x2, [x3, #L1_CACHE * 4] ;mReadPositionPop 37 | str x2, [x3, #L1_CACHE * 2] ;mReadPositionPush 38 | ret 39 | 40 | _push_item: 41 | ldr x2, [x0, #L1_CACHE * 1] ;mWritePositionPush 42 | push_loop: 43 | ldr x3, [x0, #L1_CACHE * 6] ;mExitThreadSemaphore (1 = true) 44 | cmp x3, #0 45 | bne exit_loop 46 | ldr x4, [x0, #L1_CACHE * 2] ;mReadPositionPush 47 | sub x3, x2, x4 48 | cmp x3, BUFFER_MASK 49 | bge push_loop 50 | mov x3, x2 51 | add x2, x2, #1 52 | and x3, x3, BUFFER_MASK 53 | lsl x3, x3, SHIFT_NO 54 | add x3, x3, #L1_CACHE * 7 ;mRingBuffer 55 | str x1,[x0, x3] 56 | dmb ishst 57 | str x2,[x0, #L1_CACHE * 1] ;mWritePositionPush 58 | str x2,[x0, #L1_CACHE * 3] ;mWritePositionPop 59 | exit_loop: 60 | ret 61 | 62 | _verify_mask: 63 | sub x0, x0, BUFFER_MASK 64 | ret 65 | 66 | _verify_cache_size: 67 | sub x0, x0, L1_CACHE 68 | ret -------------------------------------------------------------------------------- /fastqueue_x86_64.asm: -------------------------------------------------------------------------------- 1 | ; Created by Anders Cedronius 2 | 3 | section .text 4 | bits 64 5 | 6 | BUFFER_MASK equ 15 7 | L1_CACHE equ 64 8 | SHIFT_NO equ ((L1_CACHE) / ((L1_CACHE) % 255 + 1) / 255 % 255 * 8 + 7 - 86 / ((L1_CACHE) % 255 + 12)) 9 | 10 | global push_item 11 | global pop_item 12 | global verify_mask 13 | global verify_cache_size 14 | 15 | verify_mask: 16 | mov rax,rdi 17 | sub rax, BUFFER_MASK 18 | ret 19 | 20 | verify_cache_size: 21 | mov rax,rdi 22 | sub rax, L1_CACHE 23 | ret 24 | 25 | push_item: 26 | mov r11, [rdi + (L1_CACHE * 1)] ;mWritePositionPush 27 | push_loop: 28 | cmp [rdi + (L1_CACHE * 6)], byte 0 ;mExitThreadSemaphore 29 | jnz exit_loop 30 | mov rcx, r11 31 | sub rcx, [rdi + (L1_CACHE * 2)] ;mReadPositionPush 32 | cmp rcx, BUFFER_MASK 33 | jge push_loop 34 | mov rax, r11 35 | inc r11 36 | and rax, BUFFER_MASK 37 | shl rax, SHIFT_NO 38 | add rax, (L1_CACHE * 7) ;mRingBuffer 39 | mov [rdi + rax], rsi 40 | sfence 41 | mov [rdi + (L1_CACHE * 1)], r11 ;mWritePositionPush 42 | mov [rdi + (L1_CACHE * 3)], r11 ;mWritePositionPop 43 | exit_loop: 44 | ret 45 | 46 | pop_item: 47 | mov rcx, [rdi + (L1_CACHE * 4)] ;mReadPositionPop 48 | cmp rcx, [rdi + (L1_CACHE * 3)] ;mWritePositionPop 49 | jne entry_found 50 | sub rcx, [rdi + (L1_CACHE * 5)] ;mExitThread (0 = true) 51 | jnz pop_item 52 | cmp [rdi + (L1_CACHE * 6)], byte 0 ;mExitThreadSemaphore (1 = true) 53 | jz pop_item 54 | xor rax, rax 55 | ret 56 | entry_found: 57 | mov r11, rcx 58 | inc r11 59 | and rcx, BUFFER_MASK 60 | shl rcx, SHIFT_NO 61 | add rcx, (L1_CACHE * 7) ;mRingBuffer 62 | mov rax, [rdi + rcx] 63 | lfence 64 | mov [rdi + (L1_CACHE * 4)], r11 ;mReadPositionPop 65 | mov [rdi + (L1_CACHE * 2)], r11 ;mReadPositionPush 66 | ret 67 | 68 | -------------------------------------------------------------------------------- /fastqueuesmall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersc/fastqueue/9c2ba75146c26fac136ab9e88b0094e94658b9a0/fastqueuesmall.png -------------------------------------------------------------------------------- /ubuntu.toolchain.cmake: -------------------------------------------------------------------------------- 1 | # this one is important 2 | SET(CMAKE_SYSTEM_NAME Linux) 3 | set(CMAKE_SYSTEM_PROCESSOR aarch64) 4 | 5 | # specify the cross compiler 6 | SET(CMAKE_C_COMPILER /usr/bin/aarch64-linux-gnu-gcc) 7 | SET(CMAKE_CXX_COMPILER /usr/bin/aarch64-linux-gnu-g++) 8 | 9 | # where is the target environment 10 | SET(CMAKE_FIND_ROOT_PATH /usr/aarch64-linux-gnu) 11 | 12 | # search for programs in the build host directories 13 | SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) 14 | # for libraries and headers in the target directories 15 | SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) 16 | SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) 17 | # end of the file 18 | -------------------------------------------------------------------------------- /windows.toolchain.cmake: -------------------------------------------------------------------------------- 1 | set(CMAKE_SYSTEM_NAME Windows) 2 | set(CMAKE_SYSTEM_PROCESSOR aarch64) 3 | --------------------------------------------------------------------------------