├── README.md ├── spinlock_bench.vcxproj.filters ├── spinlock_bench.sln ├── os.hpp ├── rwlocks.hpp ├── LICENSE.txt ├── spinlock_bench.vcxproj ├── main.cpp └── excllocks.hpp /README.md: -------------------------------------------------------------------------------- 1 | # C++11 Spinlocks and Benchmark 2 | 3 | C++11 implementations of various spinlocks discussed on my blog (visit http://geidav.wordpress.com). -------------------------------------------------------------------------------- /spinlock_bench.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /spinlock_bench.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 14 4 | VisualStudioVersion = 14.0.24720.0 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "spinlock_bench", "spinlock_bench.vcxproj", "{4D7BD504-B744-4B38-BC97-78FB07F7E660}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {4D7BD504-B744-4B38-BC97-78FB07F7E660}.Debug|x64.ActiveCfg = Debug|x64 17 | {4D7BD504-B744-4B38-BC97-78FB07F7E660}.Debug|x64.Build.0 = Debug|x64 18 | {4D7BD504-B744-4B38-BC97-78FB07F7E660}.Debug|x86.ActiveCfg = Debug|Win32 19 | {4D7BD504-B744-4B38-BC97-78FB07F7E660}.Debug|x86.Build.0 = Debug|Win32 20 | {4D7BD504-B744-4B38-BC97-78FB07F7E660}.Release|x64.ActiveCfg = Release|x64 21 | {4D7BD504-B744-4B38-BC97-78FB07F7E660}.Release|x64.Build.0 = Release|x64 22 | {4D7BD504-B744-4B38-BC97-78FB07F7E660}.Release|x86.ActiveCfg = Release|Win32 23 | {4D7BD504-B744-4B38-BC97-78FB07F7E660}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | EndGlobal 29 | -------------------------------------------------------------------------------- /os.hpp: -------------------------------------------------------------------------------- 1 | #ifndef OS_HPP 2 | #define OS_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #if defined(__SSE2__) 10 | #include // _mm_pause 11 | #endif 12 | 13 | constexpr size_t CACHELINE_SIZE = 64; 14 | 15 | #define WIN 0 16 | #define UNIX 1 17 | #define OS WIN 18 | 19 | #if (OS == WIN) 20 | #define WIN32_LEAN_AND_MEAN 21 | #define NOMINMAX 22 | #include 23 | 24 | #define ALWAYS_INLINE __forceinline 25 | #elif (OS == UNIX) 26 | #include 27 | 28 | #define ALWAYS_INLINE inline __attribute__((__always_inline__)) 29 | #endif 30 | 31 | ALWAYS_INLINE static void CpuRelax() 32 | { 33 | #if (OS == WIN) 34 | _mm_pause(); 35 | #elif defined(__SSE2__) // AMD and Intel 36 | _mm_pause(); 37 | #elif defined(__i386__) || defined(__x86_64__) 38 | asm volatile("pause"); 39 | #elif defined(__aarch64__) 40 | asm volatile("wfe"); 41 | #elif defined(__armel__) || defined(__ARMEL__) 42 | asm volatile ("nop" ::: "memory"); // default operation - does nothing => Might lead to passive spinning. 43 | #elif defined(__arm__) || defined(__aarch64__) // arm big endian / arm64 44 | __asm__ __volatile__ ("yield" ::: "memory"); 45 | #elif defined(__ia64__) // IA64 46 | __asm__ __volatile__ ("hint @pause"); 47 | #elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) // PowerPC 48 | __asm__ __volatile__ ("or 27,27,27" ::: "memory"); 49 | #else // everything else. 50 | asm volatile ("nop" ::: "memory"); // default operation - does nothing => Might lead to passive spinning. 51 | #endif 52 | } 53 | 54 | ALWAYS_INLINE void YieldSleep() 55 | { 56 | using namespace std::chrono; 57 | std::this_thread::sleep_for(500us); 58 | } 59 | 60 | ALWAYS_INLINE void BackoffExp(size_t &curMaxIters) 61 | { 62 | static const size_t MAX_BACKOFF_ITERS = 1024; 63 | thread_local std::uniform_int_distribution dist; 64 | thread_local std::minstd_rand gen(std::random_device{}()); 65 | 66 | const size_t spinIters = dist(gen, decltype(dist)::param_type{0, curMaxIters}); 67 | curMaxIters = std::min(2*curMaxIters, MAX_BACKOFF_ITERS); 68 | 69 | for (size_t i=0; i 5 | #include 6 | #include 7 | 8 | #include "excllocks.hpp" 9 | 10 | class SpinRwLockNaive 11 | { 12 | public: 13 | ALWAYS_INLINE void EnterExcl() 14 | { 15 | WriteLock.Enter(); 16 | 17 | while (NumReaders > 0) 18 | CpuRelax(); 19 | } 20 | 21 | ALWAYS_INLINE void LeaveExcl() 22 | { 23 | WriteLock.Leave(); 24 | } 25 | 26 | ALWAYS_INLINE void EnterShared() 27 | { 28 | while (true) 29 | { 30 | NumReaders++; 31 | 32 | if (WriteLock.Locked) 33 | NumReaders--; 34 | else 35 | break; 36 | } 37 | } 38 | 39 | ALWAYS_INLINE void LeaveShared() 40 | { 41 | NumReaders--; 42 | } 43 | 44 | private: 45 | ExpBoRelaxTTasSpinLock WriteLock; 46 | std::atomic_size_t NumReaders = {0}; 47 | }; 48 | 49 | class SpinRwLockNaivePerThreadReadCounts 50 | { 51 | private: 52 | struct alignas(CACHELINE_SIZE) ReaderCounter 53 | { 54 | std::atomic_size_t Val; 55 | }; 56 | 57 | public: 58 | ALWAYS_INLINE SpinRwLockNaivePerThreadReadCounts() : 59 | ReaderCounters(std::thread::hardware_concurrency()*2) 60 | { 61 | // Verify that we have a power-of-2 number of reader counters 62 | assert((ReaderCounters.size()&(ReaderCounters.size()-1)) == 0); 63 | } 64 | 65 | ALWAYS_INLINE void EnterExcl() 66 | { 67 | WriteLock.Enter(); 68 | 69 | for (const auto &rc : ReaderCounters) 70 | while (rc.Val > 0) 71 | CpuRelax(); 72 | } 73 | 74 | ALWAYS_INLINE void LeaveExcl() 75 | { 76 | WriteLock.Leave(); 77 | } 78 | 79 | ALWAYS_INLINE void EnterShared() 80 | { 81 | auto &rc = GetThreadReaderCount(); 82 | 83 | while (true) 84 | { 85 | rc.Val++; 86 | 87 | if (WriteLock.Locked) 88 | rc.Val--; 89 | else 90 | break; 91 | } 92 | } 93 | 94 | ALWAYS_INLINE void LeaveShared() 95 | { 96 | auto &rc = GetThreadReaderCount(); 97 | rc.Val--; 98 | } 99 | 100 | private: 101 | size_t GetThreadIdx() const 102 | { 103 | const std::hash hashFn{}; 104 | return hashFn(std::this_thread::get_id())&(ReaderCounters.size()-1); 105 | } 106 | 107 | ReaderCounter & GetThreadReaderCount() 108 | { 109 | return ReaderCounters[GetThreadIdx()]; 110 | } 111 | 112 | private: 113 | ExpBoRelaxTTasSpinLock WriteLock; 114 | std::vector ReaderCounters; 115 | }; 116 | 117 | class SpinRwLockNaivePerThreadReadCountsMemOrder 118 | { 119 | private: 120 | struct alignas(CACHELINE_SIZE) ReaderCounter 121 | { 122 | std::atomic_size_t Val; 123 | }; 124 | 125 | public: 126 | ALWAYS_INLINE SpinRwLockNaivePerThreadReadCountsMemOrder() : 127 | ReaderCounters(std::thread::hardware_concurrency()*2) 128 | { 129 | // Verify that we have a power-of-2 number of reader counters 130 | assert((ReaderCounters.size()&(ReaderCounters.size()-1)) == 0); 131 | } 132 | 133 | ALWAYS_INLINE void EnterExcl() 134 | { 135 | WriteLock.Enter(); 136 | size_t waitIters = 1; 137 | 138 | for (const auto &rc : ReaderCounters) 139 | while (rc.Val.load(std::memory_order_relaxed) > 0) 140 | BackoffExp(waitIters); 141 | 142 | std::atomic_thread_fence(std::memory_order_acquire); 143 | } 144 | 145 | ALWAYS_INLINE void LeaveExcl() 146 | { 147 | WriteLock.Leave(); 148 | } 149 | 150 | ALWAYS_INLINE void EnterShared() 151 | { 152 | auto &rc = GetThreadReaderCount(); 153 | 154 | while (true) 155 | { 156 | rc.Val.fetch_add(1, std::memory_order_relaxed); 157 | 158 | if (WriteLock.Locked.load(std::memory_order_relaxed)) 159 | rc.Val.fetch_sub(1, std::memory_order_relaxed); 160 | else 161 | break; 162 | } 163 | 164 | std::atomic_thread_fence(std::memory_order_acquire); 165 | } 166 | 167 | ALWAYS_INLINE void LeaveShared() 168 | { 169 | auto &rc = GetThreadReaderCount(); 170 | rc.Val.fetch_sub(1, std::memory_order_release); 171 | } 172 | 173 | private: 174 | size_t GetThreadIdx() const 175 | { 176 | const std::hash hashFn{}; 177 | return hashFn(std::this_thread::get_id())&(ReaderCounters.size()-1); 178 | } 179 | 180 | ReaderCounter & GetThreadReaderCount() 181 | { 182 | return ReaderCounters[GetThreadIdx()]; 183 | } 184 | 185 | private: 186 | ExpBoRelaxTTasSpinLock WriteLock; 187 | std::vector ReaderCounters; 188 | }; 189 | 190 | #endif -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Creative Commons Legal Code 2 | 3 | CC0 1.0 Universal 4 | 5 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE 6 | LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN 7 | ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS 8 | INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES 9 | REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS 10 | PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM 11 | THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED 12 | HEREUNDER. 13 | 14 | Statement of Purpose 15 | 16 | The laws of most jurisdictions throughout the world automatically confer 17 | exclusive Copyright and Related Rights (defined below) upon the creator 18 | and subsequent owner(s) (each and all, an "owner") of an original work of 19 | authorship and/or a database (each, a "Work"). 20 | 21 | Certain owners wish to permanently relinquish those rights to a Work for 22 | the purpose of contributing to a commons of creative, cultural and 23 | scientific works ("Commons") that the public can reliably and without fear 24 | of later claims of infringement build upon, modify, incorporate in other 25 | works, reuse and redistribute as freely as possible in any form whatsoever 26 | and for any purposes, including without limitation commercial purposes. 27 | These owners may contribute to the Commons to promote the ideal of a free 28 | culture and the further production of creative, cultural and scientific 29 | works, or to gain reputation or greater distribution for their Work in 30 | part through the use and efforts of others. 31 | 32 | For these and/or other purposes and motivations, and without any 33 | expectation of additional consideration or compensation, the person 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she 35 | is an owner of Copyright and Related Rights in the Work, voluntarily 36 | elects to apply CC0 to the Work and publicly distribute the Work under its 37 | terms, with knowledge of his or her Copyright and Related Rights in the 38 | Work and the meaning and intended legal effect of CC0 on those rights. 39 | 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be 41 | protected by copyright and related or neighboring rights ("Copyright and 42 | Related Rights"). Copyright and Related Rights include, but are not 43 | limited to, the following: 44 | 45 | i. the right to reproduce, adapt, distribute, perform, display, 46 | communicate, and translate a Work; 47 | ii. moral rights retained by the original author(s) and/or performer(s); 48 | iii. publicity and privacy rights pertaining to a person's image or 49 | likeness depicted in a Work; 50 | iv. rights protecting against unfair competition in regards to a Work, 51 | subject to the limitations in paragraph 4(a), below; 52 | v. rights protecting the extraction, dissemination, use and reuse of data 53 | in a Work; 54 | vi. database rights (such as those arising under Directive 96/9/EC of the 55 | European Parliament and of the Council of 11 March 1996 on the legal 56 | protection of databases, and under any national implementation 57 | thereof, including any amended or successor version of such 58 | directive); and 59 | vii. other similar, equivalent or corresponding rights throughout the 60 | world based on applicable law or treaty, and any national 61 | implementations thereof. 62 | 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention 64 | of, applicable law, Affirmer hereby overtly, fully, permanently, 65 | irrevocably and unconditionally waives, abandons, and surrenders all of 66 | Affirmer's Copyright and Related Rights and associated claims and causes 67 | of action, whether now known or unknown (including existing as well as 68 | future claims and causes of action), in the Work (i) in all territories 69 | worldwide, (ii) for the maximum duration provided by applicable law or 70 | treaty (including future time extensions), (iii) in any current or future 71 | medium and for any number of copies, and (iv) for any purpose whatsoever, 72 | including without limitation commercial, advertising or promotional 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each 74 | member of the public at large and to the detriment of Affirmer's heirs and 75 | successors, fully intending that such Waiver shall not be subject to 76 | revocation, rescission, cancellation, termination, or any other legal or 77 | equitable action to disrupt the quiet enjoyment of the Work by the public 78 | as contemplated by Affirmer's express Statement of Purpose. 79 | 80 | 3. Public License Fallback. Should any part of the Waiver for any reason 81 | be judged legally invalid or ineffective under applicable law, then the 82 | Waiver shall be preserved to the maximum extent permitted taking into 83 | account Affirmer's express Statement of Purpose. In addition, to the 84 | extent the Waiver is so judged Affirmer hereby grants to each affected 85 | person a royalty-free, non transferable, non sublicensable, non exclusive, 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the 88 | maximum duration provided by applicable law or treaty (including future 89 | time extensions), (iii) in any current or future medium and for any number 90 | of copies, and (iv) for any purpose whatsoever, including without 91 | limitation commercial, advertising or promotional purposes (the 92 | "License"). The License shall be deemed effective as of the date CC0 was 93 | applied by Affirmer to the Work. Should any part of the License for any 94 | reason be judged legally invalid or ineffective under applicable law, such 95 | partial invalidity or ineffectiveness shall not invalidate the remainder 96 | of the License, and in such case Affirmer hereby affirms that he or she 97 | will not (i) exercise any of his or her remaining Copyright and Related 98 | Rights in the Work or (ii) assert any associated claims and causes of 99 | action with respect to the Work, in either case contrary to Affirmer's 100 | express Statement of Purpose. 101 | 102 | 4. Limitations and Disclaimers. 103 | 104 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 105 | surrendered, licensed or otherwise affected by this document. 106 | b. Affirmer offers the Work as-is and makes no representations or 107 | warranties of any kind concerning the Work, express, implied, 108 | statutory or otherwise, including without limitation warranties of 109 | title, merchantability, fitness for a particular purpose, non 110 | infringement, or the absence of latent or other defects, accuracy, or 111 | the present or absence of errors, whether or not discoverable, all to 112 | the greatest extent permissible under applicable law. 113 | c. Affirmer disclaims responsibility for clearing rights of other persons 114 | that may apply to the Work or any use thereof, including without 115 | limitation any person's Copyright and Related Rights in the Work. 116 | Further, Affirmer disclaims responsibility for obtaining any necessary 117 | consents, permissions or other rights required for any use of the 118 | Work. 119 | d. Affirmer understands and acknowledges that Creative Commons is not a 120 | party to this document and has no duty or obligation with respect to 121 | this CC0 or use of the Work. -------------------------------------------------------------------------------- /spinlock_bench.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | Debug 14 | x64 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | {4D7BD504-B744-4B38-BC97-78FB07F7E660} 31 | Win32Proj 32 | spinlock_bench 33 | 8.1 34 | 35 | 36 | 37 | Application 38 | true 39 | v140 40 | Unicode 41 | 42 | 43 | Application 44 | false 45 | v140 46 | true 47 | Unicode 48 | 49 | 50 | Application 51 | true 52 | v140 53 | Unicode 54 | 55 | 56 | Application 57 | false 58 | v140 59 | true 60 | Unicode 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | true 82 | 83 | 84 | true 85 | 86 | 87 | false 88 | 89 | 90 | false 91 | 92 | 93 | 94 | 95 | 96 | Level3 97 | Disabled 98 | WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) 99 | 100 | 101 | Console 102 | true 103 | 104 | 105 | 106 | 107 | 108 | 109 | Level3 110 | Disabled 111 | _DEBUG;_CONSOLE;%(PreprocessorDefinitions) 112 | 113 | 114 | Console 115 | true 116 | 117 | 118 | 119 | 120 | Level3 121 | 122 | 123 | MaxSpeed 124 | true 125 | true 126 | WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 127 | 128 | 129 | Console 130 | true 131 | true 132 | true 133 | 134 | 135 | 136 | 137 | Level3 138 | 139 | 140 | Full 141 | true 142 | true 143 | NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 144 | AnySuitable 145 | Speed 146 | Sync 147 | false 148 | 149 | 150 | Console 151 | true 152 | true 153 | true 154 | 155 | 156 | 157 | 158 | 159 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "excllocks.hpp" 16 | #include "rwlocks.hpp" 17 | 18 | #if 1 19 | template 20 | std::vector CreateBenchmarkRuns(size_t numRuns, size_t numItersPerRun, size_t numThreads) 21 | { 22 | std::vector runs(numRuns); 23 | const size_t numItersPerThread = numItersPerRun/numThreads; 24 | volatile std::atomic_size_t cnt = {0}; 25 | 26 | for (size_t i=0; i> futures(numThreads); 29 | LockType lock; 30 | std::atomic_size_t numThreadsReady = {0}; 31 | const auto startTime = std::chrono::high_resolution_clock::now(); 32 | 33 | for (size_t j=0; j(endTime-startTime); 63 | } 64 | 65 | return runs; 66 | } 67 | #else 68 | template 69 | std::vector CreateBenchmarkRuns(size_t numRuns, size_t numItersPerRun, size_t numThreads) 70 | { 71 | std::vector runs(numRuns); 72 | const size_t numItersPerThread = numItersPerRun/numThreads; 73 | 74 | for (size_t i=0; i> futures(numThreads); 77 | LockType lock; 78 | size_t count = 0; 79 | 80 | const auto startTime = std::chrono::high_resolution_clock::now(); 81 | 82 | for (size_t j=0; j(endTime-startTime); 113 | runs[i] = elapsedMs; 114 | } 115 | 116 | return runs; 117 | } 118 | #endif 119 | 120 | template 121 | void RunBenchmark(const char *descr, size_t numRuns, size_t numItersPerRun, size_t numThreads) 122 | { 123 | const auto &runs = CreateBenchmarkRuns(numRuns, numItersPerRun, numThreads); 124 | double avgElapsedMs(0), varianceMs(0), minMs(std::numeric_limits::max()), maxMs(0); 125 | 126 | for (const auto &r : runs) 127 | { 128 | const double curRunElapsedMs = static_cast(r.count()); 129 | minMs = std::min(minMs, curRunElapsedMs); 130 | maxMs = std::max(maxMs, curRunElapsedMs); 131 | avgElapsedMs += curRunElapsedMs; 132 | } 133 | 134 | avgElapsedMs /= static_cast(runs.size()); 135 | 136 | for (const auto &r : runs) 137 | { 138 | const double diff = static_cast(r.count())-avgElapsedMs; 139 | varianceMs += diff*diff; 140 | } 141 | 142 | varianceMs /= static_cast(runs.size()); 143 | 144 | const double stdDevMs = std::sqrt(varianceMs); 145 | const double avgElapsedNs = avgElapsedMs*1000.0*1000.0; 146 | const double timePerIterNs = avgElapsedNs/(numRuns*numItersPerRun); 147 | 148 | std::cout << std::left << std::setfill(' ') << std::setw(30) << descr << " " 149 | << std::fixed << std::setprecision(2) << std::right << std::setfill(' ') << std::setw(6) 150 | << avgElapsedMs << " " << std::right << std::setw(6) << stdDevMs << " " 151 | << std::right << std::setw(6) << minMs << " " << std::right << std::setw(6) << maxMs 152 | << " " << std::right << std::setw(6) << timePerIterNs << "\n"; 153 | } 154 | 155 | void RunBenchmarks() 156 | { 157 | std::cout << " Std. Time/\n"; 158 | std::cout << " Avg. dev. Min Max iter.\n"; 159 | std::cout << "Lock type (ms) (ms) (ms) (ms) (ns)\n"; 160 | std::cout << "----------------------------------------------------------------------------\n\n"; 161 | 162 | const auto startTime = std::chrono::high_resolution_clock::now(); 163 | 164 | for (size_t i=1; i<=std::thread::hardware_concurrency(); i++) 165 | { 166 | const size_t numRuns = 5; 167 | const size_t numItersPerRun = 1000000; 168 | 169 | std::cout << i << " Threads (work/thread: " << numItersPerRun/i << ")\n\n"; 170 | 171 | RunBenchmark("Mutex", numRuns, numItersPerRun, i); 172 | #if (OS == UNIX) 173 | RunBenchmark("SpinLockPThread", numRuns, numItersPerRun, i); 174 | #elif (OS == WIN) 175 | RunBenchmark("LockCriticalSection", numRuns, numItersPerRun, i); 176 | #endif 177 | RunBenchmark("ScTasSpinLock", numRuns, numItersPerRun, i); 178 | RunBenchmark("TasSpinLock", numRuns, numItersPerRun, i); 179 | RunBenchmark("TTasSpinLock", numRuns, numItersPerRun, i); 180 | RunBenchmark("RelaxTTasSpinLock", numRuns, numItersPerRun, i); 181 | RunBenchmark("ExpBoRelaxTTasSpinLock", numRuns, numItersPerRun, i); 182 | 183 | RunBenchmark("TicketSpinLock", numRuns, numItersPerRun, i); 184 | RunBenchmark("PropBoTicketSpinLock", numRuns, numItersPerRun, i); 185 | RunBenchmark("AndersonSpinLock", numRuns, numItersPerRun, i); 186 | RunBenchmark("GraunkeAndThakkarSpinLock", numRuns, numItersPerRun, i); 187 | #if 0 188 | RunBenchmark("SpinRwLockNaive", numRuns, numItersPerRun, i); 189 | RunBenchmark("SpinRwLockNaivePerThreadReadCounts", numRuns, numItersPerRun, i); 190 | #endif 191 | std::cout << "\n"; 192 | } 193 | 194 | const auto endTime = std::chrono::high_resolution_clock::now(); 195 | std::cout << "Total elapsed: " << std::chrono::duration_cast(endTime-startTime).count() << " ms\n"; 196 | } 197 | 198 | int main(int argc, char *argv[]) 199 | { 200 | RunBenchmarks(); 201 | return 0; 202 | } 203 | -------------------------------------------------------------------------------- /excllocks.hpp: -------------------------------------------------------------------------------- 1 | #ifndef EXCL_LOCKS_HPP 2 | #define EXCL_LOCKS_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "os.hpp" 10 | 11 | class Mutex 12 | { 13 | public: 14 | ALWAYS_INLINE void Enter() 15 | { 16 | Mtx.lock(); 17 | } 18 | 19 | ALWAYS_INLINE void Leave() 20 | { 21 | Mtx.unlock(); 22 | } 23 | 24 | private: 25 | std::mutex Mtx; 26 | }; 27 | 28 | #if (OS == UNIX) 29 | 30 | #include 31 | 32 | class SpinLockPThread 33 | { 34 | public: 35 | ALWAYS_INLINE SpinLockPThread() 36 | { 37 | pthread_spin_init(&Lock, 0); 38 | } 39 | 40 | ALWAYS_INLINE void Enter() 41 | { 42 | pthread_spin_lock(&Lock); 43 | } 44 | 45 | ALWAYS_INLINE void Leave() 46 | { 47 | pthread_spin_unlock(&Lock); 48 | } 49 | 50 | private: 51 | pthread_spinlock_t Lock; 52 | }; 53 | 54 | #elif (OS == WIN) 55 | 56 | class LockCriticalSection 57 | { 58 | public: 59 | ALWAYS_INLINE LockCriticalSection() 60 | { 61 | InitializeCriticalSection(&Cs); 62 | } 63 | 64 | ALWAYS_INLINE void Enter() 65 | { 66 | EnterCriticalSection(&Cs); 67 | } 68 | 69 | ALWAYS_INLINE void Leave() 70 | { 71 | LeaveCriticalSection(&Cs); 72 | } 73 | 74 | private: 75 | CRITICAL_SECTION Cs; 76 | }; 77 | 78 | #endif 79 | 80 | class ScTasSpinLock 81 | { 82 | public: 83 | ALWAYS_INLINE void Enter() 84 | { 85 | while (Locked.exchange(true)); 86 | } 87 | 88 | ALWAYS_INLINE void Leave() 89 | { 90 | Locked.store(false); 91 | } 92 | 93 | private: 94 | std::atomic_bool Locked = {false}; 95 | }; 96 | 97 | class TasSpinLock 98 | { 99 | public: 100 | ALWAYS_INLINE void Enter() 101 | { 102 | while (Locked.exchange(true, std::memory_order_acquire)); 103 | } 104 | 105 | ALWAYS_INLINE void Leave() 106 | { 107 | Locked.store(false, std::memory_order_release); 108 | } 109 | 110 | private: 111 | std::atomic_bool Locked = {false}; 112 | }; 113 | 114 | class TTasSpinLock 115 | { 116 | public: 117 | ALWAYS_INLINE void Enter() 118 | { 119 | do 120 | { 121 | while (Locked.load(std::memory_order_relaxed)); 122 | } 123 | while (Locked.exchange(true, std::memory_order_acquire)); 124 | } 125 | 126 | ALWAYS_INLINE void Leave() 127 | { 128 | Locked.store(false, std::memory_order_release); 129 | } 130 | 131 | private: 132 | std::atomic_bool Locked = {false}; 133 | }; 134 | 135 | class RelaxTTasSpinLock 136 | { 137 | public: 138 | ALWAYS_INLINE void Enter() 139 | { 140 | do 141 | { 142 | while (Locked.load(std::memory_order_relaxed)) 143 | CpuRelax(); 144 | } 145 | while (Locked.exchange(true, std::memory_order_acquire)); 146 | } 147 | 148 | ALWAYS_INLINE void Leave() 149 | { 150 | Locked.store(false, std::memory_order_release); 151 | } 152 | 153 | private: 154 | std::atomic_bool Locked = {false}; 155 | }; 156 | 157 | class ExpBoRelaxTTasSpinLock 158 | { 159 | public: 160 | ALWAYS_INLINE void Enter() 161 | { 162 | size_t curMaxDelay = MIN_BACKOFF_ITERS; 163 | 164 | while (true) 165 | { 166 | WaitUntilLockIsFree(); 167 | 168 | if (Locked.exchange(true, std::memory_order_acquire)) 169 | BackoffExp(curMaxDelay); 170 | else 171 | break; 172 | } 173 | } 174 | 175 | ALWAYS_INLINE void Leave() 176 | { 177 | Locked.store(false, std::memory_order_release); 178 | } 179 | 180 | private: 181 | ALWAYS_INLINE void WaitUntilLockIsFree() const 182 | { 183 | size_t numIters = 0; 184 | 185 | while (Locked.load(std::memory_order_relaxed)) 186 | { 187 | if (numIters < MAX_WAIT_ITERS) 188 | { 189 | numIters++; 190 | CpuRelax(); 191 | } 192 | else 193 | YieldSleep(); 194 | } 195 | } 196 | 197 | public: 198 | std::atomic_bool Locked = {false}; 199 | 200 | private: 201 | static const size_t MAX_WAIT_ITERS = 0x10000; 202 | static const size_t MIN_BACKOFF_ITERS = 32; 203 | }; 204 | 205 | class TicketSpinLock 206 | { 207 | public: 208 | ALWAYS_INLINE void Enter() 209 | { 210 | const auto myTicketNo = NextTicketNo.fetch_add(1, std::memory_order_relaxed); 211 | 212 | while (ServingTicketNo.load(std::memory_order_acquire) != myTicketNo) 213 | CpuRelax(); 214 | } 215 | 216 | ALWAYS_INLINE void Leave() 217 | { 218 | // We can get around a more expensive read-modify-write operation 219 | // (std::atomic_size_t::fetch_add()), because noone can modify 220 | // ServingTicketNo while we're in the critical section. 221 | const auto newNo = ServingTicketNo.load(std::memory_order_relaxed)+1; 222 | ServingTicketNo.store(newNo, std::memory_order_release); 223 | } 224 | 225 | private: 226 | alignas(CACHELINE_SIZE) std::atomic_size_t ServingTicketNo = {0}; 227 | alignas(CACHELINE_SIZE) std::atomic_size_t NextTicketNo = {0}; 228 | }; 229 | 230 | static_assert(sizeof(TicketSpinLock) == 2*CACHELINE_SIZE, ""); 231 | 232 | class PropBoTicketSpinLock 233 | { 234 | public: 235 | ALWAYS_INLINE void Enter() 236 | { 237 | constexpr size_t BACKOFF_BASE = 10; 238 | const auto myTicketNo = NextTicketNo.fetch_add(1, std::memory_order_relaxed); 239 | 240 | while (true) 241 | { 242 | const auto servingTicketNo = ServingTicketNo.load(std::memory_order_acquire); 243 | if (servingTicketNo == myTicketNo) 244 | break; 245 | 246 | const size_t waitIters = BACKOFF_BASE*(myTicketNo-servingTicketNo); 247 | 248 | for (size_t i=0; i; 301 | static_assert(sizeof(PaddedFlag) == CACHELINE_SIZE, ""); 302 | 303 | alignas(CACHELINE_SIZE) std::vector LockedFlags; 304 | alignas(CACHELINE_SIZE) std::atomic_size_t NextFreeIdx = {0}; 305 | alignas(CACHELINE_SIZE) std::atomic_size_t NextServingIdx = {1}; 306 | }; 307 | 308 | class GraunkeAndThakkarSpinLock 309 | { 310 | public: 311 | GraunkeAndThakkarSpinLock(size_t maxThreads=std::thread::hardware_concurrency()) : 312 | LockedFlags(maxThreads) 313 | { 314 | for (auto &flag : LockedFlags) 315 | flag.first = 1; 316 | 317 | assert(Tail.is_lock_free()); 318 | Tail = reinterpret_cast(&LockedFlags[0].first); 319 | assert((Tail&1) == 0); // Make sure there's space to store the old flag value in the LSB 320 | } 321 | 322 | ALWAYS_INLINE void Enter() 323 | { 324 | // Create new tail by chaining my synchronization variable into the list 325 | const auto &newFlag = LockedFlags[GetThreadIndex()].first; 326 | const auto newTail = reinterpret_cast(&newFlag)|static_cast(newFlag); 327 | const auto ahead = Tail.exchange(newTail); 328 | 329 | // Extract flag and old value of previous thread in line, so that we can wait for its completion 330 | const auto *aheadFlag = reinterpret_cast(ahead&(~static_cast(1))); 331 | const auto aheadValue = static_cast(ahead&1); 332 | 333 | // Wait for previous thread in line to flip my synchronization variable 334 | while (aheadFlag->load() == aheadValue) 335 | CpuRelax(); 336 | } 337 | 338 | ALWAYS_INLINE void Leave() 339 | { 340 | // Flipping synchronization variable enables next thread in line to enter CS 341 | auto &flag = LockedFlags[GetThreadIndex()].first; 342 | flag = !flag; 343 | } 344 | 345 | private: 346 | ALWAYS_INLINE size_t GetThreadIndex() const 347 | { 348 | static std::atomic_size_t threadCounter = {0}; 349 | thread_local size_t threadIdx = threadCounter++; 350 | assert(threadIdx < LockedFlags.size()); 351 | return threadIdx; 352 | } 353 | 354 | private: 355 | using PaddedFlag = std::pair; 356 | static_assert(sizeof(PaddedFlag) == CACHELINE_SIZE, ""); 357 | 358 | // In the LSB the old value of the flag is stored 359 | alignas(CACHELINE_SIZE) std::atomic Tail; 360 | alignas(CACHELINE_SIZE) std::vector LockedFlags; 361 | 362 | static_assert(sizeof(decltype(LockedFlags)::value_type) > 1, 363 | "Flag size > 1 required: thanks to alginment, old flag value can be stored in LSB"); 364 | }; 365 | 366 | class McsLock 367 | { 368 | public: 369 | struct QNode 370 | { 371 | std::atomic Next = {nullptr}; 372 | std::atomic_bool Locked = {false}; 373 | }; 374 | 375 | public: 376 | ALWAYS_INLINE void Enter(QNode &node) 377 | { 378 | node.Next = nullptr; 379 | node.Locked = true; 380 | 381 | QNode *oldTail = Tail.exchange(&node); 382 | 383 | if (oldTail != nullptr) 384 | { 385 | oldTail->Next = &node; 386 | 387 | while (node.Locked == true) 388 | CpuRelax(); 389 | } 390 | } 391 | 392 | ALWAYS_INLINE void Leave(QNode &node) 393 | { 394 | if (node.Next.load() == nullptr) 395 | { 396 | QNode *tailWasMe = &node; 397 | if (Tail.compare_exchange_strong(tailWasMe, nullptr)) 398 | return; 399 | 400 | while (node.Next.load() == nullptr) 401 | CpuRelax(); 402 | } 403 | 404 | node.Next.load()->Locked = false; 405 | } 406 | 407 | private: 408 | std::atomic Tail = {nullptr}; 409 | }; 410 | 411 | #endif 412 | --------------------------------------------------------------------------------