├── README.md
├── spinlock_bench.vcxproj.filters
├── spinlock_bench.sln
├── os.hpp
├── rwlocks.hpp
├── LICENSE.txt
├── spinlock_bench.vcxproj
├── main.cpp
└── excllocks.hpp
/README.md:
--------------------------------------------------------------------------------
1 | # C++11 Spinlocks and Benchmark
2 |
3 | C++11 implementations of various spinlocks discussed on my blog (visit http://geidav.wordpress.com).
--------------------------------------------------------------------------------
/spinlock_bench.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/spinlock_bench.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio 14
4 | VisualStudioVersion = 14.0.24720.0
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "spinlock_bench", "spinlock_bench.vcxproj", "{4D7BD504-B744-4B38-BC97-78FB07F7E660}"
7 | EndProject
8 | Global
9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | Debug|x64 = Debug|x64
11 | Debug|x86 = Debug|x86
12 | Release|x64 = Release|x64
13 | Release|x86 = Release|x86
14 | EndGlobalSection
15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
16 | {4D7BD504-B744-4B38-BC97-78FB07F7E660}.Debug|x64.ActiveCfg = Debug|x64
17 | {4D7BD504-B744-4B38-BC97-78FB07F7E660}.Debug|x64.Build.0 = Debug|x64
18 | {4D7BD504-B744-4B38-BC97-78FB07F7E660}.Debug|x86.ActiveCfg = Debug|Win32
19 | {4D7BD504-B744-4B38-BC97-78FB07F7E660}.Debug|x86.Build.0 = Debug|Win32
20 | {4D7BD504-B744-4B38-BC97-78FB07F7E660}.Release|x64.ActiveCfg = Release|x64
21 | {4D7BD504-B744-4B38-BC97-78FB07F7E660}.Release|x64.Build.0 = Release|x64
22 | {4D7BD504-B744-4B38-BC97-78FB07F7E660}.Release|x86.ActiveCfg = Release|Win32
23 | {4D7BD504-B744-4B38-BC97-78FB07F7E660}.Release|x86.Build.0 = Release|Win32
24 | EndGlobalSection
25 | GlobalSection(SolutionProperties) = preSolution
26 | HideSolutionNode = FALSE
27 | EndGlobalSection
28 | EndGlobal
29 |
--------------------------------------------------------------------------------
/os.hpp:
--------------------------------------------------------------------------------
1 | #ifndef OS_HPP
2 | #define OS_HPP
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 |
9 | #if defined(__SSE2__)
10 | #include // _mm_pause
11 | #endif
12 |
13 | constexpr size_t CACHELINE_SIZE = 64;
14 |
15 | #define WIN 0
16 | #define UNIX 1
17 | #define OS WIN
18 |
19 | #if (OS == WIN)
20 | #define WIN32_LEAN_AND_MEAN
21 | #define NOMINMAX
22 | #include
23 |
24 | #define ALWAYS_INLINE __forceinline
25 | #elif (OS == UNIX)
26 | #include
27 |
28 | #define ALWAYS_INLINE inline __attribute__((__always_inline__))
29 | #endif
30 |
31 | ALWAYS_INLINE static void CpuRelax()
32 | {
33 | #if (OS == WIN)
34 | _mm_pause();
35 | #elif defined(__SSE2__) // AMD and Intel
36 | _mm_pause();
37 | #elif defined(__i386__) || defined(__x86_64__)
38 | asm volatile("pause");
39 | #elif defined(__aarch64__)
40 | asm volatile("wfe");
41 | #elif defined(__armel__) || defined(__ARMEL__)
42 | asm volatile ("nop" ::: "memory"); // default operation - does nothing => Might lead to passive spinning.
43 | #elif defined(__arm__) || defined(__aarch64__) // arm big endian / arm64
44 | __asm__ __volatile__ ("yield" ::: "memory");
45 | #elif defined(__ia64__) // IA64
46 | __asm__ __volatile__ ("hint @pause");
47 | #elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) // PowerPC
48 | __asm__ __volatile__ ("or 27,27,27" ::: "memory");
49 | #else // everything else.
50 | asm volatile ("nop" ::: "memory"); // default operation - does nothing => Might lead to passive spinning.
51 | #endif
52 | }
53 |
54 | ALWAYS_INLINE void YieldSleep()
55 | {
56 | using namespace std::chrono;
57 | std::this_thread::sleep_for(500us);
58 | }
59 |
60 | ALWAYS_INLINE void BackoffExp(size_t &curMaxIters)
61 | {
62 | static const size_t MAX_BACKOFF_ITERS = 1024;
63 | thread_local std::uniform_int_distribution dist;
64 | thread_local std::minstd_rand gen(std::random_device{}());
65 |
66 | const size_t spinIters = dist(gen, decltype(dist)::param_type{0, curMaxIters});
67 | curMaxIters = std::min(2*curMaxIters, MAX_BACKOFF_ITERS);
68 |
69 | for (size_t i=0; i
5 | #include
6 | #include
7 |
8 | #include "excllocks.hpp"
9 |
10 | class SpinRwLockNaive
11 | {
12 | public:
13 | ALWAYS_INLINE void EnterExcl()
14 | {
15 | WriteLock.Enter();
16 |
17 | while (NumReaders > 0)
18 | CpuRelax();
19 | }
20 |
21 | ALWAYS_INLINE void LeaveExcl()
22 | {
23 | WriteLock.Leave();
24 | }
25 |
26 | ALWAYS_INLINE void EnterShared()
27 | {
28 | while (true)
29 | {
30 | NumReaders++;
31 |
32 | if (WriteLock.Locked)
33 | NumReaders--;
34 | else
35 | break;
36 | }
37 | }
38 |
39 | ALWAYS_INLINE void LeaveShared()
40 | {
41 | NumReaders--;
42 | }
43 |
44 | private:
45 | ExpBoRelaxTTasSpinLock WriteLock;
46 | std::atomic_size_t NumReaders = {0};
47 | };
48 |
49 | class SpinRwLockNaivePerThreadReadCounts
50 | {
51 | private:
52 | struct alignas(CACHELINE_SIZE) ReaderCounter
53 | {
54 | std::atomic_size_t Val;
55 | };
56 |
57 | public:
58 | ALWAYS_INLINE SpinRwLockNaivePerThreadReadCounts() :
59 | ReaderCounters(std::thread::hardware_concurrency()*2)
60 | {
61 | // Verify that we have a power-of-2 number of reader counters
62 | assert((ReaderCounters.size()&(ReaderCounters.size()-1)) == 0);
63 | }
64 |
65 | ALWAYS_INLINE void EnterExcl()
66 | {
67 | WriteLock.Enter();
68 |
69 | for (const auto &rc : ReaderCounters)
70 | while (rc.Val > 0)
71 | CpuRelax();
72 | }
73 |
74 | ALWAYS_INLINE void LeaveExcl()
75 | {
76 | WriteLock.Leave();
77 | }
78 |
79 | ALWAYS_INLINE void EnterShared()
80 | {
81 | auto &rc = GetThreadReaderCount();
82 |
83 | while (true)
84 | {
85 | rc.Val++;
86 |
87 | if (WriteLock.Locked)
88 | rc.Val--;
89 | else
90 | break;
91 | }
92 | }
93 |
94 | ALWAYS_INLINE void LeaveShared()
95 | {
96 | auto &rc = GetThreadReaderCount();
97 | rc.Val--;
98 | }
99 |
100 | private:
101 | size_t GetThreadIdx() const
102 | {
103 | const std::hash hashFn{};
104 | return hashFn(std::this_thread::get_id())&(ReaderCounters.size()-1);
105 | }
106 |
107 | ReaderCounter & GetThreadReaderCount()
108 | {
109 | return ReaderCounters[GetThreadIdx()];
110 | }
111 |
112 | private:
113 | ExpBoRelaxTTasSpinLock WriteLock;
114 | std::vector ReaderCounters;
115 | };
116 |
117 | class SpinRwLockNaivePerThreadReadCountsMemOrder
118 | {
119 | private:
120 | struct alignas(CACHELINE_SIZE) ReaderCounter
121 | {
122 | std::atomic_size_t Val;
123 | };
124 |
125 | public:
126 | ALWAYS_INLINE SpinRwLockNaivePerThreadReadCountsMemOrder() :
127 | ReaderCounters(std::thread::hardware_concurrency()*2)
128 | {
129 | // Verify that we have a power-of-2 number of reader counters
130 | assert((ReaderCounters.size()&(ReaderCounters.size()-1)) == 0);
131 | }
132 |
133 | ALWAYS_INLINE void EnterExcl()
134 | {
135 | WriteLock.Enter();
136 | size_t waitIters = 1;
137 |
138 | for (const auto &rc : ReaderCounters)
139 | while (rc.Val.load(std::memory_order_relaxed) > 0)
140 | BackoffExp(waitIters);
141 |
142 | std::atomic_thread_fence(std::memory_order_acquire);
143 | }
144 |
145 | ALWAYS_INLINE void LeaveExcl()
146 | {
147 | WriteLock.Leave();
148 | }
149 |
150 | ALWAYS_INLINE void EnterShared()
151 | {
152 | auto &rc = GetThreadReaderCount();
153 |
154 | while (true)
155 | {
156 | rc.Val.fetch_add(1, std::memory_order_relaxed);
157 |
158 | if (WriteLock.Locked.load(std::memory_order_relaxed))
159 | rc.Val.fetch_sub(1, std::memory_order_relaxed);
160 | else
161 | break;
162 | }
163 |
164 | std::atomic_thread_fence(std::memory_order_acquire);
165 | }
166 |
167 | ALWAYS_INLINE void LeaveShared()
168 | {
169 | auto &rc = GetThreadReaderCount();
170 | rc.Val.fetch_sub(1, std::memory_order_release);
171 | }
172 |
173 | private:
174 | size_t GetThreadIdx() const
175 | {
176 | const std::hash hashFn{};
177 | return hashFn(std::this_thread::get_id())&(ReaderCounters.size()-1);
178 | }
179 |
180 | ReaderCounter & GetThreadReaderCount()
181 | {
182 | return ReaderCounters[GetThreadIdx()];
183 | }
184 |
185 | private:
186 | ExpBoRelaxTTasSpinLock WriteLock;
187 | std::vector ReaderCounters;
188 | };
189 |
190 | #endif
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Creative Commons Legal Code
2 |
3 | CC0 1.0 Universal
4 |
5 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
6 | LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
7 | ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
8 | INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
9 | REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
10 | PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
11 | THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
12 | HEREUNDER.
13 |
14 | Statement of Purpose
15 |
16 | The laws of most jurisdictions throughout the world automatically confer
17 | exclusive Copyright and Related Rights (defined below) upon the creator
18 | and subsequent owner(s) (each and all, an "owner") of an original work of
19 | authorship and/or a database (each, a "Work").
20 |
21 | Certain owners wish to permanently relinquish those rights to a Work for
22 | the purpose of contributing to a commons of creative, cultural and
23 | scientific works ("Commons") that the public can reliably and without fear
24 | of later claims of infringement build upon, modify, incorporate in other
25 | works, reuse and redistribute as freely as possible in any form whatsoever
26 | and for any purposes, including without limitation commercial purposes.
27 | These owners may contribute to the Commons to promote the ideal of a free
28 | culture and the further production of creative, cultural and scientific
29 | works, or to gain reputation or greater distribution for their Work in
30 | part through the use and efforts of others.
31 |
32 | For these and/or other purposes and motivations, and without any
33 | expectation of additional consideration or compensation, the person
34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she
35 | is an owner of Copyright and Related Rights in the Work, voluntarily
36 | elects to apply CC0 to the Work and publicly distribute the Work under its
37 | terms, with knowledge of his or her Copyright and Related Rights in the
38 | Work and the meaning and intended legal effect of CC0 on those rights.
39 |
40 | 1. Copyright and Related Rights. A Work made available under CC0 may be
41 | protected by copyright and related or neighboring rights ("Copyright and
42 | Related Rights"). Copyright and Related Rights include, but are not
43 | limited to, the following:
44 |
45 | i. the right to reproduce, adapt, distribute, perform, display,
46 | communicate, and translate a Work;
47 | ii. moral rights retained by the original author(s) and/or performer(s);
48 | iii. publicity and privacy rights pertaining to a person's image or
49 | likeness depicted in a Work;
50 | iv. rights protecting against unfair competition in regards to a Work,
51 | subject to the limitations in paragraph 4(a), below;
52 | v. rights protecting the extraction, dissemination, use and reuse of data
53 | in a Work;
54 | vi. database rights (such as those arising under Directive 96/9/EC of the
55 | European Parliament and of the Council of 11 March 1996 on the legal
56 | protection of databases, and under any national implementation
57 | thereof, including any amended or successor version of such
58 | directive); and
59 | vii. other similar, equivalent or corresponding rights throughout the
60 | world based on applicable law or treaty, and any national
61 | implementations thereof.
62 |
63 | 2. Waiver. To the greatest extent permitted by, but not in contravention
64 | of, applicable law, Affirmer hereby overtly, fully, permanently,
65 | irrevocably and unconditionally waives, abandons, and surrenders all of
66 | Affirmer's Copyright and Related Rights and associated claims and causes
67 | of action, whether now known or unknown (including existing as well as
68 | future claims and causes of action), in the Work (i) in all territories
69 | worldwide, (ii) for the maximum duration provided by applicable law or
70 | treaty (including future time extensions), (iii) in any current or future
71 | medium and for any number of copies, and (iv) for any purpose whatsoever,
72 | including without limitation commercial, advertising or promotional
73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
74 | member of the public at large and to the detriment of Affirmer's heirs and
75 | successors, fully intending that such Waiver shall not be subject to
76 | revocation, rescission, cancellation, termination, or any other legal or
77 | equitable action to disrupt the quiet enjoyment of the Work by the public
78 | as contemplated by Affirmer's express Statement of Purpose.
79 |
80 | 3. Public License Fallback. Should any part of the Waiver for any reason
81 | be judged legally invalid or ineffective under applicable law, then the
82 | Waiver shall be preserved to the maximum extent permitted taking into
83 | account Affirmer's express Statement of Purpose. In addition, to the
84 | extent the Waiver is so judged Affirmer hereby grants to each affected
85 | person a royalty-free, non transferable, non sublicensable, non exclusive,
86 | irrevocable and unconditional license to exercise Affirmer's Copyright and
87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the
88 | maximum duration provided by applicable law or treaty (including future
89 | time extensions), (iii) in any current or future medium and for any number
90 | of copies, and (iv) for any purpose whatsoever, including without
91 | limitation commercial, advertising or promotional purposes (the
92 | "License"). The License shall be deemed effective as of the date CC0 was
93 | applied by Affirmer to the Work. Should any part of the License for any
94 | reason be judged legally invalid or ineffective under applicable law, such
95 | partial invalidity or ineffectiveness shall not invalidate the remainder
96 | of the License, and in such case Affirmer hereby affirms that he or she
97 | will not (i) exercise any of his or her remaining Copyright and Related
98 | Rights in the Work or (ii) assert any associated claims and causes of
99 | action with respect to the Work, in either case contrary to Affirmer's
100 | express Statement of Purpose.
101 |
102 | 4. Limitations and Disclaimers.
103 |
104 | a. No trademark or patent rights held by Affirmer are waived, abandoned,
105 | surrendered, licensed or otherwise affected by this document.
106 | b. Affirmer offers the Work as-is and makes no representations or
107 | warranties of any kind concerning the Work, express, implied,
108 | statutory or otherwise, including without limitation warranties of
109 | title, merchantability, fitness for a particular purpose, non
110 | infringement, or the absence of latent or other defects, accuracy, or
111 | the present or absence of errors, whether or not discoverable, all to
112 | the greatest extent permissible under applicable law.
113 | c. Affirmer disclaims responsibility for clearing rights of other persons
114 | that may apply to the Work or any use thereof, including without
115 | limitation any person's Copyright and Related Rights in the Work.
116 | Further, Affirmer disclaims responsibility for obtaining any necessary
117 | consents, permissions or other rights required for any use of the
118 | Work.
119 | d. Affirmer understands and acknowledges that Creative Commons is not a
120 | party to this document and has no duty or obligation with respect to
121 | this CC0 or use of the Work.
--------------------------------------------------------------------------------
/spinlock_bench.vcxproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Debug
6 | Win32
7 |
8 |
9 | Release
10 | Win32
11 |
12 |
13 | Debug
14 | x64
15 |
16 |
17 | Release
18 | x64
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 | {4D7BD504-B744-4B38-BC97-78FB07F7E660}
31 | Win32Proj
32 | spinlock_bench
33 | 8.1
34 |
35 |
36 |
37 | Application
38 | true
39 | v140
40 | Unicode
41 |
42 |
43 | Application
44 | false
45 | v140
46 | true
47 | Unicode
48 |
49 |
50 | Application
51 | true
52 | v140
53 | Unicode
54 |
55 |
56 | Application
57 | false
58 | v140
59 | true
60 | Unicode
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 | true
82 |
83 |
84 | true
85 |
86 |
87 | false
88 |
89 |
90 | false
91 |
92 |
93 |
94 |
95 |
96 | Level3
97 | Disabled
98 | WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
99 |
100 |
101 | Console
102 | true
103 |
104 |
105 |
106 |
107 |
108 |
109 | Level3
110 | Disabled
111 | _DEBUG;_CONSOLE;%(PreprocessorDefinitions)
112 |
113 |
114 | Console
115 | true
116 |
117 |
118 |
119 |
120 | Level3
121 |
122 |
123 | MaxSpeed
124 | true
125 | true
126 | WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
127 |
128 |
129 | Console
130 | true
131 | true
132 | true
133 |
134 |
135 |
136 |
137 | Level3
138 |
139 |
140 | Full
141 | true
142 | true
143 | NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
144 | AnySuitable
145 | Speed
146 | Sync
147 | false
148 |
149 |
150 | Console
151 | true
152 | true
153 | true
154 |
155 |
156 |
157 |
158 |
159 |
--------------------------------------------------------------------------------
/main.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 |
15 | #include "excllocks.hpp"
16 | #include "rwlocks.hpp"
17 |
18 | #if 1
19 | template
20 | std::vector CreateBenchmarkRuns(size_t numRuns, size_t numItersPerRun, size_t numThreads)
21 | {
22 | std::vector runs(numRuns);
23 | const size_t numItersPerThread = numItersPerRun/numThreads;
24 | volatile std::atomic_size_t cnt = {0};
25 |
26 | for (size_t i=0; i> futures(numThreads);
29 | LockType lock;
30 | std::atomic_size_t numThreadsReady = {0};
31 | const auto startTime = std::chrono::high_resolution_clock::now();
32 |
33 | for (size_t j=0; j(endTime-startTime);
63 | }
64 |
65 | return runs;
66 | }
67 | #else
68 | template
69 | std::vector CreateBenchmarkRuns(size_t numRuns, size_t numItersPerRun, size_t numThreads)
70 | {
71 | std::vector runs(numRuns);
72 | const size_t numItersPerThread = numItersPerRun/numThreads;
73 |
74 | for (size_t i=0; i> futures(numThreads);
77 | LockType lock;
78 | size_t count = 0;
79 |
80 | const auto startTime = std::chrono::high_resolution_clock::now();
81 |
82 | for (size_t j=0; j(endTime-startTime);
113 | runs[i] = elapsedMs;
114 | }
115 |
116 | return runs;
117 | }
118 | #endif
119 |
120 | template
121 | void RunBenchmark(const char *descr, size_t numRuns, size_t numItersPerRun, size_t numThreads)
122 | {
123 | const auto &runs = CreateBenchmarkRuns(numRuns, numItersPerRun, numThreads);
124 | double avgElapsedMs(0), varianceMs(0), minMs(std::numeric_limits::max()), maxMs(0);
125 |
126 | for (const auto &r : runs)
127 | {
128 | const double curRunElapsedMs = static_cast(r.count());
129 | minMs = std::min(minMs, curRunElapsedMs);
130 | maxMs = std::max(maxMs, curRunElapsedMs);
131 | avgElapsedMs += curRunElapsedMs;
132 | }
133 |
134 | avgElapsedMs /= static_cast(runs.size());
135 |
136 | for (const auto &r : runs)
137 | {
138 | const double diff = static_cast(r.count())-avgElapsedMs;
139 | varianceMs += diff*diff;
140 | }
141 |
142 | varianceMs /= static_cast(runs.size());
143 |
144 | const double stdDevMs = std::sqrt(varianceMs);
145 | const double avgElapsedNs = avgElapsedMs*1000.0*1000.0;
146 | const double timePerIterNs = avgElapsedNs/(numRuns*numItersPerRun);
147 |
148 | std::cout << std::left << std::setfill(' ') << std::setw(30) << descr << " "
149 | << std::fixed << std::setprecision(2) << std::right << std::setfill(' ') << std::setw(6)
150 | << avgElapsedMs << " " << std::right << std::setw(6) << stdDevMs << " "
151 | << std::right << std::setw(6) << minMs << " " << std::right << std::setw(6) << maxMs
152 | << " " << std::right << std::setw(6) << timePerIterNs << "\n";
153 | }
154 |
155 | void RunBenchmarks()
156 | {
157 | std::cout << " Std. Time/\n";
158 | std::cout << " Avg. dev. Min Max iter.\n";
159 | std::cout << "Lock type (ms) (ms) (ms) (ms) (ns)\n";
160 | std::cout << "----------------------------------------------------------------------------\n\n";
161 |
162 | const auto startTime = std::chrono::high_resolution_clock::now();
163 |
164 | for (size_t i=1; i<=std::thread::hardware_concurrency(); i++)
165 | {
166 | const size_t numRuns = 5;
167 | const size_t numItersPerRun = 1000000;
168 |
169 | std::cout << i << " Threads (work/thread: " << numItersPerRun/i << ")\n\n";
170 |
171 | RunBenchmark("Mutex", numRuns, numItersPerRun, i);
172 | #if (OS == UNIX)
173 | RunBenchmark("SpinLockPThread", numRuns, numItersPerRun, i);
174 | #elif (OS == WIN)
175 | RunBenchmark("LockCriticalSection", numRuns, numItersPerRun, i);
176 | #endif
177 | RunBenchmark("ScTasSpinLock", numRuns, numItersPerRun, i);
178 | RunBenchmark("TasSpinLock", numRuns, numItersPerRun, i);
179 | RunBenchmark("TTasSpinLock", numRuns, numItersPerRun, i);
180 | RunBenchmark("RelaxTTasSpinLock", numRuns, numItersPerRun, i);
181 | RunBenchmark("ExpBoRelaxTTasSpinLock", numRuns, numItersPerRun, i);
182 |
183 | RunBenchmark("TicketSpinLock", numRuns, numItersPerRun, i);
184 | RunBenchmark("PropBoTicketSpinLock", numRuns, numItersPerRun, i);
185 | RunBenchmark("AndersonSpinLock", numRuns, numItersPerRun, i);
186 | RunBenchmark("GraunkeAndThakkarSpinLock", numRuns, numItersPerRun, i);
187 | #if 0
188 | RunBenchmark("SpinRwLockNaive", numRuns, numItersPerRun, i);
189 | RunBenchmark("SpinRwLockNaivePerThreadReadCounts", numRuns, numItersPerRun, i);
190 | #endif
191 | std::cout << "\n";
192 | }
193 |
194 | const auto endTime = std::chrono::high_resolution_clock::now();
195 | std::cout << "Total elapsed: " << std::chrono::duration_cast(endTime-startTime).count() << " ms\n";
196 | }
197 |
198 | int main(int argc, char *argv[])
199 | {
200 | RunBenchmarks();
201 | return 0;
202 | }
203 |
--------------------------------------------------------------------------------
/excllocks.hpp:
--------------------------------------------------------------------------------
1 | #ifndef EXCL_LOCKS_HPP
2 | #define EXCL_LOCKS_HPP
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 |
9 | #include "os.hpp"
10 |
11 | class Mutex
12 | {
13 | public:
14 | ALWAYS_INLINE void Enter()
15 | {
16 | Mtx.lock();
17 | }
18 |
19 | ALWAYS_INLINE void Leave()
20 | {
21 | Mtx.unlock();
22 | }
23 |
24 | private:
25 | std::mutex Mtx;
26 | };
27 |
28 | #if (OS == UNIX)
29 |
30 | #include
31 |
32 | class SpinLockPThread
33 | {
34 | public:
35 | ALWAYS_INLINE SpinLockPThread()
36 | {
37 | pthread_spin_init(&Lock, 0);
38 | }
39 |
40 | ALWAYS_INLINE void Enter()
41 | {
42 | pthread_spin_lock(&Lock);
43 | }
44 |
45 | ALWAYS_INLINE void Leave()
46 | {
47 | pthread_spin_unlock(&Lock);
48 | }
49 |
50 | private:
51 | pthread_spinlock_t Lock;
52 | };
53 |
54 | #elif (OS == WIN)
55 |
56 | class LockCriticalSection
57 | {
58 | public:
59 | ALWAYS_INLINE LockCriticalSection()
60 | {
61 | InitializeCriticalSection(&Cs);
62 | }
63 |
64 | ALWAYS_INLINE void Enter()
65 | {
66 | EnterCriticalSection(&Cs);
67 | }
68 |
69 | ALWAYS_INLINE void Leave()
70 | {
71 | LeaveCriticalSection(&Cs);
72 | }
73 |
74 | private:
75 | CRITICAL_SECTION Cs;
76 | };
77 |
78 | #endif
79 |
80 | class ScTasSpinLock
81 | {
82 | public:
83 | ALWAYS_INLINE void Enter()
84 | {
85 | while (Locked.exchange(true));
86 | }
87 |
88 | ALWAYS_INLINE void Leave()
89 | {
90 | Locked.store(false);
91 | }
92 |
93 | private:
94 | std::atomic_bool Locked = {false};
95 | };
96 |
97 | class TasSpinLock
98 | {
99 | public:
100 | ALWAYS_INLINE void Enter()
101 | {
102 | while (Locked.exchange(true, std::memory_order_acquire));
103 | }
104 |
105 | ALWAYS_INLINE void Leave()
106 | {
107 | Locked.store(false, std::memory_order_release);
108 | }
109 |
110 | private:
111 | std::atomic_bool Locked = {false};
112 | };
113 |
114 | class TTasSpinLock
115 | {
116 | public:
117 | ALWAYS_INLINE void Enter()
118 | {
119 | do
120 | {
121 | while (Locked.load(std::memory_order_relaxed));
122 | }
123 | while (Locked.exchange(true, std::memory_order_acquire));
124 | }
125 |
126 | ALWAYS_INLINE void Leave()
127 | {
128 | Locked.store(false, std::memory_order_release);
129 | }
130 |
131 | private:
132 | std::atomic_bool Locked = {false};
133 | };
134 |
135 | class RelaxTTasSpinLock
136 | {
137 | public:
138 | ALWAYS_INLINE void Enter()
139 | {
140 | do
141 | {
142 | while (Locked.load(std::memory_order_relaxed))
143 | CpuRelax();
144 | }
145 | while (Locked.exchange(true, std::memory_order_acquire));
146 | }
147 |
148 | ALWAYS_INLINE void Leave()
149 | {
150 | Locked.store(false, std::memory_order_release);
151 | }
152 |
153 | private:
154 | std::atomic_bool Locked = {false};
155 | };
156 |
157 | class ExpBoRelaxTTasSpinLock
158 | {
159 | public:
160 | ALWAYS_INLINE void Enter()
161 | {
162 | size_t curMaxDelay = MIN_BACKOFF_ITERS;
163 |
164 | while (true)
165 | {
166 | WaitUntilLockIsFree();
167 |
168 | if (Locked.exchange(true, std::memory_order_acquire))
169 | BackoffExp(curMaxDelay);
170 | else
171 | break;
172 | }
173 | }
174 |
175 | ALWAYS_INLINE void Leave()
176 | {
177 | Locked.store(false, std::memory_order_release);
178 | }
179 |
180 | private:
181 | ALWAYS_INLINE void WaitUntilLockIsFree() const
182 | {
183 | size_t numIters = 0;
184 |
185 | while (Locked.load(std::memory_order_relaxed))
186 | {
187 | if (numIters < MAX_WAIT_ITERS)
188 | {
189 | numIters++;
190 | CpuRelax();
191 | }
192 | else
193 | YieldSleep();
194 | }
195 | }
196 |
197 | public:
198 | std::atomic_bool Locked = {false};
199 |
200 | private:
201 | static const size_t MAX_WAIT_ITERS = 0x10000;
202 | static const size_t MIN_BACKOFF_ITERS = 32;
203 | };
204 |
205 | class TicketSpinLock
206 | {
207 | public:
208 | ALWAYS_INLINE void Enter()
209 | {
210 | const auto myTicketNo = NextTicketNo.fetch_add(1, std::memory_order_relaxed);
211 |
212 | while (ServingTicketNo.load(std::memory_order_acquire) != myTicketNo)
213 | CpuRelax();
214 | }
215 |
216 | ALWAYS_INLINE void Leave()
217 | {
218 | // We can get around a more expensive read-modify-write operation
219 | // (std::atomic_size_t::fetch_add()), because noone can modify
220 | // ServingTicketNo while we're in the critical section.
221 | const auto newNo = ServingTicketNo.load(std::memory_order_relaxed)+1;
222 | ServingTicketNo.store(newNo, std::memory_order_release);
223 | }
224 |
225 | private:
226 | alignas(CACHELINE_SIZE) std::atomic_size_t ServingTicketNo = {0};
227 | alignas(CACHELINE_SIZE) std::atomic_size_t NextTicketNo = {0};
228 | };
229 |
230 | static_assert(sizeof(TicketSpinLock) == 2*CACHELINE_SIZE, "");
231 |
232 | class PropBoTicketSpinLock
233 | {
234 | public:
235 | ALWAYS_INLINE void Enter()
236 | {
237 | constexpr size_t BACKOFF_BASE = 10;
238 | const auto myTicketNo = NextTicketNo.fetch_add(1, std::memory_order_relaxed);
239 |
240 | while (true)
241 | {
242 | const auto servingTicketNo = ServingTicketNo.load(std::memory_order_acquire);
243 | if (servingTicketNo == myTicketNo)
244 | break;
245 |
246 | const size_t waitIters = BACKOFF_BASE*(myTicketNo-servingTicketNo);
247 |
248 | for (size_t i=0; i;
301 | static_assert(sizeof(PaddedFlag) == CACHELINE_SIZE, "");
302 |
303 | alignas(CACHELINE_SIZE) std::vector LockedFlags;
304 | alignas(CACHELINE_SIZE) std::atomic_size_t NextFreeIdx = {0};
305 | alignas(CACHELINE_SIZE) std::atomic_size_t NextServingIdx = {1};
306 | };
307 |
308 | class GraunkeAndThakkarSpinLock
309 | {
310 | public:
311 | GraunkeAndThakkarSpinLock(size_t maxThreads=std::thread::hardware_concurrency()) :
312 | LockedFlags(maxThreads)
313 | {
314 | for (auto &flag : LockedFlags)
315 | flag.first = 1;
316 |
317 | assert(Tail.is_lock_free());
318 | Tail = reinterpret_cast(&LockedFlags[0].first);
319 | assert((Tail&1) == 0); // Make sure there's space to store the old flag value in the LSB
320 | }
321 |
322 | ALWAYS_INLINE void Enter()
323 | {
324 | // Create new tail by chaining my synchronization variable into the list
325 | const auto &newFlag = LockedFlags[GetThreadIndex()].first;
326 | const auto newTail = reinterpret_cast(&newFlag)|static_cast(newFlag);
327 | const auto ahead = Tail.exchange(newTail);
328 |
329 | // Extract flag and old value of previous thread in line, so that we can wait for its completion
330 | const auto *aheadFlag = reinterpret_cast(ahead&(~static_cast(1)));
331 | const auto aheadValue = static_cast(ahead&1);
332 |
333 | // Wait for previous thread in line to flip my synchronization variable
334 | while (aheadFlag->load() == aheadValue)
335 | CpuRelax();
336 | }
337 |
338 | ALWAYS_INLINE void Leave()
339 | {
340 | // Flipping synchronization variable enables next thread in line to enter CS
341 | auto &flag = LockedFlags[GetThreadIndex()].first;
342 | flag = !flag;
343 | }
344 |
345 | private:
346 | ALWAYS_INLINE size_t GetThreadIndex() const
347 | {
348 | static std::atomic_size_t threadCounter = {0};
349 | thread_local size_t threadIdx = threadCounter++;
350 | assert(threadIdx < LockedFlags.size());
351 | return threadIdx;
352 | }
353 |
354 | private:
355 | using PaddedFlag = std::pair;
356 | static_assert(sizeof(PaddedFlag) == CACHELINE_SIZE, "");
357 |
358 | // In the LSB the old value of the flag is stored
359 | alignas(CACHELINE_SIZE) std::atomic Tail;
360 | alignas(CACHELINE_SIZE) std::vector LockedFlags;
361 |
362 | static_assert(sizeof(decltype(LockedFlags)::value_type) > 1,
363 | "Flag size > 1 required: thanks to alginment, old flag value can be stored in LSB");
364 | };
365 |
366 | class McsLock
367 | {
368 | public:
369 | struct QNode
370 | {
371 | std::atomic Next = {nullptr};
372 | std::atomic_bool Locked = {false};
373 | };
374 |
375 | public:
376 | ALWAYS_INLINE void Enter(QNode &node)
377 | {
378 | node.Next = nullptr;
379 | node.Locked = true;
380 |
381 | QNode *oldTail = Tail.exchange(&node);
382 |
383 | if (oldTail != nullptr)
384 | {
385 | oldTail->Next = &node;
386 |
387 | while (node.Locked == true)
388 | CpuRelax();
389 | }
390 | }
391 |
392 | ALWAYS_INLINE void Leave(QNode &node)
393 | {
394 | if (node.Next.load() == nullptr)
395 | {
396 | QNode *tailWasMe = &node;
397 | if (Tail.compare_exchange_strong(tailWasMe, nullptr))
398 | return;
399 |
400 | while (node.Next.load() == nullptr)
401 | CpuRelax();
402 | }
403 |
404 | node.Next.load()->Locked = false;
405 | }
406 |
407 | private:
408 | std::atomic Tail = {nullptr};
409 | };
410 |
411 | #endif
412 |
--------------------------------------------------------------------------------