├── .travis.yml ├── CMakeLists.txt ├── LICENSE ├── README.md ├── build └── .gitignore ├── example.cpp ├── latch.hpp ├── tests ├── README.md ├── test_latch.cpp └── test_pool.cpp ├── threadpool.cpp └── threadpool.hpp /.travis.yml: -------------------------------------------------------------------------------- 1 | language: cpp 2 | compiler: 3 | - gcc 4 | before_install: 5 | - pip install --user cpp-coveralls 6 | script: 7 | - cd build && cmake .. && make && make test 8 | after_success: 9 | - coveralls --exclude tests -r .. -b . --gcov-options '\-lp' -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(threadpool) 2 | cmake_minimum_required(VERSION 3.1) 3 | 4 | include(CTest) 5 | 6 | set(CMAKE_CXX_STANDARD 11) 7 | 8 | set(CMAKE_THREAD_PREFER_PTHREAD TRUE) 9 | set(THREADS_PREFER_PTHREAD_FLAG TRUE) 10 | find_package(Threads REQUIRED) 11 | 12 | 13 | add_definitions(-Wall -Wextra) 14 | add_library(threadpool STATIC ${CMAKE_CURRENT_SOURCE_DIR}/threadpool.cpp) 15 | set_target_properties(threadpool PROPERTIES POSITION_INDEPENDENT_CODE ON) 16 | target_link_libraries(threadpool Threads::Threads) 17 | 18 | if (BUILD_TESTING) 19 | set(CMAKE_CXX_FLAGS "-g -O0 -Wall -fprofile-arcs -ftest-coverage") 20 | add_executable(check ${CMAKE_CURRENT_SOURCE_DIR}/tests/test_pool.cpp) 21 | target_link_libraries(check threadpool) 22 | add_test(NAME threadpool_standalone_tests COMMAND check) 23 | 24 | add_executable(check_latch ${CMAKE_CURRENT_SOURCE_DIR}/tests/test_latch.cpp) 25 | target_link_libraries(check_latch Threads::Threads) 26 | add_test(NAME latch_standalone_tests COMMAND check_latch) 27 | 28 | add_executable(example_latchpool ${CMAKE_CURRENT_SOURCE_DIR}/example.cpp) 29 | target_link_libraries(example_latchpool threadpool Threads::Threads) 30 | add_test(NAME latch_pool_example COMMAND example_latchpool) 31 | endif (BUILD_TESTING) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Nathaniel J. McClatchey, PhD 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/nmcclatchey/ThreadPool.svg?branch=master)](https://travis-ci.org/nmcclatchey/ThreadPool) 2 | [![Coverage Status](https://coveralls.io/repos/github/nmcclatchey/ThreadPool/badge.svg?branch=master)](https://coveralls.io/github/nmcclatchey/ThreadPool?branch=master) 3 | 4 | # ThreadPool 5 | 6 | Provides low-overhead concurrent scheduling in C++11 through [thread pools](https://en.wikipedia.org/wiki/Thread_pool "Wikipedia: Thread pool") and [work stealing](https://en.wikipedia.org/wiki/Work_stealing "Wikipedia: Work stealing"). The thread pool approach allows fine-grained parallelism by minimizing the overhead involved in scheduling a task. The work stealing approach allows efficient balancing of scheduled tasks across available threads. 7 | 8 | ## Why use this library? 9 | 10 | - It fulfills common scheduling needs: 11 | + Performs multiple tasks concurrently. 12 | + Tasks can be scheduled for an arbitrary later time-point. This provides an efficient replacement for timed waits. 13 | + A task can spawn subtasks, with a hint that the pool ought to complete them as soon as possible. 14 | - It fulfills some uncommon scheduling needs: 15 | + The pool can be paused, and later resumed. 16 | - It is designed for efficiency and scalability: 17 | + Load balancing ensures that as long as there is work to do, it is being done by as many threads as possible. 18 | + Lock-free data structures use [weak atomic orderings](https://en.cppreference.com/w/cpp/atomic/memory_order#Release-Acquire_ordering "C++ Reference: Release-Acquire ordering"); only when those cores that require new tasks are synchronized. 19 | + No busy-waiting. Idle threads use [condition variables](https://en.cppreference.com/w/cpp/thread/condition_variable "C++ Reference: condition_variable") to wait without using the CPU. 20 | - It is explicitly documented: 21 | + Full generated documentation via [Doxygen](http://www.doxygen.nl/). 22 | + Memory synchronization between task scheduling and execution is explicitly stated in terms of [C++11's memory model](https://en.cppreference.com/w/cpp/atomic/memory_order "C++ Reference: Memory order"). 23 | + Provides usage recommendations for maximizing performance. 24 | 25 | ## Getting Started 26 | 27 | This library consists of a single header and source file. One may compile the source file either as part of one's own project, or as a static library. 28 | 29 | ### Prerequisites 30 | 31 | You will require a C++11 compiler. If you are using MinGW-w64, you may require [MinGW STD Threads](https://github.com/meganz/mingw-std-threads "MinGW STD Threads") to supply `std::thread` and similar. 32 | 33 | ### Installing 34 | 35 | Either compile `threadpool.cpp` as part of your project, or [compile it as a static library](https://en.wikipedia.org/wiki/Static_library "Wikipedia: Static library"). 36 | 37 | ### Using the library 38 | 39 | The library is designed to enable a simple use pattern: 40 | 1. Create a `ThreadPool` object. 41 | 2. Give tasks to the pool by calling the pool's `schedule()`, `schedule_subtask()`, or `schedule_after()` methods. 42 | 3. Wait for tasks to complete. 43 | 44 | Full documentation for this library may be generated using Doxygen. 45 | 46 | A simple example of how to the library follows: 47 | ``` 48 | #include "threadpool.hpp" 49 | 50 | // Create a new thread pool, letting the implementation determine the number of worker threads to use. 51 | ThreadPool pool; 52 | 53 | // Put a task into the pool. Because this isn't called from within a worker thread, it takes the scheduler's slow path. 54 | pool.schedule([](void) 55 | { 56 | // Put a task into the pool. This is called from within a worker thread, so it takes the scheduler's fast path. 57 | pool.schedule([](void) { 58 | do_something(); 59 | }); 60 | 61 | // Put a task into the pool, treated as if it were part of the currently running task. This is called from within a worker thread, so it takes the scheduler's fast path. 62 | pool.schedule_subtask([](void) { 63 | do_something(); 64 | }); 65 | 66 | // Put a task into the pool, to be executed 2 seconds after it is scheduled. 67 | using namespace std::chrono; 68 | pool.schedule_after(seconds(2), 69 | [](void) { 70 | do_something(); 71 | }); 72 | }); 73 | 74 | // When the thread pool is destroyed, remaining unexecuted tasks are forgotten. 75 | ``` 76 | 77 | ## Authors 78 | 79 | * **Nathaniel J. McClatchey, PhD** - *Initial work* 80 | 81 | ## License 82 | 83 | To encourage people to use this library freely and without concern, this project is licensed under the [MIT License](LICENSE). 84 | -------------------------------------------------------------------------------- /build/.gitignore: -------------------------------------------------------------------------------- 1 | /* 2 | !.gitignore -------------------------------------------------------------------------------- /example.cpp: -------------------------------------------------------------------------------- 1 | // Include this first to check for missed dependencies. 2 | #include "threadpool.hpp" 3 | #include "latch.hpp" 4 | 5 | #if (!defined(__MINGW32__) || defined(_GLIBCXX_HAS_GTHREADS)) 6 | #include 7 | #else 8 | #include 9 | #endif 10 | #include 11 | #include 12 | #include 13 | 14 | int main() 15 | { 16 | ThreadPool pool; 17 | latch continuation_guard (1024); 18 | std::atomic counter {0}; 19 | 20 | // To wait without blocking threads, define a continuation function, to be 21 | // executed after all other tasks complete. 22 | std::function continuation = [&]() 23 | { 24 | // Check whether all other subtasks are complete. If not, push another 25 | // call to this continuation function into the pool. 26 | if (!continuation_guard.try_wait()) 27 | { 28 | pool.schedule(continuation); 29 | return; 30 | } 31 | if (counter.load(std::memory_order_relaxed) == 1024) 32 | std::cout << "SUCCESS\n"; 33 | else 34 | std::cout << "FAILED\n"; 35 | }; 36 | 37 | pool.schedule([&](){ 38 | for (int j = 0; j < 1024; ++j) 39 | { 40 | pool.schedule_subtask([&](){ 41 | std::this_thread::sleep_for(std::chrono::milliseconds(5)); 42 | counter.fetch_add(1, std::memory_order_relaxed); 43 | continuation_guard.count_down(1); 44 | }); 45 | } 46 | pool.schedule(continuation); 47 | }); 48 | // Threads outside the pool can take a simpler approach, using the OS's 49 | // preemptive scheduling or other waiting mechanisms. 50 | continuation_guard.wait(); 51 | std::cout << "Finishing.\n"; 52 | std::this_thread::sleep_for(std::chrono::milliseconds(200)); 53 | return 0; 54 | } 55 | 56 | -------------------------------------------------------------------------------- /latch.hpp: -------------------------------------------------------------------------------- 1 | /// \file 2 | /// \brief Provides a `latch` class for synchronization, roughly equivalent to 3 | /// C++20's `std::latch`. 4 | 5 | #ifndef LATCH_HPP_ 6 | #define LATCH_HPP_ 7 | 8 | #if (__cplusplus >= 202002L) && __has_include() 9 | #include 10 | using std::latch; 11 | #elif LATCH_USE_WIN32_SYNCHAPI 12 | #include 13 | #include 14 | #include 15 | #include 16 | #define WIN32_LEAN_AND_MEAN 17 | #include 18 | #include 19 | /// \brief Allow threads to wait until a selection of tasks is completed by 20 | /// other threads. 21 | /// 22 | /// `latch`es allow threads to wait for multiple tasks to be completed by 23 | /// other threads. This is vital for applying the *fork-join* paradigm to 24 | /// concurrency models that do not naturally supply a means of joining, such as 25 | /// thread pools, and removes the need to store a tree of forked threads if one 26 | /// employs a large number of worker threads. \n 27 | /// In a typical use-case, a `latch` is locked *n* times by thread *0*, 28 | /// which then spawns worker threads *1, 2, ... n* and waits on the `latch`. 29 | /// Each worker thread completes its task, then unlocks the `latch`. In this 30 | /// example, thread *0* only progresses past the `latch` once all workers 31 | /// complete their tasks. 32 | class latch 33 | { 34 | LONG wait_for_ {0}; 35 | public: 36 | /// \brief Constructs a `latch`. Note: Diverges from `std::latch` in that it 37 | /// is not constexpr. 38 | constexpr explicit latch (std::ptrdiff_t expected = 1) 39 | : wait_for_(expected) 40 | { 41 | assert(expected >= 0); 42 | assert(expected <= (max)()); 43 | } 44 | 45 | ~latch (void) 46 | { 47 | } 48 | 49 | latch (const latch &) = delete; 50 | latch & operator= (const latch &) = delete; 51 | 52 | /// \brief Decreases the number of tasks remaining, and unlocks the `Barrier` 53 | /// if no tasks remain. 54 | void count_down (std::ptrdiff_t n = 1) 55 | { 56 | assert(n >= 0); 57 | assert(n <= (max)()); 58 | LONG previously_waiting = n; 59 | do { 60 | LONG new_waiting = InterlockedCompareExchangeRelease(&wait_for_, previously_waiting - n, previously_waiting); 61 | if (new_waiting == previously_waiting) 62 | break; 63 | previously_waiting = new_waiting; 64 | } while (true); 65 | assert(previously_waiting >= n); 66 | if (previously_waiting <= n) 67 | WakeByAddressAll(&wait_for_); 68 | } 69 | 70 | /// \brief Returns `true` if tasks remain incomplete. 71 | inline bool try_wait (void) const noexcept 72 | { 73 | return InterlockedCompareExchangeAcquire(const_cast(&wait_for_), 0, 0) == 0; 74 | } 75 | 76 | /// \brief Blocks until no tasks remain incomplete. 77 | void wait (void) const 78 | { 79 | do 80 | { 81 | LONG expected = InterlockedCompareExchangeAcquire(const_cast(&wait_for_), 0, 0); 82 | if (expected == 0) 83 | break; 84 | if (!WaitOnAddress(const_cast(&wait_for_), &expected, sizeof(LONG), INFINITE)) 85 | throw std::system_error(GetLastError(), std::system_category()); 86 | } while (true); 87 | } 88 | 89 | /// \brief Counts down, then waits until no tasks remain. 90 | void arrive_and_wait (std::ptrdiff_t n = 1) 91 | { 92 | assert(n >= 0); 93 | assert(n <= (max)()); 94 | LONG previously_waiting = n; 95 | do { 96 | LONG new_waiting = InterlockedCompareExchangeRelease(&wait_for_, previously_waiting - n, previously_waiting); 97 | if (new_waiting == previously_waiting) 98 | break; 99 | previously_waiting = new_waiting; 100 | } while (true); 101 | assert(previously_waiting >= n); 102 | if (previously_waiting <= n) 103 | WakeByAddressAll(&wait_for_); 104 | else 105 | { 106 | LONG expected = previously_waiting - n; 107 | do { 108 | if (!WaitOnAddress(&wait_for_, &expected, sizeof(LONG), INFINITE)) 109 | throw std::system_error(GetLastError(), std::system_category()); 110 | expected = InterlockedCompareExchangeAcquire(&wait_for_, 0, 0); 111 | } while (expected != 0); 112 | } 113 | } 114 | 115 | static constexpr std::ptrdiff_t (max) (void) noexcept 116 | { 117 | return (std::numeric_limits::max)(); 118 | } 119 | }; 120 | #else 121 | #include 122 | #include 123 | #include 124 | #if defined(__MINGW32__) && !defined(_GLIBCXX_HAS_GTHREADS) 125 | #include 126 | #include 127 | #else 128 | #include 129 | #include 130 | #endif 131 | 132 | /// \brief Allow threads to wait until a selection of tasks is completed by 133 | /// other threads. 134 | /// 135 | /// `latch`es allow threads to wait for multiple tasks to be completed by 136 | /// other threads. This is vital for applying the *fork-join* paradigm to 137 | /// concurrency models that do not naturally supply a means of joining, such as 138 | /// thread pools, and removes the need to store a tree of forked threads if one 139 | /// employs a large number of worker threads. \n 140 | /// In a typical use-case, a `latch` is locked *n* times by thread *0*, 141 | /// which then spawns worker threads *1, 2, ... n* and waits on the `latch`. 142 | /// Each worker thread completes its task, then unlocks the `latch`. In this 143 | /// example, thread *0* only progresses past the `latch` once all workers 144 | /// complete their tasks. 145 | class latch 146 | { 147 | mutable std::condition_variable cv_ {}; 148 | mutable std::mutex mutex_ {}; 149 | std::atomic wait_for_ {0}; 150 | public: 151 | /// \brief Constructs a `latch`. Note: Diverges from `std::latch` in that it 152 | /// is not constexpr. 153 | explicit latch (std::ptrdiff_t expected = 1) 154 | : wait_for_(expected) 155 | { 156 | } 157 | 158 | ~latch (void) 159 | { 160 | } 161 | 162 | latch (const latch &) = delete; 163 | latch & operator= (const latch &) = delete; 164 | 165 | /// \brief Decreases the number of tasks remaining, and unlocks the `Barrier` 166 | /// if no tasks remain. 167 | void count_down (std::ptrdiff_t n = 1) 168 | { 169 | assert(n >= 0); 170 | auto previously_waiting = wait_for_.fetch_sub(n, std::memory_order_release); 171 | assert(previously_waiting >= n); 172 | if (previously_waiting <= n) 173 | { 174 | // Using this mutex synchronizes with the awakened thread, ensuring that the 175 | // barrier is seen to be open. 176 | std::lock_guard guard(mutex_); 177 | cv_.notify_all(); 178 | } 179 | } 180 | 181 | /// \brief Returns `true` if tasks remain incomplete. 182 | inline bool try_wait (void) const noexcept 183 | { 184 | return wait_for_.load(std::memory_order_acquire) == 0; 185 | } 186 | 187 | /// \brief Blocks until no tasks remain incomplete. 188 | void wait (void) const 189 | { 190 | std::unique_lock lck (mutex_); 191 | cv_.wait(lck, [this]()->bool { return try_wait(); }); 192 | } 193 | 194 | /// \brief Counts down, then waits until no tasks remain. 195 | void arrive_and_wait (std::ptrdiff_t n = 1) 196 | { 197 | assert(n >= 0); 198 | auto previously_waiting = wait_for_.fetch_sub(n, std::memory_order_acq_rel); 199 | assert(previously_waiting >= n); 200 | std::unique_lock lck(mutex_); 201 | if (previously_waiting <= n) 202 | cv_.notify_all(); 203 | else 204 | cv_.wait(lck, [this]()->bool { return try_wait(); }); 205 | } 206 | 207 | static constexpr std::ptrdiff_t max (void) noexcept 208 | { 209 | return std::numeric_limits::max(); 210 | } 211 | }; 212 | #endif 213 | 214 | #endif // LATCH_HPP_ 215 | -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | # ThreadPool tests 2 | 3 | To ensure that the ThreadPool library works properly, I have created a test of its capabilities. The file `tests.cpp` is intended to test 4 | * Compilation 5 | * Expected use cases 6 | * Load-balancing 7 | 8 | The test application will, when run, perform the following: 9 | * Create and destroy a `ThreadPool` without assigning any tasks. 10 | * Assign tasks to a `ThreadPool`. 11 | * Ensure that `ThreadPool`s idle when all tasks are complete, to avoid excessive CPU use. If it fails to idle quickly enough after completion, or does not complete within a reasonable period of time, the test application returns non-zero. 12 | * Restart an idling `ThreadPool` for a second round of tasks. 13 | * Test delayed scheduling of tasks. 14 | * Ensure that an active `ThreadPool` can be safely destroyed (losing its tasks in the process). 15 | * Measure how well tasks are balanced, by counting the minimum, maximum, and average number of tasks performed by each worker thread. Given that the tasks are (mostly) homogeneous, good balance is indicated by similarity of these numbers. 16 | * Pause and resume a `ThreadPool`. 17 | * Destroy a paused `ThreadPool`. -------------------------------------------------------------------------------- /tests/test_latch.cpp: -------------------------------------------------------------------------------- 1 | // Include this first to check for missed dependencies. 2 | #include "../latch.hpp" 3 | 4 | #if (!defined(__MINGW32__) || defined(_GLIBCXX_HAS_GTHREADS)) 5 | #include 6 | #include 7 | #include 8 | #else 9 | #include 10 | #include 11 | #include 12 | #endif 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #define LOG(fmtString,...) std::printf(fmtString "\n", ##__VA_ARGS__); fflush(stdout) 21 | 22 | int main() 23 | { 24 | std::atomic result_code {0}; 25 | { 26 | latch test_latch (4); 27 | for (int i = 0; i < 4; ++i) 28 | { 29 | std::thread new_thread([i,&test_latch](){ 30 | std::this_thread::sleep_for(std::chrono::milliseconds(500 * (i + 1))); 31 | test_latch.count_down(1); 32 | }); 33 | new_thread.detach(); 34 | } 35 | std::this_thread::sleep_for(std::chrono::milliseconds(250)); 36 | for (int i = 0; i < 4; ++i) 37 | { 38 | if (test_latch.try_wait()) 39 | { 40 | LOG("Exiting far too early (probe point %d)", i); 41 | result_code.fetch_or(1, std::memory_order_relaxed); 42 | } 43 | std::this_thread::sleep_for(std::chrono::milliseconds(500)); 44 | } 45 | if (!test_latch.try_wait()) 46 | { 47 | LOG("%s", "Did not unlock when expected."); 48 | result_code.fetch_or(2, std::memory_order_relaxed); 49 | } 50 | } 51 | { 52 | latch test_latch (4); 53 | std::atomic counter {0}; 54 | for (int i = 0; i < 4; ++i) 55 | { 56 | std::thread new_thread([i,&test_latch, &counter, &result_code](){ 57 | std::this_thread::sleep_for(std::chrono::milliseconds(500 * (i + 1))); 58 | counter.fetch_add(1, std::memory_order_relaxed); 59 | try 60 | { 61 | test_latch.arrive_and_wait(1); 62 | } 63 | catch(const std::system_error & e) 64 | { 65 | std::cerr << "Arrive-and-wait error code " << e.code() << ": " << e.what() << '\n'; 66 | result_code.fetch_or(16, std::memory_order_relaxed); 67 | } 68 | if (counter.load(std::memory_order_relaxed) != 4) 69 | result_code.fetch_or(8, std::memory_order_relaxed); 70 | }); 71 | new_thread.detach(); 72 | } 73 | try 74 | { 75 | test_latch.wait(); 76 | } 77 | catch(const std::system_error & e) 78 | { 79 | std::cerr << "Wait error code " << e.code() << ": " << e.what() << '\n'; 80 | result_code.fetch_or(32, std::memory_order_relaxed); 81 | } 82 | if (counter.load(std::memory_order_relaxed) != 4) 83 | result_code.fetch_or(4, std::memory_order_relaxed); 84 | std::this_thread::sleep_for(std::chrono::milliseconds(500)); 85 | } 86 | return result_code.load(std::memory_order_relaxed); 87 | } 88 | 89 | -------------------------------------------------------------------------------- /tests/test_pool.cpp: -------------------------------------------------------------------------------- 1 | // Include this first to check for missed dependencies. 2 | #include "../threadpool.hpp" 3 | 4 | #if (!defined(__MINGW32__) || defined(_GLIBCXX_HAS_GTHREADS)) 5 | #include 6 | #include 7 | #include 8 | #else 9 | #include 10 | #include 11 | #include 12 | #endif 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #define LOG(fmtString,...) printf(fmtString "\n", ##__VA_ARGS__); fflush(stdout) 20 | 21 | using namespace std; 22 | 23 | namespace 24 | { 25 | constexpr size_t kTestMaxThreads = 1024; 26 | constexpr size_t kTestRootTasks = 1000; 27 | constexpr size_t kTestBranchFactor = 800; 28 | constexpr uint_fast64_t kTestTotalTasks = kTestRootTasks * kTestBranchFactor; 29 | 30 | thread_local std::atomic * task_slot_local = nullptr; 31 | std::atomic task_slot_next(0); 32 | std::atomic executed_tasks [kTestMaxThreads * 64]; 33 | 34 | 35 | void perform_task (void) 36 | { 37 | if (task_slot_local == nullptr) 38 | { 39 | auto n = task_slot_next.fetch_add(1, std::memory_order_relaxed); 40 | assert(n < kTestMaxThreads); 41 | task_slot_local = executed_tasks + (n * 64 / sizeof(*executed_tasks)); 42 | } 43 | task_slot_local->fetch_add(1, std::memory_order_release); 44 | } 45 | 46 | std::condition_variable cv; 47 | std::mutex mtx; 48 | 49 | bool one_is_active = false; 50 | size_t alive_count = 0; 51 | void stay_active (ThreadPool & pool) 52 | { 53 | { 54 | std::lock_guard lk (mtx); 55 | one_is_active = true; 56 | /*++alive_count; 57 | if ((alive_count & (alive_count + 1)) == 0) 58 | std::printf("Alive, %llu\n", alive_count);*/ 59 | cv.notify_all(); 60 | } 61 | pool.schedule([&pool](void){ stay_active(pool); }); 62 | } 63 | 64 | void gather_statistics (uint_fast64_t & balance_min, 65 | uint_fast64_t & balance_max, 66 | uint_fast64_t & balance_total) 67 | { 68 | balance_min = ~static_cast(0); 69 | balance_max = balance_total = 0; 70 | for (uint_fast32_t n = 0; n < task_slot_next.load(std::memory_order_acquire); ++n) 71 | { 72 | auto it = executed_tasks[n * 8].load(std::memory_order_relaxed); 73 | if (balance_max < it) 74 | balance_max = it; 75 | if (balance_min > it) 76 | balance_min = it; 77 | balance_total += it; 78 | } 79 | } 80 | } 81 | 82 | int main() 83 | { 84 | int test_id = 0; 85 | { 86 | LOG("Test %u:\t%s",++test_id,"Query static information"); 87 | LOG("\tWorker queue capacity is %zu tasks.",ThreadPool::get_worker_capacity()); 88 | } 89 | { 90 | LOG("Test %u:\t%s",++test_id,"Construct and destroy empty threadpool."); 91 | { 92 | ThreadPool pool; 93 | LOG("\t%s","Construct successful."); 94 | } 95 | LOG("\t%s","Destroy successful."); 96 | } 97 | std::atomic logged_errors {0}; 98 | #ifndef NDEBUG 99 | { 100 | LOG("Test %u:\t%s", ++test_id, "Disallow null function pointers."); 101 | LOG("\t%s","Constructing a thread pool."); 102 | ThreadPool pool; 103 | LOG("\t\tDone.\tNote: Pool has %u worker threads.", pool.get_concurrency()); 104 | 105 | std::condition_variable schedule_cv; 106 | std::mutex schedule_mtx; 107 | bool ready = false; 108 | 109 | pool.schedule([&](void) 110 | { 111 | try { 112 | try { 113 | std::function null_func; 114 | pool.schedule_subtask(null_func); 115 | logged_errors |= 8; 116 | } catch (std::bad_function_call &) {} 117 | try { 118 | pool.schedule_subtask(std::function()); 119 | logged_errors |= 8; 120 | } catch (std::bad_function_call &) {} 121 | } catch (...) 122 | { 123 | logged_errors |= 8; 124 | } 125 | std::lock_guard guard {schedule_mtx}; 126 | ready = true; 127 | schedule_cv.notify_all(); 128 | }); 129 | try { 130 | std::function null_func; 131 | pool.schedule(null_func); 132 | logged_errors |= 8; 133 | } catch (std::bad_function_call &) {} 134 | try { 135 | pool.schedule(std::function()); 136 | logged_errors |= 8; 137 | } catch (std::bad_function_call &) {} 138 | try { 139 | std::function null_func; 140 | pool.schedule_after(std::chrono::seconds(1), null_func); 141 | logged_errors |= 8; 142 | } catch (std::bad_function_call &) {} 143 | try { 144 | pool.schedule_after(std::chrono::seconds(1), std::function()); 145 | logged_errors |= 8; 146 | } catch (std::bad_function_call &) {} 147 | std::unique_lock lck {schedule_mtx}; 148 | schedule_cv.wait(lck,[&ready]()->bool { return ready; }); 149 | LOG("\t%s", "Destroying the thread pool."); 150 | } 151 | #endif 152 | 153 | { 154 | LOG("Test %u:\t%s",++test_id,"Use threadpool for tasks."); 155 | LOG("\t%s","Constructing a thread pool."); 156 | ThreadPool pool; 157 | LOG("\t\tDone.\tNote: Pool has %u worker threads.", pool.get_concurrency()); 158 | for (unsigned nn = 0; nn < 2; ++nn) 159 | { 160 | LOG("\t%s","Resetting task-recording utilities..."); 161 | for (unsigned i = 0; i < 8 * 64; ++i) 162 | executed_tasks[i].store(0, std::memory_order_release); 163 | bool already_idling = false; 164 | 165 | LOG("\tScheduling some %s tasks...", (nn == 0) ? "immediate" : "delayed"); 166 | pool.schedule([&](void) 167 | { 168 | for (unsigned i = 0; i < kTestRootTasks / 2; ++i) 169 | { 170 | pool.schedule_after(std::chrono::seconds(nn), [&](void) 171 | { 172 | for (unsigned j = 0; j < kTestBranchFactor; ++j) 173 | { 174 | pool.schedule_subtask(&perform_task); 175 | } 176 | }); 177 | } 178 | for (unsigned i = kTestRootTasks / 2; i < kTestRootTasks; ++i) 179 | { 180 | std::function lvalue_task ( [&](void) 181 | { 182 | for (unsigned j = 0; j < kTestBranchFactor; ++j) 183 | { 184 | pool.schedule_subtask(&perform_task); 185 | } 186 | } ); 187 | pool.schedule_after(std::chrono::seconds(nn), lvalue_task); 188 | } 189 | }); 190 | LOG("\t\t%s","Done. Tasks scheduled successfully."); 191 | LOG("\t%s","Waiting a bit while tasks complete..."); 192 | 193 | unsigned total_ms = 0; 194 | for (unsigned ii = 0; ii < 9; ++ii) 195 | { 196 | using namespace std::chrono; 197 | unsigned sleep_ms = (100u << ii); 198 | std::this_thread::sleep_for(milliseconds(sleep_ms)); 199 | total_ms += sleep_ms; 200 | 201 | LOG("\t\t%s","Checking whether tasks are completed..."); 202 | uint_fast64_t balance_min, balance_max, balance_total; 203 | gather_statistics(balance_min, balance_max, balance_total); 204 | LOG("\t\tCompleted %llu / %llu tasks so far.", static_cast(balance_total), static_cast(kTestTotalTasks)); 205 | if (pool.is_idle() && (balance_total == kTestTotalTasks)) 206 | { 207 | gather_statistics(balance_min, balance_max, balance_total); 208 | LOG("\tPool has idled, as expected, with all %llu tasks complete.", static_cast(kTestTotalTasks)); 209 | LOG("\tProcessor utilization [min / mean / max]:\t%llu / %llu / %llu", static_cast(balance_min), static_cast(balance_total / pool.get_concurrency()), static_cast(balance_max)); 210 | break; 211 | } 212 | else if (balance_total == kTestTotalTasks) 213 | { 214 | if (already_idling) 215 | { 216 | LOG("\t%s","Pool has not yet idled, despite all tasks being complete; this is probably an error."); 217 | logged_errors |= 1; 218 | } else 219 | already_idling = true; 220 | } 221 | } 222 | if (!pool.is_idle()) 223 | { 224 | LOG("\t\tPool failed to complete all tasks after %u seconds. There is probably an error.", total_ms / 1000); 225 | logged_errors |= 2; 226 | } 227 | } 228 | LOG("\t%s", "Destroying the thread pool."); 229 | } 230 | 231 | { 232 | LOG("Test %u:\t%s",++test_id,"Destroy a ThreadPool with running task-chains."); 233 | LOG("\t%s","Constructing a thread pool."); 234 | ThreadPool pool (2); 235 | LOG("\t\tDone.\tNote: Pool has %u worker threads.", pool.get_concurrency()); 236 | LOG("\t%s", "Scheduling several undying tasks..."); 237 | { 238 | std::unique_lock guard (mtx); 239 | one_is_active = false; 240 | for (unsigned n = 0; n < 16; ++n) 241 | pool.schedule([&pool](void) { stay_active(pool); }); 242 | cv.wait(guard, [](void)->bool { return one_is_active; }); 243 | } 244 | LOG("\t\t%s","Done. Tasks are running."); 245 | LOG("\t%s", "Destroying the thread pool."); 246 | } 247 | 248 | { 249 | LOG("Test %u:\t%s",++test_id,"Pause and resume a ThreadPool with running task-chains."); 250 | LOG("\t%s","Constructing a thread pool."); 251 | ThreadPool pool (3); 252 | LOG("\t\tDone.\tNote: Pool has %u worker threads.", pool.get_concurrency()); 253 | LOG("\t%s", "Scheduling several undying tasks..."); 254 | { 255 | std::unique_lock guard (mtx); 256 | one_is_active = false; 257 | alive_count = 0; 258 | for (unsigned n = 0; n < 16; ++n) 259 | pool.schedule([&pool](void) { stay_active(pool); }); 260 | cv.wait(guard, [](void)->bool { return one_is_active; }); 261 | } 262 | LOG("\t\t%s","Done. Tasks scheduled successfully."); 263 | LOG("\t%s","Pausing..."); 264 | pool.halt(); 265 | LOG("\t%s","Waiting for a bit..."); 266 | std::this_thread::sleep_for(std::chrono::milliseconds(250)); 267 | if (pool.is_halted()) 268 | { 269 | LOG("\t%s", "Pool did pause."); 270 | } 271 | else 272 | { 273 | LOG("\t%s", "Pool did not pause. This is most unusual!"); 274 | logged_errors |= 4; 275 | } 276 | LOG("\t%s","Unpausing..."); 277 | pool.resume(); 278 | LOG("\t%s","Waiting for 0.3 seconds..."); 279 | std::this_thread::sleep_for(std::chrono::milliseconds(300)); 280 | LOG("\t%s", "Destroying the thread pool."); 281 | } 282 | 283 | { 284 | LOG("Test %u:\t%s",++test_id,"Destroy a paused Threadpool."); 285 | LOG("\t%s","Constructing a thread pool."); 286 | ThreadPool pool (5); 287 | LOG("\t\tDone.\tNote: Pool has %u worker threads.", pool.get_concurrency()); 288 | LOG("\t%s", "Scheduling several undying tasks..."); 289 | { 290 | std::unique_lock guard (mtx); 291 | one_is_active = false; 292 | alive_count = 0; 293 | for (unsigned n = 0; n < 16; ++n) 294 | pool.schedule([&pool](void) { stay_active(pool); }); 295 | cv.wait(guard, [](void)->bool { return one_is_active; }); 296 | } 297 | LOG("\t\t%s","Done. Tasks scheduled successfully."); 298 | LOG("\t%s","Pausing..."); 299 | pool.halt(); 300 | while (!pool.is_halted()) 301 | std::this_thread::sleep_for(std::chrono::milliseconds(50)); 302 | LOG("\t%s", "Destroying the thread pool."); 303 | } 304 | 305 | { 306 | LOG("Test %u:\t%s",++test_id,"Attempt to pause from within a worker thread."); 307 | LOG("\t%s","Constructing a thread pool."); 308 | ThreadPool pool; 309 | LOG("\t\tDone.\tNote: Pool has %u worker threads.", pool.get_concurrency()); 310 | LOG("\t%s", "Scheduling a few tasks, including a pausing task."); 311 | { 312 | std::unique_lock guard (mtx); 313 | one_is_active = false; 314 | alive_count = 0; 315 | for (unsigned n = 0; n < 16; ++n) 316 | pool.schedule([&pool](void) { stay_active(pool); }); 317 | pool.schedule([&pool](void) { stay_active(pool); pool.halt(); }); 318 | cv.wait(guard, [](void)->bool { return one_is_active; }); 319 | } 320 | LOG("\t\t%s","Done. Tasks scheduled successfully."); 321 | LOG("\t\t%s","Done. Waiting for a bit..."); 322 | std::this_thread::sleep_for(std::chrono::milliseconds(250)); 323 | LOG("\t%s","Unpausing..."); 324 | pool.resume(); 325 | LOG("\t\t%s","Done. Waiting for a bit..."); 326 | std::this_thread::sleep_for(std::chrono::milliseconds(250)); 327 | LOG("\t%s", "Destroying the thread pool."); 328 | } 329 | 330 | { 331 | LOG("Test %u:\t%s",++test_id,"Attempt to pause from within a worker thread, and then destroy the pool."); 332 | LOG("\t%s","Constructing a thread pool."); 333 | ThreadPool pool; 334 | LOG("\t\tDone.\tNote: Pool has %u worker threads.", pool.get_concurrency()); 335 | LOG("\t%s", "Scheduling a few tasks, including a pausing task."); 336 | { 337 | std::unique_lock guard (mtx); 338 | one_is_active = false; 339 | alive_count = 0; 340 | for (unsigned n = 0; n < 16; ++n) 341 | pool.schedule([&pool](void) { stay_active(pool); }); 342 | pool.schedule([&pool](void) { stay_active(pool); pool.halt(); pool.halt(); pool.halt(); }); 343 | cv.wait(guard, [](void)->bool { return one_is_active; }); 344 | } 345 | LOG("\t\t%s","Done. Tasks scheduled successfully."); 346 | LOG("\t\t%s","Done. Waiting for a bit second..."); 347 | std::this_thread::sleep_for(std::chrono::milliseconds(250)); 348 | LOG("\t%s", "Destroying the thread pool."); 349 | } 350 | LOG("%s", "Exiting..."); 351 | return logged_errors; 352 | } 353 | 354 | -------------------------------------------------------------------------------- /threadpool.cpp: -------------------------------------------------------------------------------- 1 | /// \file threadpool.cpp 2 | /// \brief Implements `threadpool.hpp`. 3 | /// \author Nathaniel J. McClatchey, PhD 4 | /// \copyright Copyright (c) 2017-2019 Nathaniel J. McClatchey, PhD. \n 5 | /// Licensed under the MIT license. \n 6 | /// You should have received a copy of the license with this software. 7 | /// \note To compile for MinGW-w64 without linking against the *winpthreads* 8 | /// library, use the [*MinGW Windows STD Threads* library](https://github.com/meganz/mingw-std-threads "MinGW STD Threads"). 9 | #include "threadpool.hpp" 10 | 11 | #if !defined(__cplusplus) 12 | #error The implementation of ThreadPool requires C++11 or higher. 13 | #endif 14 | 15 | // Debugging: 16 | #include // Fail deadly on internal library error. 17 | #ifndef NDEBUG 18 | #include // Warn on task queue overflow. 19 | #endif 20 | // Memory management (for allocate-once approach): 21 | #include // For std::malloc and std::free. 22 | #include // For std::align and std::unique_ptr. 23 | #if (__cplusplus >= 201703L) && !defined(THREAD_POOL_FALSE_SHARING_ALIGNMENT) 24 | #include // Used to detect cache size. 25 | #endif 26 | // Integers: 27 | #include // Fixed-width integer types. 28 | #include // Relaxed memory orderings, for efficiency. 29 | #include // Type sizes and maximum values. 30 | // Central queue management: 31 | #include // Delayed-task sorting. 32 | #include // Delayed-task storage. 33 | #include // For central task queue. 34 | // Miscellaneous type information: 35 | #include // Detect conditions needed for noexcept. 36 | #include // For std::declval 37 | 38 | // Threading facilities: 39 | #if (!defined(__MINGW32__) || defined(_GLIBCXX_HAS_GTHREADS)) 40 | #include // For threads. Duh. 41 | #include // For locking of central queue. 42 | #include // Let threads sleep instead of spin when idle. 43 | #else 44 | // This toolchain-specific workaround allows ThreadPool to be used with 45 | // MinGW-w64 even without linking the winpthreads library. If you lack these 46 | // headers, you can find them at https://github.com/nmcclatchey/mingw-std-threads . 47 | #include "mingw.thread.h" 48 | #include "mingw.mutex.h" 49 | #include "mingw.condition_variable.h" 50 | #endif 51 | 52 | namespace { 53 | #ifdef THREAD_POOL_FALSE_SHARING_ALIGNMENT 54 | // If a user has supplied a false-sharing alignment, use it. 55 | constexpr std::size_t kFalseSharingAlignment = THREAD_POOL_FALSE_SHARING_ALIGNMENT; 56 | #elif defined(__cpp_lib_hardware_interference_size) && (__cpp_lib_hardware_interference_size >= 201703L) 57 | constexpr std::size_t kFalseSharingAlignment = std::hardware_destructive_interference_size; 58 | #else 59 | // No hints? Use a typical cache line size. 60 | constexpr std::size_t kFalseSharingAlignment = 64; 61 | #endif 62 | // Forward-declarations 63 | struct Worker; 64 | struct ThreadPoolImpl; 65 | 66 | /// \brief Determines the capacity of each `Worker`'s queue. Larger values take 67 | /// more memory, but less processing power. The reverse holds for smaller 68 | /// values. 69 | /// \note Must be positive. 70 | constexpr std::uint_fast8_t kLog2Modulus = 12u; 71 | 72 | static_assert(kLog2Modulus > 0, "Worker thread capacity must be positive."); 73 | 74 | constexpr std::uint_fast32_t kModulus = 1ull << kLog2Modulus; 75 | 76 | static_assert(kLog2Modulus < std::numeric_limits::digits, "Worker thread capacity must not be excessive."); 77 | 78 | /// \brief Least-significant bit of an integer. Useful for alignment of arrays, 79 | /// because an alignment greater than the L.S.B. of the size of an element 80 | /// will be ruined on increment. 81 | template 82 | constexpr Integer lsb (Integer x) noexcept 83 | { 84 | return ((x - 1) & x) ^ x; 85 | } 86 | 87 | /// \brief Checks whether and integer is a power-of-2. Useful for alignment 88 | /// debugging. 89 | template 90 | constexpr bool is_pow2 (Integer x) noexcept 91 | { 92 | return ((x - 1) & x) == 0; 93 | } 94 | 95 | /// \brief Checks whether (n1 > n2) || (n1 == 0). Clang optimizes this, while 96 | /// GCC does not (even in 9.0) 97 | template 98 | inline constexpr bool greater_or_zero (Integer n1, Integer n2) noexcept 99 | { 100 | // return (n1 > n2) || (n1 == 0); 101 | static_assert(std::numeric_limits::is_signed == false, 102 | "This optimization depends on using unsigned comparison."); 103 | return (n1 - 1u >= n2); 104 | } 105 | 106 | static_assert(is_pow2(kFalseSharingAlignment), 107 | "Alignments must be integer powers of 2."); 108 | 109 | /// \brief Exactly what it says on the tin. I'd use `std::min`, but that's not 110 | /// `constexpr` until C++14. 111 | template 112 | constexpr typename std::common_type::type min (In1 x, In2 y) noexcept 113 | { 114 | using result_type = decltype(min(x,y)); 115 | return (static_cast(x) < static_cast(y)) ? x : y; 116 | } 117 | 118 | /// \brief Exactly what it says on the tin. I'd use `std::max`, but that's not 119 | /// `constexpr` until C++14. 120 | template 121 | constexpr typename std::common_type::type max (In1 x, In2 y) noexcept 122 | { 123 | using result_type = decltype(max(x,y)); 124 | return (static_cast(x) < static_cast(y)) ? y : x; 125 | } 126 | 127 | /// \brief Determines an alignment that minimizes the number of times that a 128 | /// densely-packed array of `T` would have an instance of `T` straddling a 129 | /// cache-line border. 130 | template 131 | constexpr std::size_t get_align (void) 132 | { 133 | return max(alignof(T), min(lsb(sizeof(T)), kFalseSharingAlignment)); 134 | } 135 | 136 | /// \brief Destructor that allows `std::unique_ptr` to be used with memory 137 | /// acquired using `malloc`. 138 | struct RawDeleter 139 | { 140 | void operator() (void * ptr) const 141 | { 142 | std::free(ptr); 143 | } 144 | }; 145 | 146 | /// \brief Provides O(1) access to the Worker that is handling the current 147 | /// function (if any). Used to provide a fast path for scheduling within the 148 | /// ThreadPool. 149 | thread_local Worker * current_worker = nullptr; 150 | 151 | struct ThreadPoolImpl 152 | { 153 | using task_type = typename ThreadPool::task_type; 154 | using clock = std::chrono::steady_clock; 155 | using timed_task = std::pair; 156 | using index_type = std::uint_fast16_t; 157 | 158 | ThreadPoolImpl (Worker *, index_type); 159 | ~ThreadPoolImpl (void); 160 | 161 | // Returns number of allocated Workers (may differ from active workers later) 162 | inline index_type get_capacity (void) const noexcept 163 | { 164 | return num_workers_; 165 | } 166 | 167 | inline index_type get_concurrency (void) const noexcept 168 | { 169 | return num_threads_.load(std::memory_order_relaxed); 170 | } 171 | 172 | void halt (void); 173 | void resume (void); 174 | bool is_halted (void) const; 175 | 176 | template 177 | void schedule_overflow (Task &&); 178 | 179 | template 180 | void schedule_after (clock::duration const &, Task &&); 181 | 182 | bool is_idle (void) const; 183 | 184 | inline bool should_stop (void) const noexcept 185 | { 186 | return stop_.load(std::memory_order_relaxed) & 0x01; 187 | } 188 | 189 | 190 | inline void notify_if_idle (void) noexcept 191 | { 192 | if (idle_ > 0) 193 | cv_.notify_one(); 194 | } 195 | inline bool might_have_task (void) const noexcept 196 | { 197 | return !queue_.empty(); 198 | } 199 | // Note: Does no synchronization of its own. 200 | inline bool has_task (void) const noexcept 201 | { 202 | return !queue_.empty(); 203 | } 204 | // Note: Does no synchronization of its own. 205 | inline std::size_t size (void) const noexcept 206 | { 207 | return queue_.size(); 208 | } 209 | // Note: Does no synchronization of its own. 210 | void update_tasks (void) 211 | { 212 | if (time_queue_.empty()) 213 | return; 214 | auto time_now = clock::now(); 215 | 216 | while (time_now >= time_queue_.front().first) 217 | { 218 | // If an exception was thrown, it was thrown in `push`. Because of the strong 219 | // exception-safety guarantee, nothing actually happens. 220 | try { 221 | push(std::move(time_queue_.front().second)); 222 | } catch (std::bad_alloc &) { 223 | return; 224 | } 225 | // The pop_back method for a vector should be non-throwing. 226 | std::pop_heap(time_queue_.begin(), time_queue_.end(), TaskOrder{}); 227 | time_queue_.pop_back(); 228 | 229 | if (time_queue_.empty()) 230 | break; 231 | } 232 | } 233 | // Note: Does no synchronization of its own. 234 | task_type extract_task (void) 235 | { 236 | assert(!queue_.empty() && "Cannot retrieve a task from an empty queue."); 237 | task_type result = std::move(queue_.front()); 238 | queue_.pop_front(); 239 | return result; 240 | } 241 | 242 | /// \par Exception safety 243 | /// Provides the strong (rollback) guarantee, even with move semantics. 244 | template 245 | inline void push (Task && task) 246 | { 247 | queue_.push_back(std::forward(task)); 248 | } 249 | 250 | /// \par Exception safety 251 | /// Provides the strong (rollback) guarantee unless the task can only be moved 252 | /// and has a throwing move constructor. 253 | template 254 | inline void push_at (clock::time_point const & tp, Task && task) 255 | { 256 | time_queue_.push_back(timed_task{tp, std::forward(task)}); 257 | std::push_heap(time_queue_.begin(), time_queue_.end(), TaskOrder{}); 258 | } 259 | 260 | // Note: wait and wait_until don't throw in C++14 and later. 261 | void wait_for_task (std::unique_lock & lk) 262 | { 263 | assert(lk.mutex() == &mutex_ &&"Incorrect mutex used for synchronization."); 264 | if (time_queue_.empty()) 265 | cv_.wait(lk); 266 | else 267 | cv_.wait_until(lk, time_queue_.front().first); 268 | } 269 | 270 | inline Worker * data (void) noexcept 271 | { 272 | return workers_; 273 | } 274 | private: 275 | struct TaskOrder { 276 | inline bool operator() (timed_task const & lhs, timed_task const & rhs) const noexcept 277 | { 278 | return lhs.first > rhs.first; 279 | } 280 | }; 281 | 282 | std::condition_variable cv_ {}; 283 | mutable std::mutex mutex_ {}; 284 | 285 | std::deque queue_ {}; 286 | std::vector time_queue_ {}; 287 | 288 | Worker * const workers_; 289 | 290 | index_type num_workers_ {0}, 291 | living_ {0}, idle_ {0}, paused_ {0}; 292 | std::atomic num_threads_ {0}; 293 | 294 | std::atomic stop_ {0x00}; 295 | 296 | ThreadPoolImpl (ThreadPoolImpl const &) = delete; 297 | ThreadPoolImpl & operator= (ThreadPoolImpl const &) = delete; 298 | 299 | void stop_threads (std::unique_lock&); 300 | 301 | friend struct Worker; 302 | }; 303 | 304 | // Notes: 305 | // - "front_" is always claimed for the worker. 306 | // - "back_" stores past-the-end markers both for writing and validity. If 307 | // they are unequal, the back is locked. 308 | // - For various reasons, it is possible for the front marker to be between 309 | // the write and valid pte markers. In such a case, the already-claimed task 310 | // may be read, but no further tasks will be read, even if claimed. 311 | struct alignas(kFalseSharingAlignment) Worker 312 | { 313 | using task_type = typename ThreadPool::task_type; 314 | using index_type = std::uint_fast32_t; 315 | 316 | Worker (ThreadPoolImpl &) noexcept; 317 | ~Worker (void); 318 | 319 | void operator() (void); 320 | 321 | bool is_alive (void) const noexcept 322 | { 323 | return thread_.joinable(); 324 | } 325 | 326 | void restart_thread (void) 327 | { 328 | assert(!pool_.should_stop() && "Start or stop new threads. Not both."); 329 | if (!thread_.joinable()) // noexcept 330 | { 331 | thread_ = std::thread(std::reference_wrapper(*this)); 332 | pool_.num_threads_.fetch_add(1, std::memory_order_relaxed); 333 | } 334 | } 335 | void stop_thread (void) 336 | { 337 | assert(pool_.should_stop() && "Spurious thread-stopping detected."); 338 | if (thread_.joinable()) // noexcept 339 | { 340 | thread_.join(); 341 | pool_.num_threads_.fetch_sub(1, std::memory_order_relaxed); 342 | } 343 | } 344 | 345 | inline bool belongs_to (ThreadPoolImpl const * ptr) const noexcept 346 | { 347 | return &pool_ == ptr; 348 | } 349 | 350 | inline bool get_paused (void) const noexcept 351 | { 352 | return paused_; 353 | } 354 | 355 | inline void set_paused (bool val) noexcept 356 | { 357 | paused_ = val; 358 | } 359 | 360 | template 361 | bool push (Task && tasks); 362 | 363 | template 364 | bool push_front (Task && tasks); 365 | 366 | index_type count_tasks (void) const noexcept; 367 | 368 | void canibalize (ThreadPoolImpl &); 369 | 370 | private: 371 | Worker (Worker const &) = delete; 372 | Worker & operator= (Worker const &) = delete; 373 | 374 | constexpr static std::size_t kValidShift = std::numeric_limits::digits / 2; 375 | constexpr static index_type kWriteMask = ~(~static_cast(0) << kValidShift); 376 | static_assert(kLog2Modulus <= kValidShift, \ 377 | "ThreadPool's local task queue size exceeds limit of selected index type."); 378 | 379 | inline static constexpr 380 | index_type get_distance (index_type left, index_type right) noexcept 381 | { 382 | return (right - left + kModulus) % kModulus; 383 | } 384 | 385 | inline static constexpr index_type get_valid (index_type b) noexcept 386 | { 387 | return b >> kValidShift; 388 | } 389 | 390 | inline static constexpr index_type get_write (index_type b) noexcept 391 | { 392 | static_assert((kWriteMask >> kValidShift) == 0, "WRITE and VALID regions must not intersect."); 393 | return b & kWriteMask; 394 | } 395 | 396 | inline static constexpr 397 | index_type make_back (index_type write, index_type valid) noexcept 398 | { 399 | return write | (valid << kValidShift); 400 | } 401 | 402 | inline static constexpr index_type make_back (index_type write) noexcept 403 | { 404 | return write | (write << kValidShift); 405 | } 406 | 407 | 408 | unsigned steal (void); 409 | unsigned steal_from (Worker & source) noexcept(std::is_nothrow_destructible::value && std::is_nothrow_move_constructible::value); 410 | bool pop (task_type & task) noexcept(std::is_nothrow_destructible::value && std::is_nothrow_move_assignable::value); 411 | unsigned push_front(ThreadPoolImpl &, unsigned number); 412 | bool execute (void); 413 | void refresh_tasks (ThreadPoolImpl &, unsigned number); 414 | 415 | /// \brief Activates a task slot within the queue, and fills it appropriately. 416 | template 417 | void place_task (index_type location, Task && task) 418 | noexcept(std::is_nothrow_constructible::value) 419 | { 420 | static_assert(std::is_trivially_destructible::value, 421 | "Implicit destruction is used here, and thus is required here."); 422 | new(std::addressof(tasks_[location].task_)) task_type(std::forward(task)); 423 | } 424 | /// \brief Deactivates a task slot, and returns what was inside before the 425 | /// deactivation. 426 | task_type remove_task (index_type location) 427 | noexcept(std::is_nothrow_destructible::value) 428 | { 429 | task_type result = std::move(tasks_[location].task_); 430 | tasks_[location].task_.~task_type(); 431 | // Set the new active member of the union. Should be a no-op. 432 | static_assert(std::is_trivial::value, 433 | "The default value for implicit optional values must be trivial."); 434 | tasks_[location].empty_ = OptionalTask::Empty(); 435 | return result; 436 | } 437 | 438 | template 439 | void remove_all_and (Func const &); 440 | 441 | // These store information about the current state of the deque. 442 | // - front_ is modified only by the Worker's own thread. Reads and writes 443 | // must be atomic, however, to avoid torn writes. 444 | // - back_ is potentially modified by all threads. The top and bottom halves 445 | // store a past-the-end (PTE) marker for the occupied slots, and a PTE marker 446 | // for the slots this Worker is permitted to read, respectively. 447 | std::atomic front_ {0}, back_ {0}; 448 | // When this Worker runs out of tasks, it will search for more. A central 449 | // ThreadPool object will serve to coordinate work-stealing (that is, store the 450 | // addresses of other Workers), provide new tasks, and capture overflow should 451 | // a Worker generate more tasks than can fit in its deque. 452 | ThreadPoolImpl & pool_; 453 | // To avoid starvation for tasks in the overflow queue, I pull in its tasks 454 | // once every time a worker finishes a batch of tasks. The variable countdown_ 455 | // records the remaining size of the batch. A successfully scheduled subtask 456 | // will increment this to ensure the originally scheduled tasks are completed 457 | // as part of the batch. 458 | static_assert(kLog2Modulus < std::numeric_limits::digits - 2, 459 | "The behavior of the worker queue's starvation-avoidance algorithm has not yet \ 460 | been examined in the case that the countdown variable is small relative to the \ 461 | task-queue."); 462 | std::uint_fast32_t countdown_; 463 | // While a task is being executed, the front_ marker is not incremented. This 464 | // avoids early claiming of a new task (which would prevent that task from 465 | // being stolen), but makes the push-to-front process a bit more complicated. 466 | // In particular, the push-to-front should overwrite the front when first 467 | // called during an execution, but not afterward. 468 | bool front_invalid_; 469 | bool paused_; 470 | // Need to keep the thread's handle for later joining. I could work around 471 | // this, but the workaround would be less efficient. 472 | std::thread thread_ {}; 473 | // Task queue. When information about the cache is available, allocate so 474 | // that tasks aren't split across cache lines. Note: If splitting is 475 | // inevitable, make a best-effort attempt to reduce it. 476 | union OptionalTask { 477 | struct Empty {} empty_; 478 | task_type task_; 479 | 480 | OptionalTask (void) noexcept : empty_() {} 481 | ~OptionalTask (void) noexcept {} 482 | }; 483 | alignas(get_align()) OptionalTask tasks_ [kModulus]; 484 | }; 485 | 486 | Worker::Worker (ThreadPoolImpl & pool) noexcept 487 | : pool_(pool), 488 | countdown_(2), front_invalid_(false), paused_(false) 489 | { 490 | } 491 | 492 | // Only called after all workers have stopped. 493 | Worker::~Worker (void) 494 | { 495 | // If this assert fails, either synchronization wasn't performed, or a task 496 | // is actively running. Either way, the code would need a fix. 497 | assert(!front_invalid_ && "Attempting to destroy a running worker!"); 498 | 499 | // Remove tasks without using them in any way. 500 | remove_all_and([](task_type&&) noexcept {}); 501 | } 502 | 503 | // Removes each task from a Worker and applies func to it. Note: Must 504 | // not be called before the Worker's thread is fully stopped. 505 | /// \note Has exactly one possibly-throwing statement. 506 | template 507 | void Worker::remove_all_and (Func const & func) 508 | { 509 | index_type back = back_.load(std::memory_order_relaxed); 510 | 511 | // For safety, block stealing during this. Note: Won't block the worker that 512 | // is being destroyed. 513 | do { 514 | back = make_back(get_valid(back)); 515 | } while (!back_.compare_exchange_weak(back, make_back(1, 0), 516 | std::memory_order_acquire, std::memory_order_relaxed)); 517 | 518 | // If the worker is running a task, something is VERY wrong. 519 | assert(!front_invalid_ && "The worker is still running a task!"); 520 | 521 | back = get_valid(back); 522 | 523 | index_type front = front_.load(std::memory_order_acquire); 524 | // Ensure a consistent state, in the event of an exception. 525 | struct RAIIHelper 526 | { 527 | decltype(back_) & back_ref; 528 | index_type value; 529 | ~RAIIHelper (void) 530 | { 531 | back_ref.store(value, std::memory_order_release); 532 | } 533 | } raii_helper { back_, back }; 534 | while (front != raii_helper.value) 535 | { 536 | raii_helper.value = (raii_helper.value - 1 + kModulus) % kModulus; 537 | // Possibly-throwing: 538 | func(remove_task(raii_helper.value)); 539 | } 540 | } 541 | 542 | // Work-stealing will occur as follows: 543 | // 1. Determine the exact number of tasks that can be added to this queue. 544 | // - Note: Though stealing only occurs when the queue is empty, it can be 545 | // empty because of another process performing work-stealing. 546 | // - Note: This value need not be refreshed, as it can only decrease. This 547 | // is because only the Worker's thread will be allowed to add items to its 548 | // queue. 549 | // 2. Estimate the number of items in the the source queue. 550 | // - If the queue is already being edited, giving up is an option. The 551 | // worker will come back later or try a different source queue, effectively 552 | // creating a spin-lock. 553 | // 3. Set source's write to write - (items - stolen). Do not change valid. 554 | // - When write != valid, the write-head is locked. Moreover, reading 555 | // should not occur if read is in the interval [write, valid]. 556 | // 4. Check whether source's read is in the interval [write, valid]. If it is, 557 | // then the current interval is contended. Go to step 2. 558 | // 5. Now that the write interval is locked, copy to the front (reading side) 559 | // of this thread's queue. This is safe because only this thread affects this 560 | // part of the queue. 561 | // 6. Set source's VALID equal to its WRITE to unlock that part of the queue. 562 | 563 | // Steals approximately [available] / [divisor] tasks from source, if 564 | // possible. Returns number of successfully stolen tasks. 565 | /// \note noexcept if place_task and remove_task are both noexcept. 566 | unsigned Worker::steal_from (Worker & source) 567 | noexcept(std::is_nothrow_destructible::value && std::is_nothrow_move_constructible::value) 568 | { 569 | static constexpr unsigned kDivisor = 4; 570 | index_type this_front, this_back, writeable, stolen, 571 | source_front, source_back, source_valid, source_write; 572 | // Worker::steal_from may only be called from the Worker's owned thread. 573 | assert(std::this_thread::get_id() == thread_.get_id() && "Worker::steal_from may only be called from the Worker's own thread."); 574 | assert(this != &source && "Worker may not steal from itself."); 575 | assert(!front_invalid_ && "Worker cannot steal while it is performing a task."); 576 | 577 | this_front = front_.load(std::memory_order_relaxed); 578 | this_back = back_.load(std::memory_order_acquire); 579 | 580 | writeable = get_distance(get_valid(this_back), this_front - 1); 581 | if (writeable == 0) 582 | return 0; 583 | 584 | // Maximum number of times to attempt to lock the victim before giving up. 585 | std::uint_fast8_t spins = 64; 586 | // Lock the source queue, reserving several tasks to steal. 587 | source_back = source.back_.load(std::memory_order_relaxed); 588 | do { 589 | source_valid = get_valid(source_back); 590 | // Already locked. Better to give up immediately, and try a different victim. 591 | if (source_valid != get_write(source_back)) 592 | return 0; 593 | source_front = source.front_.load(std::memory_order_relaxed); 594 | // Stolen is actually from WRITE, but WRITE and VALID are identical. 595 | index_type valid = get_distance(source_front, source_valid); 596 | // Must not attempt to claim the current front pointer, so require at least 2 597 | // items in source queue. 598 | if (valid < 2) 599 | return 0; 600 | stolen = min((valid + kDivisor - 2) / kDivisor, writeable); 601 | source_write = (source_valid - stolen + kModulus) % kModulus; 602 | 603 | if (source.back_.compare_exchange_weak(source_back, 604 | make_back(source_write, source_valid), 605 | std::memory_order_acq_rel, 606 | std::memory_order_relaxed)) 607 | break; 608 | // Spun too long. Better to try a different victim than lock forever. 609 | if (--spins == 0) 610 | return 0; 611 | } while (true); 612 | // Now that the lock has been acquired, read may advance at most one more 613 | // time. That is, simply ensuring that READ < WRITE will suffice to ensure 614 | // correct behavior. Unfortunately, the READ may already be in the claim. Only 615 | // READ <= VALID is certain until we enforce it. 616 | // Note that by including the one-increment error margin, the following 617 | // adjustment needs to be run at most once. 618 | { 619 | source_front = source.front_.load(std::memory_order_acquire); 620 | index_type valid = get_distance(source_front, source_valid); 621 | if (valid < 2) // Unlock. There aren't any unclaimed tasks to steal. 622 | { 623 | source.back_.store(make_back(source_valid)); 624 | return 0; 625 | } 626 | 627 | index_type readable = get_distance(source_front, source_write); 628 | // Even if READ <= VALID, (that is, normal behavior), if READ == WRITE then 629 | // we must increment WRITE as READ may be incremented during the write phase. 630 | if (greater_or_zero(readable, valid)) 631 | { 632 | stolen = (valid + kDivisor - 2) / kDivisor; 633 | // Thief's number of held tasks can only be reduced since last check, so 634 | // there is no reason to double-check whether thief can hold the tasks. 635 | source_write = (source_valid - stolen + kModulus) % kModulus; 636 | // This store is optional. It allows the victim queue to keep executing 637 | // while memory is copied. 638 | source.back_.store(make_back(source_write, source_valid), 639 | std::memory_order_relaxed); 640 | } 641 | } 642 | 643 | #ifndef NDEBUG 644 | assert(source_write != source_valid); 645 | auto test_front = source.front_.load(std::memory_order_relaxed); 646 | assert(get_distance(test_front, source_write) <= get_distance(test_front, source_valid)); 647 | #endif 648 | do { 649 | source_valid = (source_valid - 1 + kModulus) % kModulus; 650 | this_front = (this_front - 1 + kModulus) % kModulus; 651 | place_task(this_front, source.remove_task(source_valid)); 652 | } while (source_valid != source_write); 653 | 654 | front_.store(this_front, std::memory_order_release); 655 | source.back_.store(make_back(source_valid), std::memory_order_release); 656 | return stolen; 657 | } 658 | 659 | // Removes a task from the front of the queue, if possible. Returns true or 660 | // false for success or failure, respectively. 661 | bool Worker::pop (task_type & task) 662 | noexcept(std::is_nothrow_destructible::value && 663 | std::is_nothrow_move_assignable::value) 664 | { 665 | //assert(std::this_thread::get_id() == thread_.get_id() && "Worker::pop may only be called from the Worker's own thread."); 666 | 667 | auto front = front_.load(std::memory_order_relaxed); 668 | auto back = back_.load(std::memory_order_acquire); 669 | 670 | auto readable = get_distance(front, get_write(back)); 671 | // Two circumstances can prevent reading: Either there is nothing to read, or 672 | // the current location is claimed. Even once the claim is resolved, there may 673 | // or may not be something to read. 674 | if (greater_or_zero(readable, get_distance(front, get_valid(back)))) 675 | return false; 676 | 677 | auto new_front = (front + 1) % kModulus; 678 | if (!front_invalid_) 679 | { 680 | task = remove_task(front); 681 | front_.store(new_front, std::memory_order_relaxed); 682 | // I need to release back_ so that the write to front_ is visible to thieves. 683 | back_.fetch_or(0, std::memory_order_release); 684 | return true; 685 | } 686 | else if (readable > 1) 687 | { 688 | front_.store(new_front, std::memory_order_relaxed); 689 | back = back_.fetch_or(0, std::memory_order_acq_rel); 690 | if (greater_or_zero(get_distance(new_front, get_write(back)), get_distance(new_front, get_valid(back)))) 691 | { 692 | // By the time we advanced the pointer, the task we intended to read was 693 | // already removed. Don't read it. 694 | front_invalid_ = false; 695 | return false; 696 | } 697 | else 698 | { 699 | task = remove_task(new_front); 700 | return true; 701 | } 702 | } 703 | else 704 | return false; 705 | } 706 | 707 | // Removes, then performs the task at the front of the queue. Claims the next 708 | // task after performing the current one. 709 | bool Worker::execute (void) 710 | { 711 | assert(!front_invalid_ && "Can't execute a task while already executing a different task."); 712 | assert(std::this_thread::get_id() == thread_.get_id() && "Worker::execute may only be called from the Worker's own thread."); 713 | 714 | auto front = front_.load(std::memory_order_relaxed); 715 | auto back = back_.load(std::memory_order_acquire); 716 | 717 | auto readable = get_distance(front, get_write(back)); 718 | // Two circumstances can prevent reading: Either there is nothing to read, or 719 | // the current location is claimed. Even once the claim is resolved, there may 720 | // or may not be something to read. 721 | if (greater_or_zero(readable, get_distance(front, get_valid(back)))) 722 | return false; 723 | 724 | // Will ensure that the queue is restored to validity, even in the event of 725 | // an exception. 726 | struct Reservation 727 | { 728 | Reservation (Worker & worker) noexcept 729 | : worker_(worker) 730 | { 731 | worker_.front_invalid_ = true; 732 | } 733 | ~Reservation (void) 734 | { 735 | if (worker_.front_invalid_) 736 | { 737 | worker_.front_invalid_ = false; 738 | auto new_front = worker_.front_.load(std::memory_order_relaxed); 739 | worker_.front_.store((new_front+1)%kModulus, std::memory_order_relaxed); 740 | // I need to release back_ so that the write to front_ is visible to thieves. 741 | worker_.back_.fetch_or(0, std::memory_order_release); 742 | } 743 | } 744 | Reservation (Reservation const &) = delete; 745 | Reservation & operator= (Reservation const &) = delete; 746 | private: 747 | Worker & worker_; 748 | } reservation {*this}; 749 | // Potentially-throwing. 750 | task_type task = remove_task(front); 751 | // Potentially-throwing. 752 | task(); 753 | /// \todo Find a good way to unify this with the other validation. 754 | // If the slot was not already overwritten (eg. by the task pushing to the 755 | // task-queue), need to adjust the queue size. 756 | return true; 757 | } 758 | 759 | // Pulls some tasks into the local queue from the central queue, and returns 760 | // others. 761 | void Worker::refresh_tasks (ThreadPoolImpl & tasks, unsigned number) 762 | { 763 | unsigned num_pushed = push_front(tasks, number); 764 | if (num_pushed == 0) 765 | { 766 | auto cnt = tasks.size(); 767 | if (number > cnt) 768 | number = static_cast(cnt); 769 | task_type task; 770 | 771 | for (; number && pop(task); ++num_pushed, --number) 772 | tasks.push(std::move(task)); 773 | push_front(tasks, num_pushed); 774 | } 775 | } 776 | 777 | // Feeds all existing tasks to the ThreadPool. Used as a last resort. 778 | void Worker::canibalize (ThreadPoolImpl & tasks) 779 | { 780 | do { 781 | task_type task; 782 | if (pop(task)) 783 | tasks.push(std::move(task)); 784 | else 785 | { 786 | auto front = front_.load(std::memory_order_relaxed); 787 | auto back = back_.load(std::memory_order_relaxed); 788 | // If the queue is fully-depleted, our job is done. Otherwise, we need to 789 | // keep trying. 790 | if ((get_write(back) == get_valid(back)) && (get_valid(back) == front)) 791 | break; 792 | else 793 | std::this_thread::yield(); 794 | } 795 | } while (true); 796 | } 797 | 798 | // Pushes a task onto the back of the queue, if possible. If the back of the 799 | // queue is in contention, (eg. because of work stealing), pushes onto the 800 | // front of the queue instead. 801 | // Note: Only evaluates the task reference if there is room to insert the 802 | // task. 803 | /// \par Exception safety 804 | /// *Strong*: If an exception is thrown, the function has no effect. 805 | /// Applies only if `place_task()` also provides the strong guarantee. 806 | template 807 | bool Worker::push (Task && task) 808 | { 809 | assert(std::this_thread::get_id() == thread_.get_id() && "Worker::push may only be called from the Worker's owned thread."); 810 | 811 | auto front = front_.load(std::memory_order_relaxed); 812 | auto back = back_.load(std::memory_order_acquire); 813 | 814 | auto valid = get_valid(back); 815 | if (((front - valid + kModulus) % kModulus) == 1) 816 | return false; 817 | 818 | index_type write = get_write(back); 819 | index_type new_back = (write + 1) % kModulus; 820 | index_type expected = make_back(write); 821 | if (back_.compare_exchange_strong(expected, make_back(write, new_back), 822 | std::memory_order_acquire, 823 | std::memory_order_relaxed)) 824 | { 825 | struct RAIIHelper 826 | { 827 | decltype(back_) & back_ref; 828 | index_type value; 829 | ~RAIIHelper (void) 830 | { 831 | back_ref.store(value, std::memory_order_release); 832 | } 833 | } raii_helper { back_, back }; 834 | place_task(write, std::forward(task)); // May throw. 835 | raii_helper.value = make_back(new_back); 836 | } 837 | else 838 | { 839 | write = front; 840 | front = (front - 1 + kModulus) % kModulus; 841 | if (!front_invalid_) 842 | write = front; 843 | place_task(write, std::forward(task)); 844 | front_.store(front, std::memory_order_release); 845 | } 846 | 847 | pool_.notify_if_idle(); 848 | return true; 849 | } 850 | 851 | // Places a new task at the front of the queue. Note that this skirts anti- 852 | // starvation precautions. 853 | // Note: Only evaluates the task reference if there is room to insert the 854 | // task. 855 | /// \par Exception safety 856 | /// *Strong*: If an exception is thrown, the function has no effect. 857 | /// Applies only if `place_task()` also provides the strong guarantee. 858 | template 859 | bool Worker::push_front (Task && task) 860 | { 861 | assert(std::this_thread::get_id() == thread_.get_id() && "Worker::push_front may only be called from the Worker's owned thread."); 862 | 863 | index_type front = front_.load(std::memory_order_relaxed); 864 | index_type back = back_.load(std::memory_order_acquire); 865 | 866 | if ((front - get_valid(back) + kModulus) % kModulus == 1) 867 | return false; 868 | index_type write = front; 869 | front = (front - 1 + kModulus) % kModulus; 870 | 871 | // Potentially-throwing 872 | place_task(front_invalid_ ? write : front, std::forward(task)); 873 | 874 | front_.store(front, std::memory_order_release); 875 | 876 | // Delay lower-level (central) queue from being accessed, to fully support 877 | // depth-first traversal of task tree. 878 | ++countdown_; 879 | pool_.notify_if_idle(); 880 | return true; 881 | } 882 | 883 | // Places multiple new tasks at the front of the queue. Note that this skirts 884 | // anti-starvation precautions. 885 | unsigned Worker::push_front (ThreadPoolImpl & tasks, unsigned number) 886 | { 887 | assert(std::this_thread::get_id() == thread_.get_id() && "Worker::push_front may only be called from the Worker's owned thread."); 888 | if (!tasks.has_task()) 889 | return 0; 890 | 891 | index_type front = front_.load(std::memory_order_relaxed); 892 | index_type back = back_.load(std::memory_order_acquire); 893 | 894 | auto written = (front - get_valid(back) - 1 + kModulus) % kModulus; 895 | if (number < written) 896 | written = number; 897 | if (written == 0) 898 | return 0; 899 | 900 | // In C++, bool converts implicitly to 0 (false) or 1 (true). 901 | front += front_invalid_; 902 | auto n = written; 903 | do { 904 | front = (front - 1 + kModulus) % kModulus; 905 | place_task(front, tasks.extract_task()); 906 | if (!tasks.has_task()) 907 | { 908 | written -= n - 1; 909 | break; 910 | } 911 | } while (--n); 912 | front = (front - front_invalid_ + kModulus) % kModulus; 913 | front_.store(front, std::memory_order_release); 914 | return written; 915 | } 916 | 917 | // Returns an estimate of the number of tasks currently in the queue. 918 | typename Worker::index_type Worker::count_tasks (void) const noexcept 919 | { 920 | index_type front = front_.load(std::memory_order_relaxed); 921 | index_type back = back_.load(std::memory_order_relaxed); 922 | return get_distance(front, get_valid(back)); 923 | } 924 | 925 | // Attempts to steal work from other worker threads in the same pool. 926 | unsigned Worker::steal (void) 927 | { 928 | unsigned num_workers = pool_.get_capacity(); 929 | auto randomizer = front_.load(std::memory_order_relaxed); 930 | unsigned source = static_cast(randomizer); 931 | unsigned stolen_count = 0; 932 | for (auto n = num_workers; n--;) { 933 | source = (source + 1) % num_workers; 934 | Worker * victim = pool_.data() + source; 935 | if (victim == this) 936 | continue; 937 | stolen_count += steal_from(*victim); 938 | if (stolen_count > 0) 939 | break; 940 | } 941 | return stolen_count; 942 | } 943 | 944 | // Performs a loop of the form execute-steal-check_central_queue-repeat. 945 | // Sleeps if no work is available in this and other queues. 946 | void Worker::operator() (void) 947 | { 948 | static constexpr std::uint_fast32_t kPullFromQueue = 1 + (kModulus - 1) / 32; 949 | index_type last_size = 0; 950 | // This thread-local variable allows O(1) scheduling (allows pushing directly 951 | // to the local task queue). 952 | current_worker = this; 953 | using mutex_type = decltype(pool_.mutex_); 954 | mutex_type & mutex = pool_.mutex_; 955 | 956 | { 957 | std::unique_lock guard(mutex); 958 | ++pool_.living_; 959 | guard.unlock(); 960 | pool_.cv_.notify_all(); 961 | } 962 | // The thread is started after all workers are initialized; no need to wait. 963 | 964 | while (true) 965 | { 966 | if (--countdown_ == 0) 967 | { 968 | auto task_count = count_tasks(); 969 | index_type size = (task_count <= kModulus / 16) ? task_count * 16 : kModulus - 1; 970 | countdown_ = size + 2; 971 | 972 | // Periodically check whether the program is trying to destroy the pool. 973 | if (pool_.should_stop()) 974 | goto kill; 975 | 976 | if (mutex.try_lock()) 977 | { 978 | std::lock_guard guard (mutex, std::adopt_lock); 979 | pool_.update_tasks(); 980 | if (!pool_.has_task()) 981 | { 982 | // If the queue size has stabilized, it's likely that all tasks are waiting 983 | // on something (and thus continually re-adding themselves). Shake things up a 984 | // bit by re-shuffling tasks. 985 | if (size == last_size) 986 | size += steal(); 987 | last_size = size; 988 | continue; 989 | } 990 | refresh_tasks(pool_, (kPullFromQueue + 3) / 4); 991 | countdown_ += kPullFromQueue / 2; 992 | } 993 | else 994 | { 995 | // If the queue size has stabilized, it's probably full of infinite loops. 996 | if (size == last_size) 997 | countdown_ = 4; 998 | } 999 | last_size = size; 1000 | } 1001 | // Second, check for (and perform) any tasks in this thread's queue. 1002 | if (execute()) 1003 | continue; 1004 | // Make sure we don't exhaust the full queue when an exit is desired. 1005 | if (pool_.should_stop()) 1006 | goto kill; 1007 | // Third, check whether there are common tasks available. This will also 1008 | // serve to jump-start the worker. 1009 | // Testing whether the task queue is empty may give an incorrect result, 1010 | // due to lack of synchronization, but is still a fast and easy test. 1011 | if (pool_.might_have_task() && mutex.try_lock()) 1012 | { 1013 | std::lock_guard guard (mutex, std::adopt_lock); 1014 | pool_.update_tasks(); 1015 | unsigned count = push_front(pool_, kPullFromQueue); 1016 | if (count > 0) 1017 | { 1018 | // If our new tasks are already from the queue, no need to refresh. 1019 | countdown_ += kPullFromQueue;//count; 1020 | continue; 1021 | } 1022 | } 1023 | // Fourth, try work stealing. 1024 | if (steal() > 0) 1025 | continue; 1026 | 1027 | // Fifth, wait a bit for something to change... 1028 | auto num_workers = pool_.get_capacity(); 1029 | bool should_idle = (count_tasks() == 0); 1030 | for (auto n = num_workers; n-- && should_idle;) 1031 | should_idle = (pool_.workers_[n].count_tasks() < 2); 1032 | if (should_idle && mutex.try_lock()) 1033 | { 1034 | std::unique_lock guard (mutex, std::adopt_lock); 1035 | if (pool_.should_stop()) 1036 | goto kill; 1037 | pool_.update_tasks(); 1038 | if (!pool_.has_task()) 1039 | { 1040 | ++pool_.idle_; 1041 | pool_.wait_for_task(guard); 1042 | --pool_.idle_; 1043 | if (pool_.should_stop()) 1044 | goto kill; 1045 | pool_.update_tasks(); 1046 | } 1047 | push_front(pool_, kPullFromQueue); 1048 | // If our new tasks are already from the queue, no need to refresh. 1049 | countdown_ += kPullFromQueue; 1050 | } 1051 | } 1052 | kill: 1053 | current_worker = nullptr; 1054 | { 1055 | std::unique_lock guard (mutex); 1056 | --pool_.living_; 1057 | guard.unlock(); 1058 | pool_.cv_.notify_all(); 1059 | } 1060 | } 1061 | 1062 | 1063 | 1064 | //////////////////////////////////////////////////////////////////////////////// 1065 | // ThreadPoolImpl // 1066 | //////////////////////////////////////////////////////////////////////////////// 1067 | 1068 | ThreadPoolImpl::ThreadPoolImpl (Worker * workers, index_type num_workers) 1069 | : workers_(workers), num_workers_(num_workers) 1070 | { 1071 | assert(num_workers > 0); 1072 | std::unique_lock guard (mutex_); 1073 | 1074 | // Construct the workers, after some safety-checks. 1075 | static_assert(std::is_nothrow_constructible::value,\ 1076 | "This loop is only exception-safe if Worker construction is non-throwing"); 1077 | for (index_type i = 0; i < get_capacity(); ++i) 1078 | new(workers_ + i) Worker(*this); 1079 | // Start the threads only after all initialization is complete. The Worker's 1080 | // loop will need no further synchronization for safe use. 1081 | // Note that a worker without an initialized thread will simply do nothing, 1082 | // because the threads are responsible for populating themselves with tasks. 1083 | std::exception_ptr eptr; 1084 | for (index_type i = 0; i < get_capacity(); ++i) 1085 | { 1086 | try { 1087 | workers_[i].restart_thread(); 1088 | } catch (std::system_error &) { 1089 | eptr = std::current_exception(); 1090 | } 1091 | } 1092 | // If no threads were able to start, give a meaningful error regarding why. 1093 | // However, if at least one thread was able to start, the ThreadPool will 1094 | // function properly. 1095 | if (get_concurrency() == 0) 1096 | std::rethrow_exception(eptr); 1097 | // Wait for the pool to be fully populated to ensure no weird behaviors. 1098 | cv_.wait(guard, [this](void)->bool { 1099 | return (living_ == get_concurrency()) || should_stop(); 1100 | }); 1101 | } 1102 | 1103 | ThreadPoolImpl::~ThreadPoolImpl (void) 1104 | { 1105 | #ifndef NDEBUG 1106 | Worker * p_worker = current_worker; 1107 | if ((p_worker != nullptr) && p_worker->belongs_to(this)) 1108 | { 1109 | std::printf("ERROR!\tA worker thread may not destroy the ThreadPool to \ 1110 | which it belongs.\n"); 1111 | std::abort(); 1112 | } 1113 | #endif 1114 | std::unique_lock guard (mutex_); 1115 | stop_.store(0x05, std::memory_order_relaxed); 1116 | if (paused_ > 0) 1117 | { 1118 | // If the pool is in a "paused" state, it might be the case that one thread 1119 | // is still alive (and waiting for an "unpause" signal). Wake it up... 1120 | cv_.notify_all(); 1121 | cv_.wait(guard, [this](void)->bool { 1122 | return (living_ == 0); 1123 | }); 1124 | for (auto i = get_capacity(); i--;) 1125 | workers_[i].stop_thread(); 1126 | } else 1127 | stop_threads(guard); 1128 | 1129 | for (auto i = get_capacity(); i--;) 1130 | workers_[i].~Worker(); 1131 | } 1132 | 1133 | // Note: Because of the mutex, can be called from any thread at any time. 1134 | void ThreadPoolImpl::stop_threads (std::unique_lock & guard) 1135 | { 1136 | if (idle_ > 0) 1137 | cv_.notify_all(); 1138 | cv_.wait(guard, [this](void)->bool { 1139 | return (living_ == paused_) || !should_stop(); 1140 | }); 1141 | if (should_stop()) { 1142 | // At this point, all threads are either dead (need to be joined) or paused 1143 | // (must not be joined). Take action appropriately. 1144 | // Note that if multiple threads are paused simultaneously, they all reach 1145 | // this point (one at a time, though) 1146 | for (auto i = get_capacity(); i--;) 1147 | { 1148 | if (!workers_[i].get_paused()) 1149 | workers_[i].stop_thread(); 1150 | } 1151 | } 1152 | } 1153 | 1154 | void ThreadPoolImpl::halt (void) 1155 | { 1156 | std::unique_lock guard (mutex_); 1157 | // Note: Bit 0x04 is used to indicate that the destructor is ongoing. Do not 1158 | // interfere with it. 1159 | if (stop_.load(std::memory_order_relaxed) & 0x04) 1160 | return; 1161 | stop_.store(0x03, std::memory_order_relaxed); 1162 | Worker * p_worker = current_worker; 1163 | if ((p_worker != nullptr) && p_worker->belongs_to(this)) 1164 | { 1165 | p_worker->set_paused(true); 1166 | ++paused_; 1167 | } 1168 | stop_threads(guard); 1169 | // If the caller is part of the pool, block execution until unpaused. 1170 | if ((p_worker != nullptr) && p_worker->belongs_to(this)) 1171 | { 1172 | cv_.wait(guard, [this] (void) -> bool { 1173 | return (stop_.load(std::memory_order_relaxed) & 0x02) == 0; 1174 | }); 1175 | p_worker->set_paused(false); 1176 | --paused_; 1177 | } 1178 | } 1179 | 1180 | // Note: Because of the mutex, can call from any thread at any time. 1181 | void ThreadPoolImpl::resume (void) 1182 | { 1183 | std::unique_lock guard (mutex_); 1184 | 1185 | assert(living_ >= paused_); 1186 | // Note: Bit 0x04 will be used to indicate attempted destruction. Do not 1187 | // interfere. 1188 | if (stop_.load(std::memory_order_relaxed) & 0x04) 1189 | return; 1190 | 1191 | stop_.store(0x00, std::memory_order_relaxed); 1192 | cv_.notify_all(); // noexcept 1193 | 1194 | std::exception_ptr eptr; 1195 | for (unsigned i = 0; i < get_capacity(); ++i) 1196 | { 1197 | try { 1198 | workers_[i].restart_thread(); 1199 | } catch (std::system_error &) { 1200 | // Whenever a thread fails to start, remove all the tasks it would otherwise 1201 | // need to consume. This will prevent those tasks from becoming unreachable. 1202 | if (!workers_[i].is_alive()) 1203 | workers_[i].canibalize(*this); 1204 | eptr = std::current_exception(); 1205 | } 1206 | } 1207 | if (get_concurrency() == 0) 1208 | std::rethrow_exception(eptr); 1209 | 1210 | cv_.wait(guard, [this](void)->bool { 1211 | return (living_ >= get_concurrency()) || should_stop(); 1212 | }); 1213 | } 1214 | 1215 | bool ThreadPoolImpl::is_halted (void) const 1216 | { 1217 | std::lock_guard guard (mutex_); 1218 | // Include paused tasks to give more consistent behavior. 1219 | return (stop_.load(std::memory_order_relaxed) & 0x02) && (paused_ == living_); 1220 | } 1221 | 1222 | bool ThreadPoolImpl::is_idle (void) const 1223 | { 1224 | std::lock_guard guard (mutex_); 1225 | // Include paused tasks to give more consistent behavior. 1226 | return (idle_ + paused_) == living_; 1227 | } 1228 | 1229 | /// \par Exception safety 1230 | /// Provides the strong (rollback) guarantee. 1231 | template 1232 | void ThreadPoolImpl::schedule_overflow (Task && task) 1233 | { 1234 | bool idle; 1235 | { 1236 | std::lock_guard guard (mutex_); 1237 | push(std::forward(task)); // < Strong exception-safety guarantee. 1238 | idle = idle_ > 0; 1239 | } 1240 | if (idle) 1241 | cv_.notify_one(); 1242 | } 1243 | 1244 | /// \par Exception safety 1245 | /// Provides the strong (rollback) guarantee. 1246 | template 1247 | void ThreadPoolImpl::schedule_after (clock::duration const & dur, Task && task) 1248 | { 1249 | bool idle; 1250 | { 1251 | std::lock_guard guard (mutex_); 1252 | push_at(clock::now() + dur, std::forward(task)); 1253 | // Wake the waiters, just in case the scheduled time is earlier than that for 1254 | // which they were waiting. 1255 | idle = idle_ > 0; 1256 | } 1257 | if (idle) 1258 | cv_.notify_one(); 1259 | } 1260 | 1261 | 1262 | 1263 | #ifndef NDEBUG 1264 | void debug_warn_overflow (void) noexcept 1265 | { 1266 | static std::atomic_flag overflow_warning_given = ATOMIC_FLAG_INIT; 1267 | if (!overflow_warning_given.test_and_set()) 1268 | std::printf("Task queue overflow (more than %zu tasks in a single worker's \ 1269 | queue). May impact performance.", ThreadPool::get_worker_capacity()); 1270 | } 1271 | #endif 1272 | 1273 | template 1274 | void impl_schedule (Task && task, ThreadPoolImpl * impl) 1275 | { 1276 | #ifndef NDEBUG 1277 | // If a NULL task is passed, place the error message as close as possible to 1278 | // the error itself. 1279 | if (task == nullptr) 1280 | throw std::bad_function_call(); 1281 | #endif 1282 | Worker * worker = current_worker; 1283 | // If a thread is attempting to schedule in its own pool... 1284 | if ((worker != nullptr) && worker->belongs_to(impl)) 1285 | { 1286 | if (worker->push(std::forward(task))) 1287 | return; 1288 | #ifndef NDEBUG 1289 | else 1290 | debug_warn_overflow(); 1291 | #endif 1292 | } 1293 | impl->schedule_overflow(std::forward(task)); 1294 | } 1295 | 1296 | // Schedule at the front of the queue, if in fast path. 1297 | template 1298 | void impl_schedule_subtask (Task && task, ThreadPoolImpl * impl) 1299 | { 1300 | #ifndef NDEBUG 1301 | // If a NULL task is passed, place the error message as close as possible to 1302 | // the error itself. 1303 | if (task == nullptr) 1304 | throw std::bad_function_call(); 1305 | #endif 1306 | Worker * worker = current_worker; 1307 | // If a thread is attempting to schedule in its own pool, take the fast path. 1308 | if ((worker != nullptr) && worker->belongs_to(impl)) 1309 | { 1310 | if (worker->push_front(std::forward(task))) 1311 | return; 1312 | #ifndef NDEBUG 1313 | else 1314 | debug_warn_overflow(); 1315 | #endif 1316 | } 1317 | impl->schedule_overflow(std::forward(task)); 1318 | } 1319 | 1320 | template 1321 | void impl_schedule_after (std::chrono::steady_clock::duration const & dur, 1322 | Task && task, ThreadPoolImpl * impl) 1323 | { 1324 | if (dur <= std::chrono::steady_clock::duration(0)) 1325 | impl_schedule(std::forward(task), impl); 1326 | else 1327 | { 1328 | #ifndef NDEBUG 1329 | // If a NULL task is passed, place the error message as close as possible to 1330 | // the error itself. 1331 | if (task == nullptr) 1332 | throw std::bad_function_call(); 1333 | #endif 1334 | impl->schedule_after(dur, std::forward(task)); 1335 | } 1336 | } 1337 | } // Namespace [anonymous] 1338 | 1339 | 1340 | 1341 | 1342 | 1343 | //////////////////////////////////////////////////////////////////////////////// 1344 | // ThreadPool // 1345 | //////////////////////////////////////////////////////////////////////////////// 1346 | 1347 | ThreadPool::ThreadPool (unsigned threads) 1348 | : impl_(nullptr) 1349 | { 1350 | if (threads == 0) 1351 | { 1352 | // Hardware concurrency of 0 indicates that it is unknown. Make sure we have 1353 | // a few threads running. 1354 | threads = max(2u, std::thread::hardware_concurrency()); 1355 | } 1356 | using thread_counter_type = decltype(std::declval().get_concurrency()); 1357 | threads = min(threads, min(std::numeric_limits::max(), 1358 | std::numeric_limits::max())); 1359 | // Alignment change during Worker allocation is an integer multiple of 1360 | // alignof(Worker). If (alignof(Worker) >= alignof(ThreadPoolImpl)), then 1361 | // the second align will not do anything, and the problem is solved. Otherwise, 1362 | // Alignment is off by at most alignof(ThreadPoolImpl) - alignof(Worker). 1363 | // Total alignment is off by at most the greater of the alignments. 1364 | std::size_t space = sizeof(ThreadPoolImpl) + threads * sizeof(Worker) + \ 1365 | max(alignof(ThreadPoolImpl), alignof(Worker)) + sizeof(void**); 1366 | 1367 | std::unique_ptr memory { std::malloc(space) }; 1368 | if (memory == nullptr) 1369 | throw std::bad_alloc(); 1370 | void * ptr = memory.get(); 1371 | 1372 | using std::align; 1373 | // Allocate space for a block of worker threads 1374 | if (!align(alignof(Worker), threads * sizeof(Worker), ptr, space)) 1375 | throw std::bad_alloc(); 1376 | Worker * workers = static_cast(ptr); 1377 | ptr = workers + threads; 1378 | 1379 | // Allocate space for the controller. 1380 | if (!align(alignof(ThreadPoolImpl), sizeof(ThreadPoolImpl), ptr, space)) 1381 | throw std::bad_alloc(); 1382 | ThreadPoolImpl * impl = static_cast(ptr); 1383 | ptr = impl + 1; 1384 | 1385 | new(impl) ThreadPoolImpl(workers, static_cast(threads)); 1386 | 1387 | impl_ = impl; 1388 | *reinterpret_cast(ptr) = memory.release(); 1389 | } 1390 | 1391 | ThreadPool::~ThreadPool (void) 1392 | { 1393 | ThreadPoolImpl * impl = static_cast(impl_); 1394 | std::unique_ptr memory {*reinterpret_cast(impl + 1)}; 1395 | impl->~ThreadPoolImpl(); 1396 | } 1397 | 1398 | unsigned ThreadPool::get_concurrency(void) const noexcept 1399 | { 1400 | return static_cast(impl_)->get_concurrency(); 1401 | } 1402 | 1403 | bool ThreadPool::is_idle (void) const 1404 | { 1405 | return static_cast(impl_)->is_idle(); 1406 | } 1407 | 1408 | // Schedules a task normally, at the back of the queue. 1409 | void ThreadPool::schedule (task_type const & task) 1410 | { 1411 | impl_schedule(task, static_cast(impl_)); 1412 | } 1413 | void ThreadPool::schedule (task_type && task) 1414 | { 1415 | impl_schedule(std::move(task), static_cast(impl_)); 1416 | } 1417 | 1418 | // Schedules a task normally, at the back of the queue. 1419 | void ThreadPool::sched_impl(duration const & dur, task_type const & task) 1420 | { 1421 | impl_schedule_after(dur, task, static_cast(impl_)); 1422 | } 1423 | void ThreadPool::sched_impl(duration const & dur, task_type && task) 1424 | { 1425 | impl_schedule_after(dur, std::move(task),static_cast(impl_)); 1426 | } 1427 | 1428 | // Schedule at the front of the queue, if in fast path. 1429 | void ThreadPool::schedule_subtask (task_type const & task) 1430 | { 1431 | impl_schedule_subtask(task, static_cast(impl_)); 1432 | } 1433 | void ThreadPool::schedule_subtask (task_type && task) 1434 | { 1435 | impl_schedule_subtask(std::move(task), static_cast(impl_)); 1436 | } 1437 | 1438 | std::size_t ThreadPool::get_worker_capacity (void) noexcept 1439 | { 1440 | return kModulus - 1; 1441 | } 1442 | 1443 | void ThreadPool::halt (void) 1444 | { 1445 | static_cast(impl_)->halt(); 1446 | } 1447 | void ThreadPool::resume (void) 1448 | { 1449 | static_cast(impl_)->resume(); 1450 | } 1451 | bool ThreadPool::is_halted (void) const 1452 | { 1453 | return static_cast(impl_)->is_halted(); 1454 | } 1455 | -------------------------------------------------------------------------------- /threadpool.hpp: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | /// \file threadpool.hpp 3 | /// \brief Lightweight, fine-grained multitasking through thread pools. 4 | /// 5 | /// This header is part of a multi-tasking library that provides low-overhead 6 | /// concurrent scheduling. This is provided through a thread pool, and uses the 7 | /// [work-stealing method](https://en.wikipedia.org/wiki/Work_stealing "Wikipedia: Work stealing") 8 | /// for load balancing. \n 9 | /// In addition, the library provides a fast scheduling path for tasks spawned 10 | /// by another task within the same pool. \n 11 | /// These avert the majority of scheduling overhead for each new task, which 12 | /// makes fine-grained parallelism feasible. 13 | /// \code 14 | /// #include "threadpool.hpp" 15 | /// 16 | /// // Create a new thread pool, letting the implementation determine the 17 | /// // number of worker threads to use. 18 | /// ThreadPool pool; 19 | /// 20 | /// // Function pointers of type void (*) () can be passed as tasks directly. 21 | /// void task (void) 22 | /// { 23 | /// // ... 24 | /// } 25 | /// // Put a task into the pool. Because this isn't called from within a 26 | /// // worker thread, the worker threads synchronize to avoid calling it twice. 27 | /// pool.schedule([](void) 28 | /// { 29 | /// // Put a task into the pool. This is called from within a worker thread, 30 | /// // so no synchronization is required. 31 | /// pool.schedule(&task); 32 | /// 33 | /// // Put a task into the pool, treated as if it were part of the currently 34 | /// // running task. This is called from within a worker thread, so no 35 | /// // synchronization is required. 36 | /// pool.schedule_subtask([](void) { }); 37 | /// 38 | /// using namespace std::chrono; 39 | /// // Put a task into the pool, to be executed 2 seconds after it is scheduled. 40 | /// pool.schedule_after(seconds(2), 41 | /// [](void) { 42 | /// do_something(); 43 | /// }); 44 | /// 45 | /// // Put a task into the pool, to be executed at the specified time. 46 | /// pool.schedule_after(steady_clock::now() + seconds(2), 47 | /// [](void) { 48 | /// do_something(); 49 | /// }); 50 | /// }); 51 | /// 52 | /// // When the thread pool is destroyed, remaining tasks are forgotten. 53 | /// \endcode 54 | /// \note Tasks assigned to the pool from within one of its worker threads will 55 | /// take the fast scheduling path unless the worker already has 56 | /// `get_worker_capacity()` tasks scheduled. Tasks assigned from outside the 57 | /// pool will take the slow path. 58 | /// \warning If `get_concurrency()` active tasks (or more) simultaneously 59 | /// block, then all inactive tasks in the pool may be blocked. To prevent 60 | /// deadlock, it is recommended that tasks be constructed such that at least 61 | /// one active task makes progress. 62 | /// \note Users may define the macro `THREAD_POOL_FALSE_SHARING_ALIGNMENT` to 63 | /// specify L1 cache line size when compiling `threadpool.cpp`. If it is not 64 | /// specified, the library will attempt to use C++17's 65 | /// `hardware_destructive_interference_size`. If that feature is not supported 66 | /// by the compiler, an implementation-defined default value will be selected. 67 | /// \note Users may specify the capacity of each worker's fixed queue by 68 | /// changing the definition of `kLog2Modulus` in `threadpool.cpp`. 69 | /// \todo Allow tasks to return values, possibly using `std::packaged_task`. 70 | /// \todo Investigate delegates as a replacement for `std::function`: 71 | /// ["The Impossibly Fast C++ Delegates (Fixed)"](https://www.codeproject.com/Articles/1170503/The-Impossibly-Fast-Cplusplus-Delegates-Fixed "The Impossibly Fast C++ Delegates") 72 | /// \author Nathaniel J. McClatchey, PhD 73 | /// \version 2.0 74 | /// \copyright Copyright (c) 2017-2019 Nathaniel J. McClatchey, PhD. \n 75 | /// [Licensed under the MIT license.](https://github.com/nmcclatchey/ThreadPool/blob/master/LICENSE "MIT License") \n 76 | /// You should have received a copy of the license with this software. 77 | //////////////////////////////////////////////////////////////////////////////// 78 | 79 | #ifndef THREAD_POOL_HPP_ 80 | #define THREAD_POOL_HPP_ 81 | 82 | #if !defined(__cplusplus) || (__cplusplus < 201103L) 83 | #error "The ThreadPool library requires C++11 or higher." 84 | #endif 85 | 86 | // For a unified interface to Callable objects, I considered 3 options: 87 | // * Delegates (fast, but would need extra library and wouldn't allow return) 88 | // * std::function (universally available, but doesn't allow return) 89 | // * std::packaged_task (allows return, but may not be available. Eg. MinGW-w64 90 | // with Win32 threads). 91 | #include 92 | // For std::size_t 93 | #include 94 | // For timed waiting. 95 | #include 96 | 97 | /// \brief A high-performance asynchronous task scheduler. 98 | /// \warning If `get_concurrency()` active tasks (or more) simultaneously 99 | /// block, then all inactive tasks in the pool may be blocked. To prevent 100 | /// deadlock, it is recommended that tasks be constructed such that at least 101 | /// one active task makes progress. 102 | /// \note Has a fast path and a slow path. If called by a worker thread, 103 | /// `schedule(const task_type &)` and `schedule_subtask(tconst task_type &)` 104 | /// take the fast path, placing the task into the worker thread's own queue 105 | /// and bypassing any synchronization. If any scheduling function is called by 106 | /// a thread not in the pool or if the worker's queue is at capacity, the slow 107 | /// path is taken, requiring synchronization of the `ThreadPool`'s central 108 | /// queue. 109 | /// \note If the worker's local queue is full, the slow path is taken. If one 110 | /// compiles `threadpool.cpp` without the macro `NDEBUG` defined, a warning 111 | /// will be printed when an over-full queue is first detected. 112 | // Implementer's note: The [pointer to implementation idiom](http://en.cppreference.com/w/cpp/language/pimpl "C++ Reference: pImpl idiom") 113 | // provides no significant disadvantage. It will impose a pointer lookup 114 | // penalty, but only on the slow path. Moreover, dynamic allocation is required 115 | // regardless, and all initial allocation is combined into a single allocation. 116 | struct ThreadPool 117 | { 118 | /// \brief A [Callable](https://en.cppreference.com/w/cpp/named_req/Callable "C++ Reference: Named requirements: Callable") 119 | /// type, taking no arguments and returning void. Used to store tasks for 120 | /// later execution. 121 | /// \note Will be called at most once, then destroyed. 122 | using task_type = std::function; 123 | 124 | /// \brief Initializes a thread pool and starts a collection of worker threads. 125 | /// \param[in] worker_capacity The maximum number of worker threads that the 126 | /// pool will support. 127 | /// \exception Throws `std::system_error` if the pool was unable to start at 128 | /// least one thread. 129 | /// 130 | /// Creates a thread pool with up to *worker_capacity* worker threads, and 131 | /// attempts to start them. If *worker_capacity == 0*, the number of worker 132 | /// threads is positive, but otherwise implementation-defined. 133 | /// \note Use `get_concurrency()` to detect the number of worker threads that 134 | /// were able to start. 135 | ThreadPool (unsigned worker_capacity = 0); 136 | 137 | /// \brief Destroys the `ThreadPool`, terminating all of its worker threads. 138 | /// 139 | /// Notifies all worker threads that work is to be discontinued, and blocks 140 | /// until they terminate. Though any task that has already been started will be 141 | /// completed, any tasks that are not active when `~ThreadPool()` is called 142 | /// may be forgotten. 143 | /// \warning Using a worker thread to destroy its own `ThreadPool` results in 144 | /// undefined behavior. 145 | ~ThreadPool (void); 146 | 147 | // Thread pools cannot be copied or moved. 148 | ThreadPool (ThreadPool const &) = delete; 149 | ThreadPool & operator= (ThreadPool const &) = delete; 150 | 151 | /// \brief Schedules a task to be performed asynchronously. 152 | /// \param[in] task The task to be performed. 153 | /// 154 | /// Schedules a task to be performed asynchronously. The task will be called 155 | /// at most once. 156 | /// \par Memory order 157 | /// Execution of a task *synchronizes-with* (as in 158 | /// [`std::memory_order`](https://en.cppreference.com/w/cpp/atomic/memory_order "C++ Reference: Memory order") 159 | /// ) the call to `schedule()` that added it to the pool, using a 160 | /// [*Release-Acquire*](https://en.cppreference.com/w/cpp/atomic/memory_order#Release-Acquire_ordering "C++ Reference: Release-Acquire ordering") 161 | /// ordering. 162 | void schedule (task_type const & task); 163 | /// \overload 164 | void schedule (task_type && task); 165 | 166 | /// \brief Schedules a task to be run asynchronously after a specified wait 167 | /// duration. 168 | /// \param[in] rel_time The duration after which the task is to be run. 169 | /// \param[in] task The task to be performed. 170 | /// 171 | /// Schedules a task to be performed asynchronously, but only after waiting 172 | /// for a duration of *rel_time*. The task will be called at most once. 173 | /// \par Memory order 174 | /// Execution of a task *synchronizes-with* (as in 175 | /// [`std::memory_order`](https://en.cppreference.com/w/cpp/atomic/memory_order "C++ Reference: Memory order") 176 | /// ) the call to `schedule_after()` that added it to the pool, using a 177 | /// [*Release-Acquire*](https://en.cppreference.com/w/cpp/atomic/memory_order#Release-Acquire_ordering "C++ Reference: Release-Acquire ordering") 178 | /// ordering. 179 | template 180 | void schedule_after ( std::chrono::duration const & rel_time, 181 | Task && task) 182 | { 183 | using namespace std; 184 | sched_impl(chrono::duration_cast(rel_time), forward(task)); 185 | } 186 | 187 | /// \brief Schedules a task to be run asynchronously at (or after) a specified 188 | /// point in time. 189 | /// \param[in] time The time point after which the task is to be run. 190 | /// \param[in] task The task to be performed. 191 | /// 192 | /// Schedules a task to be performed asynchronously at a specified time point. 193 | /// The task will be called at most once. 194 | /// \par Memory order 195 | /// Execution of a task *synchronizes-with* (as in 196 | /// [`std::memory_order`](https://en.cppreference.com/w/cpp/atomic/memory_order "C++ Reference: Memory order") 197 | /// ) the call to `schedule_after()` that added it to the pool, using a 198 | /// [*Release-Acquire*](https://en.cppreference.com/w/cpp/atomic/memory_order#Release-Acquire_ordering "C++ Reference: Release-Acquire ordering") 199 | /// ordering. 200 | template 201 | void schedule_after ( std::chrono::time_point const & time, 202 | Task && task) 203 | { 204 | using namespace std; 205 | using namespace std::chrono; 206 | sched_impl(duration_cast(time-Clock::now()), forward(task)); 207 | } 208 | 209 | /// \brief Schedules a task to be run asynchronously, but with a hint that the 210 | /// task ought to be considered part of the currently-scheduled task. 211 | /// \param[in] task The task to be performed. 212 | /// \see `schedule(const task_type &)` 213 | /// 214 | /// Schedules a task to be performed asynchronously, but treats it as if it 215 | /// were part of the currently scheduled task. This gives the task a better 216 | /// chance of being performed soon after scheduling, but relaxes 217 | /// non-starvation guarantees. In particular, if the collective subtasks fail 218 | /// to terminate, then the original task is considered not to have terminated, 219 | /// and later tasks may fail to run. \n 220 | /// The `schedule_subtask()` method may be used to encourage (not force) 221 | /// depth-first execution -- rather than breadth-first execution -- if tasks 222 | /// exhibit significant branching. This can reduce the odds of a local queue 223 | /// overflow (the slow path) and reduce the memory needed for scheduled tasks. 224 | /// \n 225 | /// The task will be called at most once. 226 | /// \par Memory order 227 | /// Execution of a task *synchronizes-with* (as in 228 | /// [`std::memory_order`](https://en.cppreference.com/w/cpp/atomic/memory_order "C++ Reference: Memory order") 229 | /// ) the call to `schedule_subtask()` that added it to the pool, using a 230 | /// [*Release-Acquire*](https://en.cppreference.com/w/cpp/atomic/memory_order#Release-Acquire_ordering "C++ Reference: Release-Acquire ordering") 231 | /// ordering. 232 | /// \warning Because a subtask is considered as part of the task that spawned 233 | /// it, no guarantees of non-starvation are made should the collective 234 | /// subtasks not terminate. 235 | void schedule_subtask (task_type const & task); 236 | /// \overload 237 | void schedule_subtask (task_type && task); 238 | 239 | /// \brief Returns the number of threads in the pool. 240 | /// \return Number of threads in the pool. 241 | /// 242 | /// Returns the number of threads in the `ThreadPool`. That is, this function 243 | /// returns the number of tasks that can be truly executed concurrently or with 244 | /// preemption. 245 | /// \note If more than `get_concurrency()` tasks block simultaneously, the 246 | /// entire `ThreadPool` is blocked, and no further progress will be made. 247 | unsigned get_concurrency (void) const noexcept; 248 | 249 | /// \brief Maximum number of tasks that can be efficiently scheduled by a 250 | /// worker thread. 251 | /// \return Returns the number of tasks that a worker thread can retain in local 252 | /// storage. 253 | /// 254 | /// To reduce contention, each worker thread keeps its own queue of tasks. The 255 | /// queues are pre-allocated, and of constant size. The `get_worker_capacity()` 256 | /// function returns the number of tasks that each worker can keep in its own 257 | /// queue -- that is, the number of tasks that a worker can have scheduled 258 | /// before contention occurs. \n 259 | /// If the returned value is large, many tasks may be simultaneously scheduled 260 | /// without taking the slow path, but more memory is required. If it is small, 261 | /// task scheduling is more likely to take the slow path, but less memory is 262 | /// required. \n 263 | /// To select the size of the worker queues, edit the variable `kLog2Modulus` 264 | /// in `threadpool.cpp`. 265 | static std::size_t get_worker_capacity (void) noexcept; 266 | 267 | /// \brief Determines whether the pool is currently idle. 268 | /// \return `true` if the pool is idle, or `false` if not. 269 | /// 270 | /// Returns whether the pool is idle. That is, returns `true` if all threads 271 | /// in the pool are simultaneously idling, or `false` if at least one thread is 272 | /// active. If the pool is halted, the returned value is undefined. Calling this 273 | /// from within one of the `ThreadPool`'s tasks necessarily returns `false`. 274 | bool is_idle (void) const; 275 | 276 | /// \{ 277 | /// \brief Suspends execution of tasks in the `ThreadPool`. 278 | /// 279 | /// Halts all worker threads, blocking the caller until worker threads have 280 | /// fully halted. If `halt()` is called from within one of the pool's worker 281 | /// threads, the calling thread is halted either until `resume()` is called or 282 | /// until the `ThreadPool` is destroyed, whichever comes first. 283 | /// \see `resume()` 284 | void halt (void); 285 | 286 | /// \brief Resumes execution of tasks in the `ThreadPool` after a call to 287 | /// `halt()`, or starts threads that had previously failed to initialize. 288 | /// 289 | /// Attempts to start, restart, or resume all worker threads. 290 | /// - If all allocated worker threads are already running, this function has no 291 | /// effect. 292 | /// - If execution is currently halted, or the number of active workers is less 293 | /// than that returned by `get_concurrency()`, attempts to re-start all inactive 294 | /// worker threads. 295 | /// . 296 | /// May start fewer worker threads than the total capacity of the pool. \n 297 | /// May block the caller until all started worker threads have resumed their 298 | /// tasks. 299 | /// \exception Throws `std::system_error` if the pool was unable to ensure at 300 | /// least one living thread. 301 | /// \see `halt()` 302 | void resume (void); 303 | 304 | /// \brief Returns whether the pool is currently halted. 305 | /// \return Returns `true` if all worker threads are halted, or `false` if not. 306 | /// 307 | /// Returns whether the pool is currently halted. Note that this function only 308 | /// begins to return `true` once all tasks have fully halted. Calling it from 309 | /// within one of the `ThreadPool`'s tasks necessarily returns `false`. 310 | bool is_halted (void) const; 311 | /// \} 312 | private: 313 | void * impl_; 314 | using duration = std::chrono::steady_clock::duration; 315 | void sched_impl (duration const &, task_type const &); 316 | void sched_impl (duration const &, task_type && task); 317 | }; 318 | 319 | #endif // THREAD_POOL_HPP_ 320 | --------------------------------------------------------------------------------