├── .clang-format ├── .github └── workflows │ └── build.yml ├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── LICENSE-APACHE ├── README.md ├── benchmarks ├── CMakeLists.txt ├── benchmark.cmake ├── scheduling.cpp ├── taskflow.cmake └── taskflow.cpp ├── cmake └── sanitizers.cmake ├── docs ├── CMakeLists.txt └── main_page.md ├── images ├── fibonacci_cpu.png ├── fibonacci_wall.png ├── linear_chain_cpu.png ├── linear_chain_wall.png ├── matrix_multiplication_cpu.png └── matrix_multiplication_wall.png ├── include └── scheduling │ └── scheduling.hpp ├── scripts └── build.sh └── tests ├── CMakeLists.txt ├── googletest.cmake └── thread_pool_test.cpp /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: Google 2 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - '*' 10 | 11 | env: 12 | BUILD_NUMBER: ${{github.run_number}} 13 | 14 | jobs: 15 | build: 16 | name: ${{matrix.name}} 17 | runs-on: ${{matrix.os}} 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | include: 22 | - name: MacOS 23 | os: macos-latest 24 | - name: Ubuntu Clang 25 | os: ubuntu-latest 26 | compiler: llvm 27 | - name: Ubuntu GCC 28 | os: ubuntu-latest 29 | compiler: gcc-13 30 | - name: Ubuntu GCC TSAN 31 | os: ubuntu-latest 32 | compiler: gcc-13 33 | options: "-DSCHEDULING_ENABLE_THREAD_SANITIZER=ON" 34 | - name: Ubuntu GCC ASAN LSAN UBSAN 35 | os: ubuntu-latest 36 | compiler: gcc-13 37 | options: "-DSCHEDULING_ENABLE_ADDRESS_SANITIZER=ON -DSCHEDULING_ENABLE_LEAK_SANITIZER=ON -DSCHEDULING_ENABLE_UNDEFINED_BEHAVIOR_SANITIZER=ON" 38 | - name: Windows MSVC 39 | os: windows-latest 40 | compiler: msvc 41 | 42 | steps: 43 | - name: Checkout 44 | uses: actions/checkout@v2 45 | 46 | - name: Setup Cpp 47 | uses: aminya/setup-cpp@v1 48 | with: 49 | compiler: ${{matrix.compiler}} 50 | 51 | - name: Build 52 | run: ./scripts/build.sh "${{matrix.options}}" 53 | shell: bash 54 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | \build* 2 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.21) 2 | 3 | project(scheduling VERSION 0.1.0.0 LANGUAGES CXX) 4 | 5 | include(CMakeDependentOption) 6 | include(cmake/sanitizers.cmake) 7 | 8 | if (NOT DEFINED CMAKE_CXX_STANDARD) 9 | set(CMAKE_CXX_STANDARD 20) 10 | endif() 11 | 12 | set(CMAKE_CXX_EXTENSIONS OFF) 13 | 14 | # For GCC 12 and above, disable the warning about std::hardware_destructive_interference_size not being ABI-stable. 15 | if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") 16 | if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12) 17 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-interference-size") 18 | endif() 19 | endif() 20 | 21 | option(SCHEDULING_BUILD_BENCHMARKS "Build benchmarks" ${PROJECT_IS_TOP_LEVEL}) 22 | option(SCHEDULING_BUILD_DOCS "Build documentation" ${PROJECT_IS_TOP_LEVEL}) 23 | option(SCHEDULING_BUILD_TESTS "Build tests" ${PROJECT_IS_TOP_LEVEL}) 24 | option(SCHEDULING_ENABLE_ADDRESS_SANITIZER "Defines if address sanitizer should be enabled" OFF) 25 | option(SCHEDULING_ENABLE_LEAK_SANITIZER "Defines if leak sanitizer should be enabled" OFF) 26 | option(SCHEDULING_ENABLE_MEMORY_SANITIZER "Defines if memory sanitizer should be enabled" OFF) 27 | option(SCHEDULING_ENABLE_THREAD_SANITIZER "Defines if thread sanitizer should be enabled" OFF) 28 | option(SCHEDULING_ENABLE_UNDEFINED_BEHAVIOR_SANITIZER "Defines if undefined behavior sanitizer should be enabled" OFF) 29 | 30 | cmake_dependent_option(SCHEDULING_BUILD_TASKFLOW_BENCHMARKS 31 | "Build taskflow benchmark" ${PROJECT_IS_TOP_LEVEL} "SCHEDULING_BUILD_BENCHMARKS" OFF 32 | ) 33 | 34 | add_library(${PROJECT_NAME} INTERFACE) 35 | 36 | target_include_directories(${PROJECT_NAME} 37 | INTERFACE 38 | $ 39 | $ 40 | ) 41 | 42 | if (BUILD_SHARED_LIBS) 43 | message(WARNING "Scheduling does not support dynamic linking.") 44 | endif() 45 | 46 | if (SCHEDULING_BUILD_BENCHMARKS) 47 | add_subdirectory(benchmarks) 48 | endif() 49 | 50 | if (SCHEDULING_BUILD_DOCS) 51 | add_subdirectory(docs) 52 | endif() 53 | 54 | if (SCHEDULING_BUILD_TESTS) 55 | enable_testing() 56 | add_subdirectory(tests) 57 | endif() 58 | 59 | scheduling_enable_sanitizers(${PROJECT_NAME} 60 | ${SCHEDULING_ENABLE_ADDRESS_SANITIZER} 61 | ${SCHEDULING_ENABLE_LEAK_SANITIZER} 62 | ${SCHEDULING_ENABLE_MEMORY_SANITIZER} 63 | ${SCHEDULING_ENABLE_THREAD_SANITIZER} 64 | ${SCHEDULING_ENABLE_UNDEFINED_BEHAVIOR_SANITIZER} 65 | ) 66 | 67 | install(DIRECTORY ${PROJECT_SOURCE_DIR}/include DESTINATION .) 68 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023-2024 Dmytro Puyda 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2023 The Android Open Source Project 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dpuyda/scheduling/ef687f5748a2fab935a508b00630b046b0c121c4/README.md -------------------------------------------------------------------------------- /benchmarks/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include(benchmark.cmake) 2 | 3 | project(scheduling_benchmarks LANGUAGES CXX) 4 | 5 | set(SCHEDULING_BENCHMARKS_SOURCES scheduling.cpp) 6 | 7 | set(SCHEDULING_BENCHMARKS_LINK_LIBRARIES 8 | benchmark::benchmark 9 | benchmark::benchmark_main 10 | scheduling 11 | ) 12 | 13 | if(SCHEDULING_BUILD_TASKFLOW_BENCHMARKS) 14 | include(taskflow.cmake) 15 | set(SCHEDULING_BENCHMARKS_SOURCES 16 | ${SCHEDULING_BENCHMARKS_SOURCES} 17 | taskflow.cpp 18 | ) 19 | set(SCHEDULING_BENCHMARKS_LINK_LIBRARIES 20 | ${SCHEDULING_BENCHMARKS_LINK_LIBRARIES} 21 | Taskflow 22 | ) 23 | endif() 24 | 25 | add_executable(${PROJECT_NAME} 26 | ${SCHEDULING_BENCHMARKS_SOURCES} 27 | ) 28 | 29 | target_link_libraries(${PROJECT_NAME} 30 | PRIVATE 31 | ${SCHEDULING_BENCHMARKS_LINK_LIBRARIES} 32 | ) 33 | -------------------------------------------------------------------------------- /benchmarks/benchmark.cmake: -------------------------------------------------------------------------------- 1 | include(FetchContent) 2 | 3 | set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "" FORCE) 4 | set(FETCHCONTENT_QUIET OFF) 5 | 6 | FetchContent_Declare(benchmark 7 | GIT_REPOSITORY https://github.com/google/benchmark.git 8 | GIT_TAG main 9 | ) 10 | 11 | FetchContent_MakeAvailable(benchmark) 12 | -------------------------------------------------------------------------------- /benchmarks/scheduling.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | namespace scheduling::benchmarks::fibonacci { 6 | int Fibonacci(ThreadPool& thread_pool, const int n) { 7 | if (n < 2) { 8 | return 1; 9 | } 10 | int a, b; 11 | std::atomic counter{0}; 12 | thread_pool.Submit([&, n] { 13 | a = Fibonacci(thread_pool, n - 1); 14 | counter.fetch_add(1); 15 | }); 16 | thread_pool.Submit([&, n] { 17 | b = Fibonacci(thread_pool, n - 2); 18 | counter.fetch_add(1); 19 | }); 20 | thread_pool.Wait([&] { return counter.load() == 2; }); 21 | return a + b; 22 | } 23 | 24 | void Benchmark(benchmark::State& state) { 25 | const auto n = static_cast(state.range(0)); 26 | for (auto _ : state) { 27 | ThreadPool thread_pool; 28 | benchmark::DoNotOptimize(Fibonacci(thread_pool, n)); 29 | benchmark::ClobberMemory(); 30 | } 31 | } 32 | } // namespace scheduling::benchmarks::fibonacci 33 | 34 | namespace scheduling::benchmarks::linear_chain { 35 | void LinearChain(const int length) { 36 | int counter = 0; 37 | std::vector v(length); 38 | v[0] = Task([&] { ++counter; }); 39 | for (auto i = v.begin(), j = std::next(v.begin()); j != v.end(); ++i, ++j) { 40 | *j = Task([&] { ++counter; }); 41 | j->Succeed(&*i); 42 | } 43 | ThreadPool thread_pool; 44 | thread_pool.Submit(&v[0]); 45 | } 46 | 47 | void Benchmark(benchmark::State& state) { 48 | const auto length = static_cast(state.range(0)); 49 | for (auto _ : state) { 50 | LinearChain(length); 51 | benchmark::ClobberMemory(); 52 | } 53 | } 54 | } // namespace scheduling::benchmarks::linear_chain 55 | 56 | namespace scheduling::benchmarks::matrix_multiplication { 57 | void MatrixMultiplication(const int n, std::vector>& a, 58 | std::vector>& b, 59 | std::vector>& c) { 60 | std::vector tasks; 61 | tasks.reserve(4 * n + 1); 62 | 63 | tasks.emplace_back(); 64 | 65 | for (int i = 0; i < n; ++i) { 66 | tasks 67 | .emplace_back([&, i, n] { 68 | for (int j = 0; j < n; ++j) { 69 | a[i][j] = i + j; 70 | } 71 | }) 72 | .Precede(&tasks[0]); 73 | } 74 | 75 | for (int i = 0; i < n; ++i) { 76 | tasks 77 | .emplace_back([&, i, n] { 78 | for (int j = 0; j < n; ++j) { 79 | b[i][j] = i * j; 80 | } 81 | }) 82 | .Precede(&tasks[0]); 83 | } 84 | 85 | for (int i = 0; i < n; ++i) { 86 | tasks 87 | .emplace_back([&, i, n] { 88 | for (int j = 0; j < n; ++j) { 89 | c[i][j] = 0; 90 | } 91 | }) 92 | .Precede(&tasks[0]); 93 | } 94 | 95 | for (int i = 0; i < n; ++i) { 96 | tasks 97 | .emplace_back([&, i, n] { 98 | for (int j = 0; j < n; ++j) { 99 | for (int k = 0; k < n; ++k) { 100 | c[i][j] += a[i][k] * b[k][j]; 101 | } 102 | } 103 | }) 104 | .Succeed(&tasks[0]); 105 | } 106 | 107 | ThreadPool thread_pool; 108 | thread_pool.Submit(tasks); 109 | } 110 | 111 | void Benchmark(benchmark::State& state) { 112 | const auto n = static_cast(state.range(0)); 113 | std::vector a(n, std::vector(n)); 114 | std::vector b(n, std::vector(n)); 115 | std::vector c(n, std::vector(n)); 116 | for (auto _ : state) { 117 | MatrixMultiplication(n, a, b, c); 118 | benchmark::ClobberMemory(); 119 | } 120 | } 121 | } // namespace scheduling::benchmarks::matrix_multiplication 122 | 123 | BENCHMARK(scheduling::benchmarks::fibonacci::Benchmark) 124 | ->Name("scheduling/fibonacci") 125 | ->DenseRange(25, 35) 126 | ->Unit(benchmark::kMillisecond); 127 | 128 | BENCHMARK(scheduling::benchmarks::linear_chain::Benchmark) 129 | ->Name("scheduling/linear_chain") 130 | ->RangeMultiplier(2) 131 | ->Range(1 << 20, 1 << 25) 132 | ->Unit(benchmark::kMillisecond); 133 | 134 | BENCHMARK(scheduling::benchmarks::matrix_multiplication::Benchmark) 135 | ->Name("scheduling/matrix_multiplication") 136 | ->RangeMultiplier(2) 137 | ->Range(128, 2048) 138 | ->Unit(benchmark::kMillisecond); 139 | -------------------------------------------------------------------------------- /benchmarks/taskflow.cmake: -------------------------------------------------------------------------------- 1 | set(TASKFLOW_EXTERNAL_DIR "${CMAKE_BINARY_DIR}/external/taskflow") 2 | set(TASKFLOW_BUILD_DIR "${TASKFLOW_EXTERNAL_DIR}/build") 3 | set(TASKFLOW_DOWNLOAD_DIR "${TASKFLOW_EXTERNAL_DIR}/download") 4 | set(TASKFLOW_INSTALL_DIR "${TASKFLOW_EXTERNAL_DIR}/install") 5 | set(TASKFLOW_SRC_DIR "${TASKFLOW_EXTERNAL_DIR}/src") 6 | set(TF_BUILD_BENCHMARKS OFF CACHE BOOL "" FORCE) 7 | set(TF_BUILD_CUDA OFF CACHE BOOL "" FORCE) 8 | set(TF_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE) 9 | set(TF_BUILD_SYCL OFF CACHE BOOL "" FORCE) 10 | set(TF_BUILD_TESTS OFF CACHE BOOL "" FORCE) 11 | 12 | file(WRITE "${TASKFLOW_DOWNLOAD_DIR}/CMakeLists.txt" 13 | "cmake_minimum_required(VERSION ${CMAKE_MINIMUM_REQUIRED_VERSION}) 14 | include(ExternalProject) 15 | project(taskflow-download CXX) 16 | ExternalProject_Add(taskflow-download 17 | GIT_REPOSITORY https://github.com/taskflow/taskflow.git 18 | GIT_TAG v3.7.0 19 | GIT_SHALLOW 1 20 | SOURCE_DIR \"${TASKFLOW_SRC_DIR}\" 21 | BINARY_DIR \"${TASKFLOW_BUILD_DIR}\" 22 | CONFIGURE_COMMAND \"\" 23 | BUILD_COMMAND \"\" 24 | INSTALL_COMMAND \"\" 25 | TEST_COMMAND \"\" 26 | )" 27 | ) 28 | 29 | execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . 30 | RESULT_VARIABLE TASKFLOW_CMAKE_RESULT 31 | WORKING_DIRECTORY ${TASKFLOW_DOWNLOAD_DIR} 32 | ) 33 | 34 | if(TASKFLOW_CMAKE_RESULT) 35 | message(FATAL_ERROR "CMake for taskflow-download failed: ${TASKFLOW_CMAKE_RESULT}") 36 | endif() 37 | 38 | execute_process(COMMAND ${CMAKE_COMMAND} --build . 39 | RESULT_VARIABLE TASKFLOW_BUILD_RESULT 40 | WORKING_DIRECTORY ${TASKFLOW_DOWNLOAD_DIR} 41 | ) 42 | 43 | if(TASKFLOW_BUILD_RESULT) 44 | message(FATAL_ERROR "Build for taskflow-download failed: ${TASKFLOW_BUILD_RESULT}") 45 | endif() 46 | 47 | add_subdirectory(${TASKFLOW_SRC_DIR} ${TASKFLOW_BUILD_DIR}) 48 | -------------------------------------------------------------------------------- /benchmarks/taskflow.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | namespace taskflow::benchmarks::fibonacci { 7 | int Fibonacci(const int n, tf::Subflow& sbf) { 8 | if (n < 2) { 9 | return 1; 10 | } 11 | int res1, res2; 12 | sbf.emplace( 13 | [&res1, n](tf::Subflow& sbf_n_1) { res1 = Fibonacci(n - 1, sbf_n_1); }); 14 | sbf.emplace( 15 | [&res2, n](tf::Subflow& sbf_n_2) { res2 = Fibonacci(n - 2, sbf_n_2); }); 16 | sbf.join(); 17 | return res1 + res2; 18 | } 19 | 20 | void Benchmark(benchmark::State& state) { 21 | for (auto _ : state) { 22 | const auto n = static_cast(state.range(0)); 23 | int res; 24 | tf::Executor executor; 25 | tf::Taskflow taskflow; 26 | taskflow.emplace([&res, n](tf::Subflow& sbf) { res = Fibonacci(n, sbf); }); 27 | executor.run(taskflow).wait(); 28 | benchmark::ClobberMemory(); 29 | } 30 | } 31 | } // namespace taskflow::benchmarks::fibonacci 32 | 33 | namespace taskflow::benchmarks::linear_chain { 34 | void LinearChain(const int length) { 35 | tf::Executor executor; 36 | tf::Taskflow taskflow; 37 | std::vector tasks(length); 38 | auto counter = 0; 39 | 40 | for (auto i = 0; i < length; ++i) { 41 | tasks[i] = taskflow.emplace([&] { counter++; }); 42 | } 43 | 44 | taskflow.linearize(tasks); 45 | executor.run(taskflow).get(); 46 | } 47 | 48 | void Benchmark(benchmark::State& state) { 49 | for (auto _ : state) { 50 | const auto length = static_cast(state.range(0)); 51 | LinearChain(length); 52 | benchmark::ClobberMemory(); 53 | } 54 | } 55 | } // namespace taskflow::benchmarks::linear_chain 56 | 57 | namespace taskflow::benchmarks::matrix_multiplication { 58 | void MatrixMultiplication(const int n, std::vector>& a, 59 | std::vector>& b, 60 | std::vector>& c) { 61 | tf::Executor executor; 62 | tf::Taskflow taskflow; 63 | 64 | auto init_a = taskflow.for_each_index(0, n, 1, [&](const int i) { 65 | for (int j = 0; j < n; ++j) { 66 | a[i][j] = i + j; 67 | } 68 | }); 69 | 70 | auto init_b = taskflow.for_each_index(0, n, 1, [&](const int i) { 71 | for (int j = 0; j < n; ++j) { 72 | b[i][j] = i * j; 73 | } 74 | }); 75 | 76 | auto init_c = taskflow.for_each_index(0, n, 1, [&](const int i) { 77 | for (int j = 0; j < n; ++j) { 78 | c[i][j] = 0; 79 | } 80 | }); 81 | 82 | auto comp_c = taskflow.for_each_index(0, n, 1, [&](const int i) { 83 | for (int j = 0; j < n; ++j) { 84 | for (int k = 0; k < n; ++k) { 85 | c[i][j] += a[i][k] * b[k][j]; 86 | } 87 | } 88 | }); 89 | 90 | comp_c.succeed(init_a, init_b, init_c); 91 | executor.run(taskflow).get(); 92 | } 93 | 94 | void Benchmark(benchmark::State& state) { 95 | for (auto _ : state) { 96 | const auto n = static_cast(state.range(0)); 97 | std::vector a(n, std::vector(n)); 98 | std::vector b(n, std::vector(n)); 99 | std::vector c(n, std::vector(n)); 100 | MatrixMultiplication(n, a, b, c); 101 | benchmark::ClobberMemory(); 102 | } 103 | } 104 | } // namespace taskflow::benchmarks::matrix_multiplication 105 | 106 | BENCHMARK(taskflow::benchmarks::fibonacci::Benchmark) 107 | ->Name("taskflow/fibonacci") 108 | ->DenseRange(25, 35) 109 | ->Unit(benchmark::kMillisecond); 110 | 111 | BENCHMARK(taskflow::benchmarks::linear_chain::Benchmark) 112 | ->Name("taskflow/linear_chain") 113 | ->RangeMultiplier(2) 114 | ->Range(1 << 20, 1 << 25) 115 | ->Unit(benchmark::kMillisecond); 116 | 117 | BENCHMARK(taskflow::benchmarks::matrix_multiplication::Benchmark) 118 | ->Name("taskflow/matrix_multiplication") 119 | ->RangeMultiplier(2) 120 | ->Range(128, 2048) 121 | ->Unit(benchmark::kMillisecond); 122 | -------------------------------------------------------------------------------- /cmake/sanitizers.cmake: -------------------------------------------------------------------------------- 1 | # Source: 2 | # https://github.com/cpp-best-practices/cmake_template/blob/main/cmake/Sanitizers.cmake 3 | 4 | function( 5 | scheduling_enable_sanitizers 6 | project_name 7 | ENABLE_ADDRESS_SANITIZER 8 | ENABLE_LEAK_SANITIZER 9 | ENABLE_MEMORY_SANITIZER 10 | ENABLE_THREAD_SANITIZER 11 | ENABLE_UNDEFINED_BEHAVIOR_SANITIZER) 12 | 13 | message(STATUS ENABLE_ADDRESS_SANITIZER=${ENABLE_ADDRESS_SANITIZER}) 14 | message(STATUS ENABLE_LEAK_SANITIZER=${ENABLE_LEAK_SANITIZER}) 15 | message(STATUS ENABLE_MEMORY_SANITIZER=${ENABLE_MEMORY_SANITIZER}) 16 | message(STATUS ENABLE_THREAD_SANITIZER=${ENABLE_THREAD_SANITIZER}) 17 | message(STATUS ENABLE_UNDEFINED_BEHAVIOR_SANITIZER=${ENABLE_UNDEFINED_BEHAVIOR_SANITIZER}) 18 | 19 | set(SANITIZERS "") 20 | 21 | if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES ".*Clang") 22 | if(${ENABLE_ADDRESS_SANITIZER}) 23 | list(APPEND SANITIZERS "address") 24 | endif() 25 | 26 | if(${ENABLE_LEAK_SANITIZER}) 27 | list(APPEND SANITIZERS "leak") 28 | endif() 29 | 30 | if(${ENABLE_UNDEFINED_BEHAVIOR_SANITIZER}) 31 | list(APPEND SANITIZERS "undefined") 32 | endif() 33 | 34 | if(${ENABLE_THREAD_SANITIZER}) 35 | if("address" IN_LIST SANITIZERS OR "leak" IN_LIST SANITIZERS) 36 | message(FATAL_ERROR "Thread sanitizer does not work with Address and Leak sanitizer enabled") 37 | else() 38 | list(APPEND SANITIZERS "thread") 39 | endif() 40 | endif() 41 | 42 | if(${ENABLE_MEMORY_SANITIZER} AND CMAKE_CXX_COMPILER_ID MATCHES ".*Clang") 43 | message( 44 | WARNING 45 | "Memory sanitizer requires all the code (including libc++) to be MSan-instrumented otherwise it reports false positives" 46 | ) 47 | if("address" IN_LIST SANITIZERS 48 | OR "thread" IN_LIST SANITIZERS 49 | OR "leak" IN_LIST SANITIZERS) 50 | message(FATAL_ERROR "Memory sanitizer does not work with Address, Thread or Leak sanitizer enabled") 51 | else() 52 | list(APPEND SANITIZERS "memory") 53 | endif() 54 | endif() 55 | elseif(MSVC) 56 | if(${ENABLE_ADDRESS_SANITIZER}) 57 | list(APPEND SANITIZERS "address") 58 | endif() 59 | if(${ENABLE_LEAK_SANITIZER} 60 | OR ${ENABLE_MEMORY_SANITIZER} 61 | OR ${ENABLE_THREAD_SANITIZER} 62 | OR ${ENABLE_UNDEFINED_BEHAVIOR_SANITIZER}) 63 | message(FATAL_ERROR "MSVC only supports address sanitizer") 64 | endif() 65 | endif() 66 | 67 | list( 68 | JOIN 69 | SANITIZERS 70 | "," 71 | LIST_OF_SANITIZERS) 72 | 73 | if(LIST_OF_SANITIZERS) 74 | if(NOT 75 | "${LIST_OF_SANITIZERS}" 76 | STREQUAL 77 | "") 78 | message(STATUS SANITIZERS=${LIST_OF_SANITIZERS}) 79 | if(NOT MSVC) 80 | target_compile_options(${project_name} INTERFACE -fsanitize=${LIST_OF_SANITIZERS}) 81 | target_link_options(${project_name} INTERFACE -fsanitize=${LIST_OF_SANITIZERS}) 82 | else() 83 | string(FIND "$ENV{PATH}" "$ENV{VSINSTALLDIR}" index_of_vs_install_dir) 84 | if("${index_of_vs_install_dir}" STREQUAL "-1") 85 | message( 86 | SEND_ERROR 87 | "Using MSVC sanitizers requires setting the MSVC environment before building the project. Please manually open the MSVC command prompt and rebuild the project." 88 | ) 89 | endif() 90 | target_compile_options(${project_name} INTERFACE /fsanitize=${LIST_OF_SANITIZERS} /Zi /INCREMENTAL:NO) 91 | target_compile_definitions(${project_name} INTERFACE _DISABLE_VECTOR_ANNOTATION _DISABLE_STRING_ANNOTATION) 92 | target_link_options(${project_name} INTERFACE /INCREMENTAL:NO) 93 | endif() 94 | endif() 95 | endif() 96 | endfunction() 97 | -------------------------------------------------------------------------------- /docs/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package(Doxygen) 2 | 3 | if(DOXYGEN_FOUND) 4 | set(DOXYGEN_EXCLUDE_SYMBOLS scheduling::internal) 5 | set(DOXYGEN_HIDE_FRIEND_COMPOUNDS YES) 6 | set(DOXYGEN_SHOW_USED_FILES NO) 7 | set(DOXYGEN_USE_MDFILE_AS_MAINPAGE "${CMAKE_CURRENT_SOURCE_DIR}/main_page.md") 8 | doxygen_add_docs(docs 9 | ALL 10 | ${CMAKE_SOURCE_DIR}/include/scheduling 11 | ${DOXYGEN_USE_MDFILE_AS_MAINPAGE} 12 | COMMENT "Generate HTML documentation") 13 | else() 14 | message(WARNING "Doxygen is not found, documentation will not be generated.") 15 | endif() 16 | -------------------------------------------------------------------------------- /docs/main_page.md: -------------------------------------------------------------------------------- 1 | Scheduling is a simple and minimalistic header-only library that allows you to: 2 | 3 | * Submit tasks for execution to a dynamic thread pool. 4 | * Parallelize execution of dependent tasks and task graphs. 5 | 6 | Scheduling requires C\++20. 7 | -------------------------------------------------------------------------------- /images/fibonacci_cpu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dpuyda/scheduling/ef687f5748a2fab935a508b00630b046b0c121c4/images/fibonacci_cpu.png -------------------------------------------------------------------------------- /images/fibonacci_wall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dpuyda/scheduling/ef687f5748a2fab935a508b00630b046b0c121c4/images/fibonacci_wall.png -------------------------------------------------------------------------------- /images/linear_chain_cpu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dpuyda/scheduling/ef687f5748a2fab935a508b00630b046b0c121c4/images/linear_chain_cpu.png -------------------------------------------------------------------------------- /images/linear_chain_wall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dpuyda/scheduling/ef687f5748a2fab935a508b00630b046b0c121c4/images/linear_chain_wall.png -------------------------------------------------------------------------------- /images/matrix_multiplication_cpu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dpuyda/scheduling/ef687f5748a2fab935a508b00630b046b0c121c4/images/matrix_multiplication_cpu.png -------------------------------------------------------------------------------- /images/matrix_multiplication_wall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dpuyda/scheduling/ef687f5748a2fab935a508b00630b046b0c121c4/images/matrix_multiplication_wall.png -------------------------------------------------------------------------------- /include/scheduling/scheduling.hpp: -------------------------------------------------------------------------------- 1 | // This project is licensed under the MIT License. 2 | // 3 | // The `WorkStealingDeque` class is copied from Google Filament licensed under 4 | // the Apache License 2.0. See the LICENSE-APACHE file in the root directory of 5 | // this project for more information. 6 | // Original code: 7 | // https://github.com/google/filament/blob/main/libs/utils/include/utils/WorkStealingDequeue.h 8 | // Modifications: 9 | // - Make the work-stealing deque variable-sized. 10 | // 11 | // The `Array` class is copied from Taskflow licensed under the MIT License. 12 | // Original code: 13 | // https://github.com/taskflow/taskflow/blob/master/taskflow/core/tsq.hpp 14 | #pragma once 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | namespace scheduling { 24 | namespace internal { 25 | constexpr auto kCancelled = 1; 26 | constexpr auto kInvoked = 1 << 1; 27 | 28 | template 29 | requires std::is_pointer_v 30 | class Array { 31 | public: 32 | explicit Array(const int capacity) 33 | : capacity_{capacity}, 34 | mask_{capacity - 1}, 35 | buffer_{new std::atomic[capacity]} {} 36 | 37 | Array(const Array&) = delete; 38 | Array(Array&&) = delete; 39 | Array& operator=(const Array&) = delete; 40 | Array& operator=(Array&&) = delete; 41 | ~Array() noexcept { delete[] buffer_; } 42 | 43 | void Put(const size_t index, T item) noexcept { 44 | buffer_[index & mask_].store(item, std::memory_order_relaxed); 45 | } 46 | 47 | [[nodiscard]] T Get(const size_t index) noexcept { 48 | return buffer_[index & mask_].load(std::memory_order_relaxed); 49 | } 50 | 51 | [[nodiscard]] Array* Resize(const size_t bottom, const size_t top) { 52 | auto* array = new Array{2 * capacity_}; 53 | for (auto i = top; i != bottom; ++i) { 54 | array->Put(i, Get(i)); 55 | } 56 | return array; 57 | } 58 | 59 | [[nodiscard]] int Capacity() const { return capacity_; } 60 | 61 | private: 62 | const int capacity_, mask_; 63 | std::atomic* buffer_; 64 | }; 65 | 66 | template 67 | requires std::is_pointer_v 68 | class WorkStealingDeque { 69 | public: 70 | explicit WorkStealingDeque(const int capacity = 1024) 71 | : top_{0}, bottom_{0}, array_{new Array{capacity}} { 72 | assert(capacity && (capacity & capacity - 1) == 0); 73 | garbage_.reserve(64); 74 | } 75 | 76 | WorkStealingDeque(const WorkStealingDeque&) = delete; 77 | WorkStealingDeque(WorkStealingDeque&&) = delete; 78 | WorkStealingDeque& operator=(const WorkStealingDeque&) = delete; 79 | WorkStealingDeque& operator=(WorkStealingDeque&&) = delete; 80 | 81 | ~WorkStealingDeque() noexcept { 82 | for (auto* array : garbage_) { 83 | delete array; 84 | } 85 | delete array_.load(); 86 | } 87 | 88 | void Push(T item) { 89 | // std::memory_order_relaxed is sufficient because this load doesn't acquire 90 | // anything from another thread. bottom_ is only written in pop() which 91 | // cannot be concurrent with push(). 92 | const auto bottom = bottom_.load(std::memory_order_relaxed); 93 | const auto top = top_.load(std::memory_order_acquire); 94 | auto* array = array_.load(std::memory_order_relaxed); 95 | if (array->Capacity() - 1 < bottom - top) { 96 | array = Resize(array, bottom, top); 97 | } 98 | array->Put(bottom, item); 99 | // std::memory_order_release is used because we release the item we just 100 | // pushed to other threads which are calling steal(). 101 | bottom_.store(bottom + 1, std::memory_order_release); 102 | } 103 | 104 | [[nodiscard]] T Pop() { 105 | // std::memory_order_seq_cst is needed to guarantee ordering in steal() Note 106 | // however that this is not a typical acquire/release operation: 107 | // - Not acquire because bottom_ is only written in push() which is not 108 | // concurrent. 109 | // - Not release because we're not publishing anything to steal() here. 110 | // QUESTION: does this prevent top_ load below to be reordered before the 111 | // "store" part of fetch_sub()? Hopefully it does. If not we'd need a full 112 | // memory barrier. 113 | const auto bottom = bottom_.fetch_sub(1, std::memory_order_seq_cst) - 1; 114 | // bottom could be -1 if we tried to pop() from an empty queue. This will be 115 | // corrected below. 116 | assert(bottom >= -1); 117 | auto* array = array_.load(std::memory_order_relaxed); 118 | // std::memory_order_seq_cst is needed to guarantee ordering in steal(). 119 | // Note however that this is not a typical acquire operation (i.e. other 120 | // thread's writes of top_ don't publish data). 121 | auto top = top_.load(std::memory_order_seq_cst); 122 | if (top < bottom) { 123 | // The queue isn't empty, and it's not the last item, just return it, this 124 | // is the common case. 125 | return array->Get(bottom); 126 | } 127 | T item{nullptr}; 128 | if (top == bottom) { 129 | // We just took the last item. 130 | item = array->Get(bottom); 131 | // Because we know we took the last item, we could be racing with steal() 132 | // -- the last item being both at the top and bottom of the queue. We 133 | // resolve this potential race by also stealing that item from ourselves. 134 | if (top_.compare_exchange_strong(top, top + 1, std::memory_order_seq_cst, 135 | std::memory_order_relaxed)) { 136 | // Success: we stole our last item from ourselves, meaning that a 137 | // concurrent steal() would have failed. top_ now equals top + 1, we 138 | // adjust top to make the queue empty. 139 | ++top; 140 | } else { 141 | // Failure: top_ was not equal to top, which means the item was stolen 142 | // under our feet. top now equals to top_. Simply discard the item we 143 | // just popped. The queue is now empty. 144 | item = nullptr; 145 | } 146 | } else { 147 | // We could be here if the item was stolen just before we read top_, we'll 148 | // adjust bottom_ below. 149 | assert(top - bottom == 1); 150 | } 151 | // std::memory_order_relaxed used because we're not publishing any data. No 152 | // concurrent writes to bottom_ possible, it's always safe to write bottom_. 153 | bottom_.store(top, std::memory_order_relaxed); 154 | return item; 155 | } 156 | 157 | [[nodiscard]] T Steal() { 158 | // Note: A key component of this algorithm is that top_ is read before 159 | // bottom_ here (and observed as such in other threads). 160 | 161 | // std::memory_order_seq_cst is needed to guarantee ordering in pop(). Note 162 | // however that this is not a typical acquire operation (i.e. other thread's 163 | // writes of top_ don't publish data). 164 | auto top = top_.load(std::memory_order_seq_cst); 165 | 166 | // std::memory_order_acquire is needed because we're acquiring items 167 | // published in push(). std::memory_order_seq_cst is needed to guarantee 168 | // ordering in pop(). 169 | if (const auto bottom = bottom_.load(std::memory_order_seq_cst); 170 | top >= bottom) { 171 | // The queue is empty. 172 | return nullptr; 173 | } 174 | 175 | // The queue isn't empty. 176 | auto* array = array_.load(std::memory_order_acquire); 177 | const auto item = array->Get(top); 178 | if (!top_.compare_exchange_strong(top, top + 1, std::memory_order_seq_cst, 179 | std::memory_order_relaxed)) { 180 | // Failure: the item we just tried to steal was pop()'ed under our feet, 181 | // simply discard it; nothing to do -- it's okay to try again. 182 | return nullptr; 183 | } 184 | // Success: we stole an item, just return it. 185 | return item; 186 | } 187 | 188 | private: 189 | [[nodiscard]] Array* Resize(Array* array, const size_t bottom, 190 | const size_t top) { 191 | auto* tmp = array->Resize(bottom, top); 192 | garbage_.push_back(array); 193 | std::swap(array, tmp); 194 | array_.store(array, std::memory_order_release); 195 | return array; 196 | } 197 | 198 | #ifdef __cpp_lib_hardware_interference_size 199 | alignas(std::hardware_destructive_interference_size) std::atomic top_, 200 | bottom_; 201 | #else 202 | std::atomic top_, bottom_; 203 | #endif 204 | std::atomic*> array_; 205 | std::vector*> garbage_; 206 | }; 207 | } // namespace internal 208 | 209 | /** 210 | * \brief Represents a task in a task graph. 211 | * 212 | * A task graph is a collection of tasks and dependencies between them. 213 | * Dependencies between tasks define the order in which the tasks should be 214 | * executed. 215 | */ 216 | class Task { 217 | public: 218 | /** 219 | * \brief Creates an empty task. 220 | * 221 | * Empty tasks can be used to define dependencies between task groups. 222 | */ 223 | Task() = default; 224 | 225 | /** 226 | * \brief Creates a task. 227 | * 228 | * The signature of the function to execute should be equivalent to the 229 | * following: 230 | * \code{.cpp} 231 | * void func(); 232 | * \endcode 233 | * 234 | * \param func The function to execute. 235 | */ 236 | template >>> 238 | explicit Task(TaskType&& func) : func_{std::forward(func)} {} 239 | 240 | Task(const Task& other) 241 | : total_predecessors_{other.total_predecessors_}, 242 | func_{other.func_}, 243 | next_{other.next_} { 244 | remaining_predecessors_.store(other.remaining_predecessors_); 245 | cancellation_flags_.store(other.cancellation_flags_); 246 | } 247 | 248 | Task(Task&& other) noexcept 249 | : total_predecessors_{other.total_predecessors_}, 250 | func_{std::move(other.func_)}, 251 | next_{std::move(other.next_)} { 252 | remaining_predecessors_.store(other.remaining_predecessors_); 253 | cancellation_flags_.store(other.cancellation_flags_); 254 | } 255 | 256 | Task& operator=(const Task& other) { 257 | total_predecessors_ = other.total_predecessors_; 258 | remaining_predecessors_.store(other.remaining_predecessors_); 259 | cancellation_flags_.store(other.cancellation_flags_); 260 | func_ = other.func_; 261 | next_ = other.next_; 262 | return *this; 263 | } 264 | 265 | Task& operator=(Task&& other) noexcept { 266 | total_predecessors_ = other.total_predecessors_; 267 | remaining_predecessors_.store(other.remaining_predecessors_); 268 | cancellation_flags_.store(other.cancellation_flags_); 269 | func_ = std::move(other.func_); 270 | next_ = std::move(other.next_); 271 | return *this; 272 | } 273 | 274 | ~Task() = default; 275 | 276 | /** 277 | * \brief Defines a task that should be executed before the current task. 278 | * 279 | * \param task A task that should be executed before the current task. 280 | */ 281 | void Succeed(Task* task) { 282 | task->next_.push_back(this); 283 | ++total_predecessors_; 284 | remaining_predecessors_.fetch_add(1); 285 | } 286 | 287 | /** 288 | * \brief Defines tasks that should be executed before the current task. 289 | * 290 | * \param task, tasks Tasks that should be executed before the current task. 291 | */ 292 | template 293 | void Succeed(Task* task, const TasksType&... tasks) { 294 | task->next_.push_back(this); 295 | ++total_predecessors_; 296 | remaining_predecessors_.fetch_add(1); 297 | Succeed(tasks...); 298 | } 299 | 300 | /** 301 | * \brief Defines a task that should be executed after the current task. 302 | * 303 | * \param task A task that should be executed after the current task. 304 | */ 305 | void Precede(Task* task) { 306 | next_.push_back(task); 307 | ++task->total_predecessors_; 308 | task->remaining_predecessors_.fetch_add(1); 309 | } 310 | 311 | /** 312 | * \brief Defines tasks that should be executed after the current task. 313 | * 314 | * \param task, tasks Tasks that should be executed after the current task. 315 | */ 316 | template 317 | void Precede(Task* task, const TasksType&... tasks) { 318 | next_.push_back(task); 319 | ++task->total_predecessors_; 320 | task->remaining_predecessors_.fetch_add(1); 321 | Precede(tasks...); 322 | } 323 | 324 | /** 325 | * \brief Cancels the task. 326 | * 327 | * Cancelling a task never fails. If `false` is returned, it means that the 328 | * task has been invoked earlier, or will be invoked at least once after the 329 | * cancellation. When a task is cancelled and will not be invoked anymore, its 330 | * successors also will not be invoked. Call `Reset` to undo cancellation. 331 | * 332 | * \see Reset 333 | * 334 | * \return `false` if the task has been invoked earlier or will be invoked at 335 | * least once after the cancellation, `true` otherwise. 336 | */ 337 | bool Cancel() { 338 | return (cancellation_flags_.fetch_or(internal::kCancelled) & 339 | internal::kInvoked) == 0; 340 | } 341 | 342 | /** 343 | * \brief Clears cancellation flags. 344 | * 345 | * Call `Reset` to undo task cancellation. 346 | * 347 | * \see Cancel 348 | */ 349 | void Reset() { cancellation_flags_.store(0); } 350 | 351 | private: 352 | friend class ThreadPool; 353 | bool delete_{false}, is_root_{false}; 354 | int total_predecessors_{0}; 355 | std::atomic remaining_predecessors_{0}, cancellation_flags_{0}; 356 | std::function func_; 357 | std::vector next_; 358 | }; 359 | 360 | /** 361 | * \brief A static thread pool that manages a specified number of background 362 | * threads and allows to execute tasks on these threads. 363 | * 364 | * The threads, managed by the thread pool, execute tasks in a work-stealing 365 | * manner. 366 | */ 367 | class ThreadPool { 368 | public: 369 | /** 370 | * \brief Creates a `ThreadPool` instance. 371 | * 372 | * When created, a `ThreadPool` instance creates a specified number of 373 | * threads that will be running in the background until the `ThreadPool` 374 | * instance is destroyed. 375 | * 376 | * \param threads_count The number of threads to create. 377 | */ 378 | explicit ThreadPool( 379 | const unsigned threads_count = std::thread::hardware_concurrency()) 380 | : queues_count_{threads_count + 1}, queues_{threads_count + 1} { 381 | threads_.reserve(threads_count); 382 | for (unsigned i = 0; i != threads_count; ++i) { 383 | threads_.emplace_back([this, i] { Run(i + 1); }); 384 | } 385 | } 386 | 387 | ThreadPool(const ThreadPool&) = delete; 388 | ThreadPool(ThreadPool&&) = delete; 389 | ThreadPool& operator=(const ThreadPool&) = delete; 390 | ThreadPool& operator=(ThreadPool&&) = delete; 391 | 392 | ~ThreadPool() noexcept { 393 | Wait(); 394 | stop_.test_and_set(); 395 | tasks_count_ += queues_count_; 396 | tasks_count_.notify_all(); 397 | for (auto& thread : threads_) { 398 | thread.join(); 399 | } 400 | } 401 | 402 | /** 403 | * \brief Submits a function that should be executed on a thread managed by 404 | * the thread pool. 405 | * 406 | * When submitted, the function is pushed into one of the thread pool task 407 | * queues. Eventually, the function will be popped from the queue and executed 408 | * on one of the threads managed by the thread pool. The order of function 409 | * execution is undetermined. 410 | * 411 | * The signature of the function should be equivalent to the following: 412 | * \code{.cpp} 413 | * void func(); 414 | * \endcode 415 | * 416 | * \param func The function to execute. 417 | */ 418 | template >>> 420 | void Submit(FuncType&& func) { 421 | auto* task = new Task(std::forward(func)); 422 | task->delete_ = true; 423 | Submit(task); 424 | } 425 | 426 | /** 427 | * \brief Submits a task that should be executed on a thread managed by the 428 | * thread pool. 429 | * 430 | * When submitted, the task is pushed into one of the thread pool task queues. 431 | * Eventually, the task will be popped from the queue and executed on one of 432 | * the threads managed by the thread pool. The order of task execution is 433 | * undetermined. 434 | * 435 | * \param task The task to execute. 436 | */ 437 | void Submit(Task* task) { 438 | ++tasks_count_; 439 | queues_[index_].Push(task); 440 | tasks_count_.notify_one(); 441 | } 442 | 443 | /** 444 | * \brief Submits a task graph that should be executed on threads managed by 445 | * the thread pool. 446 | * 447 | * A graph is a collection of tasks and dependencies between them. When 448 | * submitted, the tasks that do not have predecessors are pushed into the 449 | * thread pool task queues. 450 | * 451 | * \param tasks The tasks to execute. 452 | */ 453 | template 454 | void Submit(TasksType& tasks) { 455 | for (auto& task : tasks) { 456 | task.is_root_ = task.total_predecessors_ == 0; 457 | } 458 | for (auto& task : tasks) { 459 | if (task.is_root_) { 460 | Submit(&task); 461 | } 462 | } 463 | } 464 | 465 | /** 466 | * \brief Blocks the current thread and executes tasks from the task queues 467 | * until a specified predicate is satisfied. 468 | * 469 | * The signature of the predicate function should be equivalent to the 470 | * following: 471 | * \code{.cpp} 472 | * bool predicate(); 473 | * \endcode 474 | * 475 | * \param predicate The predicate which returns `false` if the waiting should 476 | * be continued. 477 | */ 478 | template 479 | void Wait(const PredicateType& predicate) { 480 | while (!predicate()) { 481 | if (auto* task = GetTask()) { 482 | Execute(task); 483 | } 484 | } 485 | } 486 | 487 | /** 488 | * \brief Blocks the current thread until all task queues are empty. 489 | * 490 | * Other threads may push tasks into the task queues while the current thread 491 | * is blocked. 492 | */ 493 | void Wait() const { 494 | while (const auto count = tasks_count_.load()) { 495 | tasks_count_.wait(count); 496 | } 497 | } 498 | 499 | private: 500 | void Run(const unsigned i) { 501 | index_ = i; 502 | for (auto attempts = 0;;) { 503 | if (constexpr auto max_attempts = 100; ++attempts > max_attempts) { 504 | tasks_count_.wait(0); 505 | } 506 | if (auto* task = GetTask()) { 507 | Execute(task); 508 | attempts = 0; 509 | } else if (stop_.test()) { 510 | return; 511 | } 512 | } 513 | } 514 | 515 | void Execute(Task* task) { 516 | for (Task* next = nullptr; task; next = nullptr) { 517 | task->remaining_predecessors_.store(task->total_predecessors_); 518 | if (task->cancellation_flags_.fetch_or(internal::kInvoked) & 519 | internal::kCancelled) { 520 | break; 521 | } 522 | if (task->func_) { 523 | task->func_(); 524 | } 525 | auto it = task->next_.begin(); 526 | for (; it != task->next_.end(); ++it) { 527 | if ((*it)->remaining_predecessors_.fetch_sub(1) == 1) { 528 | next = *it++; 529 | break; 530 | } 531 | } 532 | for (; it != task->next_.end(); ++it) { 533 | if ((*it)->remaining_predecessors_.fetch_sub(1) == 1) { 534 | Submit(*it); 535 | } 536 | } 537 | if (task->delete_) { 538 | delete task; 539 | } 540 | task = next; 541 | } 542 | if (tasks_count_.fetch_sub(1) == 1) { 543 | tasks_count_.notify_all(); 544 | } 545 | } 546 | 547 | Task* GetTask() { 548 | const auto i = index_; 549 | auto* task = queues_[i].Pop(); 550 | if (task) { 551 | return task; 552 | } 553 | for (unsigned j = 1; j != queues_count_; ++j) { 554 | task = queues_[(i + j) % queues_count_].Steal(); 555 | if (task) { 556 | return task; 557 | } 558 | } 559 | return nullptr; 560 | } 561 | 562 | static thread_local unsigned index_; 563 | const unsigned queues_count_; 564 | std::atomic_flag stop_; 565 | std::atomic tasks_count_; 566 | std::vector threads_; 567 | std::vector> queues_; 568 | }; 569 | 570 | inline thread_local unsigned ThreadPool::index_{0}; 571 | } // namespace scheduling 572 | -------------------------------------------------------------------------------- /scripts/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BUILD_DIR="${BUILD_DIR:-build}" 4 | 5 | # Create build directory 6 | if [ -d "$BUILD_DIR" ]; 7 | then 8 | echo Removing old build directory... 9 | rm -Rf $BUILD_DIR; 10 | fi 11 | 12 | echo Creating build directory... 13 | mkdir $BUILD_DIR && cd $BUILD_DIR 14 | 15 | # Run CMake 16 | echo Running CMake... 17 | cmake $1 .. 18 | 19 | if [ ! $? -eq 0 ] 20 | then 21 | echo CMake failed 22 | exit 1 23 | fi 24 | 25 | # Build 26 | echo Building... 27 | cmake --build . 28 | 29 | if [ ! $? -eq 0 ] 30 | then 31 | echo Build failed 32 | exit 1 33 | fi 34 | 35 | # Run tests 36 | echo Running tests... 37 | ctest --output-on-failure --verbose 38 | 39 | if [ ! $? -eq 0 ] 40 | then 41 | echo Tests failed 42 | exit 1 43 | fi 44 | 45 | # Leave build directory 46 | echo Leaving build directory... 47 | cd .. 48 | -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include(googletest.cmake) 2 | 3 | project(scheduling_tests LANGUAGES CXX) 4 | add_executable(${PROJECT_NAME} thread_pool_test.cpp) 5 | target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_SOURCE_DIR}/include) 6 | 7 | target_link_libraries(${PROJECT_NAME} 8 | PRIVATE 9 | gtest_main 10 | scheduling 11 | ) 12 | 13 | add_test(${PROJECT_NAME} ${PROJECT_NAME}) 14 | -------------------------------------------------------------------------------- /tests/googletest.cmake: -------------------------------------------------------------------------------- 1 | include(FetchContent) 2 | 3 | set(BUILD_GMOCK ON CACHE BOOL "" FORCE) 4 | set(BUILD_GTEST ON CACHE BOOL "" FORCE) 5 | set(FETCHCONTENT_QUIET OFF) 6 | set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) 7 | 8 | FetchContent_Declare(googletest 9 | GIT_REPOSITORY https://github.com/google/googletest.git 10 | GIT_TAG main 11 | ) 12 | 13 | FetchContent_MakeAvailable(googletest) 14 | -------------------------------------------------------------------------------- /tests/thread_pool_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | namespace { 6 | using namespace scheduling; 7 | using namespace testing; 8 | 9 | int Fibonacci(ThreadPool& thread_pool, const int n) { 10 | if (n < 2) { 11 | return 1; 12 | } 13 | int a, b; 14 | std::atomic counter{0}; 15 | thread_pool.Submit([&, n] { 16 | a = Fibonacci(thread_pool, n - 1); 17 | counter.fetch_add(1); 18 | }); 19 | thread_pool.Submit([&, n] { 20 | b = Fibonacci(thread_pool, n - 2); 21 | counter.fetch_add(1); 22 | }); 23 | thread_pool.Wait([&] { return counter.load() == 2; }); 24 | return a + b; 25 | } 26 | 27 | int LinearChain(const int length) { 28 | int counter = 0; 29 | std::vector v(length); 30 | v[0] = Task([&] { ++counter; }); 31 | for (auto i = v.begin(), j = std::next(v.begin()); j != v.end(); ++i, ++j) { 32 | *j = Task([&] { ++counter; }); 33 | j->Succeed(&*i); 34 | } 35 | ThreadPool thread_pool; 36 | thread_pool.Submit(&v[0]); 37 | thread_pool.Wait(); 38 | return counter; 39 | } 40 | 41 | void MatrixMultiplication(const int n, std::vector>& a, 42 | std::vector>& b, 43 | std::vector>& c) { 44 | std::vector tasks; 45 | tasks.reserve(4 * n + 1); 46 | 47 | tasks.emplace_back(); 48 | 49 | for (int i = 0; i < n; ++i) { 50 | tasks 51 | .emplace_back([&, i, n] { 52 | for (int j = 0; j < n; ++j) { 53 | a[i][j] = i + j; 54 | } 55 | }) 56 | .Precede(&tasks[0]); 57 | } 58 | 59 | for (int i = 0; i < n; ++i) { 60 | tasks 61 | .emplace_back([&, i, n] { 62 | for (int j = 0; j < n; ++j) { 63 | b[i][j] = i * j; 64 | } 65 | }) 66 | .Precede(&tasks[0]); 67 | } 68 | 69 | for (int i = 0; i < n; ++i) { 70 | tasks 71 | .emplace_back([&, i, n] { 72 | for (int j = 0; j < n; ++j) { 73 | c[i][j] = 0; 74 | } 75 | }) 76 | .Precede(&tasks[0]); 77 | } 78 | 79 | for (int i = 0; i < n; ++i) { 80 | tasks 81 | .emplace_back([&, i, n] { 82 | for (int j = 0; j < n; ++j) { 83 | for (int k = 0; k < n; ++k) { 84 | c[i][j] += a[i][k] * b[k][j]; 85 | } 86 | } 87 | }) 88 | .Succeed(&tasks[0]); 89 | } 90 | 91 | ThreadPool thread_pool; 92 | thread_pool.Submit(tasks); 93 | } 94 | 95 | TEST(ThreadPoolTest, ArithmeticExpression) { 96 | int a, b, c, d, sum_ab, sum_cd, product; 97 | 98 | std::vector tasks; 99 | tasks.reserve(7); 100 | 101 | auto& get_a = tasks.emplace_back([&] { 102 | std::this_thread::sleep_for(std::chrono::milliseconds(100)); 103 | a = 1; 104 | }); 105 | 106 | auto& get_b = tasks.emplace_back([&] { 107 | std::this_thread::sleep_for(std::chrono::milliseconds(100)); 108 | b = 2; 109 | }); 110 | 111 | auto& get_c = tasks.emplace_back([&] { 112 | std::this_thread::sleep_for(std::chrono::milliseconds(100)); 113 | c = 3; 114 | }); 115 | 116 | auto& get_d = tasks.emplace_back([&] { 117 | std::this_thread::sleep_for(std::chrono::milliseconds(100)); 118 | d = 4; 119 | }); 120 | 121 | auto& get_sum_ab = tasks.emplace_back([&] { 122 | std::this_thread::sleep_for(std::chrono::milliseconds(100)); 123 | sum_ab = a + b; 124 | }); 125 | 126 | auto& get_sum_cd = tasks.emplace_back([&] { 127 | std::this_thread::sleep_for(std::chrono::milliseconds(100)); 128 | sum_cd = c + d; 129 | }); 130 | 131 | auto& get_product = tasks.emplace_back([&] { 132 | std::this_thread::sleep_for(std::chrono::milliseconds(100)); 133 | product = sum_ab * sum_cd; 134 | }); 135 | 136 | get_sum_ab.Succeed(&get_a, &get_b); 137 | get_sum_cd.Succeed(&get_c, &get_d); 138 | get_product.Succeed(&get_sum_ab, &get_sum_cd); 139 | 140 | ThreadPool thread_pool; 141 | thread_pool.Submit(tasks); 142 | thread_pool.Wait(); 143 | 144 | EXPECT_EQ(a, 1); 145 | EXPECT_EQ(b, 2); 146 | EXPECT_EQ(c, 3); 147 | EXPECT_EQ(d, 4); 148 | EXPECT_EQ(sum_ab, a + b); 149 | EXPECT_EQ(sum_cd, c + d); 150 | EXPECT_EQ(product, (a + b) * (c + d)); 151 | } 152 | 153 | TEST(ThreadPoolTest, Fibonacci) { 154 | int a = 1, b = 1; 155 | ThreadPool thread_pool; 156 | for (int n = 2; n < 30; ++n) { 157 | const auto actual = Fibonacci(thread_pool, n); 158 | const auto expected = a + b; 159 | EXPECT_EQ(actual, expected) << "n = " << n; 160 | a = b; 161 | b = expected; 162 | } 163 | } 164 | 165 | TEST(ThreadPoolTest, LinearChain) { 166 | for (int n = 1; n < 1000; ++n) { 167 | const auto actual = LinearChain(n); 168 | EXPECT_EQ(actual, n) << "n = " << n; 169 | } 170 | } 171 | 172 | TEST(ThreadPoolTest, MatrixMultiplication) { 173 | for (int n = 1; n < 100; ++n) { 174 | std::vector a(n, std::vector(n)), b(n, std::vector(n)), 175 | actual(n, std::vector(n)), expected(n, std::vector(n)); 176 | MatrixMultiplication(n, a, b, actual); 177 | for (int i = 0; i < n; ++i) { 178 | for (int j = 0; j < n; ++j) { 179 | for (int k = 0; k < n; ++k) { 180 | expected[i][j] += (i + k) * (k * j); 181 | } 182 | } 183 | } 184 | EXPECT_EQ(actual, expected) << "n = " << n; 185 | } 186 | } 187 | 188 | TEST(ThreadPoolTest, ResubmitGraph) { 189 | auto counter = 0; 190 | constexpr auto repeat_count = 1'000'000; 191 | ThreadPool thread_pool; 192 | std::vector tasks(32); 193 | tasks[0] = Task([&] { ++counter; }); 194 | for (auto i = tasks.begin(), j = std::next(i); j != tasks.end(); ++i, ++j) { 195 | *j = Task([&] { ++counter; }); 196 | j->Succeed(&*i); 197 | } 198 | for (int i = 0; i < repeat_count; ++i) { 199 | thread_pool.Submit(&tasks[0]); 200 | thread_pool.Wait(); 201 | } 202 | EXPECT_EQ(counter, tasks.size() * repeat_count); 203 | } 204 | 205 | TEST(ThreadPoolTest, Cancel) { 206 | { 207 | SCOPED_TRACE("Cancel a not started task"); 208 | 209 | auto completed = false; 210 | Task task([&] { completed = true; }); 211 | EXPECT_TRUE(task.Cancel()); 212 | 213 | ThreadPool thread_pool; 214 | thread_pool.Submit(&task); 215 | thread_pool.Wait(); 216 | EXPECT_FALSE(completed); 217 | } 218 | 219 | { 220 | SCOPED_TRACE("Cancel successors"); 221 | 222 | std::atomic_flag flag_1, flag_2; 223 | auto count = 0; 224 | 225 | std::vector tasks; 226 | tasks.emplace_back([&] { 227 | flag_1.test_and_set(); 228 | flag_1.notify_one(); 229 | flag_2.wait(false); 230 | }); 231 | tasks.emplace_back([&] { ++count; }); 232 | tasks.emplace_back([&] { ++count; }); 233 | tasks[0].Precede(&tasks[1]); 234 | tasks[1].Precede(&tasks[2]); 235 | 236 | ThreadPool thread_pool; 237 | thread_pool.Submit(tasks); 238 | flag_1.wait(false); 239 | EXPECT_TRUE(tasks[1].Cancel()); 240 | flag_2.test_and_set(); 241 | flag_2.notify_one(); 242 | thread_pool.Wait(); 243 | 244 | EXPECT_EQ(count, 0); 245 | } 246 | 247 | { 248 | SCOPED_TRACE("Cancel a running task"); 249 | 250 | std::atomic_flag flag_1, flag_2; 251 | Task task([&] { 252 | flag_1.test_and_set(); 253 | flag_1.notify_one(); 254 | flag_2.wait(false); 255 | }); 256 | 257 | ThreadPool thread_pool; 258 | thread_pool.Submit(&task); 259 | flag_1.wait(false); 260 | EXPECT_FALSE(task.Cancel()); 261 | flag_2.test_and_set(); 262 | flag_2.notify_one(); 263 | } 264 | 265 | { 266 | SCOPED_TRACE("Cancel a completed task"); 267 | 268 | bool completed = false; 269 | Task task([&] { completed = true; }); 270 | 271 | ThreadPool thread_pool; 272 | thread_pool.Submit(&task); 273 | thread_pool.Wait(); 274 | 275 | EXPECT_TRUE(completed); 276 | EXPECT_FALSE(task.Cancel()); 277 | } 278 | } 279 | } // namespace 280 | --------------------------------------------------------------------------------