├── .clang-format ├── .gitignore ├── ARCHITECTURE.md ├── CMakeLists.txt ├── CMakeSettings.json ├── LICENSE ├── README.md ├── include └── coop │ ├── detail │ ├── api.hpp │ ├── blockingconcurrentqueue.h │ ├── concurrentqueue.h │ ├── lightweightsemaphore.h │ ├── promise.hpp │ ├── tracer.hpp │ └── work_queue.hpp │ ├── event.hpp │ ├── scheduler.hpp │ ├── source_location.hpp │ └── task.hpp ├── src ├── CMakeLists.txt ├── event.cpp ├── scheduler.cpp └── work_queue.cpp └── test ├── CMakeLists.txt └── test.cpp /.clang-format: -------------------------------------------------------------------------------- 1 | AccessModifierOffset: -4 2 | AlignAfterOpenBracket: true 3 | AlignConsecutiveAssignments: true 4 | AlignConsecutiveDeclarations: false 5 | AlignEscapedNewlinesLeft: true 6 | AlignTrailingComments: true 7 | AllowAllParametersOfDeclarationOnNextLine: false 8 | AllowShortBlocksOnASingleLine: true 9 | AllowShortCaseLabelsOnASingleLine: false 10 | AllowShortFunctionsOnASingleLine: false 11 | AllowShortIfStatementsOnASingleLine: false 12 | AllowShortLoopsOnASingleLine: false 13 | AlwaysBreakAfterReturnType: None 14 | AlwaysBreakBeforeMultilineStrings: true 15 | AlwaysBreakTemplateDeclarations: true 16 | BinPackArguments: false 17 | BinPackParameters: false 18 | BreakBeforeBraces: Custom 19 | BraceWrapping: 20 | AfterClass: true 21 | AfterControlStatement: true 22 | AfterEnum: true 23 | AfterFunction: true 24 | AfterNamespace: true 25 | AfterObjCDeclaration: true 26 | AfterStruct: true 27 | AfterUnion: true 28 | AfterExternBlock: true 29 | BeforeCatch: true 30 | BeforeElse: true 31 | IndentBraces: false 32 | SplitEmptyFunction: true 33 | SplitEmptyRecord: true 34 | SplitEmptyNamespace: true 35 | BreakBeforeBinaryOperators: All 36 | BreakBeforeTernaryOperators: true 37 | BreakConstructorInitializers: BeforeComma 38 | BreakStringLiterals: true 39 | ColumnLimit: 80 40 | CommentPragmas: '' 41 | CompactNamespaces: false 42 | ConstructorInitializerAllOnOneLineOrOnePerLine: false 43 | ConstructorInitializerIndentWidth: 4 44 | ContinuationIndentWidth: 4 45 | Cpp11BracedListStyle: true 46 | DerivePointerBinding: false 47 | FixNamespaceComments: true 48 | IndentCaseLabels: false 49 | IndentPPDirectives: AfterHash 50 | IndentWidth: 4 51 | IndentWrappedFunctionNames: false 52 | KeepEmptyLinesAtTheStartOfBlocks: false 53 | Language: Cpp 54 | MaxEmptyLinesToKeep: 1 55 | NamespaceIndentation: Inner 56 | PenaltyBreakBeforeFirstCallParameter: 0 57 | PenaltyBreakComment: 0 58 | PenaltyBreakFirstLessLess: 0 59 | PenaltyBreakString: 1 60 | PenaltyExcessCharacter: 10 61 | PenaltyReturnTypeOnItsOwnLine: 20 62 | PointerAlignment: Left 63 | SortIncludes: true 64 | SortUsingDeclarations: true 65 | SpaceAfterTemplateKeyword: true 66 | SpaceBeforeAssignmentOperators: true 67 | SpaceBeforeParens: ControlStatements 68 | SpaceInEmptyParentheses: false 69 | SpacesBeforeTrailingComments: 1 70 | SpacesInAngles: false 71 | SpacesInCStyleCastParentheses: false 72 | SpacesInContainerLiterals: false 73 | SpacesInParentheses: false 74 | SpacesInSquareBrackets: false 75 | Standard: C++11 76 | TabWidth: 4 77 | UseTab: Never 78 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | out 2 | build 3 | .vs 4 | .vscode 5 | -------------------------------------------------------------------------------- /ARCHITECTURE.md: -------------------------------------------------------------------------------- 1 | # Architecture 2 | 3 | The main ideas behind Coop are actually quite simple, and the scheduler may be too unsophisticated for the most demanding users. 4 | This doc exists to describe in rough terms how the scheduler works so you can decide for yourself if it's worth swapping out. 5 | If you've been working with a mature thread pool and scheduling library for a while, it is almost certainly worth it to stick 6 | with it. That said, for other users, my recommendation is to profile the functionality here to see the builtin scheduler is 7 | sufficient for your needs. 8 | 9 | The primary thread pool is defined in `src/scheduler.cpp` and a worker thread is defined in `src/work_queue.cpp`. The thread pool 10 | is initialized with threads equal to the hardware concurrency available. Each thread sets its affinity to a distinct core. 11 | 12 | When a coroutine suspends, it enqueues its associated coroutine to an idle thread, if any. If a CPU affinity mask is provided, 13 | only threads pinned to the requested cores are considered. After a thread is selected, the coroutine handle is enqueued on a 14 | lock free queue and a semaphore is released so the worker thread can wake up. When the worker thread wakes up, it always checks 15 | the higher priority queue first to see if work is available, otherwise it will dequeue from the lower priority queue. 16 | 17 | When a coroutine completes on a worker thread, the resume point (if any) before the coroutine was scheduled is invoked immediately. 18 | That is, it doesn't get requeued on the thread pool for later execution. 19 | 20 | The concurrent queue used to push work to worker threads is provided by [`moodycamel::ConcurrentQueue`](https://github.com/cameron314/concurrentqueue). 21 | Under the hood, the queue provides multiple-consumer multiple-producer usage, although in this case, only a single producer per queue 22 | exists. The thread pool worker threads currently do *not* support work stealing, which is a slightly more complicated endeavor 23 | for job schedulers that support task affinity. 24 | 25 | The granularity of your jobs shouldn't be too fine - maybe having jobs that are at least 100 us or more is a good idea, or you'll 26 | end up paying disproportionately for scheduling costs. 27 | 28 | The Win32 event awaiter works by having a single IO thread which blocks in a single `WaitForMultipleObjects` call. One of the 29 | events it waits on is used to signal the available of more events to wait on. All the other events waited on are user awaited. 30 | If a user-awaited event is signaled, the coroutine associated with that event is then queued to a worker thread, passing along 31 | the requested CPU affinity and priority. -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # USAGE 2 | # Link against the interface target "coop" or add include/ to your header path 3 | 4 | cmake_minimum_required(VERSION 3.17) 5 | 6 | if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) 7 | set(STANDALONE ON) 8 | else() 9 | set(STANDALONE OFF) 10 | endif() 11 | 12 | # Configure which targets to build. Defaults set based on whether this project is included transitively or not 13 | option(COOP_BUILD_PROCESSOR "Build the provided coop processor" ON) 14 | option(COOP_BUILD_TESTS "Build coop tests" ${STANDALONE}) 15 | option(COOP_ENABLE_TRACER "Verbose logging of all coroutine and scheduler events" ${STANDALONE}) 16 | option(COOP_ENABLE_ASAN "Enable ASAN" OFF) 17 | 18 | project(coop LANGUAGES CXX) 19 | 20 | # Output artifacts to the binary root 21 | if(STANDALONE) 22 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) 23 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) 24 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) 25 | endif() 26 | 27 | if(COOP_ENABLE_ASAN AND NOT WIN32) 28 | # For ASAN usage with MSVC, it's recommended to drive CMake from Visual Studio and use the 29 | # addressSantizerEnabled: true 30 | # flag in CMakeSettings.json 31 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer -fsanitize=address") 32 | set(CMAKE_LINKER_FLAGS "${CMAKE_LINKER_FLAGS} -fno-omit-frame-pointer -fsanitize=address") 33 | endif() 34 | 35 | find_package(Threads REQUIRED) 36 | 37 | add_library(coop_core INTERFACE) 38 | add_library(coop::coop_core ALIAS coop_core) 39 | target_include_directories(coop_core INTERFACE include) 40 | target_compile_features(coop_core INTERFACE cxx_std_20) 41 | if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") 42 | if(NOT WIN32) 43 | target_compile_options(coop_core INTERFACE -stdlib=libc++) 44 | target_link_options(coop_core INTERFACE -stdlib=libc++ -latomic) 45 | else() 46 | target_compile_definitions(coop_core INTERFACE _SILENCE_CLANG_COROUTINE_MESSAGE) 47 | endif() 48 | endif() 49 | if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") 50 | # Currently, GCC requires this flag for coroutine language support 51 | target_compile_options(coop_core INTERFACE -fcoroutines) 52 | endif() 53 | target_link_libraries(coop_core INTERFACE Threads::Threads) 54 | if(BUILD_SHARED_LIBS) 55 | target_compile_definitions(coop_core INTERFACE COOP_BUILD_SHARED) 56 | endif() 57 | 58 | if(COOP_ENABLE_TRACER) 59 | target_compile_definitions(coop_core INTERFACE COOP_TRACE) 60 | endif() 61 | 62 | if(COOP_BUILD_PROCESSOR OR COOP_BUILD_TESTS) 63 | add_subdirectory(src) 64 | endif() 65 | 66 | if(STANDALONE OR COOP_BUILD_TESTS) 67 | enable_testing() 68 | add_subdirectory(test) 69 | endif() -------------------------------------------------------------------------------- /CMakeSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "configurations": [ 3 | { 4 | "name": "x64-Debug (default)", 5 | "generator": "Ninja", 6 | "configurationType": "Debug", 7 | "inheritEnvironments": [ "msvc_x64_x64" ], 8 | "buildRoot": "${projectDir}\\out\\build\\${name}", 9 | "installRoot": "${projectDir}\\out\\install\\${name}", 10 | "addressSanitizerEnabled": true, 11 | "cmakeCommandArgs": "", 12 | "buildCommandArgs": "", 13 | "ctestCommandArgs": "", 14 | "variables": [] 15 | }, 16 | { 17 | "name": "x64-Debug-Shared", 18 | "generator": "Ninja", 19 | "configurationType": "Debug", 20 | "inheritEnvironments": [ "msvc_x64_x64" ], 21 | "buildRoot": "${projectDir}\\out\\build\\${name}", 22 | "installRoot": "${projectDir}\\out\\install\\${name}", 23 | "cmakeCommandArgs": "-DBUILD_SHARED_LIBS=ON", 24 | "buildCommandArgs": "", 25 | "ctestCommandArgs": "", 26 | "variables": [] 27 | }, 28 | { 29 | "name": "x64-Release", 30 | "generator": "Ninja", 31 | "configurationType": "RelWithDebInfo", 32 | "buildRoot": "${projectDir}\\out\\build\\${name}", 33 | "installRoot": "${projectDir}\\out\\install\\${name}", 34 | "cmakeCommandArgs": "", 35 | "buildCommandArgs": "", 36 | "ctestCommandArgs": "", 37 | "inheritEnvironments": [ "msvc_x64_x64" ], 38 | "variables": [] 39 | }, 40 | { 41 | "name": "x64-Release-ASAN", 42 | "generator": "Ninja", 43 | "configurationType": "RelWithDebInfo", 44 | "buildRoot": "${projectDir}\\out\\build\\${name}", 45 | "installRoot": "${projectDir}\\out\\install\\${name}", 46 | "cmakeCommandArgs": "", 47 | "buildCommandArgs": "", 48 | "addressSanitizerEnabled": true, 49 | "ctestCommandArgs": "", 50 | "inheritEnvironments": [ "msvc_x64_x64" ], 51 | "variables": [] 52 | } 53 | ] 54 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2021 Jeremy Ong 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🐔 Coop 2 | 3 | Coop is a C++20 coroutines-based library to support [*cooperative multitasking*](https://en.wikipedia.org/wiki/Cooperative_multitasking) 4 | in the context of a multithreaded application. The syntax will be familiar to users of `async` and `await` functionality in other 5 | programming languages. Users *do not* need to understand the C++20 coroutines API to use this library. 6 | 7 | ## Features 8 | 9 | - Ships with a default affinity-aware two-priority threadsafe task scheduler. 10 | - The task scheduler is swappable with your own 11 | - Supports scheduling of user-defined code and OS completion events (e.g. events that signal after I/O completes) 12 | - Easy to use, efficient API, with a small and digestible code footprint (hundreds of lines of code, not thousands) 13 | 14 | Tasks in Coop are *eager* as opposed to lazy, meaning that upon suspension, the coroutine is immediately dispatched for execution on 15 | a worker with the appropriate affinity. While there are many benefits to structuring things lazily (see this excellent [talk](https://www.youtube.com/watch?v=1Wy5sq3s2rg)), 16 | Coop opts to do things the way it does because: 17 | 18 | - Coop was designed to interoperate with existing job/task graph systems 19 | - Coop was originally written within the context of a game engine, where exceptions were not used 20 | - For game engines, having a CPU-toplogy-aware dispatch mechanism is extremely important (consider the architecture of, say, the PS5) 21 | 22 | While game consoles don't (yet) support C++20 fully, the hope is that options like Coop will be there when the compiler support gets there as well. 23 | 24 | ## Limitations 25 | 26 | If your use case is too far abreast of Coop's original use case (as above), you may need to do more modification to get Coop to behave the way you want. 27 | The limitations to consider below are: 28 | 29 | - Requires a recent C++20 compiler and code that uses Coop headers must also use C++20 30 | - The "event_t" wrapper around Win32 events doesn't have equivalent functionality on other platforms yet (it's provided as a reference for how you might handle your own overlapped IO) 31 | - The Clang implementation of the coroutines API at the moment doesn't work with the GCC stdlib++, so use libc++ instead 32 | - Clang on Windows does not yet support the MSVC coroutines runtime due to ABI differences 33 | - Coop ignores the problem of unhandled exceptions within scheduled tasks 34 | 35 | If the above limitations make Coop unsuitable for you, consider the following libraries: 36 | 37 | - [CppCoro](https://github.com/lewissbaker/cppcoro) - A coroutine library for C++ 38 | - [Conduit](https://github.com/loopperfect/conduit) - Lazy High Performance Streams using Coroutine TS 39 | - [folly::coro](https://github.com/facebook/folly/tree/master/folly/experimental/coro) - a developer-friendly asynchronous C++ framework based on Coroutines TS 40 | 41 | ## Building and Running the Tests 42 | 43 | When configured as a standalone project, the built-in scheduler and tests are enabled by default. To configure and build the project 44 | from the command line: 45 | 46 | ```bash 47 | mkdir build 48 | cd build 49 | cmake .. # Supply your own generator if you don't want the default generator 50 | cmake --build . 51 | ./test/coop_test 52 | ``` 53 | 54 | ## Integration Guide 55 | 56 | If you don't intend on using the built in scheduler, simply copy the contents of the `include` folder somewhere in your include path. 57 | 58 | Otherwise, the recommended integration is done via cmake. For the header only portion, link against the `coop::coop_core` target. 59 | 60 | If you'd like both headers and the scheduler implementation, link against `coop::coop`. 61 | 62 | Drop this quick cmake snippet somewhere in your `CMakeLists.txt` file to make both of these targets available. 63 | 64 | ```cmake 65 | include(FetchContent) 66 | 67 | FetchContent_Declare( 68 | coop 69 | GIT_REPOSITORY https://github.com/jeremyong/coop.git 70 | GIT_TAG master 71 | GIT_SHALLOW ON 72 | ) 73 | FetchContent_MakeAvailable(coop) 74 | ``` 75 | 76 | ## Usage 77 | 78 | To write a coroutine, you'll use the `task_t` template type. 79 | 80 | 81 | ```c++ 82 | coop::task_t<> simple_coroutine() 83 | { 84 | co_await coop::suspend(); 85 | 86 | // Fake some work with a timer 87 | std::this_thread::sleep_for(std::chrono::milliseconds{50}); 88 | } 89 | ``` 90 | 91 | The first line with the `coop::suspend` function will suspend the execution of `simple_coroutine` and the next line will continue on a different thread. 92 | 93 | To use this coroutine from another coroutine, we can do something like the following: 94 | 95 | ```c++ 96 | coop::task_t<> another_coroutine() 97 | { 98 | // This will cause `simple_coroutine` to be scheduled on a thread different to this one 99 | auto task = simple_coroutine(); 100 | 101 | // Do other useful work 102 | 103 | // Await the task when we need it to finish 104 | co_await task; 105 | } 106 | ``` 107 | 108 | Tasks can hold values to be awaited on. 109 | 110 | ```c++ 111 | coop::task_t coroutine_with_data() 112 | { 113 | co_await coop::suspend(); 114 | 115 | // Do some work 116 | int result = some_expensive_simulation(); 117 | 118 | co_return result; 119 | } 120 | ``` 121 | 122 | When the task above is awaited via the `co_await` operator, what results is the int returned via `co_return`. 123 | Of course, passing other types is possible by changing the first template parameter of `task_t`. 124 | 125 | Tasks let you do multiple async operations simultaneously, for example: 126 | 127 | ```c++ 128 | coop::task_t<> my_task(int ms) 129 | { 130 | co_await coop::suspend(); 131 | 132 | // Fake some work with a timer 133 | std::this_thread::sleep_for(std::chrono::milliseconds{ms}); 134 | } 135 | 136 | coop::task_t<> big_coroutine() 137 | { 138 | auto t1 = my_task(50); 139 | auto t2 = my_task(40); 140 | auto t3 = my_task(80); 141 | 142 | // 3 invocations of `my_task` are now potentially running concurrently on different threads 143 | 144 | do_something_useful(); 145 | 146 | // Suspend until t2 is done 147 | co_await t2; 148 | 149 | // Right now, t1 and t3 are *potentially* still running 150 | 151 | do_something_else(); 152 | 153 | // When awaiting a task, this coroutine will not suspend if the task 154 | // is already ready. Otherwise, this coroutine suspends to be continued 155 | // by the thread that completes the awaited task. 156 | co_await t1; 157 | co_await t3; 158 | 159 | // Now, all three tasks are complete 160 | } 161 | ``` 162 | 163 | One thing to keep in mind is that after awaiting a task, the thread you resume on is *not* necessarily the same thread 164 | you were on originally. 165 | 166 | What if you want to await a task from `main` or some other execution context that isn't a coroutine? For this, you can 167 | make a joinable task and `join` it. 168 | 169 | ```c++ 170 | coop::task_t joinable_coroutine() 171 | { 172 | co_await coop::suspend(); 173 | 174 | // Fake some work with a timer 175 | std::this_thread::sleep_for(std::chrono::milliseconds{50}); 176 | } 177 | 178 | int main(int argc, char** argv) 179 | { 180 | auto task = joinable_coroutine(); 181 | // The timer is now running on a different thread than the main thread 182 | 183 | // Pause execution until joinable_coroutine is finished on whichever thread it was scheduled on 184 | task.join(); 185 | 186 | return 0; 187 | } 188 | ``` 189 | 190 | Note that currently, there is some overhead associated with spawning a joinable task because it creates new event objects instead of reusing event handles from a pool. 191 | 192 | The `coop::suspend` function takes additional parameters that can set the CPU affinity mask, priority (only 0 and 1 are supported at the moment, 193 | with 1 being the higher priority), and file/line information for debugging purposes. 194 | 195 | In addition to awaiting tasks, you can also await the `event_t` object. While this currently only supports Windows, this lets a coroutine 196 | suspend execution until an event handle is signaled - a powerful pattern for doing async I/O. 197 | 198 | ```c++ 199 | coop::task_t<> wait_for_event() 200 | { 201 | // Suppose file_reading_code produces a Win32 HANDLE which will get signaled whenever the file 202 | // read is ready 203 | coop::event_t event{file_reading_code()}; 204 | 205 | // Do something else while the file is reading 206 | 207 | // Suspend until the event gets signaled 208 | co_await event; 209 | } 210 | ``` 211 | 212 | In the future, support may be added for epoll and kqueue abstractions. 213 | 214 | ## Convenience macro `COOP_SUSPEND#` 215 | 216 | The full function signature of the `suspend` function is the following: 217 | 218 | ```c++ 219 | template 220 | inline auto suspend(S& scheduler = S::instance(), 221 | uint64_t cpu_mask = 0, 222 | uint32_t priority = 0, 223 | source_location_t const& source_location = {}) noexcept 224 | ``` 225 | 226 | and you must await the returned result. Instead, you can use the family of macros and simply write 227 | 228 | ``` 229 | COOP_SUSPEND(); 230 | ``` 231 | 232 | if you are comfortable with the default behavior. This macro will supply `__FILE__` and `__LINE__` information 233 | to the `source_location` paramter to get additional tracking. Other macros with numerical suffixes to `COOP_SUSPEND` are 234 | also provided to allow you to override a subset of parameters as needed. 235 | 236 | ## (Optional) Use your own scheduler 237 | 238 | Coop is designed to be a pretty thin abstraction layer to make writing async code more convenient. If you already have a robust 239 | scheduler and thread pool, you don't have to use the one provided here. The `coop::suspend` function is templated and accepts 240 | an optional first parameter to a class that implements the `Scheduler` concept. To qualify as a `Scheduler`, a class only needs 241 | to implement the following function signature: 242 | 243 | ```c++ 244 | void schedule(std::coroutine_handle<> coroutine, 245 | uint64_t cpu_affinity = 0, 246 | uint32_t priority = 0, 247 | source_location_t source_location = {}); 248 | ``` 249 | 250 | Then, at the opportune time on a thread of your choosing, simply call `coroutine.resume()`. Remember that when implementing your 251 | own scheduler, you are responsible for thread safety and ensuring that the "usual" bugs (like missed notifications) are ironed out. 252 | You can ignore the cpu affinity and priority flags if you don't need this functionality (i.e. if you aren't targeting a NUMA). 253 | 254 | ## Hack away 255 | 256 | The source code of Coop is pretty small all things considered, with the core of its functionality contained in only a few hundred 257 | lines of commented code. Feel free to take it and adapt it for your use case. This was the route taken as opposed to making every 258 | design aspect customizable (which would have made the interface far more complicated). 259 | 260 | ## Additional Resources 261 | 262 | To learn more about coroutines in C++20, please do visit this [awesome compendium](https://gist.github.com/MattPD/9b55db49537a90545a90447392ad3aeb) 263 | of resources compiled by @MattPD. 264 | -------------------------------------------------------------------------------- /include/coop/detail/api.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Add defines needed for exporting and importing symbols needed in a shared 4 | // linkage environment 5 | 6 | #ifdef COOP_BUILD_SHARED 7 | # ifdef _MSC_VER 8 | # ifdef COOP_IMPL 9 | # define COOP_API __declspec(dllexport) 10 | # else 11 | # define COOP_API __declspec(dllimport) 12 | # endif 13 | # else 14 | # define COOP_API __attribute__((visibility(default))) 15 | # endif 16 | #else 17 | # define COOP_API 18 | #endif -------------------------------------------------------------------------------- /include/coop/detail/blockingconcurrentqueue.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeremyong/coop/f2dc18cf56c1c17e9eca9e3bad41cefbc9631fba/include/coop/detail/blockingconcurrentqueue.h -------------------------------------------------------------------------------- /include/coop/detail/lightweightsemaphore.h: -------------------------------------------------------------------------------- 1 | // Provides an efficient implementation of a semaphore (LightweightSemaphore). 2 | // This is an extension of Jeff Preshing's sempahore implementation (licensed 3 | // under the terms of its separate zlib license) that has been adapted and 4 | // extended by Cameron Desrochers. 5 | 6 | #pragma once 7 | 8 | #include 9 | #include // For std::size_t 10 | #include // For std::make_signed 11 | 12 | #if defined(_WIN32) 13 | // Avoid including windows.h in a header; we only need a handful of 14 | // items, so we'll redeclare them here (this is relatively safe since 15 | // the API generally has to remain stable between Windows versions). 16 | // I know this is an ugly hack but it still beats polluting the global 17 | // namespace with thousands of generic names or adding a .cpp for nothing. 18 | extern "C" 19 | { 20 | struct _SECURITY_ATTRIBUTES; 21 | __declspec(dllimport) void* __stdcall CreateSemaphoreW( 22 | _SECURITY_ATTRIBUTES* lpSemaphoreAttributes, 23 | long lInitialCount, 24 | long lMaximumCount, 25 | const wchar_t* lpName); 26 | __declspec(dllimport) int __stdcall CloseHandle(void* hObject); 27 | __declspec(dllimport) unsigned long __stdcall WaitForSingleObject( 28 | void* hHandle, 29 | unsigned long dwMilliseconds); 30 | __declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, 31 | long lReleaseCount, 32 | long* lpPreviousCount); 33 | } 34 | #elif defined(__MACH__) 35 | # include 36 | #elif defined(__unix__) 37 | # include 38 | #endif 39 | 40 | namespace moodycamel 41 | { 42 | namespace details 43 | { 44 | // Code in the mpmc_sema namespace below is an adaptation of Jeff Preshing's 45 | // portable + lightweight semaphore implementations, originally from 46 | // https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h 47 | // LICENSE: 48 | // Copyright (c) 2015 Jeff Preshing 49 | // 50 | // This software is provided 'as-is', without any express or implied 51 | // warranty. In no event will the authors be held liable for any damages 52 | // arising from the use of this software. 53 | // 54 | // Permission is granted to anyone to use this software for any purpose, 55 | // including commercial applications, and to alter it and redistribute it 56 | // freely, subject to the following restrictions: 57 | // 58 | // 1. The origin of this software must not be misrepresented; you must not 59 | // claim that you wrote the original software. If you use this software 60 | // in a product, an acknowledgement in the product documentation would be 61 | // appreciated but is not required. 62 | // 2. Altered source versions must be plainly marked as such, and must not be 63 | // misrepresented as being the original software. 64 | // 3. This notice may not be removed or altered from any source distribution. 65 | #if defined(_WIN32) 66 | class Semaphore 67 | { 68 | private: 69 | void* m_hSema; 70 | 71 | Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; 72 | Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; 73 | 74 | public: 75 | Semaphore(int initialCount = 0) 76 | { 77 | assert(initialCount >= 0); 78 | const long maxLong = 0x7fffffff; 79 | m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr); 80 | assert(m_hSema); 81 | } 82 | 83 | ~Semaphore() 84 | { 85 | CloseHandle(m_hSema); 86 | } 87 | 88 | bool wait() 89 | { 90 | const unsigned long infinite = 0xffffffff; 91 | return WaitForSingleObject(m_hSema, infinite) == 0; 92 | } 93 | 94 | bool try_wait() 95 | { 96 | return WaitForSingleObject(m_hSema, 0) == 0; 97 | } 98 | 99 | bool timed_wait(std::uint64_t usecs) 100 | { 101 | return WaitForSingleObject(m_hSema, (unsigned long)(usecs / 1000)) 102 | == 0; 103 | } 104 | 105 | void signal(int count = 1) 106 | { 107 | while (!ReleaseSemaphore(m_hSema, count, nullptr)) 108 | ; 109 | } 110 | }; 111 | #elif defined(__MACH__) 112 | //--------------------------------------------------------- 113 | // Semaphore (Apple iOS and OSX) 114 | // Can't use POSIX semaphores due to 115 | // http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html 116 | //--------------------------------------------------------- 117 | class Semaphore 118 | { 119 | private: 120 | semaphore_t m_sema; 121 | 122 | Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; 123 | Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; 124 | 125 | public: 126 | Semaphore(int initialCount = 0) 127 | { 128 | assert(initialCount >= 0); 129 | kern_return_t rc = semaphore_create( 130 | mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount); 131 | assert(rc == KERN_SUCCESS); 132 | (void)rc; 133 | } 134 | 135 | ~Semaphore() 136 | { 137 | semaphore_destroy(mach_task_self(), m_sema); 138 | } 139 | 140 | bool wait() 141 | { 142 | return semaphore_wait(m_sema) == KERN_SUCCESS; 143 | } 144 | 145 | bool try_wait() 146 | { 147 | return timed_wait(0); 148 | } 149 | 150 | bool timed_wait(std::uint64_t timeout_usecs) 151 | { 152 | mach_timespec_t ts; 153 | ts.tv_sec = static_cast(timeout_usecs / 1000000); 154 | ts.tv_nsec = static_cast((timeout_usecs % 1000000) * 1000); 155 | 156 | // added in OSX 10.10: 157 | // https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html 158 | kern_return_t rc = semaphore_timedwait(m_sema, ts); 159 | return rc == KERN_SUCCESS; 160 | } 161 | 162 | void signal() 163 | { 164 | while (semaphore_signal(m_sema) != KERN_SUCCESS) 165 | ; 166 | } 167 | 168 | void signal(int count) 169 | { 170 | while (count-- > 0) 171 | { 172 | while (semaphore_signal(m_sema) != KERN_SUCCESS) 173 | ; 174 | } 175 | } 176 | }; 177 | #elif defined(__unix__) 178 | //--------------------------------------------------------- 179 | // Semaphore (POSIX, Linux) 180 | //--------------------------------------------------------- 181 | class Semaphore 182 | { 183 | private: 184 | sem_t m_sema; 185 | 186 | Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; 187 | Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; 188 | 189 | public: 190 | Semaphore(int initialCount = 0) 191 | { 192 | assert(initialCount >= 0); 193 | int rc 194 | = sem_init(&m_sema, 0, static_cast(initialCount)); 195 | assert(rc == 0); 196 | (void)rc; 197 | } 198 | 199 | ~Semaphore() 200 | { 201 | sem_destroy(&m_sema); 202 | } 203 | 204 | bool wait() 205 | { 206 | // http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error 207 | int rc; 208 | do 209 | { 210 | rc = sem_wait(&m_sema); 211 | } while (rc == -1 && errno == EINTR); 212 | return rc == 0; 213 | } 214 | 215 | bool try_wait() 216 | { 217 | int rc; 218 | do 219 | { 220 | rc = sem_trywait(&m_sema); 221 | } while (rc == -1 && errno == EINTR); 222 | return rc == 0; 223 | } 224 | 225 | bool timed_wait(std::uint64_t usecs) 226 | { 227 | struct timespec ts; 228 | const int usecs_in_1_sec = 1000000; 229 | const int nsecs_in_1_sec = 1000000000; 230 | clock_gettime(CLOCK_REALTIME, &ts); 231 | ts.tv_sec += (time_t)(usecs / usecs_in_1_sec); 232 | ts.tv_nsec += (long)(usecs % usecs_in_1_sec) * 1000; 233 | // sem_timedwait bombs if you have more than 1e9 in tv_nsec 234 | // so we have to clean things up before passing it in 235 | if (ts.tv_nsec >= nsecs_in_1_sec) 236 | { 237 | ts.tv_nsec -= nsecs_in_1_sec; 238 | ++ts.tv_sec; 239 | } 240 | 241 | int rc; 242 | do 243 | { 244 | rc = sem_timedwait(&m_sema, &ts); 245 | } while (rc == -1 && errno == EINTR); 246 | return rc == 0; 247 | } 248 | 249 | void signal() 250 | { 251 | while (sem_post(&m_sema) == -1) 252 | ; 253 | } 254 | 255 | void signal(int count) 256 | { 257 | while (count-- > 0) 258 | { 259 | while (sem_post(&m_sema) == -1) 260 | ; 261 | } 262 | } 263 | }; 264 | #else 265 | # error Unsupported platform! (No semaphore wrapper available) 266 | #endif 267 | 268 | } // end namespace details 269 | 270 | //--------------------------------------------------------- 271 | // LightweightSemaphore 272 | //--------------------------------------------------------- 273 | class LightweightSemaphore 274 | { 275 | public: 276 | typedef std::make_signed::type ssize_t; 277 | 278 | private: 279 | std::atomic m_count; 280 | details::Semaphore m_sema; 281 | int m_maxSpins; 282 | 283 | bool waitWithPartialSpinning(std::int64_t timeout_usecs = -1) 284 | { 285 | ssize_t oldCount; 286 | int spin = m_maxSpins; 287 | while (--spin >= 0) 288 | { 289 | oldCount = m_count.load(std::memory_order_relaxed); 290 | if ((oldCount > 0) 291 | && m_count.compare_exchange_strong(oldCount, 292 | oldCount - 1, 293 | std::memory_order_acquire, 294 | std::memory_order_relaxed)) 295 | return true; 296 | std::atomic_signal_fence(std::memory_order_acquire); // Prevent the 297 | // compiler 298 | // from 299 | // collapsing 300 | // the loop. 301 | } 302 | oldCount = m_count.fetch_sub(1, std::memory_order_acquire); 303 | if (oldCount > 0) 304 | return true; 305 | if (timeout_usecs < 0) 306 | { 307 | if (m_sema.wait()) 308 | return true; 309 | } 310 | if (timeout_usecs > 0 && m_sema.timed_wait((std::uint64_t)timeout_usecs)) 311 | return true; 312 | // At this point, we've timed out waiting for the semaphore, but the 313 | // count is still decremented indicating we may still be waiting on 314 | // it. So we have to re-adjust the count, but only if the semaphore 315 | // wasn't signaled enough times for us too since then. If it was, we 316 | // need to release the semaphore too. 317 | while (true) 318 | { 319 | oldCount = m_count.load(std::memory_order_acquire); 320 | if (oldCount >= 0 && m_sema.try_wait()) 321 | return true; 322 | if (oldCount < 0 323 | && m_count.compare_exchange_strong(oldCount, 324 | oldCount + 1, 325 | std::memory_order_relaxed, 326 | std::memory_order_relaxed)) 327 | return false; 328 | } 329 | } 330 | 331 | ssize_t 332 | waitManyWithPartialSpinning(ssize_t max, std::int64_t timeout_usecs = -1) 333 | { 334 | assert(max > 0); 335 | ssize_t oldCount; 336 | int spin = m_maxSpins; 337 | while (--spin >= 0) 338 | { 339 | oldCount = m_count.load(std::memory_order_relaxed); 340 | if (oldCount > 0) 341 | { 342 | ssize_t newCount = oldCount > max ? oldCount - max : 0; 343 | if (m_count.compare_exchange_strong(oldCount, 344 | newCount, 345 | std::memory_order_acquire, 346 | std::memory_order_relaxed)) 347 | return oldCount - newCount; 348 | } 349 | std::atomic_signal_fence(std::memory_order_acquire); 350 | } 351 | oldCount = m_count.fetch_sub(1, std::memory_order_acquire); 352 | if (oldCount <= 0) 353 | { 354 | if ((timeout_usecs == 0) || (timeout_usecs < 0 && !m_sema.wait()) 355 | || (timeout_usecs > 0 356 | && !m_sema.timed_wait((std::uint64_t)timeout_usecs))) 357 | { 358 | while (true) 359 | { 360 | oldCount = m_count.load(std::memory_order_acquire); 361 | if (oldCount >= 0 && m_sema.try_wait()) 362 | break; 363 | if (oldCount < 0 364 | && m_count.compare_exchange_strong( 365 | oldCount, 366 | oldCount + 1, 367 | std::memory_order_relaxed, 368 | std::memory_order_relaxed)) 369 | return 0; 370 | } 371 | } 372 | } 373 | if (max > 1) 374 | return 1 + tryWaitMany(max - 1); 375 | return 1; 376 | } 377 | 378 | public: 379 | LightweightSemaphore(ssize_t initialCount = 0, int maxSpins = 10000) 380 | : m_count(initialCount) 381 | , m_maxSpins(maxSpins) 382 | { 383 | assert(initialCount >= 0); 384 | assert(maxSpins >= 0); 385 | } 386 | 387 | bool tryWait() 388 | { 389 | ssize_t oldCount = m_count.load(std::memory_order_relaxed); 390 | while (oldCount > 0) 391 | { 392 | if (m_count.compare_exchange_weak(oldCount, 393 | oldCount - 1, 394 | std::memory_order_acquire, 395 | std::memory_order_relaxed)) 396 | return true; 397 | } 398 | return false; 399 | } 400 | 401 | bool wait() 402 | { 403 | return tryWait() || waitWithPartialSpinning(); 404 | } 405 | 406 | bool wait(std::int64_t timeout_usecs) 407 | { 408 | return tryWait() || waitWithPartialSpinning(timeout_usecs); 409 | } 410 | 411 | // Acquires between 0 and (greedily) max, inclusive 412 | ssize_t tryWaitMany(ssize_t max) 413 | { 414 | assert(max >= 0); 415 | ssize_t oldCount = m_count.load(std::memory_order_relaxed); 416 | while (oldCount > 0) 417 | { 418 | ssize_t newCount = oldCount > max ? oldCount - max : 0; 419 | if (m_count.compare_exchange_weak(oldCount, 420 | newCount, 421 | std::memory_order_acquire, 422 | std::memory_order_relaxed)) 423 | return oldCount - newCount; 424 | } 425 | return 0; 426 | } 427 | 428 | // Acquires at least one, and (greedily) at most max 429 | ssize_t waitMany(ssize_t max, std::int64_t timeout_usecs) 430 | { 431 | assert(max >= 0); 432 | ssize_t result = tryWaitMany(max); 433 | if (result == 0 && max > 0) 434 | result = waitManyWithPartialSpinning(max, timeout_usecs); 435 | return result; 436 | } 437 | 438 | ssize_t waitMany(ssize_t max) 439 | { 440 | ssize_t result = waitMany(max, -1); 441 | assert(result > 0); 442 | return result; 443 | } 444 | 445 | void signal(ssize_t count = 1) 446 | { 447 | assert(count >= 0); 448 | ssize_t oldCount = m_count.fetch_add(count, std::memory_order_release); 449 | ssize_t toRelease = -oldCount < count ? -oldCount : count; 450 | if (toRelease > 0) 451 | { 452 | m_sema.signal((int)toRelease); 453 | } 454 | } 455 | 456 | std::size_t availableApprox() const 457 | { 458 | ssize_t count = m_count.load(std::memory_order_relaxed); 459 | return count > 0 ? static_cast(count) : 0; 460 | } 461 | }; 462 | 463 | } // end namespace moodycamel -------------------------------------------------------------------------------- /include/coop/detail/promise.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "tracer.hpp" 4 | #include 5 | #include 6 | #include 7 | #if defined(__clang__) 8 | # include 9 | namespace std 10 | { 11 | using experimental::coroutine_handle; 12 | using experimental::noop_coroutine; 13 | using experimental::suspend_never; 14 | } // namespace std 15 | #else 16 | # include 17 | #endif 18 | 19 | namespace coop 20 | { 21 | namespace detail 22 | { 23 | template 24 | struct final_awaiter_t 25 | { 26 | bool await_ready() const noexcept 27 | { 28 | return false; 29 | } 30 | 31 | void await_resume() const noexcept 32 | { 33 | } 34 | 35 | std::coroutine_handle<> 36 | await_suspend(std::coroutine_handle

coroutine) const noexcept 37 | { 38 | // Check if this coroutine is being finalized from the 39 | // middle of a "continuation" coroutine and hop back there to 40 | // continue execution while *this* coroutine is suspended. 41 | 42 | COOP_LOG("Final await for coroutine %p on thread %zu\n", 43 | coroutine.address(), 44 | detail::thread_id()); 45 | // After acquiring the flag, the other thread's write to the 46 | // coroutine's continuation must be visible (one-way 47 | // communication) 48 | if (coroutine.promise().flag.exchange(true, std::memory_order_acquire)) 49 | { 50 | // We're not the first to reach here, meaning the 51 | // continuation is installed properly (if any) 52 | auto continuation = coroutine.promise().continuation; 53 | if (continuation) 54 | { 55 | COOP_LOG("Resuming continuation %p on %p on thread %zu\n", 56 | continuation.address(), 57 | coroutine.address(), 58 | detail::thread_id()); 59 | return continuation; 60 | } 61 | else 62 | { 63 | COOP_LOG( 64 | "Coroutine %p on thread %zu missing continuation\n", 65 | coroutine.address(), 66 | detail::thread_id()); 67 | } 68 | } 69 | return std::noop_coroutine(); 70 | } 71 | }; 72 | 73 | template 74 | struct final_awaiter_t 75 | { 76 | bool await_ready() const noexcept 77 | { 78 | return false; 79 | } 80 | 81 | void await_resume() const noexcept 82 | { 83 | } 84 | 85 | void await_suspend(std::coroutine_handle

coroutine) const noexcept 86 | { 87 | coroutine.promise().join_sem.release(); 88 | coroutine.destroy(); 89 | } 90 | }; 91 | 92 | // Helper function for awaiting on a task. The next resume point is 93 | // installed as a continuation of the task being awaited. 94 | template 95 | std::coroutine_handle<> 96 | await_suspend(std::coroutine_handle

base, std::coroutine_handle<> next) 97 | { 98 | if constexpr (P::joinable_v) 99 | { 100 | // Joinable tasks are never awaited and so cannot have a 101 | // continuation by definition 102 | return std::noop_coroutine(); 103 | } 104 | else 105 | { 106 | COOP_LOG("Installing continuation %p for %p on thread %zu\n", 107 | next.address(), 108 | base.address(), 109 | detail::thread_id()); 110 | base.promise().continuation = next; 111 | // The write to the continuation must be visible to a person that 112 | // acquires the flag 113 | if (base.promise().flag.exchange(true, std::memory_order_release)) 114 | { 115 | // We're not the first to reach here, meaning the continuation 116 | // won't get read 117 | return next; 118 | } 119 | return std::noop_coroutine(); 120 | } 121 | } 122 | 123 | // All promises need the `continuation` member, which is set when a 124 | // coroutine is suspended within another coroutine. The `continuation` 125 | // handle is used to hop back from that suspension point when the inner 126 | // coroutine finishes. 127 | template 128 | struct promise_base_t 129 | { 130 | constexpr static bool joinable_v = Joinable; 131 | 132 | // When a coroutine suspends, the continuation stores the handle to the 133 | // resume point, which immediately following the suspend point. 134 | std::coroutine_handle<> continuation = nullptr; 135 | 136 | std::atomic flag = false; 137 | 138 | // Do not suspend immediately on entry of a coroutine 139 | std::suspend_never initial_suspend() const noexcept 140 | { 141 | return {}; 142 | } 143 | 144 | void unhandled_exception() const noexcept 145 | { 146 | // Coop doesn't currently handle exceptions. 147 | } 148 | }; 149 | 150 | // Joinable tasks need an additional semaphore the joiner can wait on 151 | template <> 152 | struct promise_base_t : public promise_base_t 153 | { 154 | std::binary_semaphore join_sem{0}; 155 | }; 156 | 157 | template 158 | struct promise_t : public promise_base_t 159 | { 160 | T data; 161 | 162 | Task get_return_object() noexcept 163 | { 164 | // On coroutine entry, we store as the continuation a handle 165 | // corresponding to the next sequence point from the caller. 166 | return {std::coroutine_handle::from_promise(*this)}; 167 | } 168 | 169 | void 170 | return_value(T const& value) noexcept(std::is_nothrow_copy_assignable_v) 171 | { 172 | data = value; 173 | } 174 | 175 | void 176 | return_value(T&& value) noexcept(std::is_nothrow_move_assignable_v) 177 | { 178 | data = std::move(value); 179 | } 180 | 181 | final_awaiter_t final_suspend() noexcept 182 | { 183 | return {}; 184 | } 185 | }; 186 | 187 | template 188 | struct promise_t : public promise_base_t 189 | { 190 | Task get_return_object() noexcept 191 | { 192 | // On coroutine entry, we store as the continuation a handle 193 | // corresponding to the next sequence point from the caller. 194 | return {std::coroutine_handle::from_promise(*this)}; 195 | } 196 | 197 | void return_void() noexcept 198 | { 199 | } 200 | 201 | final_awaiter_t final_suspend() noexcept 202 | { 203 | return {}; 204 | } 205 | }; 206 | } // namespace detail 207 | } // namespace coop 208 | -------------------------------------------------------------------------------- /include/coop/detail/tracer.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace coop 7 | { 8 | namespace detail 9 | { 10 | inline size_t thread_id() noexcept 11 | { 12 | return std::hash{}(std::this_thread::get_id()); 13 | } 14 | } // namespace detail 15 | } // namespace coop 16 | 17 | #if defined(COOP_TRACE) && !defined(NDEBUG) 18 | # include 19 | 20 | # define COOP_LOG(...) std::printf(__VA_ARGS__) 21 | 22 | #else 23 | # define COOP_LOG(...) 24 | #endif -------------------------------------------------------------------------------- /include/coop/detail/work_queue.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "api.hpp" 4 | #include "concurrentqueue.h" 5 | #include 6 | #include 7 | #include 8 | #if defined(__clang__) 9 | # include 10 | namespace std 11 | { 12 | using experimental::coroutine_handle; 13 | } 14 | #else 15 | # include 16 | #endif 17 | #include 18 | 19 | // Currently, COOP supports exactly two priority levels, 0 (default) and 1 20 | // (high) 21 | #define COOP_PRIORITY_COUNT 2 22 | 23 | namespace coop 24 | { 25 | class scheduler_t; 26 | 27 | namespace detail 28 | { 29 | class COOP_API work_queue_t 30 | { 31 | public: 32 | work_queue_t(scheduler_t& scheduler, uint32_t id); 33 | ~work_queue_t() noexcept; 34 | work_queue_t(work_queue_t const&) = delete; 35 | work_queue_t(work_queue_t&&) = delete; 36 | work_queue_t& operator=(work_queue_t const&) = delete; 37 | work_queue_t& operator=(work_queue_t&&) = delete; 38 | 39 | // Returns the approximate size across all queues of any priority 40 | size_t size_approx() const noexcept 41 | { 42 | size_t out = 0; 43 | for (size_t i = 0; i != COOP_PRIORITY_COUNT; ++i) 44 | { 45 | out += queues_[i].size_approx(); 46 | } 47 | return out; 48 | } 49 | 50 | void enqueue(std::coroutine_handle<> coroutine, 51 | uint32_t priority = 0, 52 | source_location_t source_location = {}); 53 | 54 | private: 55 | scheduler_t& scheduler_; 56 | uint32_t id_; 57 | std::thread thread_; 58 | std::atomic active_; 59 | std::counting_semaphore<> sem_; 60 | 61 | moodycamel::ConcurrentQueue> 62 | queues_[COOP_PRIORITY_COUNT]; 63 | 64 | char label_[64]; 65 | }; 66 | } // namespace detail 67 | } // namespace coop -------------------------------------------------------------------------------- /include/coop/event.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "detail/api.hpp" 4 | #if defined(__clang__) 5 | # include 6 | namespace std 7 | { 8 | using experimental::coroutine_handle; 9 | } 10 | #else 11 | # include 12 | #endif 13 | #include 14 | 15 | namespace coop 16 | { 17 | class scheduler_t; 18 | 19 | // Non-owning reference to an event 20 | class COOP_API event_ref_t 21 | { 22 | public: 23 | enum class status_e 24 | { 25 | normal, 26 | abandoned, 27 | timeout, 28 | failed 29 | }; 30 | 31 | struct wait_result_t 32 | { 33 | status_e status; 34 | uint32_t index = 0; 35 | }; 36 | 37 | // Return the index of the first event signaled in a given array of events 38 | static wait_result_t wait_many(event_ref_t* events, uint32_t count); 39 | 40 | event_ref_t() = default; 41 | #if defined(_WIN32) || defined(__linux__) 42 | event_ref_t(void* handle) noexcept 43 | : handle_{handle} 44 | { 45 | } 46 | #elif (__APPLE__) 47 | // TODO: MacOS/iOS implementation 48 | #endif 49 | event_ref_t(event_ref_t&&) = default; 50 | event_ref_t(event_ref_t const&) = default; 51 | event_ref_t& operator=(event_ref_t&&) = default; 52 | event_ref_t& operator=(event_ref_t const&) = default; 53 | 54 | void init(bool manual_reset = false, char const* label = nullptr); 55 | 56 | // Check if this event is signaled (returns immediately) 57 | bool is_signaled() const; 58 | operator bool() const noexcept 59 | { 60 | return is_signaled(); 61 | } 62 | 63 | // Wait (potentially indefinitely) for this event to be signaled 64 | bool wait() const; 65 | 66 | // Mark this event as signaled 67 | void signal(); 68 | 69 | // Mark this event as unsignaled (needed for events that are manually reset, 70 | // as opposed to reset after wait) 71 | void reset(); 72 | 73 | protected: 74 | friend class event_t; 75 | 76 | #if defined(_WIN32) || defined(__linux__) 77 | void* handle_ = nullptr; 78 | #elif (__APPLE__) 79 | // TODO: MacOS/iOS implementation 80 | #endif 81 | }; 82 | 83 | class COOP_API event_t final : public event_ref_t 84 | { 85 | public: 86 | event_t() = default; 87 | #if defined(_WIN32) || defined(__linux__) 88 | event_t(void* handle) noexcept 89 | : event_ref_t{handle} 90 | { 91 | } 92 | #elif (__APPLE__) 93 | // TODO: MacOS/iOS implementation 94 | #endif 95 | ~event_t() noexcept; 96 | event_t(event_t const& other) = delete; 97 | event_t& operator=(event_t const& other) = delete; 98 | event_t(event_t&& other) noexcept; 99 | event_t& operator=(event_t&& other) noexcept; 100 | 101 | event_ref_t ref() const noexcept; 102 | 103 | // The CPU affinity and priority set here are used to consider the 104 | // *continuation* after this event is signaled 105 | void set_cpu_affinity(uint32_t affinity) noexcept 106 | { 107 | cpu_affinity_ = affinity; 108 | } 109 | 110 | void set_priority(uint32_t priority) noexcept 111 | { 112 | priority_ = priority; 113 | } 114 | 115 | // Awaiter traits 116 | bool await_ready() const noexcept 117 | { 118 | return is_signaled(); 119 | } 120 | 121 | void await_resume() const noexcept 122 | { 123 | } 124 | 125 | // Enqueue coroutine for resumption when this event transitions to the 126 | // signaled state 127 | void await_suspend(std::coroutine_handle<> coroutine) noexcept; 128 | 129 | private: 130 | uint64_t cpu_affinity_ = 0; 131 | uint32_t priority_ = 0; 132 | }; 133 | } // namespace coop -------------------------------------------------------------------------------- /include/coop/scheduler.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "detail/api.hpp" 4 | #include "detail/concurrentqueue.h" 5 | #include "detail/work_queue.hpp" 6 | #include "event.hpp" 7 | #include "source_location.hpp" 8 | #include 9 | #if defined(__clang__) 10 | # include 11 | namespace std 12 | { 13 | using experimental::coroutine_handle; 14 | } 15 | #else 16 | # include 17 | #endif 18 | #include 19 | #include 20 | 21 | namespace coop 22 | { 23 | class event_ref_t; 24 | 25 | template 26 | concept Scheduler = requires(S scheduler, 27 | std::coroutine_handle<> coroutine, 28 | uint64_t cpu_affinity, 29 | uint32_t priority, 30 | source_location_t source_location) 31 | { 32 | scheduler.schedule(coroutine, cpu_affinity, priority, source_location); 33 | }; 34 | 35 | // Implement the Scheduler concept above to use your own coroutine scheduler 36 | class COOP_API scheduler_t final 37 | { 38 | public: 39 | // Returns the default global threadsafe scheduler 40 | static scheduler_t& instance() noexcept; 41 | 42 | scheduler_t(); 43 | ~scheduler_t() noexcept; 44 | scheduler_t(scheduler_t const&) = delete; 45 | scheduler_t(scheduler_t&&) = delete; 46 | scheduler_t& operator=(scheduler_t const&) = delete; 47 | scheduler_t&& operator=(scheduler_t&&) = delete; 48 | 49 | // Schedules a coroutine to be resumed at a later time as soon as a thread 50 | // is available. If you wish to provide your own custom scheduler, you can 51 | // schedule the coroutine in a single-threaded context, or with different 52 | // runtime behavior. 53 | // 54 | // In addition, you are free to handle or ignore the cpu affinity and 55 | // priority parameters differently. The default scheduler here supports TWO 56 | // priorities: 0 and 1. Coroutines with priority 1 will (in a best-effort 57 | // sense), be scheduled ahead of coroutines with priority 0. 58 | void schedule(std::coroutine_handle<> coroutine, 59 | uint64_t cpu_affinity = 0, 60 | uint32_t priority = 0, 61 | source_location_t source_location = {}); 62 | 63 | void schedule(std::coroutine_handle<> coroutine, 64 | event_ref_t event, 65 | uint64_t cpu_affinity, 66 | uint32_t priority); 67 | 68 | private: 69 | friend class detail::work_queue_t; 70 | 71 | struct event_continuation_t 72 | { 73 | std::coroutine_handle<> coroutine; 74 | event_ref_t event; 75 | uint64_t cpu_affinity; 76 | uint32_t priority; 77 | }; 78 | 79 | std::thread event_thread_; 80 | size_t event_count_ = 0; 81 | size_t event_capacity_ = 0; 82 | event_t event_thread_signal_; 83 | event_ref_t* events_ = nullptr; 84 | event_continuation_t* event_continuations_ = nullptr; 85 | size_t temp_storage_size_ = 0; 86 | event_continuation_t* temp_storage_ = nullptr; 87 | moodycamel::ConcurrentQueue pending_events_; 88 | 89 | std::atomic active_; 90 | 91 | // Allocated as an array. One queue is assigned to each CPU 92 | detail::work_queue_t* queues_ = nullptr; 93 | 94 | // Used to perform a low-discrepancy selection of work queue to enqueue a 95 | // coroutine to 96 | std::atomic update_; 97 | 98 | // Specifically, this is the number of concurrent threads possible, which 99 | // may be double the physical CPU count if hyperthreading or similar 100 | // technology is enabled 101 | uint32_t cpu_count_; 102 | uint32_t cpu_mask_; 103 | }; 104 | } // namespace coop -------------------------------------------------------------------------------- /include/coop/source_location.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "detail/api.hpp" 4 | #include 5 | 6 | namespace coop 7 | { 8 | // Temporary source location representation until is more 9 | // widely available 10 | struct COOP_API source_location_t 11 | { 12 | char const* file; 13 | size_t line; 14 | }; 15 | } // namespace coop -------------------------------------------------------------------------------- /include/coop/task.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "detail/api.hpp" 4 | #include "detail/promise.hpp" 5 | #include "detail/tracer.hpp" 6 | #include "scheduler.hpp" 7 | #include "source_location.hpp" 8 | #include 9 | #include 10 | #include 11 | #if defined(__clang__) 12 | # include 13 | namespace std 14 | { 15 | using experimental::coroutine_handle; 16 | using experimental::noop_coroutine; 17 | using experimental::suspend_never; 18 | } // namespace std 19 | #else 20 | # include 21 | #endif 22 | 23 | namespace coop 24 | { 25 | template 26 | class task_t 27 | { 28 | public: 29 | using promise_type = detail::promise_t; 30 | 31 | task_t() noexcept = default; 32 | task_t(std::coroutine_handle coroutine) noexcept 33 | : coroutine_{coroutine} 34 | { 35 | } 36 | task_t(task_t const&) = delete; 37 | task_t& operator=(task_t const&) = delete; 38 | task_t(task_t&& other) noexcept 39 | : coroutine_{other.coroutine_} 40 | { 41 | other.coroutine_ = nullptr; 42 | } 43 | task_t& operator=(task_t&& other) noexcept 44 | { 45 | if (this != &other) 46 | { 47 | // For joinable tasks, the coroutine is destroyed in the final 48 | // awaiter to support fire-and-forget semantics 49 | if constexpr (!Joinable) 50 | { 51 | if (coroutine_) 52 | { 53 | coroutine_.destroy(); 54 | } 55 | } 56 | coroutine_ = other.coroutine_; 57 | other.coroutine_ = nullptr; 58 | } 59 | return *this; 60 | } 61 | ~task_t() noexcept 62 | { 63 | if constexpr (!Joinable) 64 | { 65 | if (coroutine_) 66 | { 67 | coroutine_.destroy(); 68 | } 69 | } 70 | } 71 | 72 | // The dereferencing operators below return the data contained in the 73 | // associated promise 74 | [[nodiscard]] auto operator*() noexcept 75 | { 76 | static_assert( 77 | !std::is_same_v, "This task doesn't contain any data"); 78 | return std::ref(promise().data); 79 | } 80 | 81 | [[nodiscard]] auto operator*() const noexcept 82 | { 83 | static_assert( 84 | !std::is_same_v, "This task doesn't contain any data"); 85 | return std::cref(promise().data); 86 | } 87 | 88 | // A task_t is truthy if it is not associated with an outstanding 89 | // coroutine or the coroutine it is associated with is complete 90 | [[nodiscard]] operator bool() const noexcept 91 | { 92 | return await_ready(); 93 | } 94 | 95 | [[nodiscard]] bool await_ready() const noexcept 96 | { 97 | return !coroutine_ || coroutine_.done(); 98 | } 99 | 100 | void join() 101 | { 102 | static_assert(Joinable, 103 | "Cannot join a task without the Joinable type " 104 | "parameter " 105 | "set"); 106 | coroutine_.promise().join_sem.acquire(); 107 | } 108 | 109 | // When suspending from a coroutine *within* this task's coroutine, save 110 | // the resume point (to be resumed when the inner coroutine finalizes) 111 | std::coroutine_handle<> await_suspend(std::coroutine_handle<> coroutine) noexcept 112 | { 113 | return detail::await_suspend(coroutine_, coroutine); 114 | } 115 | 116 | // The return value of await_resume is the final result of `co_await 117 | // this_task` once the coroutine associated with this task completes 118 | auto await_resume() const noexcept 119 | { 120 | if constexpr (std::is_same_v) 121 | { 122 | return; 123 | } 124 | else 125 | { 126 | return std::move(promise().data); 127 | } 128 | } 129 | 130 | protected: 131 | [[nodiscard]] promise_type& promise() const noexcept 132 | { 133 | return coroutine_.promise(); 134 | } 135 | 136 | std::coroutine_handle coroutine_ = nullptr; 137 | }; 138 | 139 | // Suspend the current coroutine to be scheduled for execution on a differeent 140 | // thread by the supplied scheduler. Remember to `co_await` this function's 141 | // returned value. 142 | // 143 | // The least significant bit of the CPU mask, corresponds to CPU 0. A non-zero 144 | // mask will prevent this coroutine from being scheduled on CPUs corresponding 145 | // to bits that are set 146 | // 147 | // Threadsafe only if scheduler_t::schedule is threadsafe (the default one 148 | // provided is threadsafe). 149 | template 150 | inline auto suspend(S& scheduler = S::instance(), 151 | uint64_t cpu_mask = 0, 152 | uint32_t priority = 0, 153 | source_location_t const& source_location = {}) noexcept 154 | { 155 | struct awaiter_t 156 | { 157 | scheduler_t& scheduler; 158 | uint64_t cpu_mask; 159 | uint32_t priority; 160 | source_location_t source_location; 161 | 162 | bool await_ready() const noexcept 163 | { 164 | return false; 165 | } 166 | 167 | void await_resume() const noexcept 168 | { 169 | } 170 | 171 | void await_suspend(std::coroutine_handle<> coroutine) const noexcept 172 | { 173 | scheduler.schedule(coroutine, cpu_mask, priority, source_location); 174 | } 175 | }; 176 | 177 | return awaiter_t{scheduler, cpu_mask, priority, source_location}; 178 | } 179 | 180 | #define COOP_SUSPEND() \ 181 | co_await ::coop::suspend( \ 182 | ::coop::scheduler_t::instance(), 0, 0, {__FILE__, __LINE__}) 183 | 184 | #define COOP_SUSPEND1(scheduler) \ 185 | co_await ::coop::suspend(scheduler, 0, 0, {__FILE__, __LINE__}) 186 | 187 | #define COOP_SUSPEND2(scheduler, cpu_mask) \ 188 | co_await ::coop::suspend(scheduler, cpu_mask, 0, {__FILE__, __LINE__}) 189 | 190 | #define COOP_SUSPEND3(scheduler, cpu_mask, priority) \ 191 | co_await ::coop::suspend( \ 192 | scheduler, cpu_mask, priority, {__FILE__, __LINE__}) 193 | 194 | #define COOP_SUSPEND4(cpu_mask) \ 195 | co_await ::coop::suspend( \ 196 | ::coop::scheduler_t::instance(), cpu_mask, 0, {__FILE__, __LINE__}) 197 | 198 | #define COOP_SUSPEND5(cpu_mask, priority) \ 199 | co_await ::coop::suspend(::coop::scheduler_t::instance(), \ 200 | cpu_mask, \ 201 | priority, \ 202 | {__FILE__, __LINE__}) 203 | } // namespace coop 204 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(COOP_SOURCES 2 | ../include/coop/event.hpp 3 | ../include/coop/scheduler.hpp 4 | ../include/coop/source_location.hpp 5 | ../include/coop/task.hpp 6 | ../include/coop/detail/api.hpp 7 | ../include/coop/detail/blockingconcurrentqueue.h 8 | ../include/coop/detail/concurrentqueue.h 9 | ../include/coop/detail/lightweightsemaphore.h 10 | ../include/coop/detail/promise.hpp 11 | ../include/coop/detail/tracer.hpp 12 | ../include/coop/detail/work_queue.hpp 13 | event.cpp 14 | scheduler.cpp 15 | work_queue.cpp 16 | ) 17 | source_group( 18 | TREE 19 | ${CMAKE_CURRENT_SOURCE_DIR}/.. 20 | FILES 21 | ${COOP_SOURCES} 22 | ) 23 | 24 | add_library( 25 | coop 26 | ${COOP_SOURCES} 27 | ) 28 | add_library(coop::coop ALIAS coop) 29 | 30 | target_link_libraries( 31 | coop 32 | PUBLIC 33 | coop_core 34 | ) 35 | 36 | if(BUILD_SHARED_LIBS) 37 | target_compile_definitions(coop PRIVATE COOP_IMPL) 38 | endif() -------------------------------------------------------------------------------- /src/event.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #if defined(_WIN32) 7 | # define WIN32_LEAN_AND_MEAN 8 | # include 9 | #elif defined(__linux__) 10 | # include 11 | #elif (__APPLE__) 12 | #endif 13 | 14 | using namespace coop; 15 | 16 | event_ref_t::wait_result_t 17 | event_ref_t::wait_many(event_ref_t* events, uint32_t count) 18 | { 19 | #if defined(_WIN32) 20 | static_assert(sizeof(event_ref_t) == sizeof(HANDLE)); 21 | uint32_t result = WaitForMultipleObjects( 22 | count, reinterpret_cast(events), false, INFINITE); 23 | 24 | assert(result != WAIT_FAILED && "Failed to await events"); 25 | 26 | if (result == WAIT_FAILED) 27 | { 28 | return {status_e::failed}; 29 | } 30 | else if (result == WAIT_TIMEOUT) 31 | { 32 | return {status_e::timeout}; 33 | } 34 | 35 | if (result < WAIT_ABANDONED_0) 36 | { 37 | return {status_e::normal, result - WAIT_OBJECT_0}; 38 | } 39 | else 40 | { 41 | return {status_e::abandoned, result - WAIT_OBJECT_0}; 42 | } 43 | 44 | #elif defined(__linux__) 45 | // TODO: Android/Linux implementation 46 | #elif (__APPLE__) 47 | // TODO: MacOS/iOS implementation 48 | #endif 49 | } 50 | 51 | void event_ref_t::init(bool manual_reset, char const* label) 52 | { 53 | #if defined(_WIN32) 54 | handle_ = CreateEventA(nullptr, manual_reset, false, label); 55 | #elif defined(__linux__) 56 | // TODO: Android/Linux implementation 57 | #elif (__APPLE__) 58 | // TODO: MacOS/iOS implementation 59 | #endif 60 | } 61 | 62 | bool event_ref_t::is_signaled() const 63 | { 64 | #if defined(_WIN32) 65 | uint32_t status = WaitForSingleObject(handle_, 0); 66 | return status == WAIT_OBJECT_0; 67 | #elif defined(__linux__) 68 | // TODO: Android/Linux implementation 69 | #elif (__APPLE__) 70 | // TODO: MacOS/iOS implementation 71 | #endif 72 | } 73 | 74 | bool event_ref_t::wait() const 75 | { 76 | #if defined(_WIN32) 77 | uint32_t status = WaitForSingleObject(handle_, INFINITE); 78 | return status == WAIT_OBJECT_0; 79 | #elif defined(__linux__) 80 | // TODO: Android/Linux implementation 81 | #elif (__APPLE__) 82 | // TODO: MacOS/iOS implementation 83 | #endif 84 | } 85 | 86 | void event_ref_t::signal() 87 | { 88 | #if defined(_WIN32) 89 | SetEvent(handle_); 90 | #elif defined(__linux__) 91 | // TODO: Android/Linux implementation 92 | #elif (__APPLE__) 93 | // TODO: MacOS/iOS implementation 94 | #endif 95 | } 96 | 97 | void event_ref_t::reset() 98 | { 99 | #if defined(_WIN32) 100 | ResetEvent(handle_); 101 | #elif defined(__linux__) 102 | // TODO: Android/Linux implementation 103 | #elif (__APPLE__) 104 | // TODO: MacOS/iOS implementation 105 | #endif 106 | } 107 | 108 | void event_t::await_suspend(std::coroutine_handle<> coroutine) noexcept 109 | { 110 | // Enqueue coroutine for resumption when this event transitions to the 111 | // signaled state 112 | scheduler_t::instance().schedule(coroutine, ref(), cpu_affinity_, priority_); 113 | } 114 | 115 | event_t::~event_t() noexcept 116 | { 117 | #if defined(_WIN32) 118 | if (handle_) 119 | { 120 | CloseHandle(handle_); 121 | } 122 | #elif defined(__linux__) 123 | // TODO: Android/Linux implementation 124 | #elif (__APPLE__) 125 | // TODO: MacOS/iOS implementation 126 | #endif 127 | } 128 | 129 | event_t::event_t(event_t&& other) noexcept 130 | { 131 | *this = std::move(other); 132 | } 133 | 134 | event_t& event_t::operator=(event_t&& other) noexcept 135 | { 136 | if (this != &other) 137 | { 138 | #if defined(_WIN32) 139 | std::swap(handle_, other.handle_); 140 | #elif defined(__linux__) 141 | // TODO: Android/Linux implementation 142 | #elif (__APPLE__) 143 | // TODO: MacOS/iOS implementation 144 | #endif 145 | } 146 | return *this; 147 | } 148 | 149 | event_ref_t event_t::ref() const noexcept 150 | { 151 | #if defined(_WIN32) 152 | event_ref_t out; 153 | out.handle_ = handle_; 154 | return out; 155 | #elif defined(__linux__) 156 | // TODO: Android/Linux implementation 157 | #elif (__APPLE__) 158 | // TODO: MacOS/iOS implementation 159 | #endif 160 | } 161 | -------------------------------------------------------------------------------- /src/scheduler.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | using namespace coop; 12 | 13 | scheduler_t& scheduler_t::instance() noexcept 14 | { 15 | static scheduler_t scheduler; 16 | return scheduler; 17 | } 18 | 19 | scheduler_t::scheduler_t() 20 | { 21 | // Determine CPU count 22 | cpu_count_ = std::thread::hardware_concurrency(); 23 | assert(cpu_count_ > 0 && cpu_count_ <= 64 24 | && "Coop does not yet support CPUs with more than 64 cores"); 25 | cpu_mask_ = (1 << (cpu_count_ + 1)) - 1; 26 | 27 | COOP_LOG("Spawning coop scheduler with %i threads\n", cpu_count_); 28 | 29 | void* raw = operator new[](sizeof(detail::work_queue_t) * cpu_count_); 30 | queues_ = static_cast(raw); 31 | 32 | for (decltype(cpu_count_) i = 0; i != cpu_count_; ++i) 33 | { 34 | new (queues_ + i) detail::work_queue_t(*this, i); 35 | } 36 | 37 | // Initialize room for 32 events 38 | event_capacity_ = 32; 39 | event_count_ = 1; 40 | events_ = new event_ref_t[event_capacity_]; 41 | event_continuations_ = new event_continuation_t[event_capacity_ - 1]; 42 | event_thread_signal_.init(false, "coop_main_event"); 43 | events_[0] = event_thread_signal_; 44 | 45 | // A high quality PRNG number isn't needed here, as this update counter is 46 | // used to drive a low discrepancy sequence 47 | update_ = std::rand(); 48 | 49 | #ifdef _WIN32 50 | event_thread_ = std::thread([this] { 51 | active_ = true; 52 | while (active_) 53 | { 54 | auto [status, index] = event_t::wait_many(events_, event_count_); 55 | 56 | if (status == event_ref_t::status_e::failed 57 | || status == event_ref_t::status_e::timeout) 58 | { 59 | continue; 60 | } 61 | 62 | if (index == 0) 63 | { 64 | // The event at index 0 is special in that it is used to 65 | // indicate the availability of additional events or to stop 66 | // this thread 67 | if (!active_) 68 | { 69 | return; 70 | } 71 | 72 | // Dequeue continuation requests from the concurrent queue in 73 | // bulk 74 | size_t size = pending_events_.size_approx(); 75 | 76 | // Resize arrays holding event refs and coroutines if necessary 77 | if (size + event_count_ > event_capacity_) 78 | { 79 | event_capacity_ = size + event_count_ * 2; 80 | event_ref_t* events = new event_ref_t[event_capacity_]; 81 | std::memcpy(events_, events, sizeof(event_t) * event_count_); 82 | delete[] events_; 83 | events_ = events; 84 | 85 | event_continuation_t* event_continuations 86 | = new event_continuation_t[event_capacity_ - 1]; 87 | for (size_t i = 0; i != event_count_ - 1; ++i) 88 | { 89 | // Use moves here instead of a memcpy in case 90 | // std::coroutine_handle<> has a non-trivial move 91 | event_continuations[i] 92 | = std::move(event_continuations_[i]); 93 | } 94 | delete[] event_continuations_; 95 | event_continuations_ = event_continuations; 96 | } 97 | 98 | // Note that the number of items we actually dequeue may be more 99 | // than originally advertised 100 | size = pending_events_.try_dequeue_bulk( 101 | event_continuations_ + event_count_ - 1, 102 | event_capacity_ - event_count_); 103 | 104 | for (size_t i = 0; i != size; ++i) 105 | { 106 | events_[i + event_count_] 107 | = event_continuations_[i + event_count_ - 1].event; 108 | } 109 | 110 | COOP_LOG( 111 | "Added %zu events to the event processing thread\n", size); 112 | event_count_ += size; 113 | } 114 | else 115 | { 116 | COOP_LOG("Event %i signaled on the event processing thread\n", 117 | index); 118 | 119 | // An event has been signaled. Enqueue its associated 120 | // continuation. 121 | event_continuation_t& continuation 122 | = event_continuations_[index - 1]; 123 | schedule(continuation.coroutine, 124 | continuation.cpu_affinity, 125 | continuation.priority); 126 | 127 | // NOTE: if this event was the only event in the queue (aside 128 | // from the thread signaler), these swaps are in-place swaps and 129 | // thus no-ops 130 | std::swap(events_[index], events_[event_count_ - 1]); 131 | std::swap(event_continuations_[index - 1], 132 | event_continuations_[event_count_ - 1]); 133 | --event_count_; 134 | } 135 | } 136 | }); 137 | #endif 138 | } 139 | 140 | scheduler_t::~scheduler_t() noexcept 141 | { 142 | active_ = false; 143 | #ifdef _WIN32 144 | events_[0].signal(); 145 | event_thread_.join(); 146 | #endif 147 | delete[] events_; 148 | delete[] event_continuations_; 149 | 150 | for (decltype(cpu_count_) i = 0; i != cpu_count_; ++i) 151 | { 152 | queues_[i].~work_queue_t(); 153 | } 154 | operator delete[](static_cast(queues_)); 155 | } 156 | 157 | void scheduler_t::schedule(std::coroutine_handle<> coroutine, 158 | uint64_t cpu_affinity, 159 | uint32_t priority, 160 | source_location_t source_location) 161 | { 162 | if (cpu_affinity == 0) 163 | { 164 | cpu_affinity = ~cpu_affinity & cpu_mask_; 165 | } 166 | 167 | for (uint32_t i = 0; i != cpu_count_; ++i) 168 | { 169 | if (cpu_affinity & (1ull << i)) 170 | { 171 | if (queues_[i].size_approx() == 0) 172 | { 173 | COOP_LOG("Empty work queue %i identified\n", i); 174 | queues_[i].enqueue(coroutine, priority, source_location); 175 | return; 176 | } 177 | } 178 | } 179 | 180 | // All queues appear to be busy, pick a random one with reasonably low 181 | // discrepancy (Kronecker recurrence sequence) 182 | uint32_t index = static_cast(update_++ * std::numbers::phi_v) 183 | % std::popcount(cpu_affinity); 184 | 185 | // Iteratively unset bits to determine the nth set bit 186 | for (uint32_t i = 0; i != index; ++i) 187 | { 188 | cpu_affinity &= ~(1 << (std::countr_zero(cpu_affinity) + 1)); 189 | } 190 | uint32_t queue = std::countr_zero(cpu_affinity); 191 | COOP_LOG("Work queue %i identified\n", queue); 192 | 193 | queues_[queue].enqueue(coroutine, priority, source_location); 194 | } 195 | 196 | void scheduler_t::schedule(std::coroutine_handle<> coroutine, 197 | event_ref_t event, 198 | uint64_t cpu_affinity, 199 | uint32_t priority) 200 | { 201 | pending_events_.enqueue({coroutine, event, cpu_affinity, priority}); 202 | events_[0].signal(); 203 | } 204 | -------------------------------------------------------------------------------- /src/work_queue.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #ifdef _WIN32 9 | # define WIN32_LEAN_AND_MEAN 10 | # include 11 | #elif defined(__linux__) 12 | # include 13 | #elif (__APPLE__) 14 | #endif 15 | 16 | using namespace coop; 17 | using namespace coop::detail; 18 | 19 | work_queue_t::work_queue_t(scheduler_t& scheduler, uint32_t id) 20 | : scheduler_{scheduler} 21 | , id_{id} 22 | , sem_{0} 23 | { 24 | snprintf(label_, sizeof(label_), "work_queue:%i", id); 25 | active_ = true; 26 | thread_ = std::thread([this] { 27 | #if defined(_WIN32) 28 | SetThreadAffinityMask( 29 | GetCurrentThread(), static_cast(1ull << id_)); 30 | #elif defined(__linux__) 31 | // TODO: Android implementation 32 | pthread_t thread = pthread_self(); 33 | cpu_set_t cpuset; 34 | CPU_ZERO(&cpuset); 35 | CPU_SET(id_, &cpuset); 36 | int result = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset); 37 | 38 | if (result != 0) 39 | { 40 | errno = result; 41 | perror("Failed to set thread affinity"); 42 | return; 43 | } 44 | #elif (__APPLE__) 45 | // TODO: MacOS/iOS implementation 46 | #endif 47 | 48 | while (true) 49 | { 50 | sem_.acquire(); 51 | if (!active_) 52 | { 53 | return; 54 | } 55 | 56 | bool did_dequeue = false; 57 | 58 | // Dequeue in a loop because the concurrent queue isn't sequentially 59 | // consistent 60 | while (!did_dequeue) 61 | { 62 | for (int i = COOP_PRIORITY_COUNT - 1; i >= 0; --i) 63 | { 64 | std::coroutine_handle<> coroutine; 65 | if (queues_[i].try_dequeue(coroutine)) 66 | { 67 | COOP_LOG("Dequeueing coroutine %p on thread %zu (%i)\n", 68 | coroutine.address(), 69 | detail::thread_id(), 70 | id_); 71 | did_dequeue = true; 72 | coroutine.resume(); 73 | break; 74 | } 75 | } 76 | } 77 | 78 | // TODO: Implement some sort of work stealing here 79 | } 80 | }); 81 | } 82 | 83 | work_queue_t::~work_queue_t() noexcept 84 | { 85 | active_ = false; 86 | sem_.release(); 87 | thread_.join(); 88 | } 89 | 90 | void work_queue_t::enqueue(std::coroutine_handle<> coroutine, 91 | uint32_t priority, 92 | source_location_t source_location) 93 | { 94 | priority = std::clamp(priority, 0, COOP_PRIORITY_COUNT - 1); 95 | COOP_LOG("Enqueueing coroutine %p on thread %zu (%s:%zu)\n", 96 | coroutine.address(), 97 | detail::thread_id(), 98 | source_location.file, 99 | source_location.line); 100 | queues_[priority].enqueue(coroutine); 101 | sem_.release(); 102 | } 103 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include(FetchContent) 2 | 3 | set(DOCTEST_WITH_TESTS OFF) 4 | set(DOCTEST_NO_INSTALL ON) 5 | FetchContent_Declare( 6 | doctest 7 | GIT_REPOSITORY https://github.com/onqtam/doctest.git 8 | GIT_TAG 2.4.5 9 | GIT_SHALLOW ON 10 | ) 11 | if(NOT doctest_POPULATED) 12 | FetchContent_Populate(doctest) 13 | add_subdirectory(${doctest_SOURCE_DIR} ${doctest_BINARY_DIR}) 14 | endif() 15 | 16 | list(APPEND CMAKE_MODULE_PATH ${doctest_SOURCE_DIR}/scripts/cmake) 17 | include(doctest) 18 | 19 | add_executable(coop_test test.cpp) 20 | target_link_libraries( 21 | coop_test 22 | PUBLIC 23 | coop 24 | coop_scheduler 25 | doctest::doctest 26 | ) -------------------------------------------------------------------------------- /test/test.cpp: -------------------------------------------------------------------------------- 1 | #define DOCTEST_CONFIG_IMPLEMENT 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | coop::task_t suspend_time() 9 | { 10 | // std::printf("%zu start thread\n", coop::detail::thread_id()); 11 | auto t1 = std::chrono::system_clock::now(); 12 | COOP_SUSPEND(); 13 | auto t2 = std::chrono::system_clock::now(); 14 | // std::printf("%zu end thread\n", coop::detail::thread_id()); 15 | size_t us 16 | = std::chrono::duration_cast(t2 - t1).count(); 17 | std::printf("Duration for suspend test: %zu us\n", us); 18 | } 19 | 20 | TEST_CASE("suspend overhead") 21 | { 22 | std::printf("Calling suspend_test2 coroutine\n"); 23 | // auto task = suspend_time(); 24 | // task.join(); 25 | suspend_time().join(); 26 | std::printf("suspend_test2 joined\n"); 27 | } 28 | 29 | coop::task_t test_suspend(std::thread::id& id) 30 | { 31 | COOP_SUSPEND(); 32 | id = std::this_thread::get_id(); 33 | co_return; 34 | } 35 | 36 | TEST_CASE("test suspend") 37 | { 38 | std::thread::id id = std::this_thread::get_id(); 39 | std::thread::id next; 40 | auto task = test_suspend(next); 41 | std::printf("Joining task\n"); 42 | task.join(); 43 | std::printf("Task joined\n"); 44 | 45 | CHECK(id != next); 46 | } 47 | 48 | coop::task_t chain1(int core) 49 | { 50 | std::printf("chain1 suspending\n"); 51 | COOP_SUSPEND4(1 << core); 52 | std::printf("chain1 resumed\n"); 53 | co_return 1; 54 | } 55 | 56 | coop::task_t chain2() 57 | { 58 | std::printf("chain2\n"); 59 | COOP_SUSPEND4(1 << 3); 60 | auto t1 = chain1(5); 61 | auto t2 = chain1(6); 62 | co_return co_await t1 + co_await t2; 63 | } 64 | 65 | coop::task_t chain3(int& result) 66 | { 67 | std::printf("chain3 suspending\n"); 68 | COOP_SUSPEND4(1 << 4); 69 | std::printf("chain3 resumed\n"); 70 | result = co_await chain2(); 71 | } 72 | 73 | TEST_CASE("chained continuation") 74 | { 75 | int x = 0; 76 | auto task = chain3(x); 77 | std::printf("Joining chained continuation task\n"); 78 | task.join(); 79 | std::printf("Task chained continuation joined\n"); 80 | CHECK(x == 2); 81 | } 82 | 83 | coop::task_t<> in_flight1() 84 | { 85 | COOP_SUSPEND(); 86 | std::this_thread::sleep_for(std::chrono::milliseconds{50}); 87 | } 88 | 89 | coop::task_t in_flight2(size_t& ms_elapsed) 90 | { 91 | // The timing of this test will be off if you don't have at least 8 92 | // concurrent threads that can run on your machine 93 | constexpr size_t count = 8; 94 | coop::task_t<> tasks[count]; 95 | 96 | for (size_t i = 0; i != count; ++i) 97 | { 98 | tasks[i] = in_flight1(); 99 | } 100 | 101 | auto t1 = std::chrono::system_clock::now(); 102 | for (size_t i = 0; i != count; ++i) 103 | { 104 | co_await tasks[i]; 105 | } 106 | auto t2 = std::chrono::system_clock::now(); 107 | 108 | ms_elapsed 109 | = std::chrono::duration_cast(t2 - t1).count(); 110 | } 111 | 112 | TEST_CASE("multiple in flight") 113 | { 114 | size_t ms; 115 | auto task = in_flight2(ms); 116 | task.join(); 117 | std::printf("Duration for in flight test: %zu ms\n", ms); 118 | CHECK(ms < 150); 119 | } 120 | 121 | #ifdef _WIN32 122 | coop::task_t wait_for_event(coop::event_t& event) 123 | { 124 | co_await event; 125 | } 126 | 127 | coop::task_t signal_event(coop::event_t& event) 128 | { 129 | COOP_SUSPEND(); 130 | std::this_thread::sleep_for(std::chrono::milliseconds{50}); 131 | event.signal(); 132 | } 133 | 134 | TEST_CASE("event completion") 135 | { 136 | coop::event_t event; 137 | event.init(); 138 | auto t1 = std::chrono::system_clock::now(); 139 | auto task = wait_for_event(event); 140 | 141 | // Fire and forget coroutine 142 | // signal_event(event); 143 | event.signal(); 144 | task.join(); 145 | auto t2 = std::chrono::system_clock::now(); 146 | size_t ms 147 | = std::chrono::duration_cast(t2 - t1).count(); 148 | std::printf("Duration for event_completion test: %zu ms\n", ms); 149 | } 150 | #endif 151 | 152 | int main(int argc, char* argv[]) 153 | { 154 | // Spawn thread pool 155 | coop::scheduler_t::instance(); 156 | return doctest::Context{argc, argv}.run(); 157 | } 158 | --------------------------------------------------------------------------------