├── .clang-format
├── .gitignore
├── ARCHITECTURE.md
├── CMakeLists.txt
├── CMakeSettings.json
├── LICENSE
├── README.md
├── include
    └── coop
    │   ├── detail
    │       ├── api.hpp
    │       ├── blockingconcurrentqueue.h
    │       ├── concurrentqueue.h
    │       ├── lightweightsemaphore.h
    │       ├── promise.hpp
    │       ├── tracer.hpp
    │       └── work_queue.hpp
    │   ├── event.hpp
    │   ├── scheduler.hpp
    │   ├── source_location.hpp
    │   └── task.hpp
├── src
    ├── CMakeLists.txt
    ├── event.cpp
    ├── scheduler.cpp
    └── work_queue.cpp
└── test
    ├── CMakeLists.txt
    └── test.cpp


/.clang-format:
--------------------------------------------------------------------------------
 1 | AccessModifierOffset: -4
 2 | AlignAfterOpenBracket: true
 3 | AlignConsecutiveAssignments: true
 4 | AlignConsecutiveDeclarations: false
 5 | AlignEscapedNewlinesLeft: true
 6 | AlignTrailingComments: true
 7 | AllowAllParametersOfDeclarationOnNextLine: false
 8 | AllowShortBlocksOnASingleLine: true
 9 | AllowShortCaseLabelsOnASingleLine: false
10 | AllowShortFunctionsOnASingleLine: false
11 | AllowShortIfStatementsOnASingleLine: false
12 | AllowShortLoopsOnASingleLine: false
13 | AlwaysBreakAfterReturnType: None
14 | AlwaysBreakBeforeMultilineStrings: true
15 | AlwaysBreakTemplateDeclarations: true
16 | BinPackArguments: false
17 | BinPackParameters: false
18 | BreakBeforeBraces: Custom
19 | BraceWrapping:
20 |         AfterClass: true
21 |         AfterControlStatement: true
22 |         AfterEnum: true
23 |         AfterFunction: true
24 |         AfterNamespace: true
25 |         AfterObjCDeclaration: true
26 |         AfterStruct: true
27 |         AfterUnion: true
28 |         AfterExternBlock: true
29 |         BeforeCatch: true
30 |         BeforeElse: true
31 |         IndentBraces: false
32 |         SplitEmptyFunction: true
33 |         SplitEmptyRecord: true
34 |         SplitEmptyNamespace: true
35 | BreakBeforeBinaryOperators: All
36 | BreakBeforeTernaryOperators: true
37 | BreakConstructorInitializers: BeforeComma
38 | BreakStringLiterals: true
39 | ColumnLimit: 80
40 | CommentPragmas: ''
41 | CompactNamespaces: false
42 | ConstructorInitializerAllOnOneLineOrOnePerLine: false
43 | ConstructorInitializerIndentWidth: 4
44 | ContinuationIndentWidth: 4
45 | Cpp11BracedListStyle: true
46 | DerivePointerBinding: false
47 | FixNamespaceComments: true
48 | IndentCaseLabels: false
49 | IndentPPDirectives: AfterHash
50 | IndentWidth: 4
51 | IndentWrappedFunctionNames: false
52 | KeepEmptyLinesAtTheStartOfBlocks: false
53 | Language: Cpp
54 | MaxEmptyLinesToKeep: 1
55 | NamespaceIndentation: Inner
56 | PenaltyBreakBeforeFirstCallParameter: 0
57 | PenaltyBreakComment: 0
58 | PenaltyBreakFirstLessLess: 0
59 | PenaltyBreakString: 1
60 | PenaltyExcessCharacter: 10
61 | PenaltyReturnTypeOnItsOwnLine: 20
62 | PointerAlignment: Left
63 | SortIncludes: true
64 | SortUsingDeclarations: true
65 | SpaceAfterTemplateKeyword: true
66 | SpaceBeforeAssignmentOperators: true
67 | SpaceBeforeParens: ControlStatements
68 | SpaceInEmptyParentheses: false
69 | SpacesBeforeTrailingComments: 1
70 | SpacesInAngles: false
71 | SpacesInCStyleCastParentheses: false
72 | SpacesInContainerLiterals: false
73 | SpacesInParentheses: false
74 | SpacesInSquareBrackets: false
75 | Standard: C++11
76 | TabWidth: 4
77 | UseTab: Never
78 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | out
2 | build
3 | .vs
4 | .vscode
5 | 


--------------------------------------------------------------------------------
/ARCHITECTURE.md:
--------------------------------------------------------------------------------
 1 | # Architecture
 2 | 
 3 | The main ideas behind Coop are actually quite simple, and the scheduler may be too unsophisticated for the most demanding users.
 4 | This doc exists to describe in rough terms how the scheduler works so you can decide for yourself if it's worth swapping out.
 5 | If you've been working with a mature thread pool and scheduling library for a while, it is almost certainly worth it to stick
 6 | with it. That said, for other users, my recommendation is to profile the functionality here to see the builtin scheduler is
 7 | sufficient for your needs.
 8 | 
 9 | The primary thread pool is defined in `src/scheduler.cpp` and a worker thread is defined in `src/work_queue.cpp`. The thread pool
10 | is initialized with threads equal to the hardware concurrency available. Each thread sets its affinity to a distinct core.
11 | 
12 | When a coroutine suspends, it enqueues its associated coroutine to an idle thread, if any. If a CPU affinity mask is provided,
13 | only threads pinned to the requested cores are considered. After a thread is selected, the coroutine handle is enqueued on a
14 | lock free queue and a semaphore is released so the worker thread can wake up. When the worker thread wakes up, it always checks
15 | the higher priority queue first to see if work is available, otherwise it will dequeue from the lower priority queue.
16 | 
17 | When a coroutine completes on a worker thread, the resume point (if any) before the coroutine was scheduled is invoked immediately.
18 | That is, it doesn't get requeued on the thread pool for later execution.
19 | 
20 | The concurrent queue used to push work to worker threads is provided by [`moodycamel::ConcurrentQueue`](https://github.com/cameron314/concurrentqueue).
21 | Under the hood, the queue provides multiple-consumer multiple-producer usage, although in this case, only a single producer per queue
22 | exists. The thread pool worker threads currently do *not* support work stealing, which is a slightly more complicated endeavor
23 | for job schedulers that support task affinity.
24 | 
25 | The granularity of your jobs shouldn't be too fine - maybe having jobs that are at least 100 us or more is a good idea, or you'll
26 | end up paying disproportionately for scheduling costs.
27 | 
28 | The Win32 event awaiter works by having a single IO thread which blocks in a single `WaitForMultipleObjects` call. One of the
29 | events it waits on is used to signal the available of more events to wait on. All the other events waited on are user awaited.
30 | If a user-awaited event is signaled, the coroutine associated with that event is then queued to a worker thread, passing along
31 | the requested CPU affinity and priority.


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # USAGE
 2 | # Link against the interface target "coop" or add include/ to your header path
 3 | 
 4 | cmake_minimum_required(VERSION 3.17)
 5 | 
 6 | if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
 7 |     set(STANDALONE ON)
 8 | else()
 9 |     set(STANDALONE OFF)
10 | endif()
11 | 
12 | # Configure which targets to build. Defaults set based on whether this project is included transitively or not
13 | option(COOP_BUILD_PROCESSOR "Build the provided coop processor" ON)
14 | option(COOP_BUILD_TESTS "Build coop tests" ${STANDALONE})
15 | option(COOP_ENABLE_TRACER "Verbose logging of all coroutine and scheduler events" ${STANDALONE})
16 | option(COOP_ENABLE_ASAN "Enable ASAN" OFF)
17 | 
18 | project(coop LANGUAGES CXX)
19 | 
20 | # Output artifacts to the binary root
21 | if(STANDALONE)
22 |     set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
23 |     set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
24 |     set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
25 | endif()
26 | 
27 | if(COOP_ENABLE_ASAN AND NOT WIN32)
28 |     # For ASAN usage with MSVC, it's recommended to drive CMake from Visual Studio and use the
29 |     # addressSantizerEnabled: true
30 |     # flag in CMakeSettings.json
31 |     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer -fsanitize=address")
32 |     set(CMAKE_LINKER_FLAGS "${CMAKE_LINKER_FLAGS} -fno-omit-frame-pointer -fsanitize=address")
33 | endif()
34 | 
35 | find_package(Threads REQUIRED)
36 | 
37 | add_library(coop_core INTERFACE)
38 | add_library(coop::coop_core ALIAS coop_core)
39 | target_include_directories(coop_core INTERFACE include)
40 | target_compile_features(coop_core INTERFACE cxx_std_20)
41 | if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
42 |     if(NOT WIN32)
43 |         target_compile_options(coop_core INTERFACE -stdlib=libc++)
44 |         target_link_options(coop_core INTERFACE -stdlib=libc++ -latomic)
45 |     else()
46 |         target_compile_definitions(coop_core INTERFACE _SILENCE_CLANG_COROUTINE_MESSAGE)
47 |     endif()
48 | endif()
49 | if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
50 |     # Currently, GCC requires this flag for coroutine language support
51 |     target_compile_options(coop_core INTERFACE -fcoroutines)
52 | endif()
53 | target_link_libraries(coop_core INTERFACE Threads::Threads)
54 | if(BUILD_SHARED_LIBS)
55 |     target_compile_definitions(coop_core INTERFACE COOP_BUILD_SHARED)
56 | endif()
57 | 
58 | if(COOP_ENABLE_TRACER)
59 |     target_compile_definitions(coop_core INTERFACE COOP_TRACE)
60 | endif()
61 | 
62 | if(COOP_BUILD_PROCESSOR OR COOP_BUILD_TESTS)
63 |     add_subdirectory(src)
64 | endif()
65 | 
66 | if(STANDALONE OR COOP_BUILD_TESTS)
67 |     enable_testing()
68 |     add_subdirectory(test)
69 | endif()


--------------------------------------------------------------------------------
/CMakeSettings.json:
--------------------------------------------------------------------------------
 1 | ﻿{
 2 |   "configurations": [
 3 |     {
 4 |       "name": "x64-Debug (default)",
 5 |       "generator": "Ninja",
 6 |       "configurationType": "Debug",
 7 |       "inheritEnvironments": [ "msvc_x64_x64" ],
 8 |       "buildRoot": "${projectDir}\\out\\build\\${name}",
 9 |       "installRoot": "${projectDir}\\out\\install\\${name}",
10 |       "addressSanitizerEnabled": true,
11 |       "cmakeCommandArgs": "",
12 |       "buildCommandArgs": "",
13 |       "ctestCommandArgs": "",
14 |       "variables": []
15 |     },
16 |     {
17 |       "name": "x64-Debug-Shared",
18 |       "generator": "Ninja",
19 |       "configurationType": "Debug",
20 |       "inheritEnvironments": [ "msvc_x64_x64" ],
21 |       "buildRoot": "${projectDir}\\out\\build\\${name}",
22 |       "installRoot": "${projectDir}\\out\\install\\${name}",
23 |       "cmakeCommandArgs": "-DBUILD_SHARED_LIBS=ON",
24 |       "buildCommandArgs": "",
25 |       "ctestCommandArgs": "",
26 |       "variables": []
27 |     },
28 |     {
29 |       "name": "x64-Release",
30 |       "generator": "Ninja",
31 |       "configurationType": "RelWithDebInfo",
32 |       "buildRoot": "${projectDir}\\out\\build\\${name}",
33 |       "installRoot": "${projectDir}\\out\\install\\${name}",
34 |       "cmakeCommandArgs": "",
35 |       "buildCommandArgs": "",
36 |       "ctestCommandArgs": "",
37 |       "inheritEnvironments": [ "msvc_x64_x64" ],
38 |       "variables": []
39 |     },
40 |     {
41 |       "name": "x64-Release-ASAN",
42 |       "generator": "Ninja",
43 |       "configurationType": "RelWithDebInfo",
44 |       "buildRoot": "${projectDir}\\out\\build\\${name}",
45 |       "installRoot": "${projectDir}\\out\\install\\${name}",
46 |       "cmakeCommandArgs": "",
47 |       "buildCommandArgs": "",
48 |       "addressSanitizerEnabled": true,
49 |       "ctestCommandArgs": "",
50 |       "inheritEnvironments": [ "msvc_x64_x64" ],
51 |       "variables": []
52 |     }
53 |   ]
54 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2021 Jeremy Ong
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ﻿# 🐔 Coop
  2 | 
  3 | Coop is a C++20 coroutines-based library to support [*cooperative multitasking*](https://en.wikipedia.org/wiki/Cooperative_multitasking)
  4 | in the context of a multithreaded application. The syntax will be familiar to users of `async` and `await` functionality in other
  5 | programming languages. Users *do not* need to understand the C++20 coroutines API to use this library.
  6 | 
  7 | ## Features
  8 | 
  9 | - Ships with a default affinity-aware two-priority threadsafe task scheduler.
 10 | - The task scheduler is swappable with your own
 11 | - Supports scheduling of user-defined code and OS completion events (e.g. events that signal after I/O completes)
 12 | - Easy to use, efficient API, with a small and digestible code footprint (hundreds of lines of code, not thousands)
 13 | 
 14 | Tasks in Coop are *eager* as opposed to lazy, meaning that upon suspension, the coroutine is immediately dispatched for execution on
 15 | a worker with the appropriate affinity. While there are many benefits to structuring things lazily (see this excellent [talk](https://www.youtube.com/watch?v=1Wy5sq3s2rg)),
 16 | Coop opts to do things the way it does because:
 17 | 
 18 | - Coop was designed to interoperate with existing job/task graph systems
 19 | - Coop was originally written within the context of a game engine, where exceptions were not used
 20 | - For game engines, having a CPU-toplogy-aware dispatch mechanism is extremely important (consider the architecture of, say, the PS5)
 21 | 
 22 | While game consoles don't (yet) support C++20 fully, the hope is that options like Coop will be there when the compiler support gets there as well.
 23 | 
 24 | ## Limitations
 25 | 
 26 | If your use case is too far abreast of Coop's original use case (as above), you may need to do more modification to get Coop to behave the way you want.
 27 | The limitations to consider below are:
 28 | 
 29 | - Requires a recent C++20 compiler and code that uses Coop headers must also use C++20
 30 | - The "event_t" wrapper around Win32 events doesn't have equivalent functionality on other platforms yet (it's provided as a reference for how you might handle your own overlapped IO)
 31 | - The Clang implementation of the coroutines API at the moment doesn't work with the GCC stdlib++, so use libc++ instead
 32 | - Clang on Windows does not yet support the MSVC coroutines runtime due to ABI differences
 33 | - Coop ignores the problem of unhandled exceptions within scheduled tasks
 34 | 
 35 | If the above limitations make Coop unsuitable for you, consider the following libraries:
 36 | 
 37 | - [CppCoro](https://github.com/lewissbaker/cppcoro) - A coroutine library for C++
 38 | - [Conduit](https://github.com/loopperfect/conduit) - Lazy High Performance Streams using Coroutine TS
 39 | - [folly::coro](https://github.com/facebook/folly/tree/master/folly/experimental/coro) - a developer-friendly asynchronous C++ framework based on Coroutines TS
 40 | 
 41 | ## Building and Running the Tests
 42 | 
 43 | When configured as a standalone project, the built-in scheduler and tests are enabled by default. To configure and build the project
 44 | from the command line:
 45 | 
 46 | ```bash
 47 | mkdir build
 48 | cd build
 49 | cmake .. # Supply your own generator if you don't want the default generator
 50 | cmake --build .
 51 | ./test/coop_test
 52 | ```
 53 | 
 54 | ## Integration Guide
 55 | 
 56 | If you don't intend on using the built in scheduler, simply copy the contents of the `include` folder somewhere in your include path.
 57 | 
 58 | Otherwise, the recommended integration is done via cmake. For the header only portion, link against the `coop::coop_core` target.
 59 | 
 60 | If you'd like both headers and the scheduler implementation, link against `coop::coop`.
 61 | 
 62 | Drop this quick cmake snippet somewhere in your `CMakeLists.txt` file to make both of these targets available.
 63 | 
 64 | ```cmake
 65 | include(FetchContent)
 66 | 
 67 | FetchContent_Declare(
 68 |     coop
 69 |     GIT_REPOSITORY https://github.com/jeremyong/coop.git
 70 |     GIT_TAG master
 71 |     GIT_SHALLOW ON
 72 | )
 73 | FetchContent_MakeAvailable(coop)
 74 | ```
 75 | 
 76 | ## Usage
 77 | 
 78 | To write a coroutine, you'll use the `task_t` template type.
 79 | 
 80 | 
 81 | ```c++
 82 | coop::task_t<> simple_coroutine()
 83 | {
 84 |     co_await coop::suspend();
 85 | 
 86 |     // Fake some work with a timer
 87 |     std::this_thread::sleep_for(std::chrono::milliseconds{50});
 88 | }
 89 | ```
 90 | 
 91 | The first line with the `coop::suspend` function will suspend the execution of `simple_coroutine` and the next line will continue on a different thread.
 92 | 
 93 | To use this coroutine from another coroutine, we can do something like the following:
 94 | 
 95 | ```c++
 96 | coop::task_t<> another_coroutine()
 97 | {
 98 |     // This will cause `simple_coroutine` to be scheduled on a thread different to this one
 99 |     auto task = simple_coroutine();
100 | 
101 |     // Do other useful work
102 | 
103 |     // Await the task when we need it to finish
104 |     co_await task;
105 | }
106 | ```
107 | 
108 | Tasks can hold values to be awaited on.
109 | 
110 | ```c++
111 | coop::task_t<int> coroutine_with_data()
112 | {
113 |     co_await coop::suspend();
114 | 
115 |     // Do some work
116 |     int result = some_expensive_simulation();
117 | 
118 |     co_return result;
119 | }
120 | ```
121 | 
122 | When the task above is awaited via the `co_await` operator, what results is the int returned via `co_return`.
123 | Of course, passing other types is possible by changing the first template parameter of `task_t`.
124 | 
125 | Tasks let you do multiple async operations simultaneously, for example:
126 | 
127 | ```c++
128 | coop::task_t<> my_task(int ms)
129 | {
130 |     co_await coop::suspend();
131 | 
132 |     // Fake some work with a timer
133 |     std::this_thread::sleep_for(std::chrono::milliseconds{ms});
134 | }
135 | 
136 | coop::task_t<> big_coroutine()
137 | {
138 |     auto t1 = my_task(50);
139 |     auto t2 = my_task(40);
140 |     auto t3 = my_task(80);
141 | 
142 |     // 3 invocations of `my_task` are now potentially running concurrently on different threads
143 | 
144 |     do_something_useful();
145 | 
146 |     // Suspend until t2 is done
147 |     co_await t2;
148 | 
149 |     // Right now, t1 and t3 are *potentially* still running
150 | 
151 |     do_something_else();
152 | 
153 |     // When awaiting a task, this coroutine will not suspend if the task
154 |     // is already ready. Otherwise, this coroutine suspends to be continued
155 |     // by the thread that completes the awaited task.
156 |     co_await t1;
157 |     co_await t3;
158 | 
159 |     // Now, all three tasks are complete
160 | }
161 | ```
162 | 
163 | One thing to keep in mind is that after awaiting a task, the thread you resume on is *not* necessarily the same thread
164 | you were on originally.
165 | 
166 | What if you want to await a task from `main` or some other execution context that isn't a coroutine? For this, you can
167 | make a joinable task and `join` it.
168 | 
169 | ```c++
170 | coop::task_t<void, true> joinable_coroutine()
171 | {
172 |     co_await coop::suspend();
173 | 
174 |     // Fake some work with a timer
175 |     std::this_thread::sleep_for(std::chrono::milliseconds{50});
176 | }
177 | 
178 | int main(int argc, char** argv)
179 | {
180 |     auto task = joinable_coroutine();
181 |     // The timer is now running on a different thread than the main thread
182 | 
183 |     // Pause execution until joinable_coroutine is finished on whichever thread it was scheduled on
184 |     task.join();
185 | 
186 |     return 0;
187 | }
188 | ```
189 | 
190 | Note that currently, there is some overhead associated with spawning a joinable task because it creates new event objects instead of reusing event handles from a pool.
191 | 
192 | The `coop::suspend` function takes additional parameters that can set the CPU affinity mask, priority (only 0 and 1 are supported at the moment,
193 | with 1 being the higher priority), and file/line information for debugging purposes.
194 | 
195 | In addition to awaiting tasks, you can also await the `event_t` object. While this currently only supports Windows, this lets a coroutine
196 | suspend execution until an event handle is signaled - a powerful pattern for doing async I/O.
197 | 
198 | ```c++
199 | coop::task_t<> wait_for_event()
200 | {
201 |     // Suppose file_reading_code produces a Win32 HANDLE which will get signaled whenever the file
202 |     // read is ready
203 |     coop::event_t event{file_reading_code()};
204 | 
205 |     // Do something else while the file is reading
206 | 
207 |     // Suspend until the event gets signaled
208 |     co_await event;
209 | }
210 | ```
211 | 
212 | In the future, support may be added for epoll and kqueue abstractions.
213 | 
214 | ## Convenience macro `COOP_SUSPEND#`
215 | 
216 | The full function signature of the `suspend` function is the following:
217 | 
218 | ```c++
219 | template <Scheduler S = scheduler_t>
220 | inline auto suspend(S& scheduler                             = S::instance(),
221 |                     uint64_t cpu_mask                        = 0,
222 |                     uint32_t priority                        = 0,
223 |                     source_location_t const& source_location = {}) noexcept
224 | ```
225 | 
226 | and you must await the returned result. Instead, you can use the family of macros and simply write
227 | 
228 | ```
229 | COOP_SUSPEND();
230 | ```
231 | 
232 | if you are comfortable with the default behavior. This macro will supply `__FILE__` and `__LINE__` information
233 | to the `source_location` paramter to get additional tracking. Other macros with numerical suffixes to `COOP_SUSPEND` are
234 | also provided to allow you to override a subset of parameters as needed.
235 | 
236 | ## (Optional) Use your own scheduler
237 | 
238 | Coop is designed to be a pretty thin abstraction layer to make writing async code more convenient. If you already have a robust
239 | scheduler and thread pool, you don't have to use the one provided here. The `coop::suspend` function is templated and accepts
240 | an optional first parameter to a class that implements the `Scheduler` concept. To qualify as a `Scheduler`, a class only needs
241 | to implement the following function signature:
242 | 
243 | ```c++
244 |     void schedule(std::coroutine_handle<> coroutine,
245 |                   uint64_t cpu_affinity             = 0,
246 |                   uint32_t priority                 = 0,
247 |                   source_location_t source_location = {});
248 | ```
249 | 
250 | Then, at the opportune time on a thread of your choosing, simply call `coroutine.resume()`. Remember that when implementing your
251 | own scheduler, you are responsible for thread safety and ensuring that the "usual" bugs (like missed notifications) are ironed out.
252 | You can ignore the cpu affinity and priority flags if you don't need this functionality (i.e. if you aren't targeting a NUMA).
253 | 
254 | ## Hack away
255 | 
256 | The source code of Coop is pretty small all things considered, with the core of its functionality contained in only a few hundred
257 | lines of commented code. Feel free to take it and adapt it for your use case. This was the route taken as opposed to making every
258 | design aspect customizable (which would have made the interface far more complicated).
259 | 
260 | ## Additional Resources
261 | 
262 | To learn more about coroutines in C++20, please do visit this [awesome compendium](https://gist.github.com/MattPD/9b55db49537a90545a90447392ad3aeb)
263 | of resources compiled by @MattPD.
264 | 


--------------------------------------------------------------------------------
/include/coop/detail/api.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | // Add defines needed for exporting and importing symbols needed in a shared
 4 | // linkage environment
 5 | 
 6 | #ifdef COOP_BUILD_SHARED
 7 | #    ifdef _MSC_VER
 8 | #        ifdef COOP_IMPL
 9 | #            define COOP_API __declspec(dllexport)
10 | #        else
11 | #            define COOP_API __declspec(dllimport)
12 | #        endif
13 | #    else
14 | #        define COOP_API __attribute__((visibility(default)))
15 | #    endif
16 | #else
17 | #    define COOP_API
18 | #endif


--------------------------------------------------------------------------------
/include/coop/detail/blockingconcurrentqueue.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jeremyong/coop/f2dc18cf56c1c17e9eca9e3bad41cefbc9631fba/include/coop/detail/blockingconcurrentqueue.h


--------------------------------------------------------------------------------
/include/coop/detail/lightweightsemaphore.h:
--------------------------------------------------------------------------------
  1 | // Provides an efficient implementation of a semaphore (LightweightSemaphore).
  2 | // This is an extension of Jeff Preshing's sempahore implementation (licensed
  3 | // under the terms of its separate zlib license) that has been adapted and
  4 | // extended by Cameron Desrochers.
  5 | 
  6 | #pragma once
  7 | 
  8 | #include <atomic>
  9 | #include <cstddef>     // For std::size_t
 10 | #include <type_traits> // For std::make_signed<T>
 11 | 
 12 | #if defined(_WIN32)
 13 | // Avoid including windows.h in a header; we only need a handful of
 14 | // items, so we'll redeclare them here (this is relatively safe since
 15 | // the API generally has to remain stable between Windows versions).
 16 | // I know this is an ugly hack but it still beats polluting the global
 17 | // namespace with thousands of generic names or adding a .cpp for nothing.
 18 | extern "C"
 19 | {
 20 |     struct _SECURITY_ATTRIBUTES;
 21 |     __declspec(dllimport) void* __stdcall CreateSemaphoreW(
 22 |         _SECURITY_ATTRIBUTES* lpSemaphoreAttributes,
 23 |         long lInitialCount,
 24 |         long lMaximumCount,
 25 |         const wchar_t* lpName);
 26 |     __declspec(dllimport) int __stdcall CloseHandle(void* hObject);
 27 |     __declspec(dllimport) unsigned long __stdcall WaitForSingleObject(
 28 |         void* hHandle,
 29 |         unsigned long dwMilliseconds);
 30 |     __declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore,
 31 |                                                          long lReleaseCount,
 32 |                                                          long* lpPreviousCount);
 33 | }
 34 | #elif defined(__MACH__)
 35 | #    include <mach/mach.h>
 36 | #elif defined(__unix__)
 37 | #    include <semaphore.h>
 38 | #endif
 39 | 
 40 | namespace moodycamel
 41 | {
 42 | namespace details
 43 | {
 44 | // Code in the mpmc_sema namespace below is an adaptation of Jeff Preshing's
 45 | // portable + lightweight semaphore implementations, originally from
 46 | // https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h
 47 | // LICENSE:
 48 | // Copyright (c) 2015 Jeff Preshing
 49 | //
 50 | // This software is provided 'as-is', without any express or implied
 51 | // warranty. In no event will the authors be held liable for any damages
 52 | // arising from the use of this software.
 53 | //
 54 | // Permission is granted to anyone to use this software for any purpose,
 55 | // including commercial applications, and to alter it and redistribute it
 56 | // freely, subject to the following restrictions:
 57 | //
 58 | // 1. The origin of this software must not be misrepresented; you must not
 59 | //	claim that you wrote the original software. If you use this software
 60 | //	in a product, an acknowledgement in the product documentation would be
 61 | //	appreciated but is not required.
 62 | // 2. Altered source versions must be plainly marked as such, and must not be
 63 | //	misrepresented as being the original software.
 64 | // 3. This notice may not be removed or altered from any source distribution.
 65 | #if defined(_WIN32)
 66 |     class Semaphore
 67 |     {
 68 |     private:
 69 |         void* m_hSema;
 70 | 
 71 |         Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
 72 |         Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
 73 | 
 74 |     public:
 75 |         Semaphore(int initialCount = 0)
 76 |         {
 77 |             assert(initialCount >= 0);
 78 |             const long maxLong = 0x7fffffff;
 79 |             m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr);
 80 |             assert(m_hSema);
 81 |         }
 82 | 
 83 |         ~Semaphore()
 84 |         {
 85 |             CloseHandle(m_hSema);
 86 |         }
 87 | 
 88 |         bool wait()
 89 |         {
 90 |             const unsigned long infinite = 0xffffffff;
 91 |             return WaitForSingleObject(m_hSema, infinite) == 0;
 92 |         }
 93 | 
 94 |         bool try_wait()
 95 |         {
 96 |             return WaitForSingleObject(m_hSema, 0) == 0;
 97 |         }
 98 | 
 99 |         bool timed_wait(std::uint64_t usecs)
100 |         {
101 |             return WaitForSingleObject(m_hSema, (unsigned long)(usecs / 1000))
102 |                    == 0;
103 |         }
104 | 
105 |         void signal(int count = 1)
106 |         {
107 |             while (!ReleaseSemaphore(m_hSema, count, nullptr))
108 |                 ;
109 |         }
110 |     };
111 | #elif defined(__MACH__)
112 |     //---------------------------------------------------------
113 |     // Semaphore (Apple iOS and OSX)
114 |     // Can't use POSIX semaphores due to
115 |     // http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html
116 |     //---------------------------------------------------------
117 |     class Semaphore
118 |     {
119 |     private:
120 |         semaphore_t m_sema;
121 | 
122 |         Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
123 |         Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
124 | 
125 |     public:
126 |         Semaphore(int initialCount = 0)
127 |         {
128 |             assert(initialCount >= 0);
129 |             kern_return_t rc = semaphore_create(
130 |                 mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
131 |             assert(rc == KERN_SUCCESS);
132 |             (void)rc;
133 |         }
134 | 
135 |         ~Semaphore()
136 |         {
137 |             semaphore_destroy(mach_task_self(), m_sema);
138 |         }
139 | 
140 |         bool wait()
141 |         {
142 |             return semaphore_wait(m_sema) == KERN_SUCCESS;
143 |         }
144 | 
145 |         bool try_wait()
146 |         {
147 |             return timed_wait(0);
148 |         }
149 | 
150 |         bool timed_wait(std::uint64_t timeout_usecs)
151 |         {
152 |             mach_timespec_t ts;
153 |             ts.tv_sec  = static_cast<unsigned int>(timeout_usecs / 1000000);
154 |             ts.tv_nsec = static_cast<int>((timeout_usecs % 1000000) * 1000);
155 | 
156 |             // added in OSX 10.10:
157 |             // https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html
158 |             kern_return_t rc = semaphore_timedwait(m_sema, ts);
159 |             return rc == KERN_SUCCESS;
160 |         }
161 | 
162 |         void signal()
163 |         {
164 |             while (semaphore_signal(m_sema) != KERN_SUCCESS)
165 |                 ;
166 |         }
167 | 
168 |         void signal(int count)
169 |         {
170 |             while (count-- > 0)
171 |             {
172 |                 while (semaphore_signal(m_sema) != KERN_SUCCESS)
173 |                     ;
174 |             }
175 |         }
176 |     };
177 | #elif defined(__unix__)
178 |     //---------------------------------------------------------
179 |     // Semaphore (POSIX, Linux)
180 |     //---------------------------------------------------------
181 |     class Semaphore
182 |     {
183 |     private:
184 |         sem_t m_sema;
185 | 
186 |         Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
187 |         Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
188 | 
189 |     public:
190 |         Semaphore(int initialCount = 0)
191 |         {
192 |             assert(initialCount >= 0);
193 |             int rc
194 |                 = sem_init(&m_sema, 0, static_cast<unsigned int>(initialCount));
195 |             assert(rc == 0);
196 |             (void)rc;
197 |         }
198 | 
199 |         ~Semaphore()
200 |         {
201 |             sem_destroy(&m_sema);
202 |         }
203 | 
204 |         bool wait()
205 |         {
206 |             // http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
207 |             int rc;
208 |             do
209 |             {
210 |                 rc = sem_wait(&m_sema);
211 |             } while (rc == -1 && errno == EINTR);
212 |             return rc == 0;
213 |         }
214 | 
215 |         bool try_wait()
216 |         {
217 |             int rc;
218 |             do
219 |             {
220 |                 rc = sem_trywait(&m_sema);
221 |             } while (rc == -1 && errno == EINTR);
222 |             return rc == 0;
223 |         }
224 | 
225 |         bool timed_wait(std::uint64_t usecs)
226 |         {
227 |             struct timespec ts;
228 |             const int usecs_in_1_sec = 1000000;
229 |             const int nsecs_in_1_sec = 1000000000;
230 |             clock_gettime(CLOCK_REALTIME, &ts);
231 |             ts.tv_sec += (time_t)(usecs / usecs_in_1_sec);
232 |             ts.tv_nsec += (long)(usecs % usecs_in_1_sec) * 1000;
233 |             // sem_timedwait bombs if you have more than 1e9 in tv_nsec
234 |             // so we have to clean things up before passing it in
235 |             if (ts.tv_nsec >= nsecs_in_1_sec)
236 |             {
237 |                 ts.tv_nsec -= nsecs_in_1_sec;
238 |                 ++ts.tv_sec;
239 |             }
240 | 
241 |             int rc;
242 |             do
243 |             {
244 |                 rc = sem_timedwait(&m_sema, &ts);
245 |             } while (rc == -1 && errno == EINTR);
246 |             return rc == 0;
247 |         }
248 | 
249 |         void signal()
250 |         {
251 |             while (sem_post(&m_sema) == -1)
252 |                 ;
253 |         }
254 | 
255 |         void signal(int count)
256 |         {
257 |             while (count-- > 0)
258 |             {
259 |                 while (sem_post(&m_sema) == -1)
260 |                     ;
261 |             }
262 |         }
263 |     };
264 | #else
265 | #    error Unsupported platform! (No semaphore wrapper available)
266 | #endif
267 | 
268 | } // end namespace details
269 | 
270 | //---------------------------------------------------------
271 | // LightweightSemaphore
272 | //---------------------------------------------------------
273 | class LightweightSemaphore
274 | {
275 | public:
276 |     typedef std::make_signed<std::size_t>::type ssize_t;
277 | 
278 | private:
279 |     std::atomic<ssize_t> m_count;
280 |     details::Semaphore m_sema;
281 |     int m_maxSpins;
282 | 
283 |     bool waitWithPartialSpinning(std::int64_t timeout_usecs = -1)
284 |     {
285 |         ssize_t oldCount;
286 |         int spin = m_maxSpins;
287 |         while (--spin >= 0)
288 |         {
289 |             oldCount = m_count.load(std::memory_order_relaxed);
290 |             if ((oldCount > 0)
291 |                 && m_count.compare_exchange_strong(oldCount,
292 |                                                    oldCount - 1,
293 |                                                    std::memory_order_acquire,
294 |                                                    std::memory_order_relaxed))
295 |                 return true;
296 |             std::atomic_signal_fence(std::memory_order_acquire); // Prevent the
297 |                                                                  // compiler
298 |                                                                  // from
299 |                                                                  // collapsing
300 |                                                                  // the loop.
301 |         }
302 |         oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
303 |         if (oldCount > 0)
304 |             return true;
305 |         if (timeout_usecs < 0)
306 |         {
307 |             if (m_sema.wait())
308 |                 return true;
309 |         }
310 |         if (timeout_usecs > 0 && m_sema.timed_wait((std::uint64_t)timeout_usecs))
311 |             return true;
312 |         // At this point, we've timed out waiting for the semaphore, but the
313 |         // count is still decremented indicating we may still be waiting on
314 |         // it. So we have to re-adjust the count, but only if the semaphore
315 |         // wasn't signaled enough times for us too since then. If it was, we
316 |         // need to release the semaphore too.
317 |         while (true)
318 |         {
319 |             oldCount = m_count.load(std::memory_order_acquire);
320 |             if (oldCount >= 0 && m_sema.try_wait())
321 |                 return true;
322 |             if (oldCount < 0
323 |                 && m_count.compare_exchange_strong(oldCount,
324 |                                                    oldCount + 1,
325 |                                                    std::memory_order_relaxed,
326 |                                                    std::memory_order_relaxed))
327 |                 return false;
328 |         }
329 |     }
330 | 
331 |     ssize_t
332 |     waitManyWithPartialSpinning(ssize_t max, std::int64_t timeout_usecs = -1)
333 |     {
334 |         assert(max > 0);
335 |         ssize_t oldCount;
336 |         int spin = m_maxSpins;
337 |         while (--spin >= 0)
338 |         {
339 |             oldCount = m_count.load(std::memory_order_relaxed);
340 |             if (oldCount > 0)
341 |             {
342 |                 ssize_t newCount = oldCount > max ? oldCount - max : 0;
343 |                 if (m_count.compare_exchange_strong(oldCount,
344 |                                                     newCount,
345 |                                                     std::memory_order_acquire,
346 |                                                     std::memory_order_relaxed))
347 |                     return oldCount - newCount;
348 |             }
349 |             std::atomic_signal_fence(std::memory_order_acquire);
350 |         }
351 |         oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
352 |         if (oldCount <= 0)
353 |         {
354 |             if ((timeout_usecs == 0) || (timeout_usecs < 0 && !m_sema.wait())
355 |                 || (timeout_usecs > 0
356 |                     && !m_sema.timed_wait((std::uint64_t)timeout_usecs)))
357 |             {
358 |                 while (true)
359 |                 {
360 |                     oldCount = m_count.load(std::memory_order_acquire);
361 |                     if (oldCount >= 0 && m_sema.try_wait())
362 |                         break;
363 |                     if (oldCount < 0
364 |                         && m_count.compare_exchange_strong(
365 |                             oldCount,
366 |                             oldCount + 1,
367 |                             std::memory_order_relaxed,
368 |                             std::memory_order_relaxed))
369 |                         return 0;
370 |                 }
371 |             }
372 |         }
373 |         if (max > 1)
374 |             return 1 + tryWaitMany(max - 1);
375 |         return 1;
376 |     }
377 | 
378 | public:
379 |     LightweightSemaphore(ssize_t initialCount = 0, int maxSpins = 10000)
380 |         : m_count(initialCount)
381 |         , m_maxSpins(maxSpins)
382 |     {
383 |         assert(initialCount >= 0);
384 |         assert(maxSpins >= 0);
385 |     }
386 | 
387 |     bool tryWait()
388 |     {
389 |         ssize_t oldCount = m_count.load(std::memory_order_relaxed);
390 |         while (oldCount > 0)
391 |         {
392 |             if (m_count.compare_exchange_weak(oldCount,
393 |                                               oldCount - 1,
394 |                                               std::memory_order_acquire,
395 |                                               std::memory_order_relaxed))
396 |                 return true;
397 |         }
398 |         return false;
399 |     }
400 | 
401 |     bool wait()
402 |     {
403 |         return tryWait() || waitWithPartialSpinning();
404 |     }
405 | 
406 |     bool wait(std::int64_t timeout_usecs)
407 |     {
408 |         return tryWait() || waitWithPartialSpinning(timeout_usecs);
409 |     }
410 | 
411 |     // Acquires between 0 and (greedily) max, inclusive
412 |     ssize_t tryWaitMany(ssize_t max)
413 |     {
414 |         assert(max >= 0);
415 |         ssize_t oldCount = m_count.load(std::memory_order_relaxed);
416 |         while (oldCount > 0)
417 |         {
418 |             ssize_t newCount = oldCount > max ? oldCount - max : 0;
419 |             if (m_count.compare_exchange_weak(oldCount,
420 |                                               newCount,
421 |                                               std::memory_order_acquire,
422 |                                               std::memory_order_relaxed))
423 |                 return oldCount - newCount;
424 |         }
425 |         return 0;
426 |     }
427 | 
428 |     // Acquires at least one, and (greedily) at most max
429 |     ssize_t waitMany(ssize_t max, std::int64_t timeout_usecs)
430 |     {
431 |         assert(max >= 0);
432 |         ssize_t result = tryWaitMany(max);
433 |         if (result == 0 && max > 0)
434 |             result = waitManyWithPartialSpinning(max, timeout_usecs);
435 |         return result;
436 |     }
437 | 
438 |     ssize_t waitMany(ssize_t max)
439 |     {
440 |         ssize_t result = waitMany(max, -1);
441 |         assert(result > 0);
442 |         return result;
443 |     }
444 | 
445 |     void signal(ssize_t count = 1)
446 |     {
447 |         assert(count >= 0);
448 |         ssize_t oldCount  = m_count.fetch_add(count, std::memory_order_release);
449 |         ssize_t toRelease = -oldCount < count ? -oldCount : count;
450 |         if (toRelease > 0)
451 |         {
452 |             m_sema.signal((int)toRelease);
453 |         }
454 |     }
455 | 
456 |     std::size_t availableApprox() const
457 |     {
458 |         ssize_t count = m_count.load(std::memory_order_relaxed);
459 |         return count > 0 ? static_cast<std::size_t>(count) : 0;
460 |     }
461 | };
462 | 
463 | } // end namespace moodycamel


--------------------------------------------------------------------------------
/include/coop/detail/promise.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "tracer.hpp"
  4 | #include <atomic>
  5 | #include <semaphore>
  6 | #include <thread>
  7 | #if defined(__clang__)
  8 | #    include <experimental/coroutine>
  9 | namespace std
 10 | {
 11 | using experimental::coroutine_handle;
 12 | using experimental::noop_coroutine;
 13 | using experimental::suspend_never;
 14 | } // namespace std
 15 | #else
 16 | #    include <coroutine>
 17 | #endif
 18 | 
 19 | namespace coop
 20 | {
 21 | namespace detail
 22 | {
 23 |     template <typename P, bool Joinable>
 24 |     struct final_awaiter_t
 25 |     {
 26 |         bool await_ready() const noexcept
 27 |         {
 28 |             return false;
 29 |         }
 30 | 
 31 |         void await_resume() const noexcept
 32 |         {
 33 |         }
 34 | 
 35 |         std::coroutine_handle<>
 36 |         await_suspend(std::coroutine_handle<P> coroutine) const noexcept
 37 |         {
 38 |             // Check if this coroutine is being finalized from the
 39 |             // middle of a "continuation" coroutine and hop back there to
 40 |             // continue execution while *this* coroutine is suspended.
 41 | 
 42 |             COOP_LOG("Final await for coroutine %p on thread %zu\n",
 43 |                      coroutine.address(),
 44 |                      detail::thread_id());
 45 |             // After acquiring the flag, the other thread's write to the
 46 |             // coroutine's continuation must be visible (one-way
 47 |             // communication)
 48 |             if (coroutine.promise().flag.exchange(true, std::memory_order_acquire))
 49 |             {
 50 |                 // We're not the first to reach here, meaning the
 51 |                 // continuation is installed properly (if any)
 52 |                 auto continuation = coroutine.promise().continuation;
 53 |                 if (continuation)
 54 |                 {
 55 |                     COOP_LOG("Resuming continuation %p on %p on thread %zu\n",
 56 |                              continuation.address(),
 57 |                              coroutine.address(),
 58 |                              detail::thread_id());
 59 |                     return continuation;
 60 |                 }
 61 |                 else
 62 |                 {
 63 |                     COOP_LOG(
 64 |                         "Coroutine %p on thread %zu missing continuation\n",
 65 |                         coroutine.address(),
 66 |                         detail::thread_id());
 67 |                 }
 68 |             }
 69 |             return std::noop_coroutine();
 70 |         }
 71 |     };
 72 | 
 73 |     template <typename P>
 74 |     struct final_awaiter_t<P, true>
 75 |     {
 76 |         bool await_ready() const noexcept
 77 |         {
 78 |             return false;
 79 |         }
 80 | 
 81 |         void await_resume() const noexcept
 82 |         {
 83 |         }
 84 | 
 85 |         void await_suspend(std::coroutine_handle<P> coroutine) const noexcept
 86 |         {
 87 |             coroutine.promise().join_sem.release();
 88 |             coroutine.destroy();
 89 |         }
 90 |     };
 91 | 
 92 |     // Helper function for awaiting on a task. The next resume point is
 93 |     // installed as a continuation of the task being awaited.
 94 |     template <typename P>
 95 |     std::coroutine_handle<>
 96 |     await_suspend(std::coroutine_handle<P> base, std::coroutine_handle<> next)
 97 |     {
 98 |         if constexpr (P::joinable_v)
 99 |         {
100 |             // Joinable tasks are never awaited and so cannot have a
101 |             // continuation by definition
102 |             return std::noop_coroutine();
103 |         }
104 |         else
105 |         {
106 |             COOP_LOG("Installing continuation %p for %p on thread %zu\n",
107 |                      next.address(),
108 |                      base.address(),
109 |                      detail::thread_id());
110 |             base.promise().continuation = next;
111 |             // The write to the continuation must be visible to a person that
112 |             // acquires the flag
113 |             if (base.promise().flag.exchange(true, std::memory_order_release))
114 |             {
115 |                 // We're not the first to reach here, meaning the continuation
116 |                 // won't get read
117 |                 return next;
118 |             }
119 |             return std::noop_coroutine();
120 |         }
121 |     }
122 | 
123 |     // All promises need the `continuation` member, which is set when a
124 |     // coroutine is suspended within another coroutine. The `continuation`
125 |     // handle is used to hop back from that suspension point when the inner
126 |     // coroutine finishes.
127 |     template <bool Joinable>
128 |     struct promise_base_t
129 |     {
130 |         constexpr static bool joinable_v = Joinable;
131 | 
132 |         // When a coroutine suspends, the continuation stores the handle to the
133 |         // resume point, which immediately following the suspend point.
134 |         std::coroutine_handle<> continuation = nullptr;
135 | 
136 |         std::atomic<bool> flag = false;
137 | 
138 |         // Do not suspend immediately on entry of a coroutine
139 |         std::suspend_never initial_suspend() const noexcept
140 |         {
141 |             return {};
142 |         }
143 | 
144 |         void unhandled_exception() const noexcept
145 |         {
146 |             // Coop doesn't currently handle exceptions.
147 |         }
148 |     };
149 | 
150 |     // Joinable tasks need an additional semaphore the joiner can wait on
151 |     template <>
152 |     struct promise_base_t<true> : public promise_base_t<false>
153 |     {
154 |         std::binary_semaphore join_sem{0};
155 |     };
156 | 
157 |     template <typename Task, typename T, bool Joinable>
158 |     struct promise_t : public promise_base_t<Joinable>
159 |     {
160 |         T data;
161 | 
162 |         Task get_return_object() noexcept
163 |         {
164 |             // On coroutine entry, we store as the continuation a handle
165 |             // corresponding to the next sequence point from the caller.
166 |             return {std::coroutine_handle<promise_t>::from_promise(*this)};
167 |         }
168 | 
169 |         void
170 |         return_value(T const& value) noexcept(std::is_nothrow_copy_assignable_v<T>)
171 |         {
172 |             data = value;
173 |         }
174 | 
175 |         void
176 |         return_value(T&& value) noexcept(std::is_nothrow_move_assignable_v<T>)
177 |         {
178 |             data = std::move(value);
179 |         }
180 | 
181 |         final_awaiter_t<promise_t, Joinable> final_suspend() noexcept
182 |         {
183 |             return {};
184 |         }
185 |     };
186 | 
187 |     template <typename Task, bool Joinable>
188 |     struct promise_t<Task, void, Joinable> : public promise_base_t<Joinable>
189 |     {
190 |         Task get_return_object() noexcept
191 |         {
192 |             // On coroutine entry, we store as the continuation a handle
193 |             // corresponding to the next sequence point from the caller.
194 |             return {std::coroutine_handle<promise_t>::from_promise(*this)};
195 |         }
196 | 
197 |         void return_void() noexcept
198 |         {
199 |         }
200 | 
201 |         final_awaiter_t<promise_t, Joinable> final_suspend() noexcept
202 |         {
203 |             return {};
204 |         }
205 |     };
206 | } // namespace detail
207 | } // namespace coop
208 | 


--------------------------------------------------------------------------------
/include/coop/detail/tracer.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cstdint>
 4 | #include <thread>
 5 | 
 6 | namespace coop
 7 | {
 8 | namespace detail
 9 | {
10 |     inline size_t thread_id() noexcept
11 |     {
12 |         return std::hash<std::thread::id>{}(std::this_thread::get_id());
13 |     }
14 | } // namespace detail
15 | } // namespace coop
16 | 
17 | #if defined(COOP_TRACE) && !defined(NDEBUG)
18 | #    include <cstdio>
19 | 
20 | #    define COOP_LOG(...) std::printf(__VA_ARGS__)
21 | 
22 | #else
23 | #    define COOP_LOG(...)
24 | #endif


--------------------------------------------------------------------------------
/include/coop/detail/work_queue.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "api.hpp"
 4 | #include "concurrentqueue.h"
 5 | #include <atomic>
 6 | #include <coop/source_location.hpp>
 7 | #include <semaphore>
 8 | #if defined(__clang__)
 9 | #    include <experimental/coroutine>
10 | namespace std
11 | {
12 | using experimental::coroutine_handle;
13 | }
14 | #else
15 | #    include <coroutine>
16 | #endif
17 | #include <thread>
18 | 
19 | // Currently, COOP supports exactly two priority levels, 0 (default) and 1
20 | // (high)
21 | #define COOP_PRIORITY_COUNT 2
22 | 
23 | namespace coop
24 | {
25 | class scheduler_t;
26 | 
27 | namespace detail
28 | {
29 |     class COOP_API work_queue_t
30 |     {
31 |     public:
32 |         work_queue_t(scheduler_t& scheduler, uint32_t id);
33 |         ~work_queue_t() noexcept;
34 |         work_queue_t(work_queue_t const&) = delete;
35 |         work_queue_t(work_queue_t&&)      = delete;
36 |         work_queue_t& operator=(work_queue_t const&) = delete;
37 |         work_queue_t& operator=(work_queue_t&&) = delete;
38 | 
39 |         // Returns the approximate size across all queues of any priority
40 |         size_t size_approx() const noexcept
41 |         {
42 |             size_t out = 0;
43 |             for (size_t i = 0; i != COOP_PRIORITY_COUNT; ++i)
44 |             {
45 |                 out += queues_[i].size_approx();
46 |             }
47 |             return out;
48 |         }
49 | 
50 |         void enqueue(std::coroutine_handle<> coroutine,
51 |                      uint32_t priority                 = 0,
52 |                      source_location_t source_location = {});
53 | 
54 |     private:
55 |         scheduler_t& scheduler_;
56 |         uint32_t id_;
57 |         std::thread thread_;
58 |         std::atomic<bool> active_;
59 |         std::counting_semaphore<> sem_;
60 | 
61 |         moodycamel::ConcurrentQueue<std::coroutine_handle<>>
62 |             queues_[COOP_PRIORITY_COUNT];
63 | 
64 |         char label_[64];
65 |     };
66 | } // namespace detail
67 | } // namespace coop


--------------------------------------------------------------------------------
/include/coop/event.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "detail/api.hpp"
  4 | #if defined(__clang__)
  5 | #    include <experimental/coroutine>
  6 | namespace std
  7 | {
  8 | using experimental::coroutine_handle;
  9 | }
 10 | #else
 11 | #    include <coroutine>
 12 | #endif
 13 | #include <cstdint>
 14 | 
 15 | namespace coop
 16 | {
 17 | class scheduler_t;
 18 | 
 19 | // Non-owning reference to an event
 20 | class COOP_API event_ref_t
 21 | {
 22 | public:
 23 |     enum class status_e
 24 |     {
 25 |         normal,
 26 |         abandoned,
 27 |         timeout,
 28 |         failed
 29 |     };
 30 | 
 31 |     struct wait_result_t
 32 |     {
 33 |         status_e status;
 34 |         uint32_t index = 0;
 35 |     };
 36 | 
 37 |     // Return the index of the first event signaled in a given array of events
 38 |     static wait_result_t wait_many(event_ref_t* events, uint32_t count);
 39 | 
 40 |     event_ref_t() = default;
 41 | #if defined(_WIN32) || defined(__linux__)
 42 |     event_ref_t(void* handle) noexcept
 43 |         : handle_{handle}
 44 |     {
 45 |     }
 46 | #elif (__APPLE__)
 47 |     // TODO: MacOS/iOS implementation
 48 | #endif
 49 |     event_ref_t(event_ref_t&&)      = default;
 50 |     event_ref_t(event_ref_t const&) = default;
 51 |     event_ref_t& operator=(event_ref_t&&) = default;
 52 |     event_ref_t& operator=(event_ref_t const&) = default;
 53 | 
 54 |     void init(bool manual_reset = false, char const* label = nullptr);
 55 | 
 56 |     // Check if this event is signaled (returns immediately)
 57 |     bool is_signaled() const;
 58 |     operator bool() const noexcept
 59 |     {
 60 |         return is_signaled();
 61 |     }
 62 | 
 63 |     // Wait (potentially indefinitely) for this event to be signaled
 64 |     bool wait() const;
 65 | 
 66 |     // Mark this event as signaled
 67 |     void signal();
 68 | 
 69 |     // Mark this event as unsignaled (needed for events that are manually reset,
 70 |     // as opposed to reset after wait)
 71 |     void reset();
 72 | 
 73 | protected:
 74 |     friend class event_t;
 75 | 
 76 | #if defined(_WIN32) || defined(__linux__)
 77 |     void* handle_ = nullptr;
 78 | #elif (__APPLE__)
 79 |     // TODO: MacOS/iOS implementation
 80 | #endif
 81 | };
 82 | 
 83 | class COOP_API event_t final : public event_ref_t
 84 | {
 85 | public:
 86 |     event_t() = default;
 87 | #if defined(_WIN32) || defined(__linux__)
 88 |     event_t(void* handle) noexcept
 89 |         : event_ref_t{handle}
 90 |     {
 91 |     }
 92 | #elif (__APPLE__)
 93 |     // TODO: MacOS/iOS implementation
 94 | #endif
 95 |     ~event_t() noexcept;
 96 |     event_t(event_t const& other) = delete;
 97 |     event_t& operator=(event_t const& other) = delete;
 98 |     event_t(event_t&& other) noexcept;
 99 |     event_t& operator=(event_t&& other) noexcept;
100 | 
101 |     event_ref_t ref() const noexcept;
102 | 
103 |     // The CPU affinity and priority set here are used to consider the
104 |     // *continuation* after this event is signaled
105 |     void set_cpu_affinity(uint32_t affinity) noexcept
106 |     {
107 |         cpu_affinity_ = affinity;
108 |     }
109 | 
110 |     void set_priority(uint32_t priority) noexcept
111 |     {
112 |         priority_ = priority;
113 |     }
114 | 
115 |     // Awaiter traits
116 |     bool await_ready() const noexcept
117 |     {
118 |         return is_signaled();
119 |     }
120 | 
121 |     void await_resume() const noexcept
122 |     {
123 |     }
124 | 
125 |     // Enqueue coroutine for resumption when this event transitions to the
126 |     // signaled state
127 |     void await_suspend(std::coroutine_handle<> coroutine) noexcept;
128 | 
129 | private:
130 |     uint64_t cpu_affinity_ = 0;
131 |     uint32_t priority_     = 0;
132 | };
133 | } // namespace coop


--------------------------------------------------------------------------------
/include/coop/scheduler.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "detail/api.hpp"
  4 | #include "detail/concurrentqueue.h"
  5 | #include "detail/work_queue.hpp"
  6 | #include "event.hpp"
  7 | #include "source_location.hpp"
  8 | #include <atomic>
  9 | #if defined(__clang__)
 10 | #    include <experimental/coroutine>
 11 | namespace std
 12 | {
 13 | using experimental::coroutine_handle;
 14 | }
 15 | #else
 16 | #    include <coroutine>
 17 | #endif
 18 | #include <cstdint>
 19 | #include <thread>
 20 | 
 21 | namespace coop
 22 | {
 23 | class event_ref_t;
 24 | 
 25 | template <typename S>
 26 | concept Scheduler = requires(S scheduler,
 27 |                              std::coroutine_handle<> coroutine,
 28 |                              uint64_t cpu_affinity,
 29 |                              uint32_t priority,
 30 |                              source_location_t source_location)
 31 | {
 32 |     scheduler.schedule(coroutine, cpu_affinity, priority, source_location);
 33 | };
 34 | 
 35 | // Implement the Scheduler concept above to use your own coroutine scheduler
 36 | class COOP_API scheduler_t final
 37 | {
 38 | public:
 39 |     // Returns the default global threadsafe scheduler
 40 |     static scheduler_t& instance() noexcept;
 41 | 
 42 |     scheduler_t();
 43 |     ~scheduler_t() noexcept;
 44 |     scheduler_t(scheduler_t const&) = delete;
 45 |     scheduler_t(scheduler_t&&)      = delete;
 46 |     scheduler_t& operator=(scheduler_t const&) = delete;
 47 |     scheduler_t&& operator=(scheduler_t&&) = delete;
 48 | 
 49 |     // Schedules a coroutine to be resumed at a later time as soon as a thread
 50 |     // is available. If you wish to provide your own custom scheduler, you can
 51 |     // schedule the coroutine in a single-threaded context, or with different
 52 |     // runtime behavior.
 53 |     //
 54 |     // In addition, you are free to handle or ignore the cpu affinity and
 55 |     // priority parameters differently. The default scheduler here supports TWO
 56 |     // priorities: 0 and 1. Coroutines with priority 1 will (in a best-effort
 57 |     // sense), be scheduled ahead of coroutines with priority 0.
 58 |     void schedule(std::coroutine_handle<> coroutine,
 59 |                   uint64_t cpu_affinity             = 0,
 60 |                   uint32_t priority                 = 0,
 61 |                   source_location_t source_location = {});
 62 | 
 63 |     void schedule(std::coroutine_handle<> coroutine,
 64 |                   event_ref_t event,
 65 |                   uint64_t cpu_affinity,
 66 |                   uint32_t priority);
 67 | 
 68 | private:
 69 |     friend class detail::work_queue_t;
 70 | 
 71 |     struct event_continuation_t
 72 |     {
 73 |         std::coroutine_handle<> coroutine;
 74 |         event_ref_t event;
 75 |         uint64_t cpu_affinity;
 76 |         uint32_t priority;
 77 |     };
 78 | 
 79 |     std::thread event_thread_;
 80 |     size_t event_count_    = 0;
 81 |     size_t event_capacity_ = 0;
 82 |     event_t event_thread_signal_;
 83 |     event_ref_t* events_                       = nullptr;
 84 |     event_continuation_t* event_continuations_ = nullptr;
 85 |     size_t temp_storage_size_                  = 0;
 86 |     event_continuation_t* temp_storage_        = nullptr;
 87 |     moodycamel::ConcurrentQueue<event_continuation_t> pending_events_;
 88 | 
 89 |     std::atomic<bool> active_;
 90 | 
 91 |     // Allocated as an array. One queue is assigned to each CPU
 92 |     detail::work_queue_t* queues_ = nullptr;
 93 | 
 94 |     // Used to perform a low-discrepancy selection of work queue to enqueue a
 95 |     // coroutine to
 96 |     std::atomic<uint32_t> update_;
 97 | 
 98 |     // Specifically, this is the number of concurrent threads possible, which
 99 |     // may be double the physical CPU count if hyperthreading or similar
100 |     // technology is enabled
101 |     uint32_t cpu_count_;
102 |     uint32_t cpu_mask_;
103 | };
104 | } // namespace coop


--------------------------------------------------------------------------------
/include/coop/source_location.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "detail/api.hpp"
 4 | #include <cstddef>
 5 | 
 6 | namespace coop
 7 | {
 8 | // Temporary source location representation until <source_location> is more
 9 | // widely available
10 | struct COOP_API source_location_t
11 | {
12 |     char const* file;
13 |     size_t line;
14 | };
15 | } // namespace coop


--------------------------------------------------------------------------------
/include/coop/task.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "detail/api.hpp"
  4 | #include "detail/promise.hpp"
  5 | #include "detail/tracer.hpp"
  6 | #include "scheduler.hpp"
  7 | #include "source_location.hpp"
  8 | #include <cstdlib>
  9 | #include <functional>
 10 | #include <limits>
 11 | #if defined(__clang__)
 12 | #    include <experimental/coroutine>
 13 | namespace std
 14 | {
 15 | using experimental::coroutine_handle;
 16 | using experimental::noop_coroutine;
 17 | using experimental::suspend_never;
 18 | } // namespace std
 19 | #else
 20 | #    include <coroutine>
 21 | #endif
 22 | 
 23 | namespace coop
 24 | {
 25 | template <typename T = void, bool Joinable = false>
 26 | class task_t
 27 | {
 28 | public:
 29 |     using promise_type = detail::promise_t<task_t, T, Joinable>;
 30 | 
 31 |     task_t() noexcept = default;
 32 |     task_t(std::coroutine_handle<promise_type> coroutine) noexcept
 33 |         : coroutine_{coroutine}
 34 |     {
 35 |     }
 36 |     task_t(task_t const&) = delete;
 37 |     task_t& operator=(task_t const&) = delete;
 38 |     task_t(task_t&& other) noexcept
 39 |         : coroutine_{other.coroutine_}
 40 |     {
 41 |         other.coroutine_ = nullptr;
 42 |     }
 43 |     task_t& operator=(task_t&& other) noexcept
 44 |     {
 45 |         if (this != &other)
 46 |         {
 47 |             // For joinable tasks, the coroutine is destroyed in the final
 48 |             // awaiter to support fire-and-forget semantics
 49 |             if constexpr (!Joinable)
 50 |             {
 51 |                 if (coroutine_)
 52 |                 {
 53 |                     coroutine_.destroy();
 54 |                 }
 55 |             }
 56 |             coroutine_       = other.coroutine_;
 57 |             other.coroutine_ = nullptr;
 58 |         }
 59 |         return *this;
 60 |     }
 61 |     ~task_t() noexcept
 62 |     {
 63 |         if constexpr (!Joinable)
 64 |         {
 65 |             if (coroutine_)
 66 |             {
 67 |                 coroutine_.destroy();
 68 |             }
 69 |         }
 70 |     }
 71 | 
 72 |     // The dereferencing operators below return the data contained in the
 73 |     // associated promise
 74 |     [[nodiscard]] auto operator*() noexcept
 75 |     {
 76 |         static_assert(
 77 |             !std::is_same_v<T, void>, "This task doesn't contain any data");
 78 |         return std::ref(promise().data);
 79 |     }
 80 | 
 81 |     [[nodiscard]] auto operator*() const noexcept
 82 |     {
 83 |         static_assert(
 84 |             !std::is_same_v<T, void>, "This task doesn't contain any data");
 85 |         return std::cref(promise().data);
 86 |     }
 87 | 
 88 |     // A task_t is truthy if it is not associated with an outstanding
 89 |     // coroutine or the coroutine it is associated with is complete
 90 |     [[nodiscard]] operator bool() const noexcept
 91 |     {
 92 |         return await_ready();
 93 |     }
 94 | 
 95 |     [[nodiscard]] bool await_ready() const noexcept
 96 |     {
 97 |         return !coroutine_ || coroutine_.done();
 98 |     }
 99 | 
100 |     void join()
101 |     {
102 |         static_assert(Joinable,
103 |                       "Cannot join a task without the Joinable type "
104 |                       "parameter "
105 |                       "set");
106 |         coroutine_.promise().join_sem.acquire();
107 |     }
108 | 
109 |     // When suspending from a coroutine *within* this task's coroutine, save
110 |     // the resume point (to be resumed when the inner coroutine finalizes)
111 |     std::coroutine_handle<> await_suspend(std::coroutine_handle<> coroutine) noexcept
112 |     {
113 |         return detail::await_suspend(coroutine_, coroutine);
114 |     }
115 | 
116 |     // The return value of await_resume is the final result of `co_await
117 |     // this_task` once the coroutine associated with this task completes
118 |     auto await_resume() const noexcept
119 |     {
120 |         if constexpr (std::is_same_v<T, void>)
121 |         {
122 |             return;
123 |         }
124 |         else
125 |         {
126 |             return std::move(promise().data);
127 |         }
128 |     }
129 | 
130 | protected:
131 |     [[nodiscard]] promise_type& promise() const noexcept
132 |     {
133 |         return coroutine_.promise();
134 |     }
135 | 
136 |     std::coroutine_handle<promise_type> coroutine_ = nullptr;
137 | };
138 | 
139 | // Suspend the current coroutine to be scheduled for execution on a differeent
140 | // thread by the supplied scheduler. Remember to `co_await` this function's
141 | // returned value.
142 | //
143 | // The least significant bit of the CPU mask, corresponds to CPU 0. A non-zero
144 | // mask will prevent this coroutine from being scheduled on CPUs corresponding
145 | // to bits that are set
146 | //
147 | // Threadsafe only if scheduler_t::schedule is threadsafe (the default one
148 | // provided is threadsafe).
149 | template <Scheduler S = scheduler_t>
150 | inline auto suspend(S& scheduler                             = S::instance(),
151 |                     uint64_t cpu_mask                        = 0,
152 |                     uint32_t priority                        = 0,
153 |                     source_location_t const& source_location = {}) noexcept
154 | {
155 |     struct awaiter_t
156 |     {
157 |         scheduler_t& scheduler;
158 |         uint64_t cpu_mask;
159 |         uint32_t priority;
160 |         source_location_t source_location;
161 | 
162 |         bool await_ready() const noexcept
163 |         {
164 |             return false;
165 |         }
166 | 
167 |         void await_resume() const noexcept
168 |         {
169 |         }
170 | 
171 |         void await_suspend(std::coroutine_handle<> coroutine) const noexcept
172 |         {
173 |             scheduler.schedule(coroutine, cpu_mask, priority, source_location);
174 |         }
175 |     };
176 | 
177 |     return awaiter_t{scheduler, cpu_mask, priority, source_location};
178 | }
179 | 
180 | #define COOP_SUSPEND()        \
181 |     co_await ::coop::suspend( \
182 |         ::coop::scheduler_t::instance(), 0, 0, {__FILE__, __LINE__})
183 | 
184 | #define COOP_SUSPEND1(scheduler) \
185 |     co_await ::coop::suspend(scheduler, 0, 0, {__FILE__, __LINE__})
186 | 
187 | #define COOP_SUSPEND2(scheduler, cpu_mask) \
188 |     co_await ::coop::suspend(scheduler, cpu_mask, 0, {__FILE__, __LINE__})
189 | 
190 | #define COOP_SUSPEND3(scheduler, cpu_mask, priority) \
191 |     co_await ::coop::suspend(                        \
192 |         scheduler, cpu_mask, priority, {__FILE__, __LINE__})
193 | 
194 | #define COOP_SUSPEND4(cpu_mask) \
195 |     co_await ::coop::suspend(   \
196 |         ::coop::scheduler_t::instance(), cpu_mask, 0, {__FILE__, __LINE__})
197 | 
198 | #define COOP_SUSPEND5(cpu_mask, priority)                     \
199 |     co_await ::coop::suspend(::coop::scheduler_t::instance(), \
200 |                              cpu_mask,                        \
201 |                              priority,                        \
202 |                              {__FILE__, __LINE__})
203 | } // namespace coop
204 | 


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(COOP_SOURCES
 2 |     ../include/coop/event.hpp
 3 |     ../include/coop/scheduler.hpp
 4 |     ../include/coop/source_location.hpp
 5 |     ../include/coop/task.hpp
 6 |     ../include/coop/detail/api.hpp
 7 |     ../include/coop/detail/blockingconcurrentqueue.h
 8 |     ../include/coop/detail/concurrentqueue.h
 9 |     ../include/coop/detail/lightweightsemaphore.h
10 |     ../include/coop/detail/promise.hpp
11 |     ../include/coop/detail/tracer.hpp
12 |     ../include/coop/detail/work_queue.hpp
13 |     event.cpp
14 |     scheduler.cpp
15 |     work_queue.cpp
16 | )
17 | source_group(
18 |     TREE
19 |     ${CMAKE_CURRENT_SOURCE_DIR}/..
20 |     FILES
21 |     ${COOP_SOURCES}
22 | )
23 | 
24 | add_library(
25 |     coop
26 |     ${COOP_SOURCES}
27 | )
28 | add_library(coop::coop ALIAS coop)
29 | 
30 | target_link_libraries(
31 |     coop
32 |     PUBLIC
33 |     coop_core
34 | )
35 | 
36 | if(BUILD_SHARED_LIBS)
37 |     target_compile_definitions(coop PRIVATE COOP_IMPL)
38 | endif()


--------------------------------------------------------------------------------
/src/event.cpp:
--------------------------------------------------------------------------------
  1 | #include <coop/event.hpp>
  2 | 
  3 | #include <coop/scheduler.hpp>
  4 | #include <utility>
  5 | 
  6 | #if defined(_WIN32)
  7 | #    define WIN32_LEAN_AND_MEAN
  8 | #    include <Windows.h>
  9 | #elif defined(__linux__)
 10 | # include <pthread.h>
 11 | #elif (__APPLE__)
 12 | #endif
 13 | 
 14 | using namespace coop;
 15 | 
 16 | event_ref_t::wait_result_t
 17 | event_ref_t::wait_many(event_ref_t* events, uint32_t count)
 18 | {
 19 | #if defined(_WIN32)
 20 |     static_assert(sizeof(event_ref_t) == sizeof(HANDLE));
 21 |     uint32_t result = WaitForMultipleObjects(
 22 |         count, reinterpret_cast<HANDLE*>(events), false, INFINITE);
 23 | 
 24 |     assert(result != WAIT_FAILED && "Failed to await events");
 25 | 
 26 |     if (result == WAIT_FAILED)
 27 |     {
 28 |         return {status_e::failed};
 29 |     }
 30 |     else if (result == WAIT_TIMEOUT)
 31 |     {
 32 |         return {status_e::timeout};
 33 |     }
 34 | 
 35 |     if (result < WAIT_ABANDONED_0)
 36 |     {
 37 |         return {status_e::normal, result - WAIT_OBJECT_0};
 38 |     }
 39 |     else
 40 |     {
 41 |         return {status_e::abandoned, result - WAIT_OBJECT_0};
 42 |     }
 43 | 
 44 | #elif defined(__linux__)
 45 |     // TODO: Android/Linux implementation
 46 | #elif (__APPLE__)
 47 |     // TODO: MacOS/iOS implementation
 48 | #endif
 49 | }
 50 | 
 51 | void event_ref_t::init(bool manual_reset, char const* label)
 52 | {
 53 | #if defined(_WIN32)
 54 |     handle_ = CreateEventA(nullptr, manual_reset, false, label);
 55 | #elif defined(__linux__)
 56 |     // TODO: Android/Linux implementation
 57 | #elif (__APPLE__)
 58 |     // TODO: MacOS/iOS implementation
 59 | #endif
 60 | }
 61 | 
 62 | bool event_ref_t::is_signaled() const
 63 | {
 64 | #if defined(_WIN32)
 65 |     uint32_t status = WaitForSingleObject(handle_, 0);
 66 |     return status == WAIT_OBJECT_0;
 67 | #elif defined(__linux__)
 68 |     // TODO: Android/Linux implementation
 69 | #elif (__APPLE__)
 70 |     // TODO: MacOS/iOS implementation
 71 | #endif
 72 | }
 73 | 
 74 | bool event_ref_t::wait() const
 75 | {
 76 | #if defined(_WIN32)
 77 |     uint32_t status = WaitForSingleObject(handle_, INFINITE);
 78 |     return status == WAIT_OBJECT_0;
 79 | #elif defined(__linux__)
 80 |     // TODO: Android/Linux implementation
 81 | #elif (__APPLE__)
 82 |     // TODO: MacOS/iOS implementation
 83 | #endif
 84 | }
 85 | 
 86 | void event_ref_t::signal()
 87 | {
 88 | #if defined(_WIN32)
 89 |     SetEvent(handle_);
 90 | #elif defined(__linux__)
 91 |     // TODO: Android/Linux implementation
 92 | #elif (__APPLE__)
 93 |     // TODO: MacOS/iOS implementation
 94 | #endif
 95 | }
 96 | 
 97 | void event_ref_t::reset()
 98 | {
 99 | #if defined(_WIN32)
100 |     ResetEvent(handle_);
101 | #elif defined(__linux__)
102 |     // TODO: Android/Linux implementation
103 | #elif (__APPLE__)
104 |     // TODO: MacOS/iOS implementation
105 | #endif
106 | }
107 | 
108 | void event_t::await_suspend(std::coroutine_handle<> coroutine) noexcept
109 | {
110 |     // Enqueue coroutine for resumption when this event transitions to the
111 |     // signaled state
112 |     scheduler_t::instance().schedule(coroutine, ref(), cpu_affinity_, priority_);
113 | }
114 | 
115 | event_t::~event_t() noexcept
116 | {
117 | #if defined(_WIN32)
118 |     if (handle_)
119 |     {
120 |         CloseHandle(handle_);
121 |     }
122 | #elif defined(__linux__)
123 |     // TODO: Android/Linux implementation
124 | #elif (__APPLE__)
125 |     // TODO: MacOS/iOS implementation
126 | #endif
127 | }
128 | 
129 | event_t::event_t(event_t&& other) noexcept
130 | {
131 |     *this = std::move(other);
132 | }
133 | 
134 | event_t& event_t::operator=(event_t&& other) noexcept
135 | {
136 |     if (this != &other)
137 |     {
138 | #if defined(_WIN32)
139 |         std::swap(handle_, other.handle_);
140 | #elif defined(__linux__)
141 |         // TODO: Android/Linux implementation
142 | #elif (__APPLE__)
143 |         // TODO: MacOS/iOS implementation
144 | #endif
145 |     }
146 |     return *this;
147 | }
148 | 
149 | event_ref_t event_t::ref() const noexcept
150 | {
151 | #if defined(_WIN32)
152 |     event_ref_t out;
153 |     out.handle_ = handle_;
154 |     return out;
155 | #elif defined(__linux__)
156 |     // TODO: Android/Linux implementation
157 | #elif (__APPLE__)
158 |     // TODO: MacOS/iOS implementation
159 | #endif
160 | }
161 | 


--------------------------------------------------------------------------------
/src/scheduler.cpp:
--------------------------------------------------------------------------------
  1 | #include <coop/scheduler.hpp>
  2 | 
  3 | #include <bit>
  4 | #include <cassert>
  5 | #include <coop/detail/tracer.hpp>
  6 | #include <cstdlib>
  7 | #include <cstring>
  8 | #include <numbers>
  9 | #include <thread>
 10 | 
 11 | using namespace coop;
 12 | 
 13 | scheduler_t& scheduler_t::instance() noexcept
 14 | {
 15 |     static scheduler_t scheduler;
 16 |     return scheduler;
 17 | }
 18 | 
 19 | scheduler_t::scheduler_t()
 20 | {
 21 |     // Determine CPU count
 22 |     cpu_count_ = std::thread::hardware_concurrency();
 23 |     assert(cpu_count_ > 0 && cpu_count_ <= 64
 24 |            && "Coop does not yet support CPUs with more than 64 cores");
 25 |     cpu_mask_ = (1 << (cpu_count_ + 1)) - 1;
 26 | 
 27 |     COOP_LOG("Spawning coop scheduler with %i threads\n", cpu_count_);
 28 | 
 29 |     void* raw = operator new[](sizeof(detail::work_queue_t) * cpu_count_);
 30 |     queues_   = static_cast<detail::work_queue_t*>(raw);
 31 | 
 32 |     for (decltype(cpu_count_) i = 0; i != cpu_count_; ++i)
 33 |     {
 34 |         new (queues_ + i) detail::work_queue_t(*this, i);
 35 |     }
 36 | 
 37 |     // Initialize room for 32 events
 38 |     event_capacity_      = 32;
 39 |     event_count_         = 1;
 40 |     events_              = new event_ref_t[event_capacity_];
 41 |     event_continuations_ = new event_continuation_t[event_capacity_ - 1];
 42 |     event_thread_signal_.init(false, "coop_main_event");
 43 |     events_[0] = event_thread_signal_;
 44 | 
 45 |     // A high quality PRNG number isn't needed here, as this update counter is
 46 |     // used to drive a low discrepancy sequence
 47 |     update_ = std::rand();
 48 | 
 49 | #ifdef _WIN32
 50 |     event_thread_ = std::thread([this] {
 51 |         active_ = true;
 52 |         while (active_)
 53 |         {
 54 |             auto [status, index] = event_t::wait_many(events_, event_count_);
 55 | 
 56 |             if (status == event_ref_t::status_e::failed
 57 |                 || status == event_ref_t::status_e::timeout)
 58 |             {
 59 |                 continue;
 60 |             }
 61 | 
 62 |             if (index == 0)
 63 |             {
 64 |                 // The event at index 0 is special in that it is used to
 65 |                 // indicate the availability of additional events or to stop
 66 |                 // this thread
 67 |                 if (!active_)
 68 |                 {
 69 |                     return;
 70 |                 }
 71 | 
 72 |                 // Dequeue continuation requests from the concurrent queue in
 73 |                 // bulk
 74 |                 size_t size = pending_events_.size_approx();
 75 | 
 76 |                 // Resize arrays holding event refs and coroutines if necessary
 77 |                 if (size + event_count_ > event_capacity_)
 78 |                 {
 79 |                     event_capacity_     = size + event_count_ * 2;
 80 |                     event_ref_t* events = new event_ref_t[event_capacity_];
 81 |                     std::memcpy(events_, events, sizeof(event_t) * event_count_);
 82 |                     delete[] events_;
 83 |                     events_ = events;
 84 | 
 85 |                     event_continuation_t* event_continuations
 86 |                         = new event_continuation_t[event_capacity_ - 1];
 87 |                     for (size_t i = 0; i != event_count_ - 1; ++i)
 88 |                     {
 89 |                         // Use moves here instead of a memcpy in case
 90 |                         // std::coroutine_handle<> has a non-trivial move
 91 |                         event_continuations[i]
 92 |                             = std::move(event_continuations_[i]);
 93 |                     }
 94 |                     delete[] event_continuations_;
 95 |                     event_continuations_ = event_continuations;
 96 |                 }
 97 | 
 98 |                 // Note that the number of items we actually dequeue may be more
 99 |                 // than originally advertised
100 |                 size = pending_events_.try_dequeue_bulk(
101 |                     event_continuations_ + event_count_ - 1,
102 |                     event_capacity_ - event_count_);
103 | 
104 |                 for (size_t i = 0; i != size; ++i)
105 |                 {
106 |                     events_[i + event_count_]
107 |                         = event_continuations_[i + event_count_ - 1].event;
108 |                 }
109 | 
110 |                 COOP_LOG(
111 |                     "Added %zu events to the event processing thread\n", size);
112 |                 event_count_ += size;
113 |             }
114 |             else
115 |             {
116 |                 COOP_LOG("Event %i signaled on the event processing thread\n",
117 |                          index);
118 | 
119 |                 // An event has been signaled. Enqueue its associated
120 |                 // continuation.
121 |                 event_continuation_t& continuation
122 |                     = event_continuations_[index - 1];
123 |                 schedule(continuation.coroutine,
124 |                          continuation.cpu_affinity,
125 |                          continuation.priority);
126 | 
127 |                 // NOTE: if this event was the only event in the queue (aside
128 |                 // from the thread signaler), these swaps are in-place swaps and
129 |                 // thus no-ops
130 |                 std::swap(events_[index], events_[event_count_ - 1]);
131 |                 std::swap(event_continuations_[index - 1],
132 |                           event_continuations_[event_count_ - 1]);
133 |                 --event_count_;
134 |             }
135 |         }
136 |     });
137 | #endif
138 | }
139 | 
140 | scheduler_t::~scheduler_t() noexcept
141 | {
142 |     active_ = false;
143 | #ifdef _WIN32
144 |     events_[0].signal();
145 |     event_thread_.join();
146 | #endif
147 |     delete[] events_;
148 |     delete[] event_continuations_;
149 | 
150 |     for (decltype(cpu_count_) i = 0; i != cpu_count_; ++i)
151 |     {
152 |         queues_[i].~work_queue_t();
153 |     }
154 |     operator delete[](static_cast<void*>(queues_));
155 | }
156 | 
157 | void scheduler_t::schedule(std::coroutine_handle<> coroutine,
158 |                            uint64_t cpu_affinity,
159 |                            uint32_t priority,
160 |                            source_location_t source_location)
161 | {
162 |     if (cpu_affinity == 0)
163 |     {
164 |         cpu_affinity = ~cpu_affinity & cpu_mask_;
165 |     }
166 | 
167 |     for (uint32_t i = 0; i != cpu_count_; ++i)
168 |     {
169 |         if (cpu_affinity & (1ull << i))
170 |         {
171 |             if (queues_[i].size_approx() == 0)
172 |             {
173 |                 COOP_LOG("Empty work queue %i identified\n", i);
174 |                 queues_[i].enqueue(coroutine, priority, source_location);
175 |                 return;
176 |             }
177 |         }
178 |     }
179 | 
180 |     // All queues appear to be busy, pick a random one with reasonably low
181 |     // discrepancy (Kronecker recurrence sequence)
182 |     uint32_t index = static_cast<uint32_t>(update_++ * std::numbers::phi_v<float>)
183 |                      % std::popcount(cpu_affinity);
184 | 
185 |     // Iteratively unset bits to determine the nth set bit
186 |     for (uint32_t i = 0; i != index; ++i)
187 |     {
188 |         cpu_affinity &= ~(1 << (std::countr_zero(cpu_affinity) + 1));
189 |     }
190 |     uint32_t queue = std::countr_zero(cpu_affinity);
191 |     COOP_LOG("Work queue %i identified\n", queue);
192 | 
193 |     queues_[queue].enqueue(coroutine, priority, source_location);
194 | }
195 | 
196 | void scheduler_t::schedule(std::coroutine_handle<> coroutine,
197 |                            event_ref_t event,
198 |                            uint64_t cpu_affinity,
199 |                            uint32_t priority)
200 | {
201 |     pending_events_.enqueue({coroutine, event, cpu_affinity, priority});
202 |     events_[0].signal();
203 | }
204 | 


--------------------------------------------------------------------------------
/src/work_queue.cpp:
--------------------------------------------------------------------------------
  1 | #include <coop/detail/work_queue.hpp>
  2 | 
  3 | #include <algorithm>
  4 | #include <cassert>
  5 | #include <coop/detail/tracer.hpp>
  6 | #include <cstdio>
  7 | 
  8 | #ifdef _WIN32
  9 | #    define WIN32_LEAN_AND_MEAN
 10 | #    include <Windows.h>
 11 | #elif defined(__linux__)
 12 | #    include <pthread.h>
 13 | #elif (__APPLE__)
 14 | #endif
 15 | 
 16 | using namespace coop;
 17 | using namespace coop::detail;
 18 | 
 19 | work_queue_t::work_queue_t(scheduler_t& scheduler, uint32_t id)
 20 |     : scheduler_{scheduler}
 21 |     , id_{id}
 22 |     , sem_{0}
 23 | {
 24 |     snprintf(label_, sizeof(label_), "work_queue:%i", id);
 25 |     active_ = true;
 26 |     thread_ = std::thread([this] {
 27 | #if defined(_WIN32)
 28 |         SetThreadAffinityMask(
 29 |             GetCurrentThread(), static_cast<uint32_t>(1ull << id_));
 30 | #elif defined(__linux__)
 31 |         // TODO: Android implementation
 32 |         pthread_t thread = pthread_self();
 33 |         cpu_set_t cpuset;
 34 |         CPU_ZERO(&cpuset);
 35 |         CPU_SET(id_, &cpuset);
 36 |         int result = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset);
 37 | 
 38 |         if (result != 0)
 39 |         {
 40 |             errno = result;
 41 |             perror("Failed to set thread affinity");
 42 |             return;
 43 |         }
 44 | #elif (__APPLE__)
 45 |     // TODO: MacOS/iOS implementation
 46 | #endif
 47 | 
 48 |         while (true)
 49 |         {
 50 |             sem_.acquire();
 51 |             if (!active_)
 52 |             {
 53 |                 return;
 54 |             }
 55 | 
 56 |             bool did_dequeue = false;
 57 | 
 58 |             // Dequeue in a loop because the concurrent queue isn't sequentially
 59 |             // consistent
 60 |             while (!did_dequeue)
 61 |             {
 62 |                 for (int i = COOP_PRIORITY_COUNT - 1; i >= 0; --i)
 63 |                 {
 64 |                     std::coroutine_handle<> coroutine;
 65 |                     if (queues_[i].try_dequeue(coroutine))
 66 |                     {
 67 |                         COOP_LOG("Dequeueing coroutine %p on thread %zu (%i)\n",
 68 |                                  coroutine.address(),
 69 |                                  detail::thread_id(),
 70 |                                  id_);
 71 |                         did_dequeue = true;
 72 |                         coroutine.resume();
 73 |                         break;
 74 |                     }
 75 |                 }
 76 |             }
 77 | 
 78 |             // TODO: Implement some sort of work stealing here
 79 |         }
 80 |     });
 81 | }
 82 | 
 83 | work_queue_t::~work_queue_t() noexcept
 84 | {
 85 |     active_ = false;
 86 |     sem_.release();
 87 |     thread_.join();
 88 | }
 89 | 
 90 | void work_queue_t::enqueue(std::coroutine_handle<> coroutine,
 91 |                            uint32_t priority,
 92 |                            source_location_t source_location)
 93 | {
 94 |     priority = std::clamp<uint32_t>(priority, 0, COOP_PRIORITY_COUNT - 1);
 95 |     COOP_LOG("Enqueueing coroutine %p on thread %zu (%s:%zu)\n",
 96 |              coroutine.address(),
 97 |              detail::thread_id(),
 98 |              source_location.file,
 99 |              source_location.line);
100 |     queues_[priority].enqueue(coroutine);
101 |     sem_.release();
102 | }
103 | 


--------------------------------------------------------------------------------
/test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | include(FetchContent)
 2 | 
 3 | set(DOCTEST_WITH_TESTS OFF)
 4 | set(DOCTEST_NO_INSTALL ON)
 5 | FetchContent_Declare(
 6 |     doctest
 7 |     GIT_REPOSITORY https://github.com/onqtam/doctest.git
 8 |     GIT_TAG 2.4.5
 9 |     GIT_SHALLOW ON
10 | )
11 | if(NOT doctest_POPULATED)
12 |     FetchContent_Populate(doctest)
13 |     add_subdirectory(${doctest_SOURCE_DIR} ${doctest_BINARY_DIR})
14 | endif()
15 | 
16 | list(APPEND CMAKE_MODULE_PATH ${doctest_SOURCE_DIR}/scripts/cmake)
17 | include(doctest)
18 |  
19 | add_executable(coop_test test.cpp)
20 | target_link_libraries(
21 |     coop_test
22 |     PUBLIC
23 |     coop
24 |     coop_scheduler
25 |     doctest::doctest
26 | )


--------------------------------------------------------------------------------
/test/test.cpp:
--------------------------------------------------------------------------------
  1 | #define DOCTEST_CONFIG_IMPLEMENT
  2 | #include <doctest/doctest.h>
  3 | 
  4 | #include <chrono>
  5 | #include <coop/task.hpp>
  6 | #include <thread>
  7 | 
  8 | coop::task_t<void, true> suspend_time()
  9 | {
 10 |     // std::printf("%zu start thread\n", coop::detail::thread_id());
 11 |     auto t1 = std::chrono::system_clock::now();
 12 |     COOP_SUSPEND();
 13 |     auto t2 = std::chrono::system_clock::now();
 14 |     // std::printf("%zu end thread\n", coop::detail::thread_id());
 15 |     size_t us
 16 |         = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
 17 |     std::printf("Duration for suspend test: %zu us\n", us);
 18 | }
 19 | 
 20 | TEST_CASE("suspend overhead")
 21 | {
 22 |     std::printf("Calling suspend_test2 coroutine\n");
 23 |     // auto task = suspend_time();
 24 |     // task.join();
 25 |     suspend_time().join();
 26 |     std::printf("suspend_test2 joined\n");
 27 | }
 28 | 
 29 | coop::task_t<void, true> test_suspend(std::thread::id& id)
 30 | {
 31 |     COOP_SUSPEND();
 32 |     id = std::this_thread::get_id();
 33 |     co_return;
 34 | }
 35 | 
 36 | TEST_CASE("test suspend")
 37 | {
 38 |     std::thread::id id = std::this_thread::get_id();
 39 |     std::thread::id next;
 40 |     auto task = test_suspend(next);
 41 |     std::printf("Joining task\n");
 42 |     task.join();
 43 |     std::printf("Task joined\n");
 44 | 
 45 |     CHECK(id != next);
 46 | }
 47 | 
 48 | coop::task_t<int> chain1(int core)
 49 | {
 50 |     std::printf("chain1 suspending\n");
 51 |     COOP_SUSPEND4(1 << core);
 52 |     std::printf("chain1 resumed\n");
 53 |     co_return 1;
 54 | }
 55 | 
 56 | coop::task_t<int> chain2()
 57 | {
 58 |     std::printf("chain2\n");
 59 |     COOP_SUSPEND4(1 << 3);
 60 |     auto t1 = chain1(5);
 61 |     auto t2 = chain1(6);
 62 |     co_return co_await t1 + co_await t2;
 63 | }
 64 | 
 65 | coop::task_t<void, true> chain3(int& result)
 66 | {
 67 |     std::printf("chain3 suspending\n");
 68 |     COOP_SUSPEND4(1 << 4);
 69 |     std::printf("chain3 resumed\n");
 70 |     result = co_await chain2();
 71 | }
 72 | 
 73 | TEST_CASE("chained continuation")
 74 | {
 75 |     int x     = 0;
 76 |     auto task = chain3(x);
 77 |     std::printf("Joining chained continuation task\n");
 78 |     task.join();
 79 |     std::printf("Task chained continuation joined\n");
 80 |     CHECK(x == 2);
 81 | }
 82 | 
 83 | coop::task_t<> in_flight1()
 84 | {
 85 |     COOP_SUSPEND();
 86 |     std::this_thread::sleep_for(std::chrono::milliseconds{50});
 87 | }
 88 | 
 89 | coop::task_t<void, true> in_flight2(size_t& ms_elapsed)
 90 | {
 91 |     // The timing of this test will be off if you don't have at least 8
 92 |     // concurrent threads that can run on your machine
 93 |     constexpr size_t count = 8;
 94 |     coop::task_t<> tasks[count];
 95 | 
 96 |     for (size_t i = 0; i != count; ++i)
 97 |     {
 98 |         tasks[i] = in_flight1();
 99 |     }
100 | 
101 |     auto t1 = std::chrono::system_clock::now();
102 |     for (size_t i = 0; i != count; ++i)
103 |     {
104 |         co_await tasks[i];
105 |     }
106 |     auto t2 = std::chrono::system_clock::now();
107 | 
108 |     ms_elapsed
109 |         = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count();
110 | }
111 | 
112 | TEST_CASE("multiple in flight")
113 | {
114 |     size_t ms;
115 |     auto task = in_flight2(ms);
116 |     task.join();
117 |     std::printf("Duration for in flight test: %zu ms\n", ms);
118 |     CHECK(ms < 150);
119 | }
120 | 
121 | #ifdef _WIN32
122 | coop::task_t<void, true> wait_for_event(coop::event_t& event)
123 | {
124 |     co_await event;
125 | }
126 | 
127 | coop::task_t<void, true> signal_event(coop::event_t& event)
128 | {
129 |     COOP_SUSPEND();
130 |     std::this_thread::sleep_for(std::chrono::milliseconds{50});
131 |     event.signal();
132 | }
133 | 
134 | TEST_CASE("event completion")
135 | {
136 |     coop::event_t event;
137 |     event.init();
138 |     auto t1   = std::chrono::system_clock::now();
139 |     auto task = wait_for_event(event);
140 | 
141 |     // Fire and forget coroutine
142 |     // signal_event(event);
143 |     event.signal();
144 |     task.join();
145 |     auto t2 = std::chrono::system_clock::now();
146 |     size_t ms
147 |         = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
148 |     std::printf("Duration for event_completion test: %zu ms\n", ms);
149 | }
150 | #endif
151 | 
152 | int main(int argc, char* argv[])
153 | {
154 |     // Spawn thread pool
155 |     coop::scheduler_t::instance();
156 |     return doctest::Context{argc, argv}.run();
157 | }
158 | 


--------------------------------------------------------------------------------