├── .gitignore
├── .package
├── LICENSE
├── README.md
├── llco.c
├── llco.h
├── test.c
└── unwind.c
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | *.dSYM
3 | *.out
4 | *.o
5 | *.out.*
6 |
--------------------------------------------------------------------------------
/.package:
--------------------------------------------------------------------------------
1 | file llco.c
2 | file llco.h
3 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | This software is available as a choice of the following licenses. Choose
2 | whichever you prefer.
3 |
4 | ===============================================================================
5 | ALTERNATIVE 1 - Public Domain (www.unlicense.org)
6 | ===============================================================================
7 | This is free and unencumbered software released into the public domain.
8 |
9 | Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
10 | software, either in source code form or as a compiled binary, for any purpose,
11 | commercial or non-commercial, and by any means.
12 |
13 | In jurisdictions that recognize copyright laws, the author or authors of this
14 | software dedicate any and all copyright interest in the software to the public
15 | domain. We make this dedication for the benefit of the public at large and to
16 | the detriment of our heirs and successors. We intend this dedication to be an
17 | overt act of relinquishment in perpetuity of all present and future rights to
18 | this software under copyright law.
19 |
20 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 | AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
24 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 |
27 | For more information, please refer to
28 |
29 | ===============================================================================
30 | ALTERNATIVE 2 - MIT No Attribution
31 | ===============================================================================
32 | Copyright (c) 2024 Joshua J Baker (https://github.com/tidwall/llco)
33 |
34 | Permission is hereby granted, free of charge, to any person obtaining a copy of
35 | this software and associated documentation files (the "Software"), to deal in
36 | the Software without restriction, including without limitation the rights to
37 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
38 | of the Software, and to permit persons to whom the Software is furnished to do
39 | so.
40 |
41 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
42 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
43 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
44 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
45 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
46 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
47 | SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # llco
2 |
3 | A low-level coroutine library for C.
4 |
5 | The main purpose of this project is to power
6 | [sco](https://github.com/tidwall/sco) and
7 | [neco](https://github.com/tidwall/neco), which are more general purpose
8 | coroutine libraries.
9 |
10 | ## Features
11 |
12 | - Stackful coroutines.
13 | - No allocations (bring your own stack)
14 | - Cross-platform. Linux, Mac, Webassembly, iOS, Android, FreeBSD, Windows, RaspPi, RISC-V
15 | - Fast context switching. Uses assembly in most cases
16 | - No built-in scheduler. You are in charge of the coroutine priority
17 | - Single file amalgamation. No dependencies.
18 |
19 | ## Example
20 |
21 | ```C
22 |
23 | void cleanup(void *stack, size_t stack_size, void *udata) {
24 | // Free the coroutine stack
25 | free(stack);
26 | }
27 |
28 | void entry(void *udata) {
29 | printf("Coroutine started\n");
30 | // Switch back to the main thread and cleanup this coroutine
31 | llco_switch(0, true);
32 | }
33 |
34 | int main(void) {
35 | // Start a coroutine from the main function using an newly allocated stack.
36 | struct llco_desc desc = {
37 | .stack = malloc(LLCO_MINSTACKSIZE),
38 | .stack_size = LLCO_MINSTACKSIZE,
39 | .entry = entry,
40 | .cleanup = cleanup,
41 | };
42 | llco_start(&desc, false);
43 | printf("Back to main\n");
44 | }
45 |
46 | ```
47 |
48 | ## API
49 |
50 | ```C
51 | // Switch to another coroutine. Set the `co` param to NULL to switch to the
52 | // main function. Use the final param to tell the program that you are done
53 | // with the current coroutine, at which point it's respective `cleanup`
54 | // callback will be called.
55 | void llco_switch(struct llco *co, bool final);
56 |
57 | // Start a new coroutine. This can be called from the main function or a
58 | // nested coroutine.
59 | void llco_start(struct llco_desc *desc, bool final);
60 |
61 |
62 | // Return the current coroutine or NULL if not currently running in a
63 | // coroutine.
64 | struct llco *llco_current(void);
65 |
66 | // Returns a string that indicates which coroutine method is being used by
67 | // the program. Such as "asm" or "ucontext", etc.
68 | const char *llco_method(void *caps);
69 | ```
70 |
71 | ## Caveats
72 |
73 | - Windows: Only x86_64 is supported at this time. The Windows Fibers API is not
74 | currently suitable as a fallback due to the `CreateFiber` call needing to
75 | allocate memory dynamically.
76 | - Webassembly: Must be compiled with Emscripten using the `-sASYNCIFY` flag.
77 | - All other platforms may fallback to using ucontext when the assembly method
78 | is not available. The `uco_method(0)` function can be used to see if assembly
79 | or ucontext is being used.
80 | - The ucontext fallback method only uses the ucontext API when starting a
81 | coroutine. Once the coroutine has been started, `setjmp` and `longjmp` take
82 | over the switching duties.
83 |
84 | ## Compiler Options
85 |
86 | - `-DLLCO_NOASM`: Disable assembly. Use ucontext fallback instead.
87 | - `-DLLCO_STACKJMP`: Use `setjmp` and `longjmp` for jumping between stacks,
88 | even with the assembly method.
89 | - `-DLLCO_VALGRIND`: Enable support for Valgrind
90 | - `-DLLCO_NOUNWIND`: Disable support for stack unwinding
91 |
92 | ## Running test
93 |
94 | ```sh
95 | # Basic test
96 | cc llco.c test.c && ./a.out
97 |
98 | # Using Valgrind
99 | cc -DLLCO_VALGRIND llco.c test.c && valgrind ./a.out
100 |
101 | # Using Emscripten (Web Assembly)
102 | emcc -sASYNCIFY llco.c test.c && node ./a.out.js
103 | ```
104 |
105 | ## Acknowledgements
106 |
107 | Much of the assembly code was adapted from the [minicoro](https://github.com/edubart/minicoro)
108 | project by [Eduardo Bart](https://github.com/edubart), which was originally
109 | adapted from the [Lua Coco](https://coco.luajit.org) project by
110 | [Mike Pall](https://github.com/MikePall).
111 |
112 | ## License
113 |
114 | Public Domain or MIT No Attribution, your choice.
115 |
--------------------------------------------------------------------------------
/llco.c:
--------------------------------------------------------------------------------
1 | // https://github.com/tidwall/llco
2 | //
3 | // Copyright (c) 2024 Joshua J Baker.
4 | // This software is available as a choice of Public Domain or MIT-0.
5 |
6 | #ifdef _FORTIFY_SOURCE
7 | #define LLCO_FORTIFY_SOURCE _FORTIFY_SOURCE
8 | // Disable __longjmp_chk validation so that we can jump between stacks.
9 | #pragma push_macro("_FORTIFY_SOURCE")
10 | #undef _FORTIFY_SOURCE
11 | #include
12 | #define _FORTIFY_SOURCE LLCO_FORTIFY_SOURCE
13 | #undef LLCO_FORTIFY_SOURCE
14 | #pragma pop_macro("_FORTIFY_SOURCE")
15 | #endif
16 |
17 | #ifndef LLCO_STATIC
18 | #include "llco.h"
19 | #else
20 | #include
21 | #include
22 | #define LLCO_MINSTACKSIZE 16384
23 | #define LLCO_EXTERN static
24 | struct llco_desc {
25 | void *stack;
26 | size_t stack_size;
27 | void (*entry)(void *udata);
28 | void (*cleanup)(void *stack, size_t stack_size, void *udata);
29 | void *udata;
30 | };
31 | struct llco_symbol {
32 | void *cfa;
33 | void *ip;
34 | const char *fname;
35 | void *fbase;
36 | const char *sname;
37 | void *saddr;
38 | };
39 | #endif
40 |
41 | #include
42 |
43 | #ifdef LLCO_VALGRIND
44 | #include
45 | #endif
46 |
47 | #ifndef LLCO_EXTERN
48 | #define LLCO_EXTERN
49 | #endif
50 |
51 | #if defined(__GNUC__)
52 | #ifdef noinline
53 | #define LLCO_NOINLINE noinline
54 | #else
55 | #define LLCO_NOINLINE __attribute__ ((noinline))
56 | #endif
57 | #ifdef noreturn
58 | #define LLCO_NORETURN noreturn
59 | #else
60 | #define LLCO_NORETURN __attribute__ ((noreturn))
61 | #endif
62 | #else
63 | #define LLCO_NOINLINE
64 | #define LLCO_NORETURN
65 | #endif
66 |
67 | #if defined(_MSC_VER)
68 | #define __thread __declspec(thread)
69 | #endif
70 |
71 | static void llco_entry(void *arg);
72 |
73 | LLCO_NORETURN
74 | static void llco_exit(void) {
75 | _Exit(0);
76 | }
77 |
78 | #ifdef LLCO_ASM
79 | #error LLCO_ASM must not be defined
80 | #endif
81 |
82 | #if defined(__COSMOCC__) && !defined(LLCO_NOASM)
83 | // Cosmopolitan has issues with asm code
84 | #define LLCO_NOASM
85 | #endif
86 |
87 | // Passing the entry function into assembly requires casting the function
88 | // pointer to an object pointer, which is forbidden in the ISO C spec but
89 | // allowed in posix. Ignore the warning attributed to this requirement when
90 | // the -pedantic compiler flag is provide.
91 | #if defined(__GNUC__)
92 | #pragma GCC diagnostic push
93 | #pragma GCC diagnostic ignored "-Wpedantic"
94 | #endif
95 |
96 | ////////////////////////////////////////////////////////////////////////////////
97 | // Below is various assembly code adapted from the Lua Coco [MIT] and Minicoro
98 | // [MIT-0] projects by Mike Pall and Eduardo Bart respectively.
99 | ////////////////////////////////////////////////////////////////////////////////
100 |
101 | /*
102 | Lua Coco (coco.luajit.org)
103 | Copyright (C) 2004-2016 Mike Pall. All rights reserved.
104 |
105 | Permission is hereby granted, free of charge, to any person obtaining
106 | a copy of this software and associated documentation files (the
107 | "Software"), to deal in the Software without restriction, including
108 | without limitation the rights to use, copy, modify, merge, publish,
109 | distribute, sublicense, and/or sell copies of the Software, and to
110 | permit persons to whom the Software is furnished to do so, subject to
111 | the following conditions:
112 |
113 | The above copyright notice and this permission notice shall be
114 | included in all copies or substantial portions of the Software.
115 |
116 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
117 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
118 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
119 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
120 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
121 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
122 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
123 | */
124 |
125 | ////////////////////////////////////////////////////////////////////////////////
126 | // ARM
127 | ////////////////////////////////////////////////////////////////////////////////
128 | #if defined(__ARM_EABI__) && !defined(LLCO_NOASM)
129 | #define LLCO_ASM
130 | #define LLCO_READY
131 | #define LLCO_METHOD "asm,arm_eabi"
132 |
133 | struct llco_asmctx {
134 | #ifndef __SOFTFP__
135 | void* f[16];
136 | #endif
137 | void *d[4]; /* d8-d15 */
138 | void *r[4]; /* r4-r11 */
139 | void *lr;
140 | void *sp;
141 | };
142 |
143 | void _llco_asm_entry(void);
144 | int _llco_asm_switch(struct llco_asmctx *from, struct llco_asmctx *to);
145 |
146 | __asm__(
147 | ".text\n"
148 | #ifdef __APPLE__
149 | ".globl __llco_asm_switch\n"
150 | "__llco_asm_switch:\n"
151 | #else
152 | ".globl _llco_asm_switch\n"
153 | ".type _llco_asm_switch #function\n"
154 | ".hidden _llco_asm_switch\n"
155 | "_llco_asm_switch:\n"
156 | #endif
157 | #ifndef __SOFTFP__
158 | " vstmia r0!, {d8-d15}\n"
159 | #endif
160 | " stmia r0, {r4-r11, lr}\n"
161 | " str sp, [r0, #9*4]\n"
162 | #ifndef __SOFTFP__
163 | " vldmia r1!, {d8-d15}\n"
164 | #endif
165 | " ldr sp, [r1, #9*4]\n"
166 | " ldmia r1, {r4-r11, pc}\n"
167 | #ifndef __APPLE__
168 | ".size _llco_asm_switch, .-_llco_asm_switch\n"
169 | #endif
170 | );
171 |
172 | __asm__(
173 | ".text\n"
174 | #ifdef __APPLE__
175 | ".globl __llco_asm_entry\n"
176 | "__llco_asm_entry:\n"
177 | #else
178 | ".globl _llco_asm_entry\n"
179 | ".type _llco_asm_entry #function\n"
180 | ".hidden _llco_asm_entry\n"
181 | "_llco_asm_entry:\n"
182 | #endif
183 | " mov r0, r4\n"
184 | " mov ip, r5\n"
185 | " mov lr, r6\n"
186 | " bx ip\n"
187 | #ifndef __APPLE__
188 | ".size _llco_asm_entry, .-_llco_asm_entry\n"
189 | #endif
190 | );
191 |
192 | static void llco_asmctx_make(struct llco_asmctx *ctx, void* stack_base,
193 | size_t stack_size, void *arg)
194 | {
195 | ctx->d[0] = (void*)(arg);
196 | ctx->d[1] = (void*)(llco_entry);
197 | ctx->d[2] = (void*)(0xdeaddead); /* Dummy return address. */
198 | ctx->lr = (void*)(_llco_asm_entry);
199 | ctx->sp = (void*)((size_t)stack_base + stack_size);
200 | }
201 |
202 | #endif
203 |
204 | ////////////////////////////////////////////////////////////////////////////////
205 | // ARM 64-bit
206 | ////////////////////////////////////////////////////////////////////////////////
207 | #if defined(__aarch64__) && !defined(LLCO_NOASM)
208 | #define LLCO_ASM
209 | #define LLCO_READY
210 | #define LLCO_METHOD "asm,aarch64"
211 |
212 | struct llco_asmctx {
213 | void *x[12]; /* x19-x30 */
214 | void *sp;
215 | void *lr;
216 | void *d[8]; /* d8-d15 */
217 | };
218 |
219 | void _llco_asm_entry(void);
220 | int _llco_asm_switch(struct llco_asmctx *from, struct llco_asmctx *to);
221 |
222 | __asm__(
223 | ".text\n"
224 | #ifdef __APPLE__
225 | ".globl __llco_asm_switch\n"
226 | "__llco_asm_switch:\n"
227 | #else
228 | ".globl _llco_asm_switch\n"
229 | ".type _llco_asm_switch #function\n"
230 | ".hidden _llco_asm_switch\n"
231 | "_llco_asm_switch:\n"
232 | #endif
233 |
234 | " mov x10, sp\n"
235 | " mov x11, x30\n"
236 | " stp x19, x20, [x0, #(0*16)]\n"
237 | " stp x21, x22, [x0, #(1*16)]\n"
238 | " stp d8, d9, [x0, #(7*16)]\n"
239 | " stp x23, x24, [x0, #(2*16)]\n"
240 | " stp d10, d11, [x0, #(8*16)]\n"
241 | " stp x25, x26, [x0, #(3*16)]\n"
242 | " stp d12, d13, [x0, #(9*16)]\n"
243 | " stp x27, x28, [x0, #(4*16)]\n"
244 | " stp d14, d15, [x0, #(10*16)]\n"
245 | " stp x29, x30, [x0, #(5*16)]\n"
246 | " stp x10, x11, [x0, #(6*16)]\n"
247 | " ldp x19, x20, [x1, #(0*16)]\n"
248 | " ldp x21, x22, [x1, #(1*16)]\n"
249 | " ldp d8, d9, [x1, #(7*16)]\n"
250 | " ldp x23, x24, [x1, #(2*16)]\n"
251 | " ldp d10, d11, [x1, #(8*16)]\n"
252 | " ldp x25, x26, [x1, #(3*16)]\n"
253 | " ldp d12, d13, [x1, #(9*16)]\n"
254 | " ldp x27, x28, [x1, #(4*16)]\n"
255 | " ldp d14, d15, [x1, #(10*16)]\n"
256 | " ldp x29, x30, [x1, #(5*16)]\n"
257 | " ldp x10, x11, [x1, #(6*16)]\n"
258 | " mov sp, x10\n"
259 | " br x11\n"
260 | #ifndef __APPLE__
261 | ".size _llco_asm_switch, .-_llco_asm_switch\n"
262 | #endif
263 | );
264 |
265 | __asm__(
266 | ".text\n"
267 | #ifdef __APPLE__
268 | ".globl __llco_asm_entry\n"
269 | "__llco_asm_entry:\n"
270 | #else
271 | ".globl _llco_asm_entry\n"
272 | ".type _llco_asm_entry #function\n"
273 | ".hidden _llco_asm_entry\n"
274 | "_llco_asm_entry:\n"
275 | #endif
276 | " mov x0, x19\n"
277 | " mov x30, x21\n"
278 | " br x20\n"
279 | #ifndef __APPLE__
280 | ".size _llco_asm_entry, .-_llco_asm_entry\n"
281 | #endif
282 | );
283 |
284 | static void llco_asmctx_make(struct llco_asmctx *ctx, void* stack_base,
285 | size_t stack_size, void *arg)
286 | {
287 | ctx->x[0] = (void*)(arg);
288 | ctx->x[1] = (void*)(llco_entry);
289 | ctx->x[2] = (void*)(0xdeaddeaddeaddead); /* Dummy return address. */
290 | ctx->sp = (void*)((size_t)stack_base + stack_size);
291 | ctx->lr = (void*)(_llco_asm_entry);
292 | }
293 | #endif
294 |
295 | ////////////////////////////////////////////////////////////////////////////////
296 | // RISC-V (rv64/rv32)
297 | ////////////////////////////////////////////////////////////////////////////////
298 | #if defined(__riscv) && !defined(LLCO_NOASM)
299 | #define LLCO_ASM
300 | #define LLCO_READY
301 | #define LLCO_METHOD "asm,riscv"
302 |
303 | struct llco_asmctx {
304 | void* s[12]; /* s0-s11 */
305 | void* ra;
306 | void* pc;
307 | void* sp;
308 | #ifdef __riscv_flen
309 | #if __riscv_flen == 64
310 | double fs[12]; /* fs0-fs11 */
311 | #elif __riscv_flen == 32
312 | float fs[12]; /* fs0-fs11 */
313 | #endif
314 | #endif /* __riscv_flen */
315 | };
316 |
317 | void _llco_asm_entry(void);
318 | int _llco_asm_switch(struct llco_asmctx *from, struct llco_asmctx *to);
319 |
320 | __asm__(
321 | ".text\n"
322 | ".globl _llco_asm_entry\n"
323 | ".type _llco_asm_entry @function\n"
324 | ".hidden _llco_asm_entry\n"
325 | "_llco_asm_entry:\n"
326 | " mv a0, s0\n"
327 | " jr s1\n"
328 | ".size _llco_asm_entry, .-_llco_asm_entry\n"
329 | );
330 |
331 | __asm__(
332 | ".text\n"
333 | ".globl _llco_asm_switch\n"
334 | ".type _llco_asm_switch @function\n"
335 | ".hidden _llco_asm_switch\n"
336 | "_llco_asm_switch:\n"
337 | #if __riscv_xlen == 64
338 | " sd s0, 0x00(a0)\n"
339 | " sd s1, 0x08(a0)\n"
340 | " sd s2, 0x10(a0)\n"
341 | " sd s3, 0x18(a0)\n"
342 | " sd s4, 0x20(a0)\n"
343 | " sd s5, 0x28(a0)\n"
344 | " sd s6, 0x30(a0)\n"
345 | " sd s7, 0x38(a0)\n"
346 | " sd s8, 0x40(a0)\n"
347 | " sd s9, 0x48(a0)\n"
348 | " sd s10, 0x50(a0)\n"
349 | " sd s11, 0x58(a0)\n"
350 | " sd ra, 0x60(a0)\n"
351 | " sd ra, 0x68(a0)\n" /* pc */
352 | " sd sp, 0x70(a0)\n"
353 | #ifdef __riscv_flen
354 | #if __riscv_flen == 64
355 | " fsd fs0, 0x78(a0)\n"
356 | " fsd fs1, 0x80(a0)\n"
357 | " fsd fs2, 0x88(a0)\n"
358 | " fsd fs3, 0x90(a0)\n"
359 | " fsd fs4, 0x98(a0)\n"
360 | " fsd fs5, 0xa0(a0)\n"
361 | " fsd fs6, 0xa8(a0)\n"
362 | " fsd fs7, 0xb0(a0)\n"
363 | " fsd fs8, 0xb8(a0)\n"
364 | " fsd fs9, 0xc0(a0)\n"
365 | " fsd fs10, 0xc8(a0)\n"
366 | " fsd fs11, 0xd0(a0)\n"
367 | " fld fs0, 0x78(a1)\n"
368 | " fld fs1, 0x80(a1)\n"
369 | " fld fs2, 0x88(a1)\n"
370 | " fld fs3, 0x90(a1)\n"
371 | " fld fs4, 0x98(a1)\n"
372 | " fld fs5, 0xa0(a1)\n"
373 | " fld fs6, 0xa8(a1)\n"
374 | " fld fs7, 0xb0(a1)\n"
375 | " fld fs8, 0xb8(a1)\n"
376 | " fld fs9, 0xc0(a1)\n"
377 | " fld fs10, 0xc8(a1)\n"
378 | " fld fs11, 0xd0(a1)\n"
379 | #else
380 | #error "Unsupported RISC-V FLEN"
381 | #endif
382 | #endif /* __riscv_flen */
383 | " ld s0, 0x00(a1)\n"
384 | " ld s1, 0x08(a1)\n"
385 | " ld s2, 0x10(a1)\n"
386 | " ld s3, 0x18(a1)\n"
387 | " ld s4, 0x20(a1)\n"
388 | " ld s5, 0x28(a1)\n"
389 | " ld s6, 0x30(a1)\n"
390 | " ld s7, 0x38(a1)\n"
391 | " ld s8, 0x40(a1)\n"
392 | " ld s9, 0x48(a1)\n"
393 | " ld s10, 0x50(a1)\n"
394 | " ld s11, 0x58(a1)\n"
395 | " ld ra, 0x60(a1)\n"
396 | " ld a2, 0x68(a1)\n" /* pc */
397 | " ld sp, 0x70(a1)\n"
398 | " jr a2\n"
399 | #elif __riscv_xlen == 32
400 | " sw s0, 0x00(a0)\n"
401 | " sw s1, 0x04(a0)\n"
402 | " sw s2, 0x08(a0)\n"
403 | " sw s3, 0x0c(a0)\n"
404 | " sw s4, 0x10(a0)\n"
405 | " sw s5, 0x14(a0)\n"
406 | " sw s6, 0x18(a0)\n"
407 | " sw s7, 0x1c(a0)\n"
408 | " sw s8, 0x20(a0)\n"
409 | " sw s9, 0x24(a0)\n"
410 | " sw s10, 0x28(a0)\n"
411 | " sw s11, 0x2c(a0)\n"
412 | " sw ra, 0x30(a0)\n"
413 | " sw ra, 0x34(a0)\n" /* pc */
414 | " sw sp, 0x38(a0)\n"
415 | #ifdef __riscv_flen
416 | #if __riscv_flen == 64
417 | " fsd fs0, 0x3c(a0)\n"
418 | " fsd fs1, 0x44(a0)\n"
419 | " fsd fs2, 0x4c(a0)\n"
420 | " fsd fs3, 0x54(a0)\n"
421 | " fsd fs4, 0x5c(a0)\n"
422 | " fsd fs5, 0x64(a0)\n"
423 | " fsd fs6, 0x6c(a0)\n"
424 | " fsd fs7, 0x74(a0)\n"
425 | " fsd fs8, 0x7c(a0)\n"
426 | " fsd fs9, 0x84(a0)\n"
427 | " fsd fs10, 0x8c(a0)\n"
428 | " fsd fs11, 0x94(a0)\n"
429 | " fld fs0, 0x3c(a1)\n"
430 | " fld fs1, 0x44(a1)\n"
431 | " fld fs2, 0x4c(a1)\n"
432 | " fld fs3, 0x54(a1)\n"
433 | " fld fs4, 0x5c(a1)\n"
434 | " fld fs5, 0x64(a1)\n"
435 | " fld fs6, 0x6c(a1)\n"
436 | " fld fs7, 0x74(a1)\n"
437 | " fld fs8, 0x7c(a1)\n"
438 | " fld fs9, 0x84(a1)\n"
439 | " fld fs10, 0x8c(a1)\n"
440 | " fld fs11, 0x94(a1)\n"
441 | #elif __riscv_flen == 32
442 | " fsw fs0, 0x3c(a0)\n"
443 | " fsw fs1, 0x40(a0)\n"
444 | " fsw fs2, 0x44(a0)\n"
445 | " fsw fs3, 0x48(a0)\n"
446 | " fsw fs4, 0x4c(a0)\n"
447 | " fsw fs5, 0x50(a0)\n"
448 | " fsw fs6, 0x54(a0)\n"
449 | " fsw fs7, 0x58(a0)\n"
450 | " fsw fs8, 0x5c(a0)\n"
451 | " fsw fs9, 0x60(a0)\n"
452 | " fsw fs10, 0x64(a0)\n"
453 | " fsw fs11, 0x68(a0)\n"
454 | " flw fs0, 0x3c(a1)\n"
455 | " flw fs1, 0x40(a1)\n"
456 | " flw fs2, 0x44(a1)\n"
457 | " flw fs3, 0x48(a1)\n"
458 | " flw fs4, 0x4c(a1)\n"
459 | " flw fs5, 0x50(a1)\n"
460 | " flw fs6, 0x54(a1)\n"
461 | " flw fs7, 0x58(a1)\n"
462 | " flw fs8, 0x5c(a1)\n"
463 | " flw fs9, 0x60(a1)\n"
464 | " flw fs10, 0x64(a1)\n"
465 | " flw fs11, 0x68(a1)\n"
466 | #else
467 | #error "Unsupported RISC-V FLEN"
468 | #endif
469 | #endif /* __riscv_flen */
470 | " lw s0, 0x00(a1)\n"
471 | " lw s1, 0x04(a1)\n"
472 | " lw s2, 0x08(a1)\n"
473 | " lw s3, 0x0c(a1)\n"
474 | " lw s4, 0x10(a1)\n"
475 | " lw s5, 0x14(a1)\n"
476 | " lw s6, 0x18(a1)\n"
477 | " lw s7, 0x1c(a1)\n"
478 | " lw s8, 0x20(a1)\n"
479 | " lw s9, 0x24(a1)\n"
480 | " lw s10, 0x28(a1)\n"
481 | " lw s11, 0x2c(a1)\n"
482 | " lw ra, 0x30(a1)\n"
483 | " lw a2, 0x34(a1)\n" /* pc */
484 | " lw sp, 0x38(a1)\n"
485 | " jr a2\n"
486 | #else
487 | #error "Unsupported RISC-V XLEN"
488 | #endif /* __riscv_xlen */
489 | ".size _llco_asm_switch, .-_llco_asm_switch\n"
490 | );
491 |
492 | static void llco_asmctx_make(struct llco_asmctx *ctx,
493 | void* stack_base, size_t stack_size, void *arg)
494 | {
495 | ctx->s[0] = (void*)(arg);
496 | ctx->s[1] = (void*)(llco_entry);
497 | ctx->pc = (void*)(_llco_asm_entry);
498 | #if __riscv_xlen == 64
499 | ctx->ra = (void*)(0xdeaddeaddeaddead);
500 | #elif __riscv_xlen == 32
501 | ctx->ra = (void*)(0xdeaddead);
502 | #endif
503 | ctx->sp = (void*)((size_t)stack_base + stack_size);
504 | }
505 |
506 | #endif // riscv
507 |
508 | ////////////////////////////////////////////////////////////////////////////////
509 | // x86
510 | ////////////////////////////////////////////////////////////////////////////////
511 | #if (defined(__i386) || defined(__i386__)) && !defined(LLCO_NOASM)
512 | #define LLCO_ASM
513 | #define LLCO_READY
514 | #define LLCO_METHOD "asm,i386"
515 |
516 | struct llco_asmctx {
517 | void *eip, *esp, *ebp, *ebx, *esi, *edi;
518 | };
519 |
520 | void _llco_asm_switch(struct llco_asmctx *from, struct llco_asmctx *to);
521 |
522 | __asm__(
523 | #ifdef __DJGPP__ /* DOS compiler */
524 | "__llco_asm_switch:\n"
525 | #else
526 | ".text\n"
527 | ".globl _llco_asm_switch\n"
528 | ".type _llco_asm_switch @function\n"
529 | ".hidden _llco_asm_switch\n"
530 | "_llco_asm_switch:\n"
531 | #endif
532 | " call 1f\n"
533 | " 1:\n"
534 | " popl %ecx\n"
535 | " addl $(2f-1b), %ecx\n"
536 | " movl 4(%esp), %eax\n"
537 | " movl 8(%esp), %edx\n"
538 | " movl %ecx, (%eax)\n"
539 | " movl %esp, 4(%eax)\n"
540 | " movl %ebp, 8(%eax)\n"
541 | " movl %ebx, 12(%eax)\n"
542 | " movl %esi, 16(%eax)\n"
543 | " movl %edi, 20(%eax)\n"
544 | " movl 20(%edx), %edi\n"
545 | " movl 16(%edx), %esi\n"
546 | " movl 12(%edx), %ebx\n"
547 | " movl 8(%edx), %ebp\n"
548 | " movl 4(%edx), %esp\n"
549 | " jmp *(%edx)\n"
550 | " 2:\n"
551 | " ret\n"
552 | #ifndef __DJGPP__
553 | ".size _llco_asm_switch, .-_llco_asm_switch\n"
554 | #endif
555 | );
556 |
557 | static void llco_asmctx_make(struct llco_asmctx *ctx,
558 | void* stack_base, size_t stack_size, void *arg)
559 | {
560 | void** stack_high_ptr = (void**)((size_t)stack_base + stack_size - 16 -
561 | 1*sizeof(size_t));
562 | stack_high_ptr[0] = (void*)(0xdeaddead); // Dummy return address.
563 | stack_high_ptr[1] = (void*)(arg);
564 | ctx->eip = (void*)(llco_entry);
565 | ctx->esp = (void*)(stack_high_ptr);
566 | }
567 | #endif // __i386__
568 |
569 | ////////////////////////////////////////////////////////////////////////////////
570 | // x64
571 | ////////////////////////////////////////////////////////////////////////////////
572 | #if (defined(__x86_64__) || defined(_M_X64)) && !defined(LLCO_NOASM)
573 | #define LLCO_ASM
574 | #define LLCO_READY
575 | #define LLCO_METHOD "asm,x64"
576 |
577 | #ifdef _WIN32
578 |
579 | struct llco_asmctx {
580 | void *rip, *rsp, *rbp, *rbx, *r12, *r13, *r14, *r15, *rdi, *rsi;
581 | void* xmm[20]; /* xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13,
582 | xmm14, xmm15 */
583 | void* fiber_storage;
584 | void* dealloc_stack;
585 | void* stack_limit;
586 | void* stack_base;
587 | };
588 |
589 | #if defined(__GNUC__)
590 | #define LLCO_ASM_BLOB __attribute__((section(".text")))
591 | #elif defined(_MSC_VER)
592 | #define LLCO_ASM_BLOB __declspec(allocate(".text"))
593 | #pragma section(".text")
594 | #endif
595 |
596 | LLCO_ASM_BLOB static unsigned char llco_wrap_main_code_entry[] = {
597 | 0x4c,0x89,0xe9, // mov %r13,%rcx
598 | 0x41,0xff,0xe4, // jmpq *%r12
599 | 0xc3, // retq
600 | 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90 // nop
601 | };
602 |
603 | LLCO_ASM_BLOB static unsigned char llco_asm_switch_code[] = {
604 | 0x48,0x8d,0x05,0x3e,0x01,0x00,0x00, // lea 0x13e(%rip),%rax
605 | 0x48,0x89,0x01, // mov %rax,(%rcx)
606 | 0x48,0x89,0x61,0x08, // mov %rsp,0x8(%rcx)
607 | 0x48,0x89,0x69,0x10, // mov %rbp,0x10(%rcx)
608 | 0x48,0x89,0x59,0x18, // mov %rbx,0x18(%rcx)
609 | 0x4c,0x89,0x61,0x20, // mov %r12,0x20(%rcx)
610 | 0x4c,0x89,0x69,0x28, // mov %r13,0x28(%rcx)
611 | 0x4c,0x89,0x71,0x30, // mov %r14,0x30(%rcx)
612 | 0x4c,0x89,0x79,0x38, // mov %r15,0x38(%rcx)
613 | 0x48,0x89,0x79,0x40, // mov %rdi,0x40(%rcx)
614 | 0x48,0x89,0x71,0x48, // mov %rsi,0x48(%rcx)
615 | 0x0f,0x11,0x71,0x50, // movups %xmm6,0x50(%rcx)
616 | 0x0f,0x11,0x79,0x60, // movups %xmm7,0x60(%rcx)
617 | 0x44,0x0f,0x11,0x41,0x70, // movups %xmm8,0x70(%rcx)
618 | 0x44,0x0f,0x11,0x89,0x80,0x00,0x00,0x00, // movups %xmm9,0x80(%rcx)
619 | 0x44,0x0f,0x11,0x91,0x90,0x00,0x00,0x00, // movups %xmm10,0x90(%rcx)
620 | 0x44,0x0f,0x11,0x99,0xa0,0x00,0x00,0x00, // movups %xmm11,0xa0(%rcx)
621 | 0x44,0x0f,0x11,0xa1,0xb0,0x00,0x00,0x00, // movups %xmm12,0xb0(%rcx)
622 | 0x44,0x0f,0x11,0xa9,0xc0,0x00,0x00,0x00, // movups %xmm13,0xc0(%rcx)
623 | 0x44,0x0f,0x11,0xb1,0xd0,0x00,0x00,0x00, // movups %xmm14,0xd0(%rcx)
624 | 0x44,0x0f,0x11,0xb9,0xe0,0x00,0x00,0x00, // movups %xmm15,0xe0(%rcx)
625 | 0x65,0x4c,0x8b,0x14,0x25,0x30,0x00,0x00,0x00, // mov %gs:0x30,%r10
626 | 0x49,0x8b,0x42,0x20, // mov 0x20(%r10),%rax
627 | 0x48,0x89,0x81,0xf0,0x00,0x00,0x00, // mov %rax,0xf0(%rcx)
628 | 0x49,0x8b,0x82,0x78,0x14,0x00,0x00, // mov 0x1478(%r10),%rax
629 | 0x48,0x89,0x81,0xf8,0x00,0x00,0x00, // mov %rax,0xf8(%rcx)
630 | 0x49,0x8b,0x42,0x10, // mov 0x10(%r10),%rax
631 | 0x48,0x89,0x81,0x00,0x01,0x00,0x00, // mov %rax,0x100(%rcx)
632 | 0x49,0x8b,0x42,0x08, // mov 0x8(%r10),%rax
633 | 0x48,0x89,0x81,0x08,0x01,0x00,0x00, // mov %rax,0x108(%rcx)
634 | 0x48,0x8b,0x82,0x08,0x01,0x00,0x00, // mov 0x108(%rdx),%rax
635 | 0x49,0x89,0x42,0x08, // mov %rax,0x8(%r10)
636 | 0x48,0x8b,0x82,0x00,0x01, 0x00, 0x00, // mov 0x100(%rdx),%rax
637 | 0x49,0x89,0x42,0x10, // mov %rax,0x10(%r10)
638 | 0x48,0x8b,0x82,0xf8,0x00, 0x00, 0x00, // mov 0xf8(%rdx),%rax
639 | 0x49,0x89,0x82,0x78,0x14, 0x00, 0x00, // mov %rax,0x1478(%r10)
640 | 0x48,0x8b,0x82,0xf0,0x00, 0x00, 0x00, // mov 0xf0(%rdx),%rax
641 | 0x49,0x89,0x42,0x20, // mov %rax,0x20(%r10)
642 | 0x44,0x0f,0x10,0xba,0xe0,0x00,0x00,0x00, // movups 0xe0(%rdx),%xmm15
643 | 0x44,0x0f,0x10,0xb2,0xd0,0x00,0x00,0x00, // movups 0xd0(%rdx),%xmm14
644 | 0x44,0x0f,0x10,0xaa,0xc0,0x00,0x00,0x00, // movups 0xc0(%rdx),%xmm13
645 | 0x44,0x0f,0x10,0xa2,0xb0,0x00,0x00,0x00, // movups 0xb0(%rdx),%xmm12
646 | 0x44,0x0f,0x10,0x9a,0xa0,0x00,0x00,0x00, // movups 0xa0(%rdx),%xmm11
647 | 0x44,0x0f,0x10,0x92,0x90,0x00,0x00,0x00, // movups 0x90(%rdx),%xmm10
648 | 0x44,0x0f,0x10,0x8a,0x80,0x00,0x00,0x00, // movups 0x80(%rdx),%xmm9
649 | 0x44,0x0f,0x10,0x42,0x70, // movups 0x70(%rdx),%xmm8
650 | 0x0f,0x10,0x7a,0x60, // movups 0x60(%rdx),%xmm7
651 | 0x0f,0x10,0x72,0x50, // movups 0x50(%rdx),%xmm6
652 | 0x48,0x8b,0x72,0x48, // mov 0x48(%rdx),%rsi
653 | 0x48,0x8b,0x7a,0x40, // mov 0x40(%rdx),%rdi
654 | 0x4c,0x8b,0x7a,0x38, // mov 0x38(%rdx),%r15
655 | 0x4c,0x8b,0x72,0x30, // mov 0x30(%rdx),%r14
656 | 0x4c,0x8b,0x6a,0x28, // mov 0x28(%rdx),%r13
657 | 0x4c,0x8b,0x62,0x20, // mov 0x20(%rdx),%r12
658 | 0x48,0x8b,0x5a,0x18, // mov 0x18(%rdx),%rbx
659 | 0x48,0x8b,0x6a,0x10, // mov 0x10(%rdx),%rbp
660 | 0x48,0x8b,0x62,0x08, // mov 0x8(%rdx),%rsp
661 | 0xff,0x22, // jmpq *(%rdx)
662 | 0xc3, // retq
663 | 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90, // nop
664 | 0x90,0x90, // nop
665 | };
666 |
667 | void (*_llco_asm_entry)(void) =
668 | (void(*)(void))(void*)llco_wrap_main_code_entry;
669 | void (*_llco_asm_switch)(struct llco_asmctx *from,
670 | struct llco_asmctx *to) = (void(*)(struct llco_asmctx *from,
671 | struct llco_asmctx *to))(void*)llco_asm_switch_code;
672 |
673 | static void llco_asmctx_make(struct llco_asmctx *ctx,
674 | void* stack_base, size_t stack_size, void *arg)
675 | {
676 | stack_size = stack_size - 32; // Reserve 32 bytes for the shadow space.
677 | void** stack_high_ptr = (void**)((size_t)stack_base + stack_size -
678 | sizeof(size_t));
679 | stack_high_ptr[0] = (void*)(0xdeaddeaddeaddead); // Dummy return address.
680 | ctx->rip = (void*)(_llco_asm_entry);
681 | ctx->rsp = (void*)(stack_high_ptr);
682 | ctx->r12 = (void*)(llco_entry);
683 | ctx->r13 = (void*)(arg);
684 | void* stack_top = (void*)((size_t)stack_base + stack_size);
685 | ctx->stack_base = stack_top;
686 | ctx->stack_limit = stack_base;
687 | ctx->dealloc_stack = stack_base;
688 | }
689 |
690 | #else
691 |
692 | struct llco_asmctx {
693 | void *rip, *rsp, *rbp, *rbx, *r12, *r13, *r14, *r15;
694 | };
695 |
696 | void _llco_asm_entry(void);
697 | int _llco_asm_switch(struct llco_asmctx *from, struct llco_asmctx *to);
698 |
699 | __asm__(
700 | ".text\n"
701 | #ifdef __MACH__ /* Mac OS X assembler */
702 | ".globl __llco_asm_entry\n"
703 | "__llco_asm_entry:\n"
704 | #else /* Linux assembler */
705 | ".globl _llco_asm_entry\n"
706 | ".type _llco_asm_entry @function\n"
707 | ".hidden _llco_asm_entry\n"
708 | "_llco_asm_entry:\n"
709 | #endif
710 | " movq %r13, %rdi\n"
711 | " jmpq *%r12\n"
712 | #ifndef __MACH__
713 | ".size _llco_asm_entry, .-_llco_asm_entry\n"
714 | #endif
715 | );
716 |
717 | __asm__(
718 | ".text\n"
719 | #ifdef __MACH__ /* Mac OS assembler */
720 | ".globl __llco_asm_switch\n"
721 | "__llco_asm_switch:\n"
722 | #else /* Linux assembler */
723 | ".globl _llco_asm_switch\n"
724 | ".type _llco_asm_switch @function\n"
725 | ".hidden _llco_asm_switch\n"
726 | "_llco_asm_switch:\n"
727 | #endif
728 | " leaq 0x3d(%rip), %rax\n"
729 | " movq %rax, (%rdi)\n"
730 | " movq %rsp, 8(%rdi)\n"
731 | " movq %rbp, 16(%rdi)\n"
732 | " movq %rbx, 24(%rdi)\n"
733 | " movq %r12, 32(%rdi)\n"
734 | " movq %r13, 40(%rdi)\n"
735 | " movq %r14, 48(%rdi)\n"
736 | " movq %r15, 56(%rdi)\n"
737 | " movq 56(%rsi), %r15\n"
738 | " movq 48(%rsi), %r14\n"
739 | " movq 40(%rsi), %r13\n"
740 | " movq 32(%rsi), %r12\n"
741 | " movq 24(%rsi), %rbx\n"
742 | " movq 16(%rsi), %rbp\n"
743 | " movq 8(%rsi), %rsp\n"
744 | " jmpq *(%rsi)\n"
745 | " ret\n"
746 | #ifndef __MACH__
747 | ".size _llco_asm_switch, .-_llco_asm_switch\n"
748 | #endif
749 | );
750 |
751 | static void llco_asmctx_make(struct llco_asmctx *ctx,
752 | void* stack_base, size_t stack_size, void *arg)
753 | {
754 | // Reserve 128 bytes for the Red Zone space (System V AMD64 ABI).
755 | stack_size = stack_size - 128;
756 | void** stack_high_ptr = (void**)((size_t)stack_base + stack_size -
757 | sizeof(size_t));
758 | stack_high_ptr[0] = (void*)(0xdeaddeaddeaddead); // Dummy return address.
759 | ctx->rip = (void*)(_llco_asm_entry);
760 | ctx->rsp = (void*)(stack_high_ptr);
761 | ctx->r12 = (void*)(llco_entry);
762 | ctx->r13 = (void*)(arg);
763 | }
764 |
765 | #endif
766 | #endif // x64
767 |
768 | // --- END ASM Code --- //
769 |
770 | #if defined(__GNUC__)
771 | #pragma GCC diagnostic pop
772 | #endif
773 |
774 | ////////////////////////////////////////////////////////////////////////////////
775 | // ASM with stackjmp activated
776 | ////////////////////////////////////////////////////////////////////////////////
777 | #if defined(LLCO_READY) && defined(LLCO_STACKJMP)
778 | LLCO_NOINLINE LLCO_NORETURN
779 | static void llco_stackjmp(void *stack, size_t stack_size,
780 | void(*entry)(void *arg))
781 | {
782 | struct llco_asmctx ctx = { 0 };
783 | llco_asmctx_make(&ctx, stack, stack_size, 0);
784 | struct llco_asmctx ctx0 = { 0 };
785 | _llco_asm_switch(&ctx0, &ctx);
786 | llco_exit();
787 | }
788 | #endif
789 |
790 | ////////////////////////////////////////////////////////////////////////////////
791 | // Windows Fibers
792 | ////////////////////////////////////////////////////////////////////////////////
793 | #if defined(_WIN32) && !defined(LLCO_READY)
794 | #define LLCO_WINDOWS
795 | #define LLCO_READY
796 | #define LLCO_METHOD "fibers,windows"
797 |
798 | #ifndef _WIN32_WINNT
799 | #define _WIN32_WINNT 0x0400
800 | #endif
801 | #ifndef WIN32_LEAN_AND_MEAN
802 | #define WIN32_LEAN_AND_MEAN
803 | #endif
804 | #include
805 |
806 | #error Windows fibers unsupported
807 |
808 | #endif
809 |
810 | ////////////////////////////////////////////////////////////////////////////////
811 | // Webassembly Fibers
812 | ////////////////////////////////////////////////////////////////////////////////
813 | #if defined(__EMSCRIPTEN__) && !defined(LLCO_READY)
814 | #define LLCO_WASM
815 | #define LLCO_READY
816 | #define LLCO_METHOD "fibers,emscripten"
817 |
818 | #include
819 | #include
820 |
821 | #ifndef LLCO_ASYNCIFY_STACK_SIZE
822 | #define LLCO_ASYNCIFY_STACK_SIZE 4096
823 | #endif
824 |
825 | static __thread char llco_main_stack[LLCO_ASYNCIFY_STACK_SIZE];
826 |
827 | #endif
828 |
829 | ////////////////////////////////////////////////////////////////////////////////
830 | // Ucontext
831 | ////////////////////////////////////////////////////////////////////////////////
832 | #if !defined(LLCO_READY)
833 | #define LLCO_UCONTEXT
834 | #define LLCO_READY
835 | #define LLCO_METHOD "ucontext"
836 | #ifndef LLCO_STACKJMP
837 | #define LLCO_STACKJMP
838 | #endif
839 |
840 | #if defined(__FreeBSD__) || defined(__APPLE__)
841 | #ifdef __clang__
842 | #pragma clang diagnostic ignored "-Wdeprecated-declarations"
843 | #endif
844 | #define _XOPEN_SOURCE
845 | #endif
846 | #include
847 |
848 | static __thread ucontext_t stackjmp_ucallee;
849 | static __thread int stackjmp_ucallee_gotten = 0;
850 |
851 | #if defined(__APPLE__) && defined(__aarch64__) && !defined(LLCO_NOSTACKADJUST)
852 | // Here we ensure that the initial context switch will *not* page the
853 | // entire stack into process memory before executing the entry point
854 | // function. Which is a behavior that can be observed on Mac OS with
855 | // Apple Silicon. This "trick" can be optionally removed at the expense
856 | // of slower initial jumping into large stacks.
857 | enum llco_stack_grows { DOWNWARDS, UPWARDS };
858 |
859 | static enum llco_stack_grows llco_stack_grows0(int *addr0) {
860 | int addr1;
861 | return addr0 < &addr1 ? UPWARDS : DOWNWARDS;
862 | }
863 |
864 | static enum llco_stack_grows llco_stack_grows(void) {
865 | int addr0;
866 | return llco_stack_grows0(&addr0);
867 | }
868 |
869 | static void llco_adjust_ucontext_stack(ucontext_t *ucp) {
870 | if (llco_stack_grows() == UPWARDS) {
871 | ucp->uc_stack.ss_sp = (char*)ucp->uc_stack.ss_sp+ucp->uc_stack.ss_size;
872 | ucp->uc_stack.ss_size = 0;
873 | }
874 | }
875 | #else
876 | #define llco_adjust_ucontext_stack(ucp)
877 | #endif
878 |
879 | // Ucontext always uses stackjmp with setjmp/longjmp, instead of swapcontext
880 | // becuase it's much faster.
881 | LLCO_NOINLINE LLCO_NORETURN
882 | static void llco_stackjmp(void *stack, size_t stack_size,
883 | void(*entry)(void *arg))
884 | {
885 | if (!stackjmp_ucallee_gotten) {
886 | stackjmp_ucallee_gotten = 1;
887 | getcontext(&stackjmp_ucallee);
888 | }
889 | stackjmp_ucallee.uc_stack.ss_sp = stack;
890 | stackjmp_ucallee.uc_stack.ss_size = stack_size;
891 | llco_adjust_ucontext_stack(&stackjmp_ucallee);
892 | makecontext(&stackjmp_ucallee, (void(*)(void))entry, 0);
893 | setcontext(&stackjmp_ucallee);
894 | llco_exit();
895 | }
896 |
897 | #endif // Ucontext
898 |
899 | #if defined(LLCO_STACKJMP)
900 | #include
901 | #ifdef _WIN32
902 | // For reasons outside of my understanding, Windows does not allow for jumping
903 | // between stacks using the setjmp/longjmp mechanism.
904 | #error Windows stackjmp not supported
905 | #endif
906 | #endif
907 |
908 | ////////////////////////////////////////////////////////////////////////////////
909 | // llco switching code
910 | ////////////////////////////////////////////////////////////////////////////////
911 |
912 | #include
913 |
914 | struct llco {
915 | struct llco_desc desc;
916 | #if defined(LLCO_STACKJMP)
917 | jmp_buf buf;
918 | #elif defined(LLCO_ASM)
919 | struct llco_asmctx ctx;
920 | #elif defined(LLCO_WASM)
921 | emscripten_fiber_t fiber;
922 | #elif defined(LLCO_WINDOWS)
923 | LPVOID fiber;
924 | #endif
925 | #ifdef LLCO_VALGRIND
926 | int valgrind_stack_id;
927 | #endif
928 | #if defined(__GNUC__)
929 | void *uw_stop_ip; // record of the last unwind ip.
930 | #endif
931 | };
932 |
933 | #ifdef LLCO_VALGRIND
934 | static __thread unsigned int llco_valgrind_stack_id = 0;
935 | static __thread unsigned int llco_cleanup_valgrind_stack_id = 0;
936 | #endif
937 |
938 | static __thread struct llco llco_thread = { 0 };
939 | static __thread struct llco *llco_cur = NULL;
940 | static __thread struct llco_desc llco_desc;
941 | static __thread volatile bool llco_cleanup_needed = false;
942 | static __thread volatile struct llco_desc llco_cleanup_desc;
943 | static __thread volatile bool llco_cleanup_active = false;
944 |
945 | #define llco_cleanup_guard() { \
946 | if (llco_cleanup_active) { \
947 | fprintf(stderr, "%s not available during cleanup\n", __func__); \
948 | abort(); \
949 | } \
950 | }
951 |
952 | static void llco_cleanup_last(void) {
953 | if (llco_cleanup_needed) {
954 | if (llco_cleanup_desc.cleanup) {
955 | llco_cleanup_active = true;
956 | #ifdef LLCO_VALGRIND
957 | VALGRIND_STACK_DEREGISTER(llco_cleanup_valgrind_stack_id);
958 | #endif
959 | llco_cleanup_desc.cleanup(llco_cleanup_desc.stack,
960 | llco_cleanup_desc.stack_size, llco_cleanup_desc.udata);
961 | llco_cleanup_active = false;
962 | }
963 | llco_cleanup_needed = false;
964 | }
965 | }
966 |
967 | LLCO_NOINLINE
968 | static void llco_entry_wrap(void *arg) {
969 | llco_cleanup_last();
970 | #if defined(LLCO_WASM)
971 | llco_cur = arg;
972 | llco_cur->desc = llco_desc;
973 | #else
974 | (void)arg;
975 | struct llco self = { .desc = llco_desc };
976 | llco_cur = &self;
977 | #endif
978 | #ifdef LLCO_VALGRIND
979 | llco_cur->valgrind_stack_id = llco_valgrind_stack_id;
980 | #endif
981 | #if defined(__GNUC__) && !defined(__EMSCRIPTEN__)
982 | llco_cur->uw_stop_ip = __builtin_return_address(0);
983 | #endif
984 | llco_cur->desc.entry(llco_cur->desc.udata);
985 | }
986 |
987 |
988 | LLCO_NOINLINE LLCO_NORETURN
989 | static void llco_entry(void *arg) {
990 | llco_entry_wrap(arg);
991 | llco_exit();
992 | }
993 |
994 | LLCO_NOINLINE
995 | static void llco_switch1(struct llco *from, struct llco *to,
996 | void *stack, size_t stack_size)
997 | {
998 | #ifdef LLCO_VALGRIND
999 | llco_valgrind_stack_id = VALGRIND_STACK_REGISTER(stack, stack + stack_size);
1000 | #endif
1001 | #if defined(LLCO_STACKJMP)
1002 | if (to) {
1003 | if (!_setjmp(from->buf)) {
1004 | _longjmp(to->buf, 1);
1005 | }
1006 | } else {
1007 | if (!_setjmp(from->buf)) {
1008 | llco_stackjmp(stack, stack_size, llco_entry);
1009 | }
1010 | }
1011 | #elif defined(LLCO_ASM)
1012 | if (to) {
1013 | _llco_asm_switch(&from->ctx, &to->ctx);
1014 | } else {
1015 | struct llco_asmctx ctx = { 0 };
1016 | llco_asmctx_make(&ctx, stack, stack_size, 0);
1017 | _llco_asm_switch(&from->ctx, &ctx);
1018 | }
1019 | #elif defined(LLCO_WASM)
1020 | if (to) {
1021 | emscripten_fiber_swap(&from->fiber, &to->fiber);
1022 | } else {
1023 | if (from == &llco_thread) {
1024 | emscripten_fiber_init_from_current_context(&from->fiber,
1025 | llco_main_stack, LLCO_ASYNCIFY_STACK_SIZE);
1026 | }
1027 | stack_size -= LLCO_ASYNCIFY_STACK_SIZE;
1028 | char *astack = ((char*)stack) + stack_size;
1029 | size_t astack_size = LLCO_ASYNCIFY_STACK_SIZE - sizeof(struct llco);
1030 | struct llco *self = (void*)(astack + astack_size);
1031 | memset(self, 0, sizeof(struct llco));
1032 | emscripten_fiber_init(&self->fiber, llco_entry,
1033 | self, stack, stack_size, astack, astack_size);
1034 | emscripten_fiber_swap(&from->fiber, &self->fiber);
1035 | }
1036 | #elif defined(LLCO_WINDOWS)
1037 | // Unsupported
1038 | #endif
1039 | }
1040 |
1041 | static void llco_switch0(struct llco_desc *desc, struct llco *co,
1042 | bool final)
1043 | {
1044 | struct llco *from = llco_cur ? llco_cur : &llco_thread;
1045 | struct llco *to = desc ? NULL : co ? co : &llco_thread;
1046 | if (from != to) {
1047 | if (final) {
1048 | llco_cleanup_needed = true;
1049 | llco_cleanup_desc = from->desc;
1050 | #ifdef LLCO_VALGRIND
1051 | llco_cleanup_valgrind_stack_id = from->valgrind_stack_id;
1052 | #endif
1053 | }
1054 | if (desc) {
1055 | llco_desc = *desc;
1056 | llco_switch1(from, 0, desc->stack, desc->stack_size);
1057 | } else {
1058 | llco_cur = to;
1059 | llco_switch1(from, to, 0, 0);
1060 | }
1061 | llco_cleanup_last();
1062 | }
1063 | }
1064 |
1065 |
1066 |
1067 | ////////////////////////////////////////////////////////////////////////////////
1068 | // Exported methods
1069 | ////////////////////////////////////////////////////////////////////////////////
1070 |
1071 | // Start a new coroutine.
1072 | LLCO_EXTERN
1073 | void llco_start(struct llco_desc *desc, bool final) {
1074 | if (!desc || desc->stack_size < LLCO_MINSTACKSIZE) {
1075 | fprintf(stderr, "stack too small\n");
1076 | abort();
1077 | }
1078 | llco_cleanup_guard();
1079 | llco_switch0(desc, 0, final);
1080 | }
1081 |
1082 | // Switch to another coroutine.
1083 | LLCO_EXTERN
1084 | void llco_switch(struct llco *co, bool final) {
1085 | #if defined(LLCO_ASM)
1086 | // fast track context switch. Saves a few nanoseconds by checking the
1087 | // exception condition first.
1088 | if (!llco_cleanup_active && llco_cur && co && llco_cur != co && !final) {
1089 | struct llco *from = llco_cur;
1090 | llco_cur = co;
1091 | _llco_asm_switch(&from->ctx, &co->ctx);
1092 | llco_cleanup_last();
1093 | return;
1094 | }
1095 | #endif
1096 | llco_cleanup_guard();
1097 | llco_switch0(0, co, final);
1098 | }
1099 |
1100 | // Return the current coroutine or NULL if not currently running in a
1101 | // coroutine.
1102 | LLCO_EXTERN
1103 | struct llco *llco_current(void) {
1104 | llco_cleanup_guard();
1105 | return llco_cur == &llco_thread ? 0 : llco_cur;
1106 | }
1107 |
1108 | // Returns a string that indicates which coroutine method is being used by
1109 | // the program. Such as "asm" or "ucontext", etc.
1110 | LLCO_EXTERN
1111 | const char *llco_method(void *caps) {
1112 | (void)caps;
1113 | return LLCO_METHOD
1114 | #ifdef LLCO_STACKJMP
1115 | ",stackjmp"
1116 | #endif
1117 | ;
1118 | }
1119 |
1120 | #if defined(__GNUC__) && !defined(__EMSCRIPTEN__) && !defined(_WIN32) && \
1121 | !defined(LLCO_NOUNWIND) && !defined(__COSMOCC__)
1122 |
1123 | #include
1124 | #include
1125 | #include
1126 |
1127 | struct llco_dlinfo {
1128 | const char *dli_fname; /* Pathname of shared object */
1129 | void *dli_fbase; /* Base address of shared object */
1130 | const char *dli_sname; /* Name of nearest symbol */
1131 | void *dli_saddr; /* Address of nearest symbol */
1132 | };
1133 |
1134 | #if defined(__linux__) && !defined(_GNU_SOURCE)
1135 | int dladdr(const void *, void *);
1136 | #endif
1137 |
1138 | static void llco_getsymbol(struct _Unwind_Context *uwc,
1139 | struct llco_symbol *sym)
1140 | {
1141 | memset(sym, 0, sizeof(struct llco_symbol));
1142 | sym->cfa = (void*)_Unwind_GetCFA(uwc);
1143 | int ip_before; /* unused */
1144 | sym->ip = (void*)_Unwind_GetIPInfo(uwc, &ip_before);
1145 | struct llco_dlinfo dlinfo = { 0 };
1146 | if (sym->ip && dladdr(sym->ip, (void*)&dlinfo)) {
1147 | sym->fname = dlinfo.dli_fname;
1148 | sym->fbase = dlinfo.dli_fbase;
1149 | sym->sname = dlinfo.dli_sname;
1150 | sym->saddr = dlinfo.dli_saddr;
1151 | }
1152 | }
1153 |
1154 | struct llco_unwind_context {
1155 | void *udata;
1156 | void *start_ip;
1157 | bool started;
1158 | int nsymbols;
1159 | int nsymbols_actual;
1160 | struct llco_symbol last;
1161 | bool (*func)(struct llco_symbol *, void *);
1162 | void *unwind_addr;
1163 | };
1164 |
1165 | static _Unwind_Reason_Code llco_func(struct _Unwind_Context *uwc, void *ptr) {
1166 | struct llco_unwind_context *ctx = ptr;
1167 |
1168 | struct llco *cur = llco_current();
1169 | if (cur && !cur->uw_stop_ip) {
1170 | return _URC_END_OF_STACK;
1171 | }
1172 | struct llco_symbol sym;
1173 | llco_getsymbol(uwc, &sym);
1174 | if (ctx->start_ip && !ctx->started && sym.ip != ctx->start_ip) {
1175 | return _URC_NO_REASON;
1176 | }
1177 | ctx->started = true;
1178 | if (!sym.ip || (cur && sym.ip == cur->uw_stop_ip)) {
1179 | return _URC_END_OF_STACK;
1180 | }
1181 | ctx->nsymbols++;
1182 | if (!cur) {
1183 | ctx->nsymbols_actual++;
1184 | if (ctx->func && !ctx->func(&sym, ctx->udata)) {
1185 | return _URC_END_OF_STACK;
1186 | }
1187 | } else {
1188 | if (ctx->nsymbols > 1) {
1189 | ctx->nsymbols_actual++;
1190 | if (ctx->func && !ctx->func(&ctx->last, ctx->udata)) {
1191 | return _URC_END_OF_STACK;
1192 | }
1193 | }
1194 | ctx->last = sym;
1195 | }
1196 | return _URC_NO_REASON;
1197 | }
1198 |
1199 | LLCO_EXTERN
1200 | int llco_unwind(bool(*func)(struct llco_symbol *sym, void *udata), void *udata){
1201 | struct llco_unwind_context ctx = {
1202 | #if defined(__GNUC__) && !defined(__EMSCRIPTEN__)
1203 | .start_ip = __builtin_return_address(0),
1204 | #endif
1205 | .func = func,
1206 | .udata = udata
1207 | };
1208 | _Unwind_Backtrace(llco_func, &ctx);
1209 | return ctx.nsymbols_actual;
1210 | }
1211 |
1212 | #else
1213 |
1214 | LLCO_EXTERN
1215 | int llco_unwind(bool(*func)(struct llco_symbol *sym, void *udata), void *udata){
1216 | (void)func; (void)udata;
1217 | /* Unsupported */
1218 | return 0;
1219 | }
1220 |
1221 | #endif
1222 |
--------------------------------------------------------------------------------
/llco.h:
--------------------------------------------------------------------------------
1 | #ifndef LLCO_H
2 | #define LLCO_H
3 |
4 | #include
5 | #include
6 |
7 | #define LLCO_MINSTACKSIZE 16384
8 |
9 | struct llco_desc {
10 | void *stack;
11 | size_t stack_size;
12 | void (*entry)(void *udata);
13 | void (*cleanup)(void *stack, size_t stack_size, void *udata);
14 | void *udata;
15 | };
16 |
17 | struct llco;
18 |
19 | struct llco *llco_current(void);
20 | void llco_start(struct llco_desc *desc, bool final);
21 | void llco_switch(struct llco *co, bool final);
22 | const char *llco_method(void *caps);
23 |
24 | // Coroutine stack unwinding
25 | struct llco_symbol {
26 | void *cfa; // Canonical Frame Address
27 | void *ip; // Instruction Pointer
28 | const char *fname; // Pathname of shared object
29 | void *fbase; // Base address of shared object
30 | const char *sname; // Name of nearest symbol
31 | void *saddr; // Address of nearest symbol
32 | };
33 |
34 | int llco_unwind(bool(*func)(struct llco_symbol *sym, void *udata), void *udata);
35 |
36 | #endif // LLCO_H
37 |
--------------------------------------------------------------------------------
/test.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include "llco.h"
9 |
10 | #define STKSZ 32768
11 |
12 | char tstr[512];
13 | char tresult[4096] = { 0 };
14 |
15 | #define tprintf(...) { \
16 | printf(__VA_ARGS__); \
17 | snprintf(tstr, sizeof(tstr), __VA_ARGS__); \
18 | strcat(tresult, tstr); \
19 | } \
20 |
21 | void cleanup(void *stk, size_t stksz, void *udata) {
22 | tprintf("(cleanup %d)\n", (int)(intptr_t)udata);
23 | free(stk);
24 | }
25 |
26 | struct llco *co1, *co2, *co3;
27 |
28 | void entry3(void *udata) {
29 | co3 = llco_current();
30 | tprintf("(entry %d)\n", (int)(intptr_t)udata);
31 | llco_switch(co1, false);
32 | tprintf("(mark C)\n");
33 | llco_switch(co1, true);
34 | }
35 |
36 | void entry2(void *udata) {
37 | co2 = llco_current();
38 | tprintf("(entry %d)\n", (int)(intptr_t)udata);
39 | struct llco_desc desc = {
40 | .stack = malloc(STKSZ),
41 | .stack_size = STKSZ,
42 | .entry = entry3,
43 | .cleanup = cleanup,
44 | .udata = (void*)3,
45 | };
46 | llco_start(&desc, true);
47 | }
48 |
49 | void entry1(void *udata) {
50 | co1 = llco_current();
51 | tprintf("(entry %d)\n", (int)(intptr_t)udata);
52 | struct llco_desc desc = {
53 | .stack = malloc(STKSZ),
54 | .stack_size = STKSZ,
55 | .entry = entry2,
56 | .cleanup = cleanup,
57 | .udata = (void*)2,
58 | };
59 | llco_start(&desc, false);
60 | tprintf("(mark B)\n");
61 | llco_switch(co3, false);
62 | tprintf("(mark D)\n");
63 | llco_switch(0, true);
64 | }
65 |
66 | void perfcleanup(void *stk, size_t stksz, void *udata) {
67 | free(stk);
68 | }
69 |
70 | // Return monotonic nanoseconds of the CPU clock.
71 | static int64_t getnow(void) {
72 | struct timespec now = { 0 };
73 | clock_gettime(CLOCK_MONOTONIC, &now);
74 | return now.tv_sec * INT64_C(1000000000) + now.tv_nsec;
75 | }
76 |
77 | int64_t perfstart, perfend;
78 |
79 | struct llco *perf1co;
80 | struct llco *perf2co;
81 |
82 | #define NSWITCHES 10000000
83 | int perf_count = 0;
84 |
85 | void perf1(void *udata) {
86 | perf1co = llco_current();
87 | llco_switch(0, 0);
88 | while (perf_count < NSWITCHES) {
89 | perf_count++;
90 | llco_switch(perf2co, 0);
91 | }
92 | llco_switch(0, 1);
93 | }
94 |
95 | void perf2(void *udata) {
96 | perf2co = llco_current();
97 | perfstart = getnow();
98 | while (perf_count < NSWITCHES) {
99 | perf_count++;
100 | llco_switch(perf1co, 0);
101 | }
102 | perfend = getnow();
103 | llco_switch(perf1co, 1);
104 | }
105 |
106 | void test_perf() {
107 | struct llco_desc desc1 = {
108 | .stack = malloc(STKSZ),
109 | .stack_size = STKSZ,
110 | .entry = perf1,
111 | .cleanup = perfcleanup,
112 | .udata = (void*)1,
113 | };
114 | llco_start(&desc1, false);
115 |
116 | struct llco_desc desc2 = {
117 | .stack = malloc(STKSZ),
118 | .stack_size = STKSZ,
119 | .entry = perf2,
120 | .cleanup = perfcleanup,
121 | .udata = (void*)2,
122 | };
123 | llco_start(&desc2, false);
124 |
125 | printf("perf: %ld switches in %.3f secs, %ld ns / switch\n",
126 | (long)perf_count, (double)(perfend-perfstart)/1e9,
127 | (long)((perfend-perfstart) / perf_count));
128 |
129 | }
130 |
131 | int main(void) {
132 | printf("%s\n", llco_method(0));
133 | assert(!llco_current());
134 | tprintf("(mark A)\n");
135 | struct llco_desc desc = {
136 | .stack = malloc(STKSZ),
137 | .stack_size = STKSZ,
138 | .entry = entry1,
139 | .cleanup = cleanup,
140 | .udata = (void*)1,
141 | };
142 | llco_start(&desc, false);
143 | tprintf("(mark E)\n");
144 | assert(!llco_current());
145 | const char *exp =
146 | "(mark A)\n"
147 | "(entry 1)\n"
148 | "(entry 2)\n"
149 | "(cleanup 2)\n"
150 | "(entry 3)\n"
151 | "(mark B)\n"
152 | "(mark C)\n"
153 | "(cleanup 3)\n"
154 | "(mark D)\n"
155 | "(cleanup 1)\n"
156 | "(mark E)\n";
157 | if (strcmp(tresult, exp) != 0) {
158 | printf("== expected ==\n%s", exp);
159 | printf("FAILED\n");
160 | } else {
161 | test_perf();
162 | printf("PASSED\n");
163 | }
164 | return 0;
165 | }
166 |
167 |
--------------------------------------------------------------------------------
/unwind.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include "llco.h"
6 |
7 | #define STKSZ 32768
8 |
9 | void cleanup(void *stk, size_t stksz, void *udata) {
10 | printf("== CLEANUP ==\n");
11 | free(stk);
12 | }
13 |
14 |
15 | bool symbol(struct llco_symbol *sym, void *udata) {
16 | printf("== UNWIND ==\n");
17 | printf(" cfa: %p\n", sym->cfa);
18 | printf(" ip: %p\n", sym->ip);
19 | printf(" fname: %s\n", sym->fname);
20 | printf(" fbase: %p\n", sym->fbase);
21 | printf(" sname: %s\n", sym->sname);
22 | printf(" saddr: %p\n", sym->saddr);
23 | return true;
24 | }
25 |
26 | __attribute__((noinline)) void func4(int x) {
27 | printf("func4 (%d)\n", x);
28 | printf("nsymbols: %d\n", llco_unwind(symbol, 0));
29 | // _Unwind_Backtrace(btfnc, NULL);
30 | }
31 |
32 | __attribute__((noinline)) void func3(int x) { printf("func3\n"); func4(x); }
33 | __attribute__((noinline)) void func2(int x) { printf("func2\n"); func3(x); }
34 | __attribute__((noinline)) void func1(int x) { printf("func1\n"); func2(x); }
35 |
36 |
37 | // __attribute__((noinline))
38 | void entry(void *udata) {
39 |
40 | printf("== COROUTINE ==\n");
41 | func1(10);
42 | printf("== SWITCH TO MAIN ==\n");
43 | llco_switch(0, true);
44 | }
45 |
46 | int main(void) {
47 | printf("%s\n", llco_method(0));
48 | printf("== MAIN ==\n");
49 | struct llco_desc desc = {
50 | .stack = malloc(STKSZ),
51 | .stack_size = STKSZ,
52 | .entry = entry,
53 | .cleanup = cleanup,
54 | .udata = (void*)1,
55 | };
56 | llco_start(&desc, false);
57 | printf("== EXIT ==\n");
58 | printf("nsymbols: %d\n", llco_unwind(symbol, 0));
59 | return 0;
60 | }
61 |
62 |
--------------------------------------------------------------------------------