├── .gitignore ├── .package ├── LICENSE ├── README.md ├── llco.c ├── llco.h ├── test.c └── unwind.c /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.dSYM 3 | *.out 4 | *.o 5 | *.out.* 6 | -------------------------------------------------------------------------------- /.package: -------------------------------------------------------------------------------- 1 | file llco.c 2 | file llco.h 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This software is available as a choice of the following licenses. Choose 2 | whichever you prefer. 3 | 4 | =============================================================================== 5 | ALTERNATIVE 1 - Public Domain (www.unlicense.org) 6 | =============================================================================== 7 | This is free and unencumbered software released into the public domain. 8 | 9 | Anyone is free to copy, modify, publish, use, compile, sell, or distribute this 10 | software, either in source code form or as a compiled binary, for any purpose, 11 | commercial or non-commercial, and by any means. 12 | 13 | In jurisdictions that recognize copyright laws, the author or authors of this 14 | software dedicate any and all copyright interest in the software to the public 15 | domain. We make this dedication for the benefit of the public at large and to 16 | the detriment of our heirs and successors. We intend this dedication to be an 17 | overt act of relinquishment in perpetuity of all present and future rights to 18 | this software under copyright law. 19 | 20 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 | AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 24 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 25 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | 27 | For more information, please refer to 28 | 29 | =============================================================================== 30 | ALTERNATIVE 2 - MIT No Attribution 31 | =============================================================================== 32 | Copyright (c) 2024 Joshua J Baker (https://github.com/tidwall/llco) 33 | 34 | Permission is hereby granted, free of charge, to any person obtaining a copy of 35 | this software and associated documentation files (the "Software"), to deal in 36 | the Software without restriction, including without limitation the rights to 37 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 38 | of the Software, and to permit persons to whom the Software is furnished to do 39 | so. 40 | 41 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 42 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 43 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 44 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 45 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 46 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 47 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # llco 2 | 3 | A low-level coroutine library for C. 4 | 5 | The main purpose of this project is to power 6 | [sco](https://github.com/tidwall/sco) and 7 | [neco](https://github.com/tidwall/neco), which are more general purpose 8 | coroutine libraries. 9 | 10 | ## Features 11 | 12 | - Stackful coroutines. 13 | - No allocations (bring your own stack) 14 | - Cross-platform. Linux, Mac, Webassembly, iOS, Android, FreeBSD, Windows, RaspPi, RISC-V 15 | - Fast context switching. Uses assembly in most cases 16 | - No built-in scheduler. You are in charge of the coroutine priority 17 | - Single file amalgamation. No dependencies. 18 | 19 | ## Example 20 | 21 | ```C 22 | 23 | void cleanup(void *stack, size_t stack_size, void *udata) { 24 | // Free the coroutine stack 25 | free(stack); 26 | } 27 | 28 | void entry(void *udata) { 29 | printf("Coroutine started\n"); 30 | // Switch back to the main thread and cleanup this coroutine 31 | llco_switch(0, true); 32 | } 33 | 34 | int main(void) { 35 | // Start a coroutine from the main function using an newly allocated stack. 36 | struct llco_desc desc = { 37 | .stack = malloc(LLCO_MINSTACKSIZE), 38 | .stack_size = LLCO_MINSTACKSIZE, 39 | .entry = entry, 40 | .cleanup = cleanup, 41 | }; 42 | llco_start(&desc, false); 43 | printf("Back to main\n"); 44 | } 45 | 46 | ``` 47 | 48 | ## API 49 | 50 | ```C 51 | // Switch to another coroutine. Set the `co` param to NULL to switch to the 52 | // main function. Use the final param to tell the program that you are done 53 | // with the current coroutine, at which point it's respective `cleanup` 54 | // callback will be called. 55 | void llco_switch(struct llco *co, bool final); 56 | 57 | // Start a new coroutine. This can be called from the main function or a 58 | // nested coroutine. 59 | void llco_start(struct llco_desc *desc, bool final); 60 | 61 | 62 | // Return the current coroutine or NULL if not currently running in a 63 | // coroutine. 64 | struct llco *llco_current(void); 65 | 66 | // Returns a string that indicates which coroutine method is being used by 67 | // the program. Such as "asm" or "ucontext", etc. 68 | const char *llco_method(void *caps); 69 | ``` 70 | 71 | ## Caveats 72 | 73 | - Windows: Only x86_64 is supported at this time. The Windows Fibers API is not 74 | currently suitable as a fallback due to the `CreateFiber` call needing to 75 | allocate memory dynamically. 76 | - Webassembly: Must be compiled with Emscripten using the `-sASYNCIFY` flag. 77 | - All other platforms may fallback to using ucontext when the assembly method 78 | is not available. The `uco_method(0)` function can be used to see if assembly 79 | or ucontext is being used. 80 | - The ucontext fallback method only uses the ucontext API when starting a 81 | coroutine. Once the coroutine has been started, `setjmp` and `longjmp` take 82 | over the switching duties. 83 | 84 | ## Compiler Options 85 | 86 | - `-DLLCO_NOASM`: Disable assembly. Use ucontext fallback instead. 87 | - `-DLLCO_STACKJMP`: Use `setjmp` and `longjmp` for jumping between stacks, 88 | even with the assembly method. 89 | - `-DLLCO_VALGRIND`: Enable support for Valgrind 90 | - `-DLLCO_NOUNWIND`: Disable support for stack unwinding 91 | 92 | ## Running test 93 | 94 | ```sh 95 | # Basic test 96 | cc llco.c test.c && ./a.out 97 | 98 | # Using Valgrind 99 | cc -DLLCO_VALGRIND llco.c test.c && valgrind ./a.out 100 | 101 | # Using Emscripten (Web Assembly) 102 | emcc -sASYNCIFY llco.c test.c && node ./a.out.js 103 | ``` 104 | 105 | ## Acknowledgements 106 | 107 | Much of the assembly code was adapted from the [minicoro](https://github.com/edubart/minicoro) 108 | project by [Eduardo Bart](https://github.com/edubart), which was originally 109 | adapted from the [Lua Coco](https://coco.luajit.org) project by 110 | [Mike Pall](https://github.com/MikePall). 111 | 112 | ## License 113 | 114 | Public Domain or MIT No Attribution, your choice. 115 | -------------------------------------------------------------------------------- /llco.c: -------------------------------------------------------------------------------- 1 | // https://github.com/tidwall/llco 2 | // 3 | // Copyright (c) 2024 Joshua J Baker. 4 | // This software is available as a choice of Public Domain or MIT-0. 5 | 6 | #ifdef _FORTIFY_SOURCE 7 | #define LLCO_FORTIFY_SOURCE _FORTIFY_SOURCE 8 | // Disable __longjmp_chk validation so that we can jump between stacks. 9 | #pragma push_macro("_FORTIFY_SOURCE") 10 | #undef _FORTIFY_SOURCE 11 | #include 12 | #define _FORTIFY_SOURCE LLCO_FORTIFY_SOURCE 13 | #undef LLCO_FORTIFY_SOURCE 14 | #pragma pop_macro("_FORTIFY_SOURCE") 15 | #endif 16 | 17 | #ifndef LLCO_STATIC 18 | #include "llco.h" 19 | #else 20 | #include 21 | #include 22 | #define LLCO_MINSTACKSIZE 16384 23 | #define LLCO_EXTERN static 24 | struct llco_desc { 25 | void *stack; 26 | size_t stack_size; 27 | void (*entry)(void *udata); 28 | void (*cleanup)(void *stack, size_t stack_size, void *udata); 29 | void *udata; 30 | }; 31 | struct llco_symbol { 32 | void *cfa; 33 | void *ip; 34 | const char *fname; 35 | void *fbase; 36 | const char *sname; 37 | void *saddr; 38 | }; 39 | #endif 40 | 41 | #include 42 | 43 | #ifdef LLCO_VALGRIND 44 | #include 45 | #endif 46 | 47 | #ifndef LLCO_EXTERN 48 | #define LLCO_EXTERN 49 | #endif 50 | 51 | #if defined(__GNUC__) 52 | #ifdef noinline 53 | #define LLCO_NOINLINE noinline 54 | #else 55 | #define LLCO_NOINLINE __attribute__ ((noinline)) 56 | #endif 57 | #ifdef noreturn 58 | #define LLCO_NORETURN noreturn 59 | #else 60 | #define LLCO_NORETURN __attribute__ ((noreturn)) 61 | #endif 62 | #else 63 | #define LLCO_NOINLINE 64 | #define LLCO_NORETURN 65 | #endif 66 | 67 | #if defined(_MSC_VER) 68 | #define __thread __declspec(thread) 69 | #endif 70 | 71 | static void llco_entry(void *arg); 72 | 73 | LLCO_NORETURN 74 | static void llco_exit(void) { 75 | _Exit(0); 76 | } 77 | 78 | #ifdef LLCO_ASM 79 | #error LLCO_ASM must not be defined 80 | #endif 81 | 82 | #if defined(__COSMOCC__) && !defined(LLCO_NOASM) 83 | // Cosmopolitan has issues with asm code 84 | #define LLCO_NOASM 85 | #endif 86 | 87 | // Passing the entry function into assembly requires casting the function 88 | // pointer to an object pointer, which is forbidden in the ISO C spec but 89 | // allowed in posix. Ignore the warning attributed to this requirement when 90 | // the -pedantic compiler flag is provide. 91 | #if defined(__GNUC__) 92 | #pragma GCC diagnostic push 93 | #pragma GCC diagnostic ignored "-Wpedantic" 94 | #endif 95 | 96 | //////////////////////////////////////////////////////////////////////////////// 97 | // Below is various assembly code adapted from the Lua Coco [MIT] and Minicoro 98 | // [MIT-0] projects by Mike Pall and Eduardo Bart respectively. 99 | //////////////////////////////////////////////////////////////////////////////// 100 | 101 | /* 102 | Lua Coco (coco.luajit.org) 103 | Copyright (C) 2004-2016 Mike Pall. All rights reserved. 104 | 105 | Permission is hereby granted, free of charge, to any person obtaining 106 | a copy of this software and associated documentation files (the 107 | "Software"), to deal in the Software without restriction, including 108 | without limitation the rights to use, copy, modify, merge, publish, 109 | distribute, sublicense, and/or sell copies of the Software, and to 110 | permit persons to whom the Software is furnished to do so, subject to 111 | the following conditions: 112 | 113 | The above copyright notice and this permission notice shall be 114 | included in all copies or substantial portions of the Software. 115 | 116 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 117 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 118 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 119 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 120 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 121 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 122 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 123 | */ 124 | 125 | //////////////////////////////////////////////////////////////////////////////// 126 | // ARM 127 | //////////////////////////////////////////////////////////////////////////////// 128 | #if defined(__ARM_EABI__) && !defined(LLCO_NOASM) 129 | #define LLCO_ASM 130 | #define LLCO_READY 131 | #define LLCO_METHOD "asm,arm_eabi" 132 | 133 | struct llco_asmctx { 134 | #ifndef __SOFTFP__ 135 | void* f[16]; 136 | #endif 137 | void *d[4]; /* d8-d15 */ 138 | void *r[4]; /* r4-r11 */ 139 | void *lr; 140 | void *sp; 141 | }; 142 | 143 | void _llco_asm_entry(void); 144 | int _llco_asm_switch(struct llco_asmctx *from, struct llco_asmctx *to); 145 | 146 | __asm__( 147 | ".text\n" 148 | #ifdef __APPLE__ 149 | ".globl __llco_asm_switch\n" 150 | "__llco_asm_switch:\n" 151 | #else 152 | ".globl _llco_asm_switch\n" 153 | ".type _llco_asm_switch #function\n" 154 | ".hidden _llco_asm_switch\n" 155 | "_llco_asm_switch:\n" 156 | #endif 157 | #ifndef __SOFTFP__ 158 | " vstmia r0!, {d8-d15}\n" 159 | #endif 160 | " stmia r0, {r4-r11, lr}\n" 161 | " str sp, [r0, #9*4]\n" 162 | #ifndef __SOFTFP__ 163 | " vldmia r1!, {d8-d15}\n" 164 | #endif 165 | " ldr sp, [r1, #9*4]\n" 166 | " ldmia r1, {r4-r11, pc}\n" 167 | #ifndef __APPLE__ 168 | ".size _llco_asm_switch, .-_llco_asm_switch\n" 169 | #endif 170 | ); 171 | 172 | __asm__( 173 | ".text\n" 174 | #ifdef __APPLE__ 175 | ".globl __llco_asm_entry\n" 176 | "__llco_asm_entry:\n" 177 | #else 178 | ".globl _llco_asm_entry\n" 179 | ".type _llco_asm_entry #function\n" 180 | ".hidden _llco_asm_entry\n" 181 | "_llco_asm_entry:\n" 182 | #endif 183 | " mov r0, r4\n" 184 | " mov ip, r5\n" 185 | " mov lr, r6\n" 186 | " bx ip\n" 187 | #ifndef __APPLE__ 188 | ".size _llco_asm_entry, .-_llco_asm_entry\n" 189 | #endif 190 | ); 191 | 192 | static void llco_asmctx_make(struct llco_asmctx *ctx, void* stack_base, 193 | size_t stack_size, void *arg) 194 | { 195 | ctx->d[0] = (void*)(arg); 196 | ctx->d[1] = (void*)(llco_entry); 197 | ctx->d[2] = (void*)(0xdeaddead); /* Dummy return address. */ 198 | ctx->lr = (void*)(_llco_asm_entry); 199 | ctx->sp = (void*)((size_t)stack_base + stack_size); 200 | } 201 | 202 | #endif 203 | 204 | //////////////////////////////////////////////////////////////////////////////// 205 | // ARM 64-bit 206 | //////////////////////////////////////////////////////////////////////////////// 207 | #if defined(__aarch64__) && !defined(LLCO_NOASM) 208 | #define LLCO_ASM 209 | #define LLCO_READY 210 | #define LLCO_METHOD "asm,aarch64" 211 | 212 | struct llco_asmctx { 213 | void *x[12]; /* x19-x30 */ 214 | void *sp; 215 | void *lr; 216 | void *d[8]; /* d8-d15 */ 217 | }; 218 | 219 | void _llco_asm_entry(void); 220 | int _llco_asm_switch(struct llco_asmctx *from, struct llco_asmctx *to); 221 | 222 | __asm__( 223 | ".text\n" 224 | #ifdef __APPLE__ 225 | ".globl __llco_asm_switch\n" 226 | "__llco_asm_switch:\n" 227 | #else 228 | ".globl _llco_asm_switch\n" 229 | ".type _llco_asm_switch #function\n" 230 | ".hidden _llco_asm_switch\n" 231 | "_llco_asm_switch:\n" 232 | #endif 233 | 234 | " mov x10, sp\n" 235 | " mov x11, x30\n" 236 | " stp x19, x20, [x0, #(0*16)]\n" 237 | " stp x21, x22, [x0, #(1*16)]\n" 238 | " stp d8, d9, [x0, #(7*16)]\n" 239 | " stp x23, x24, [x0, #(2*16)]\n" 240 | " stp d10, d11, [x0, #(8*16)]\n" 241 | " stp x25, x26, [x0, #(3*16)]\n" 242 | " stp d12, d13, [x0, #(9*16)]\n" 243 | " stp x27, x28, [x0, #(4*16)]\n" 244 | " stp d14, d15, [x0, #(10*16)]\n" 245 | " stp x29, x30, [x0, #(5*16)]\n" 246 | " stp x10, x11, [x0, #(6*16)]\n" 247 | " ldp x19, x20, [x1, #(0*16)]\n" 248 | " ldp x21, x22, [x1, #(1*16)]\n" 249 | " ldp d8, d9, [x1, #(7*16)]\n" 250 | " ldp x23, x24, [x1, #(2*16)]\n" 251 | " ldp d10, d11, [x1, #(8*16)]\n" 252 | " ldp x25, x26, [x1, #(3*16)]\n" 253 | " ldp d12, d13, [x1, #(9*16)]\n" 254 | " ldp x27, x28, [x1, #(4*16)]\n" 255 | " ldp d14, d15, [x1, #(10*16)]\n" 256 | " ldp x29, x30, [x1, #(5*16)]\n" 257 | " ldp x10, x11, [x1, #(6*16)]\n" 258 | " mov sp, x10\n" 259 | " br x11\n" 260 | #ifndef __APPLE__ 261 | ".size _llco_asm_switch, .-_llco_asm_switch\n" 262 | #endif 263 | ); 264 | 265 | __asm__( 266 | ".text\n" 267 | #ifdef __APPLE__ 268 | ".globl __llco_asm_entry\n" 269 | "__llco_asm_entry:\n" 270 | #else 271 | ".globl _llco_asm_entry\n" 272 | ".type _llco_asm_entry #function\n" 273 | ".hidden _llco_asm_entry\n" 274 | "_llco_asm_entry:\n" 275 | #endif 276 | " mov x0, x19\n" 277 | " mov x30, x21\n" 278 | " br x20\n" 279 | #ifndef __APPLE__ 280 | ".size _llco_asm_entry, .-_llco_asm_entry\n" 281 | #endif 282 | ); 283 | 284 | static void llco_asmctx_make(struct llco_asmctx *ctx, void* stack_base, 285 | size_t stack_size, void *arg) 286 | { 287 | ctx->x[0] = (void*)(arg); 288 | ctx->x[1] = (void*)(llco_entry); 289 | ctx->x[2] = (void*)(0xdeaddeaddeaddead); /* Dummy return address. */ 290 | ctx->sp = (void*)((size_t)stack_base + stack_size); 291 | ctx->lr = (void*)(_llco_asm_entry); 292 | } 293 | #endif 294 | 295 | //////////////////////////////////////////////////////////////////////////////// 296 | // RISC-V (rv64/rv32) 297 | //////////////////////////////////////////////////////////////////////////////// 298 | #if defined(__riscv) && !defined(LLCO_NOASM) 299 | #define LLCO_ASM 300 | #define LLCO_READY 301 | #define LLCO_METHOD "asm,riscv" 302 | 303 | struct llco_asmctx { 304 | void* s[12]; /* s0-s11 */ 305 | void* ra; 306 | void* pc; 307 | void* sp; 308 | #ifdef __riscv_flen 309 | #if __riscv_flen == 64 310 | double fs[12]; /* fs0-fs11 */ 311 | #elif __riscv_flen == 32 312 | float fs[12]; /* fs0-fs11 */ 313 | #endif 314 | #endif /* __riscv_flen */ 315 | }; 316 | 317 | void _llco_asm_entry(void); 318 | int _llco_asm_switch(struct llco_asmctx *from, struct llco_asmctx *to); 319 | 320 | __asm__( 321 | ".text\n" 322 | ".globl _llco_asm_entry\n" 323 | ".type _llco_asm_entry @function\n" 324 | ".hidden _llco_asm_entry\n" 325 | "_llco_asm_entry:\n" 326 | " mv a0, s0\n" 327 | " jr s1\n" 328 | ".size _llco_asm_entry, .-_llco_asm_entry\n" 329 | ); 330 | 331 | __asm__( 332 | ".text\n" 333 | ".globl _llco_asm_switch\n" 334 | ".type _llco_asm_switch @function\n" 335 | ".hidden _llco_asm_switch\n" 336 | "_llco_asm_switch:\n" 337 | #if __riscv_xlen == 64 338 | " sd s0, 0x00(a0)\n" 339 | " sd s1, 0x08(a0)\n" 340 | " sd s2, 0x10(a0)\n" 341 | " sd s3, 0x18(a0)\n" 342 | " sd s4, 0x20(a0)\n" 343 | " sd s5, 0x28(a0)\n" 344 | " sd s6, 0x30(a0)\n" 345 | " sd s7, 0x38(a0)\n" 346 | " sd s8, 0x40(a0)\n" 347 | " sd s9, 0x48(a0)\n" 348 | " sd s10, 0x50(a0)\n" 349 | " sd s11, 0x58(a0)\n" 350 | " sd ra, 0x60(a0)\n" 351 | " sd ra, 0x68(a0)\n" /* pc */ 352 | " sd sp, 0x70(a0)\n" 353 | #ifdef __riscv_flen 354 | #if __riscv_flen == 64 355 | " fsd fs0, 0x78(a0)\n" 356 | " fsd fs1, 0x80(a0)\n" 357 | " fsd fs2, 0x88(a0)\n" 358 | " fsd fs3, 0x90(a0)\n" 359 | " fsd fs4, 0x98(a0)\n" 360 | " fsd fs5, 0xa0(a0)\n" 361 | " fsd fs6, 0xa8(a0)\n" 362 | " fsd fs7, 0xb0(a0)\n" 363 | " fsd fs8, 0xb8(a0)\n" 364 | " fsd fs9, 0xc0(a0)\n" 365 | " fsd fs10, 0xc8(a0)\n" 366 | " fsd fs11, 0xd0(a0)\n" 367 | " fld fs0, 0x78(a1)\n" 368 | " fld fs1, 0x80(a1)\n" 369 | " fld fs2, 0x88(a1)\n" 370 | " fld fs3, 0x90(a1)\n" 371 | " fld fs4, 0x98(a1)\n" 372 | " fld fs5, 0xa0(a1)\n" 373 | " fld fs6, 0xa8(a1)\n" 374 | " fld fs7, 0xb0(a1)\n" 375 | " fld fs8, 0xb8(a1)\n" 376 | " fld fs9, 0xc0(a1)\n" 377 | " fld fs10, 0xc8(a1)\n" 378 | " fld fs11, 0xd0(a1)\n" 379 | #else 380 | #error "Unsupported RISC-V FLEN" 381 | #endif 382 | #endif /* __riscv_flen */ 383 | " ld s0, 0x00(a1)\n" 384 | " ld s1, 0x08(a1)\n" 385 | " ld s2, 0x10(a1)\n" 386 | " ld s3, 0x18(a1)\n" 387 | " ld s4, 0x20(a1)\n" 388 | " ld s5, 0x28(a1)\n" 389 | " ld s6, 0x30(a1)\n" 390 | " ld s7, 0x38(a1)\n" 391 | " ld s8, 0x40(a1)\n" 392 | " ld s9, 0x48(a1)\n" 393 | " ld s10, 0x50(a1)\n" 394 | " ld s11, 0x58(a1)\n" 395 | " ld ra, 0x60(a1)\n" 396 | " ld a2, 0x68(a1)\n" /* pc */ 397 | " ld sp, 0x70(a1)\n" 398 | " jr a2\n" 399 | #elif __riscv_xlen == 32 400 | " sw s0, 0x00(a0)\n" 401 | " sw s1, 0x04(a0)\n" 402 | " sw s2, 0x08(a0)\n" 403 | " sw s3, 0x0c(a0)\n" 404 | " sw s4, 0x10(a0)\n" 405 | " sw s5, 0x14(a0)\n" 406 | " sw s6, 0x18(a0)\n" 407 | " sw s7, 0x1c(a0)\n" 408 | " sw s8, 0x20(a0)\n" 409 | " sw s9, 0x24(a0)\n" 410 | " sw s10, 0x28(a0)\n" 411 | " sw s11, 0x2c(a0)\n" 412 | " sw ra, 0x30(a0)\n" 413 | " sw ra, 0x34(a0)\n" /* pc */ 414 | " sw sp, 0x38(a0)\n" 415 | #ifdef __riscv_flen 416 | #if __riscv_flen == 64 417 | " fsd fs0, 0x3c(a0)\n" 418 | " fsd fs1, 0x44(a0)\n" 419 | " fsd fs2, 0x4c(a0)\n" 420 | " fsd fs3, 0x54(a0)\n" 421 | " fsd fs4, 0x5c(a0)\n" 422 | " fsd fs5, 0x64(a0)\n" 423 | " fsd fs6, 0x6c(a0)\n" 424 | " fsd fs7, 0x74(a0)\n" 425 | " fsd fs8, 0x7c(a0)\n" 426 | " fsd fs9, 0x84(a0)\n" 427 | " fsd fs10, 0x8c(a0)\n" 428 | " fsd fs11, 0x94(a0)\n" 429 | " fld fs0, 0x3c(a1)\n" 430 | " fld fs1, 0x44(a1)\n" 431 | " fld fs2, 0x4c(a1)\n" 432 | " fld fs3, 0x54(a1)\n" 433 | " fld fs4, 0x5c(a1)\n" 434 | " fld fs5, 0x64(a1)\n" 435 | " fld fs6, 0x6c(a1)\n" 436 | " fld fs7, 0x74(a1)\n" 437 | " fld fs8, 0x7c(a1)\n" 438 | " fld fs9, 0x84(a1)\n" 439 | " fld fs10, 0x8c(a1)\n" 440 | " fld fs11, 0x94(a1)\n" 441 | #elif __riscv_flen == 32 442 | " fsw fs0, 0x3c(a0)\n" 443 | " fsw fs1, 0x40(a0)\n" 444 | " fsw fs2, 0x44(a0)\n" 445 | " fsw fs3, 0x48(a0)\n" 446 | " fsw fs4, 0x4c(a0)\n" 447 | " fsw fs5, 0x50(a0)\n" 448 | " fsw fs6, 0x54(a0)\n" 449 | " fsw fs7, 0x58(a0)\n" 450 | " fsw fs8, 0x5c(a0)\n" 451 | " fsw fs9, 0x60(a0)\n" 452 | " fsw fs10, 0x64(a0)\n" 453 | " fsw fs11, 0x68(a0)\n" 454 | " flw fs0, 0x3c(a1)\n" 455 | " flw fs1, 0x40(a1)\n" 456 | " flw fs2, 0x44(a1)\n" 457 | " flw fs3, 0x48(a1)\n" 458 | " flw fs4, 0x4c(a1)\n" 459 | " flw fs5, 0x50(a1)\n" 460 | " flw fs6, 0x54(a1)\n" 461 | " flw fs7, 0x58(a1)\n" 462 | " flw fs8, 0x5c(a1)\n" 463 | " flw fs9, 0x60(a1)\n" 464 | " flw fs10, 0x64(a1)\n" 465 | " flw fs11, 0x68(a1)\n" 466 | #else 467 | #error "Unsupported RISC-V FLEN" 468 | #endif 469 | #endif /* __riscv_flen */ 470 | " lw s0, 0x00(a1)\n" 471 | " lw s1, 0x04(a1)\n" 472 | " lw s2, 0x08(a1)\n" 473 | " lw s3, 0x0c(a1)\n" 474 | " lw s4, 0x10(a1)\n" 475 | " lw s5, 0x14(a1)\n" 476 | " lw s6, 0x18(a1)\n" 477 | " lw s7, 0x1c(a1)\n" 478 | " lw s8, 0x20(a1)\n" 479 | " lw s9, 0x24(a1)\n" 480 | " lw s10, 0x28(a1)\n" 481 | " lw s11, 0x2c(a1)\n" 482 | " lw ra, 0x30(a1)\n" 483 | " lw a2, 0x34(a1)\n" /* pc */ 484 | " lw sp, 0x38(a1)\n" 485 | " jr a2\n" 486 | #else 487 | #error "Unsupported RISC-V XLEN" 488 | #endif /* __riscv_xlen */ 489 | ".size _llco_asm_switch, .-_llco_asm_switch\n" 490 | ); 491 | 492 | static void llco_asmctx_make(struct llco_asmctx *ctx, 493 | void* stack_base, size_t stack_size, void *arg) 494 | { 495 | ctx->s[0] = (void*)(arg); 496 | ctx->s[1] = (void*)(llco_entry); 497 | ctx->pc = (void*)(_llco_asm_entry); 498 | #if __riscv_xlen == 64 499 | ctx->ra = (void*)(0xdeaddeaddeaddead); 500 | #elif __riscv_xlen == 32 501 | ctx->ra = (void*)(0xdeaddead); 502 | #endif 503 | ctx->sp = (void*)((size_t)stack_base + stack_size); 504 | } 505 | 506 | #endif // riscv 507 | 508 | //////////////////////////////////////////////////////////////////////////////// 509 | // x86 510 | //////////////////////////////////////////////////////////////////////////////// 511 | #if (defined(__i386) || defined(__i386__)) && !defined(LLCO_NOASM) 512 | #define LLCO_ASM 513 | #define LLCO_READY 514 | #define LLCO_METHOD "asm,i386" 515 | 516 | struct llco_asmctx { 517 | void *eip, *esp, *ebp, *ebx, *esi, *edi; 518 | }; 519 | 520 | void _llco_asm_switch(struct llco_asmctx *from, struct llco_asmctx *to); 521 | 522 | __asm__( 523 | #ifdef __DJGPP__ /* DOS compiler */ 524 | "__llco_asm_switch:\n" 525 | #else 526 | ".text\n" 527 | ".globl _llco_asm_switch\n" 528 | ".type _llco_asm_switch @function\n" 529 | ".hidden _llco_asm_switch\n" 530 | "_llco_asm_switch:\n" 531 | #endif 532 | " call 1f\n" 533 | " 1:\n" 534 | " popl %ecx\n" 535 | " addl $(2f-1b), %ecx\n" 536 | " movl 4(%esp), %eax\n" 537 | " movl 8(%esp), %edx\n" 538 | " movl %ecx, (%eax)\n" 539 | " movl %esp, 4(%eax)\n" 540 | " movl %ebp, 8(%eax)\n" 541 | " movl %ebx, 12(%eax)\n" 542 | " movl %esi, 16(%eax)\n" 543 | " movl %edi, 20(%eax)\n" 544 | " movl 20(%edx), %edi\n" 545 | " movl 16(%edx), %esi\n" 546 | " movl 12(%edx), %ebx\n" 547 | " movl 8(%edx), %ebp\n" 548 | " movl 4(%edx), %esp\n" 549 | " jmp *(%edx)\n" 550 | " 2:\n" 551 | " ret\n" 552 | #ifndef __DJGPP__ 553 | ".size _llco_asm_switch, .-_llco_asm_switch\n" 554 | #endif 555 | ); 556 | 557 | static void llco_asmctx_make(struct llco_asmctx *ctx, 558 | void* stack_base, size_t stack_size, void *arg) 559 | { 560 | void** stack_high_ptr = (void**)((size_t)stack_base + stack_size - 16 - 561 | 1*sizeof(size_t)); 562 | stack_high_ptr[0] = (void*)(0xdeaddead); // Dummy return address. 563 | stack_high_ptr[1] = (void*)(arg); 564 | ctx->eip = (void*)(llco_entry); 565 | ctx->esp = (void*)(stack_high_ptr); 566 | } 567 | #endif // __i386__ 568 | 569 | //////////////////////////////////////////////////////////////////////////////// 570 | // x64 571 | //////////////////////////////////////////////////////////////////////////////// 572 | #if (defined(__x86_64__) || defined(_M_X64)) && !defined(LLCO_NOASM) 573 | #define LLCO_ASM 574 | #define LLCO_READY 575 | #define LLCO_METHOD "asm,x64" 576 | 577 | #ifdef _WIN32 578 | 579 | struct llco_asmctx { 580 | void *rip, *rsp, *rbp, *rbx, *r12, *r13, *r14, *r15, *rdi, *rsi; 581 | void* xmm[20]; /* xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, 582 | xmm14, xmm15 */ 583 | void* fiber_storage; 584 | void* dealloc_stack; 585 | void* stack_limit; 586 | void* stack_base; 587 | }; 588 | 589 | #if defined(__GNUC__) 590 | #define LLCO_ASM_BLOB __attribute__((section(".text"))) 591 | #elif defined(_MSC_VER) 592 | #define LLCO_ASM_BLOB __declspec(allocate(".text")) 593 | #pragma section(".text") 594 | #endif 595 | 596 | LLCO_ASM_BLOB static unsigned char llco_wrap_main_code_entry[] = { 597 | 0x4c,0x89,0xe9, // mov %r13,%rcx 598 | 0x41,0xff,0xe4, // jmpq *%r12 599 | 0xc3, // retq 600 | 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90 // nop 601 | }; 602 | 603 | LLCO_ASM_BLOB static unsigned char llco_asm_switch_code[] = { 604 | 0x48,0x8d,0x05,0x3e,0x01,0x00,0x00, // lea 0x13e(%rip),%rax 605 | 0x48,0x89,0x01, // mov %rax,(%rcx) 606 | 0x48,0x89,0x61,0x08, // mov %rsp,0x8(%rcx) 607 | 0x48,0x89,0x69,0x10, // mov %rbp,0x10(%rcx) 608 | 0x48,0x89,0x59,0x18, // mov %rbx,0x18(%rcx) 609 | 0x4c,0x89,0x61,0x20, // mov %r12,0x20(%rcx) 610 | 0x4c,0x89,0x69,0x28, // mov %r13,0x28(%rcx) 611 | 0x4c,0x89,0x71,0x30, // mov %r14,0x30(%rcx) 612 | 0x4c,0x89,0x79,0x38, // mov %r15,0x38(%rcx) 613 | 0x48,0x89,0x79,0x40, // mov %rdi,0x40(%rcx) 614 | 0x48,0x89,0x71,0x48, // mov %rsi,0x48(%rcx) 615 | 0x0f,0x11,0x71,0x50, // movups %xmm6,0x50(%rcx) 616 | 0x0f,0x11,0x79,0x60, // movups %xmm7,0x60(%rcx) 617 | 0x44,0x0f,0x11,0x41,0x70, // movups %xmm8,0x70(%rcx) 618 | 0x44,0x0f,0x11,0x89,0x80,0x00,0x00,0x00, // movups %xmm9,0x80(%rcx) 619 | 0x44,0x0f,0x11,0x91,0x90,0x00,0x00,0x00, // movups %xmm10,0x90(%rcx) 620 | 0x44,0x0f,0x11,0x99,0xa0,0x00,0x00,0x00, // movups %xmm11,0xa0(%rcx) 621 | 0x44,0x0f,0x11,0xa1,0xb0,0x00,0x00,0x00, // movups %xmm12,0xb0(%rcx) 622 | 0x44,0x0f,0x11,0xa9,0xc0,0x00,0x00,0x00, // movups %xmm13,0xc0(%rcx) 623 | 0x44,0x0f,0x11,0xb1,0xd0,0x00,0x00,0x00, // movups %xmm14,0xd0(%rcx) 624 | 0x44,0x0f,0x11,0xb9,0xe0,0x00,0x00,0x00, // movups %xmm15,0xe0(%rcx) 625 | 0x65,0x4c,0x8b,0x14,0x25,0x30,0x00,0x00,0x00, // mov %gs:0x30,%r10 626 | 0x49,0x8b,0x42,0x20, // mov 0x20(%r10),%rax 627 | 0x48,0x89,0x81,0xf0,0x00,0x00,0x00, // mov %rax,0xf0(%rcx) 628 | 0x49,0x8b,0x82,0x78,0x14,0x00,0x00, // mov 0x1478(%r10),%rax 629 | 0x48,0x89,0x81,0xf8,0x00,0x00,0x00, // mov %rax,0xf8(%rcx) 630 | 0x49,0x8b,0x42,0x10, // mov 0x10(%r10),%rax 631 | 0x48,0x89,0x81,0x00,0x01,0x00,0x00, // mov %rax,0x100(%rcx) 632 | 0x49,0x8b,0x42,0x08, // mov 0x8(%r10),%rax 633 | 0x48,0x89,0x81,0x08,0x01,0x00,0x00, // mov %rax,0x108(%rcx) 634 | 0x48,0x8b,0x82,0x08,0x01,0x00,0x00, // mov 0x108(%rdx),%rax 635 | 0x49,0x89,0x42,0x08, // mov %rax,0x8(%r10) 636 | 0x48,0x8b,0x82,0x00,0x01, 0x00, 0x00, // mov 0x100(%rdx),%rax 637 | 0x49,0x89,0x42,0x10, // mov %rax,0x10(%r10) 638 | 0x48,0x8b,0x82,0xf8,0x00, 0x00, 0x00, // mov 0xf8(%rdx),%rax 639 | 0x49,0x89,0x82,0x78,0x14, 0x00, 0x00, // mov %rax,0x1478(%r10) 640 | 0x48,0x8b,0x82,0xf0,0x00, 0x00, 0x00, // mov 0xf0(%rdx),%rax 641 | 0x49,0x89,0x42,0x20, // mov %rax,0x20(%r10) 642 | 0x44,0x0f,0x10,0xba,0xe0,0x00,0x00,0x00, // movups 0xe0(%rdx),%xmm15 643 | 0x44,0x0f,0x10,0xb2,0xd0,0x00,0x00,0x00, // movups 0xd0(%rdx),%xmm14 644 | 0x44,0x0f,0x10,0xaa,0xc0,0x00,0x00,0x00, // movups 0xc0(%rdx),%xmm13 645 | 0x44,0x0f,0x10,0xa2,0xb0,0x00,0x00,0x00, // movups 0xb0(%rdx),%xmm12 646 | 0x44,0x0f,0x10,0x9a,0xa0,0x00,0x00,0x00, // movups 0xa0(%rdx),%xmm11 647 | 0x44,0x0f,0x10,0x92,0x90,0x00,0x00,0x00, // movups 0x90(%rdx),%xmm10 648 | 0x44,0x0f,0x10,0x8a,0x80,0x00,0x00,0x00, // movups 0x80(%rdx),%xmm9 649 | 0x44,0x0f,0x10,0x42,0x70, // movups 0x70(%rdx),%xmm8 650 | 0x0f,0x10,0x7a,0x60, // movups 0x60(%rdx),%xmm7 651 | 0x0f,0x10,0x72,0x50, // movups 0x50(%rdx),%xmm6 652 | 0x48,0x8b,0x72,0x48, // mov 0x48(%rdx),%rsi 653 | 0x48,0x8b,0x7a,0x40, // mov 0x40(%rdx),%rdi 654 | 0x4c,0x8b,0x7a,0x38, // mov 0x38(%rdx),%r15 655 | 0x4c,0x8b,0x72,0x30, // mov 0x30(%rdx),%r14 656 | 0x4c,0x8b,0x6a,0x28, // mov 0x28(%rdx),%r13 657 | 0x4c,0x8b,0x62,0x20, // mov 0x20(%rdx),%r12 658 | 0x48,0x8b,0x5a,0x18, // mov 0x18(%rdx),%rbx 659 | 0x48,0x8b,0x6a,0x10, // mov 0x10(%rdx),%rbp 660 | 0x48,0x8b,0x62,0x08, // mov 0x8(%rdx),%rsp 661 | 0xff,0x22, // jmpq *(%rdx) 662 | 0xc3, // retq 663 | 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90, // nop 664 | 0x90,0x90, // nop 665 | }; 666 | 667 | void (*_llco_asm_entry)(void) = 668 | (void(*)(void))(void*)llco_wrap_main_code_entry; 669 | void (*_llco_asm_switch)(struct llco_asmctx *from, 670 | struct llco_asmctx *to) = (void(*)(struct llco_asmctx *from, 671 | struct llco_asmctx *to))(void*)llco_asm_switch_code; 672 | 673 | static void llco_asmctx_make(struct llco_asmctx *ctx, 674 | void* stack_base, size_t stack_size, void *arg) 675 | { 676 | stack_size = stack_size - 32; // Reserve 32 bytes for the shadow space. 677 | void** stack_high_ptr = (void**)((size_t)stack_base + stack_size - 678 | sizeof(size_t)); 679 | stack_high_ptr[0] = (void*)(0xdeaddeaddeaddead); // Dummy return address. 680 | ctx->rip = (void*)(_llco_asm_entry); 681 | ctx->rsp = (void*)(stack_high_ptr); 682 | ctx->r12 = (void*)(llco_entry); 683 | ctx->r13 = (void*)(arg); 684 | void* stack_top = (void*)((size_t)stack_base + stack_size); 685 | ctx->stack_base = stack_top; 686 | ctx->stack_limit = stack_base; 687 | ctx->dealloc_stack = stack_base; 688 | } 689 | 690 | #else 691 | 692 | struct llco_asmctx { 693 | void *rip, *rsp, *rbp, *rbx, *r12, *r13, *r14, *r15; 694 | }; 695 | 696 | void _llco_asm_entry(void); 697 | int _llco_asm_switch(struct llco_asmctx *from, struct llco_asmctx *to); 698 | 699 | __asm__( 700 | ".text\n" 701 | #ifdef __MACH__ /* Mac OS X assembler */ 702 | ".globl __llco_asm_entry\n" 703 | "__llco_asm_entry:\n" 704 | #else /* Linux assembler */ 705 | ".globl _llco_asm_entry\n" 706 | ".type _llco_asm_entry @function\n" 707 | ".hidden _llco_asm_entry\n" 708 | "_llco_asm_entry:\n" 709 | #endif 710 | " movq %r13, %rdi\n" 711 | " jmpq *%r12\n" 712 | #ifndef __MACH__ 713 | ".size _llco_asm_entry, .-_llco_asm_entry\n" 714 | #endif 715 | ); 716 | 717 | __asm__( 718 | ".text\n" 719 | #ifdef __MACH__ /* Mac OS assembler */ 720 | ".globl __llco_asm_switch\n" 721 | "__llco_asm_switch:\n" 722 | #else /* Linux assembler */ 723 | ".globl _llco_asm_switch\n" 724 | ".type _llco_asm_switch @function\n" 725 | ".hidden _llco_asm_switch\n" 726 | "_llco_asm_switch:\n" 727 | #endif 728 | " leaq 0x3d(%rip), %rax\n" 729 | " movq %rax, (%rdi)\n" 730 | " movq %rsp, 8(%rdi)\n" 731 | " movq %rbp, 16(%rdi)\n" 732 | " movq %rbx, 24(%rdi)\n" 733 | " movq %r12, 32(%rdi)\n" 734 | " movq %r13, 40(%rdi)\n" 735 | " movq %r14, 48(%rdi)\n" 736 | " movq %r15, 56(%rdi)\n" 737 | " movq 56(%rsi), %r15\n" 738 | " movq 48(%rsi), %r14\n" 739 | " movq 40(%rsi), %r13\n" 740 | " movq 32(%rsi), %r12\n" 741 | " movq 24(%rsi), %rbx\n" 742 | " movq 16(%rsi), %rbp\n" 743 | " movq 8(%rsi), %rsp\n" 744 | " jmpq *(%rsi)\n" 745 | " ret\n" 746 | #ifndef __MACH__ 747 | ".size _llco_asm_switch, .-_llco_asm_switch\n" 748 | #endif 749 | ); 750 | 751 | static void llco_asmctx_make(struct llco_asmctx *ctx, 752 | void* stack_base, size_t stack_size, void *arg) 753 | { 754 | // Reserve 128 bytes for the Red Zone space (System V AMD64 ABI). 755 | stack_size = stack_size - 128; 756 | void** stack_high_ptr = (void**)((size_t)stack_base + stack_size - 757 | sizeof(size_t)); 758 | stack_high_ptr[0] = (void*)(0xdeaddeaddeaddead); // Dummy return address. 759 | ctx->rip = (void*)(_llco_asm_entry); 760 | ctx->rsp = (void*)(stack_high_ptr); 761 | ctx->r12 = (void*)(llco_entry); 762 | ctx->r13 = (void*)(arg); 763 | } 764 | 765 | #endif 766 | #endif // x64 767 | 768 | // --- END ASM Code --- // 769 | 770 | #if defined(__GNUC__) 771 | #pragma GCC diagnostic pop 772 | #endif 773 | 774 | //////////////////////////////////////////////////////////////////////////////// 775 | // ASM with stackjmp activated 776 | //////////////////////////////////////////////////////////////////////////////// 777 | #if defined(LLCO_READY) && defined(LLCO_STACKJMP) 778 | LLCO_NOINLINE LLCO_NORETURN 779 | static void llco_stackjmp(void *stack, size_t stack_size, 780 | void(*entry)(void *arg)) 781 | { 782 | struct llco_asmctx ctx = { 0 }; 783 | llco_asmctx_make(&ctx, stack, stack_size, 0); 784 | struct llco_asmctx ctx0 = { 0 }; 785 | _llco_asm_switch(&ctx0, &ctx); 786 | llco_exit(); 787 | } 788 | #endif 789 | 790 | //////////////////////////////////////////////////////////////////////////////// 791 | // Windows Fibers 792 | //////////////////////////////////////////////////////////////////////////////// 793 | #if defined(_WIN32) && !defined(LLCO_READY) 794 | #define LLCO_WINDOWS 795 | #define LLCO_READY 796 | #define LLCO_METHOD "fibers,windows" 797 | 798 | #ifndef _WIN32_WINNT 799 | #define _WIN32_WINNT 0x0400 800 | #endif 801 | #ifndef WIN32_LEAN_AND_MEAN 802 | #define WIN32_LEAN_AND_MEAN 803 | #endif 804 | #include 805 | 806 | #error Windows fibers unsupported 807 | 808 | #endif 809 | 810 | //////////////////////////////////////////////////////////////////////////////// 811 | // Webassembly Fibers 812 | //////////////////////////////////////////////////////////////////////////////// 813 | #if defined(__EMSCRIPTEN__) && !defined(LLCO_READY) 814 | #define LLCO_WASM 815 | #define LLCO_READY 816 | #define LLCO_METHOD "fibers,emscripten" 817 | 818 | #include 819 | #include 820 | 821 | #ifndef LLCO_ASYNCIFY_STACK_SIZE 822 | #define LLCO_ASYNCIFY_STACK_SIZE 4096 823 | #endif 824 | 825 | static __thread char llco_main_stack[LLCO_ASYNCIFY_STACK_SIZE]; 826 | 827 | #endif 828 | 829 | //////////////////////////////////////////////////////////////////////////////// 830 | // Ucontext 831 | //////////////////////////////////////////////////////////////////////////////// 832 | #if !defined(LLCO_READY) 833 | #define LLCO_UCONTEXT 834 | #define LLCO_READY 835 | #define LLCO_METHOD "ucontext" 836 | #ifndef LLCO_STACKJMP 837 | #define LLCO_STACKJMP 838 | #endif 839 | 840 | #if defined(__FreeBSD__) || defined(__APPLE__) 841 | #ifdef __clang__ 842 | #pragma clang diagnostic ignored "-Wdeprecated-declarations" 843 | #endif 844 | #define _XOPEN_SOURCE 845 | #endif 846 | #include 847 | 848 | static __thread ucontext_t stackjmp_ucallee; 849 | static __thread int stackjmp_ucallee_gotten = 0; 850 | 851 | #if defined(__APPLE__) && defined(__aarch64__) && !defined(LLCO_NOSTACKADJUST) 852 | // Here we ensure that the initial context switch will *not* page the 853 | // entire stack into process memory before executing the entry point 854 | // function. Which is a behavior that can be observed on Mac OS with 855 | // Apple Silicon. This "trick" can be optionally removed at the expense 856 | // of slower initial jumping into large stacks. 857 | enum llco_stack_grows { DOWNWARDS, UPWARDS }; 858 | 859 | static enum llco_stack_grows llco_stack_grows0(int *addr0) { 860 | int addr1; 861 | return addr0 < &addr1 ? UPWARDS : DOWNWARDS; 862 | } 863 | 864 | static enum llco_stack_grows llco_stack_grows(void) { 865 | int addr0; 866 | return llco_stack_grows0(&addr0); 867 | } 868 | 869 | static void llco_adjust_ucontext_stack(ucontext_t *ucp) { 870 | if (llco_stack_grows() == UPWARDS) { 871 | ucp->uc_stack.ss_sp = (char*)ucp->uc_stack.ss_sp+ucp->uc_stack.ss_size; 872 | ucp->uc_stack.ss_size = 0; 873 | } 874 | } 875 | #else 876 | #define llco_adjust_ucontext_stack(ucp) 877 | #endif 878 | 879 | // Ucontext always uses stackjmp with setjmp/longjmp, instead of swapcontext 880 | // becuase it's much faster. 881 | LLCO_NOINLINE LLCO_NORETURN 882 | static void llco_stackjmp(void *stack, size_t stack_size, 883 | void(*entry)(void *arg)) 884 | { 885 | if (!stackjmp_ucallee_gotten) { 886 | stackjmp_ucallee_gotten = 1; 887 | getcontext(&stackjmp_ucallee); 888 | } 889 | stackjmp_ucallee.uc_stack.ss_sp = stack; 890 | stackjmp_ucallee.uc_stack.ss_size = stack_size; 891 | llco_adjust_ucontext_stack(&stackjmp_ucallee); 892 | makecontext(&stackjmp_ucallee, (void(*)(void))entry, 0); 893 | setcontext(&stackjmp_ucallee); 894 | llco_exit(); 895 | } 896 | 897 | #endif // Ucontext 898 | 899 | #if defined(LLCO_STACKJMP) 900 | #include 901 | #ifdef _WIN32 902 | // For reasons outside of my understanding, Windows does not allow for jumping 903 | // between stacks using the setjmp/longjmp mechanism. 904 | #error Windows stackjmp not supported 905 | #endif 906 | #endif 907 | 908 | //////////////////////////////////////////////////////////////////////////////// 909 | // llco switching code 910 | //////////////////////////////////////////////////////////////////////////////// 911 | 912 | #include 913 | 914 | struct llco { 915 | struct llco_desc desc; 916 | #if defined(LLCO_STACKJMP) 917 | jmp_buf buf; 918 | #elif defined(LLCO_ASM) 919 | struct llco_asmctx ctx; 920 | #elif defined(LLCO_WASM) 921 | emscripten_fiber_t fiber; 922 | #elif defined(LLCO_WINDOWS) 923 | LPVOID fiber; 924 | #endif 925 | #ifdef LLCO_VALGRIND 926 | int valgrind_stack_id; 927 | #endif 928 | #if defined(__GNUC__) 929 | void *uw_stop_ip; // record of the last unwind ip. 930 | #endif 931 | }; 932 | 933 | #ifdef LLCO_VALGRIND 934 | static __thread unsigned int llco_valgrind_stack_id = 0; 935 | static __thread unsigned int llco_cleanup_valgrind_stack_id = 0; 936 | #endif 937 | 938 | static __thread struct llco llco_thread = { 0 }; 939 | static __thread struct llco *llco_cur = NULL; 940 | static __thread struct llco_desc llco_desc; 941 | static __thread volatile bool llco_cleanup_needed = false; 942 | static __thread volatile struct llco_desc llco_cleanup_desc; 943 | static __thread volatile bool llco_cleanup_active = false; 944 | 945 | #define llco_cleanup_guard() { \ 946 | if (llco_cleanup_active) { \ 947 | fprintf(stderr, "%s not available during cleanup\n", __func__); \ 948 | abort(); \ 949 | } \ 950 | } 951 | 952 | static void llco_cleanup_last(void) { 953 | if (llco_cleanup_needed) { 954 | if (llco_cleanup_desc.cleanup) { 955 | llco_cleanup_active = true; 956 | #ifdef LLCO_VALGRIND 957 | VALGRIND_STACK_DEREGISTER(llco_cleanup_valgrind_stack_id); 958 | #endif 959 | llco_cleanup_desc.cleanup(llco_cleanup_desc.stack, 960 | llco_cleanup_desc.stack_size, llco_cleanup_desc.udata); 961 | llco_cleanup_active = false; 962 | } 963 | llco_cleanup_needed = false; 964 | } 965 | } 966 | 967 | LLCO_NOINLINE 968 | static void llco_entry_wrap(void *arg) { 969 | llco_cleanup_last(); 970 | #if defined(LLCO_WASM) 971 | llco_cur = arg; 972 | llco_cur->desc = llco_desc; 973 | #else 974 | (void)arg; 975 | struct llco self = { .desc = llco_desc }; 976 | llco_cur = &self; 977 | #endif 978 | #ifdef LLCO_VALGRIND 979 | llco_cur->valgrind_stack_id = llco_valgrind_stack_id; 980 | #endif 981 | #if defined(__GNUC__) && !defined(__EMSCRIPTEN__) 982 | llco_cur->uw_stop_ip = __builtin_return_address(0); 983 | #endif 984 | llco_cur->desc.entry(llco_cur->desc.udata); 985 | } 986 | 987 | 988 | LLCO_NOINLINE LLCO_NORETURN 989 | static void llco_entry(void *arg) { 990 | llco_entry_wrap(arg); 991 | llco_exit(); 992 | } 993 | 994 | LLCO_NOINLINE 995 | static void llco_switch1(struct llco *from, struct llco *to, 996 | void *stack, size_t stack_size) 997 | { 998 | #ifdef LLCO_VALGRIND 999 | llco_valgrind_stack_id = VALGRIND_STACK_REGISTER(stack, stack + stack_size); 1000 | #endif 1001 | #if defined(LLCO_STACKJMP) 1002 | if (to) { 1003 | if (!_setjmp(from->buf)) { 1004 | _longjmp(to->buf, 1); 1005 | } 1006 | } else { 1007 | if (!_setjmp(from->buf)) { 1008 | llco_stackjmp(stack, stack_size, llco_entry); 1009 | } 1010 | } 1011 | #elif defined(LLCO_ASM) 1012 | if (to) { 1013 | _llco_asm_switch(&from->ctx, &to->ctx); 1014 | } else { 1015 | struct llco_asmctx ctx = { 0 }; 1016 | llco_asmctx_make(&ctx, stack, stack_size, 0); 1017 | _llco_asm_switch(&from->ctx, &ctx); 1018 | } 1019 | #elif defined(LLCO_WASM) 1020 | if (to) { 1021 | emscripten_fiber_swap(&from->fiber, &to->fiber); 1022 | } else { 1023 | if (from == &llco_thread) { 1024 | emscripten_fiber_init_from_current_context(&from->fiber, 1025 | llco_main_stack, LLCO_ASYNCIFY_STACK_SIZE); 1026 | } 1027 | stack_size -= LLCO_ASYNCIFY_STACK_SIZE; 1028 | char *astack = ((char*)stack) + stack_size; 1029 | size_t astack_size = LLCO_ASYNCIFY_STACK_SIZE - sizeof(struct llco); 1030 | struct llco *self = (void*)(astack + astack_size); 1031 | memset(self, 0, sizeof(struct llco)); 1032 | emscripten_fiber_init(&self->fiber, llco_entry, 1033 | self, stack, stack_size, astack, astack_size); 1034 | emscripten_fiber_swap(&from->fiber, &self->fiber); 1035 | } 1036 | #elif defined(LLCO_WINDOWS) 1037 | // Unsupported 1038 | #endif 1039 | } 1040 | 1041 | static void llco_switch0(struct llco_desc *desc, struct llco *co, 1042 | bool final) 1043 | { 1044 | struct llco *from = llco_cur ? llco_cur : &llco_thread; 1045 | struct llco *to = desc ? NULL : co ? co : &llco_thread; 1046 | if (from != to) { 1047 | if (final) { 1048 | llco_cleanup_needed = true; 1049 | llco_cleanup_desc = from->desc; 1050 | #ifdef LLCO_VALGRIND 1051 | llco_cleanup_valgrind_stack_id = from->valgrind_stack_id; 1052 | #endif 1053 | } 1054 | if (desc) { 1055 | llco_desc = *desc; 1056 | llco_switch1(from, 0, desc->stack, desc->stack_size); 1057 | } else { 1058 | llco_cur = to; 1059 | llco_switch1(from, to, 0, 0); 1060 | } 1061 | llco_cleanup_last(); 1062 | } 1063 | } 1064 | 1065 | 1066 | 1067 | //////////////////////////////////////////////////////////////////////////////// 1068 | // Exported methods 1069 | //////////////////////////////////////////////////////////////////////////////// 1070 | 1071 | // Start a new coroutine. 1072 | LLCO_EXTERN 1073 | void llco_start(struct llco_desc *desc, bool final) { 1074 | if (!desc || desc->stack_size < LLCO_MINSTACKSIZE) { 1075 | fprintf(stderr, "stack too small\n"); 1076 | abort(); 1077 | } 1078 | llco_cleanup_guard(); 1079 | llco_switch0(desc, 0, final); 1080 | } 1081 | 1082 | // Switch to another coroutine. 1083 | LLCO_EXTERN 1084 | void llco_switch(struct llco *co, bool final) { 1085 | #if defined(LLCO_ASM) 1086 | // fast track context switch. Saves a few nanoseconds by checking the 1087 | // exception condition first. 1088 | if (!llco_cleanup_active && llco_cur && co && llco_cur != co && !final) { 1089 | struct llco *from = llco_cur; 1090 | llco_cur = co; 1091 | _llco_asm_switch(&from->ctx, &co->ctx); 1092 | llco_cleanup_last(); 1093 | return; 1094 | } 1095 | #endif 1096 | llco_cleanup_guard(); 1097 | llco_switch0(0, co, final); 1098 | } 1099 | 1100 | // Return the current coroutine or NULL if not currently running in a 1101 | // coroutine. 1102 | LLCO_EXTERN 1103 | struct llco *llco_current(void) { 1104 | llco_cleanup_guard(); 1105 | return llco_cur == &llco_thread ? 0 : llco_cur; 1106 | } 1107 | 1108 | // Returns a string that indicates which coroutine method is being used by 1109 | // the program. Such as "asm" or "ucontext", etc. 1110 | LLCO_EXTERN 1111 | const char *llco_method(void *caps) { 1112 | (void)caps; 1113 | return LLCO_METHOD 1114 | #ifdef LLCO_STACKJMP 1115 | ",stackjmp" 1116 | #endif 1117 | ; 1118 | } 1119 | 1120 | #if defined(__GNUC__) && !defined(__EMSCRIPTEN__) && !defined(_WIN32) && \ 1121 | !defined(LLCO_NOUNWIND) && !defined(__COSMOCC__) 1122 | 1123 | #include 1124 | #include 1125 | #include 1126 | 1127 | struct llco_dlinfo { 1128 | const char *dli_fname; /* Pathname of shared object */ 1129 | void *dli_fbase; /* Base address of shared object */ 1130 | const char *dli_sname; /* Name of nearest symbol */ 1131 | void *dli_saddr; /* Address of nearest symbol */ 1132 | }; 1133 | 1134 | #if defined(__linux__) && !defined(_GNU_SOURCE) 1135 | int dladdr(const void *, void *); 1136 | #endif 1137 | 1138 | static void llco_getsymbol(struct _Unwind_Context *uwc, 1139 | struct llco_symbol *sym) 1140 | { 1141 | memset(sym, 0, sizeof(struct llco_symbol)); 1142 | sym->cfa = (void*)_Unwind_GetCFA(uwc); 1143 | int ip_before; /* unused */ 1144 | sym->ip = (void*)_Unwind_GetIPInfo(uwc, &ip_before); 1145 | struct llco_dlinfo dlinfo = { 0 }; 1146 | if (sym->ip && dladdr(sym->ip, (void*)&dlinfo)) { 1147 | sym->fname = dlinfo.dli_fname; 1148 | sym->fbase = dlinfo.dli_fbase; 1149 | sym->sname = dlinfo.dli_sname; 1150 | sym->saddr = dlinfo.dli_saddr; 1151 | } 1152 | } 1153 | 1154 | struct llco_unwind_context { 1155 | void *udata; 1156 | void *start_ip; 1157 | bool started; 1158 | int nsymbols; 1159 | int nsymbols_actual; 1160 | struct llco_symbol last; 1161 | bool (*func)(struct llco_symbol *, void *); 1162 | void *unwind_addr; 1163 | }; 1164 | 1165 | static _Unwind_Reason_Code llco_func(struct _Unwind_Context *uwc, void *ptr) { 1166 | struct llco_unwind_context *ctx = ptr; 1167 | 1168 | struct llco *cur = llco_current(); 1169 | if (cur && !cur->uw_stop_ip) { 1170 | return _URC_END_OF_STACK; 1171 | } 1172 | struct llco_symbol sym; 1173 | llco_getsymbol(uwc, &sym); 1174 | if (ctx->start_ip && !ctx->started && sym.ip != ctx->start_ip) { 1175 | return _URC_NO_REASON; 1176 | } 1177 | ctx->started = true; 1178 | if (!sym.ip || (cur && sym.ip == cur->uw_stop_ip)) { 1179 | return _URC_END_OF_STACK; 1180 | } 1181 | ctx->nsymbols++; 1182 | if (!cur) { 1183 | ctx->nsymbols_actual++; 1184 | if (ctx->func && !ctx->func(&sym, ctx->udata)) { 1185 | return _URC_END_OF_STACK; 1186 | } 1187 | } else { 1188 | if (ctx->nsymbols > 1) { 1189 | ctx->nsymbols_actual++; 1190 | if (ctx->func && !ctx->func(&ctx->last, ctx->udata)) { 1191 | return _URC_END_OF_STACK; 1192 | } 1193 | } 1194 | ctx->last = sym; 1195 | } 1196 | return _URC_NO_REASON; 1197 | } 1198 | 1199 | LLCO_EXTERN 1200 | int llco_unwind(bool(*func)(struct llco_symbol *sym, void *udata), void *udata){ 1201 | struct llco_unwind_context ctx = { 1202 | #if defined(__GNUC__) && !defined(__EMSCRIPTEN__) 1203 | .start_ip = __builtin_return_address(0), 1204 | #endif 1205 | .func = func, 1206 | .udata = udata 1207 | }; 1208 | _Unwind_Backtrace(llco_func, &ctx); 1209 | return ctx.nsymbols_actual; 1210 | } 1211 | 1212 | #else 1213 | 1214 | LLCO_EXTERN 1215 | int llco_unwind(bool(*func)(struct llco_symbol *sym, void *udata), void *udata){ 1216 | (void)func; (void)udata; 1217 | /* Unsupported */ 1218 | return 0; 1219 | } 1220 | 1221 | #endif 1222 | -------------------------------------------------------------------------------- /llco.h: -------------------------------------------------------------------------------- 1 | #ifndef LLCO_H 2 | #define LLCO_H 3 | 4 | #include 5 | #include 6 | 7 | #define LLCO_MINSTACKSIZE 16384 8 | 9 | struct llco_desc { 10 | void *stack; 11 | size_t stack_size; 12 | void (*entry)(void *udata); 13 | void (*cleanup)(void *stack, size_t stack_size, void *udata); 14 | void *udata; 15 | }; 16 | 17 | struct llco; 18 | 19 | struct llco *llco_current(void); 20 | void llco_start(struct llco_desc *desc, bool final); 21 | void llco_switch(struct llco *co, bool final); 22 | const char *llco_method(void *caps); 23 | 24 | // Coroutine stack unwinding 25 | struct llco_symbol { 26 | void *cfa; // Canonical Frame Address 27 | void *ip; // Instruction Pointer 28 | const char *fname; // Pathname of shared object 29 | void *fbase; // Base address of shared object 30 | const char *sname; // Name of nearest symbol 31 | void *saddr; // Address of nearest symbol 32 | }; 33 | 34 | int llco_unwind(bool(*func)(struct llco_symbol *sym, void *udata), void *udata); 35 | 36 | #endif // LLCO_H 37 | -------------------------------------------------------------------------------- /test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "llco.h" 9 | 10 | #define STKSZ 32768 11 | 12 | char tstr[512]; 13 | char tresult[4096] = { 0 }; 14 | 15 | #define tprintf(...) { \ 16 | printf(__VA_ARGS__); \ 17 | snprintf(tstr, sizeof(tstr), __VA_ARGS__); \ 18 | strcat(tresult, tstr); \ 19 | } \ 20 | 21 | void cleanup(void *stk, size_t stksz, void *udata) { 22 | tprintf("(cleanup %d)\n", (int)(intptr_t)udata); 23 | free(stk); 24 | } 25 | 26 | struct llco *co1, *co2, *co3; 27 | 28 | void entry3(void *udata) { 29 | co3 = llco_current(); 30 | tprintf("(entry %d)\n", (int)(intptr_t)udata); 31 | llco_switch(co1, false); 32 | tprintf("(mark C)\n"); 33 | llco_switch(co1, true); 34 | } 35 | 36 | void entry2(void *udata) { 37 | co2 = llco_current(); 38 | tprintf("(entry %d)\n", (int)(intptr_t)udata); 39 | struct llco_desc desc = { 40 | .stack = malloc(STKSZ), 41 | .stack_size = STKSZ, 42 | .entry = entry3, 43 | .cleanup = cleanup, 44 | .udata = (void*)3, 45 | }; 46 | llco_start(&desc, true); 47 | } 48 | 49 | void entry1(void *udata) { 50 | co1 = llco_current(); 51 | tprintf("(entry %d)\n", (int)(intptr_t)udata); 52 | struct llco_desc desc = { 53 | .stack = malloc(STKSZ), 54 | .stack_size = STKSZ, 55 | .entry = entry2, 56 | .cleanup = cleanup, 57 | .udata = (void*)2, 58 | }; 59 | llco_start(&desc, false); 60 | tprintf("(mark B)\n"); 61 | llco_switch(co3, false); 62 | tprintf("(mark D)\n"); 63 | llco_switch(0, true); 64 | } 65 | 66 | void perfcleanup(void *stk, size_t stksz, void *udata) { 67 | free(stk); 68 | } 69 | 70 | // Return monotonic nanoseconds of the CPU clock. 71 | static int64_t getnow(void) { 72 | struct timespec now = { 0 }; 73 | clock_gettime(CLOCK_MONOTONIC, &now); 74 | return now.tv_sec * INT64_C(1000000000) + now.tv_nsec; 75 | } 76 | 77 | int64_t perfstart, perfend; 78 | 79 | struct llco *perf1co; 80 | struct llco *perf2co; 81 | 82 | #define NSWITCHES 10000000 83 | int perf_count = 0; 84 | 85 | void perf1(void *udata) { 86 | perf1co = llco_current(); 87 | llco_switch(0, 0); 88 | while (perf_count < NSWITCHES) { 89 | perf_count++; 90 | llco_switch(perf2co, 0); 91 | } 92 | llco_switch(0, 1); 93 | } 94 | 95 | void perf2(void *udata) { 96 | perf2co = llco_current(); 97 | perfstart = getnow(); 98 | while (perf_count < NSWITCHES) { 99 | perf_count++; 100 | llco_switch(perf1co, 0); 101 | } 102 | perfend = getnow(); 103 | llco_switch(perf1co, 1); 104 | } 105 | 106 | void test_perf() { 107 | struct llco_desc desc1 = { 108 | .stack = malloc(STKSZ), 109 | .stack_size = STKSZ, 110 | .entry = perf1, 111 | .cleanup = perfcleanup, 112 | .udata = (void*)1, 113 | }; 114 | llco_start(&desc1, false); 115 | 116 | struct llco_desc desc2 = { 117 | .stack = malloc(STKSZ), 118 | .stack_size = STKSZ, 119 | .entry = perf2, 120 | .cleanup = perfcleanup, 121 | .udata = (void*)2, 122 | }; 123 | llco_start(&desc2, false); 124 | 125 | printf("perf: %ld switches in %.3f secs, %ld ns / switch\n", 126 | (long)perf_count, (double)(perfend-perfstart)/1e9, 127 | (long)((perfend-perfstart) / perf_count)); 128 | 129 | } 130 | 131 | int main(void) { 132 | printf("%s\n", llco_method(0)); 133 | assert(!llco_current()); 134 | tprintf("(mark A)\n"); 135 | struct llco_desc desc = { 136 | .stack = malloc(STKSZ), 137 | .stack_size = STKSZ, 138 | .entry = entry1, 139 | .cleanup = cleanup, 140 | .udata = (void*)1, 141 | }; 142 | llco_start(&desc, false); 143 | tprintf("(mark E)\n"); 144 | assert(!llco_current()); 145 | const char *exp = 146 | "(mark A)\n" 147 | "(entry 1)\n" 148 | "(entry 2)\n" 149 | "(cleanup 2)\n" 150 | "(entry 3)\n" 151 | "(mark B)\n" 152 | "(mark C)\n" 153 | "(cleanup 3)\n" 154 | "(mark D)\n" 155 | "(cleanup 1)\n" 156 | "(mark E)\n"; 157 | if (strcmp(tresult, exp) != 0) { 158 | printf("== expected ==\n%s", exp); 159 | printf("FAILED\n"); 160 | } else { 161 | test_perf(); 162 | printf("PASSED\n"); 163 | } 164 | return 0; 165 | } 166 | 167 | -------------------------------------------------------------------------------- /unwind.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "llco.h" 6 | 7 | #define STKSZ 32768 8 | 9 | void cleanup(void *stk, size_t stksz, void *udata) { 10 | printf("== CLEANUP ==\n"); 11 | free(stk); 12 | } 13 | 14 | 15 | bool symbol(struct llco_symbol *sym, void *udata) { 16 | printf("== UNWIND ==\n"); 17 | printf(" cfa: %p\n", sym->cfa); 18 | printf(" ip: %p\n", sym->ip); 19 | printf(" fname: %s\n", sym->fname); 20 | printf(" fbase: %p\n", sym->fbase); 21 | printf(" sname: %s\n", sym->sname); 22 | printf(" saddr: %p\n", sym->saddr); 23 | return true; 24 | } 25 | 26 | __attribute__((noinline)) void func4(int x) { 27 | printf("func4 (%d)\n", x); 28 | printf("nsymbols: %d\n", llco_unwind(symbol, 0)); 29 | // _Unwind_Backtrace(btfnc, NULL); 30 | } 31 | 32 | __attribute__((noinline)) void func3(int x) { printf("func3\n"); func4(x); } 33 | __attribute__((noinline)) void func2(int x) { printf("func2\n"); func3(x); } 34 | __attribute__((noinline)) void func1(int x) { printf("func1\n"); func2(x); } 35 | 36 | 37 | // __attribute__((noinline)) 38 | void entry(void *udata) { 39 | 40 | printf("== COROUTINE ==\n"); 41 | func1(10); 42 | printf("== SWITCH TO MAIN ==\n"); 43 | llco_switch(0, true); 44 | } 45 | 46 | int main(void) { 47 | printf("%s\n", llco_method(0)); 48 | printf("== MAIN ==\n"); 49 | struct llco_desc desc = { 50 | .stack = malloc(STKSZ), 51 | .stack_size = STKSZ, 52 | .entry = entry, 53 | .cleanup = cleanup, 54 | .udata = (void*)1, 55 | }; 56 | llco_start(&desc, false); 57 | printf("== EXIT ==\n"); 58 | printf("nsymbols: %d\n", llco_unwind(symbol, 0)); 59 | return 0; 60 | } 61 | 62 | --------------------------------------------------------------------------------