├── .gitattributes ├── include ├── arm │ └── bunki_ctx.h ├── aarch64 │ └── bunki_ctx.h ├── x86_64 │ └── bunki_ctx.h ├── bunki.h └── bunki_common.h ├── .gitignore ├── LICENSE ├── example.c ├── src ├── bunki_common.c ├── arm │ ├── bunki_ctx.S │ └── bunki_native.c ├── x86_64 │ ├── bunki_native.c │ └── bunki_ctx.S ├── bunki_setup.c └── aarch64 │ ├── bunki_ctx.S │ └── bunki_native.c ├── makefile └── readme.md /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /include/arm/bunki_ctx.h: -------------------------------------------------------------------------------- 1 | #ifndef BUNKI_ARM_CTX_FILE_H 2 | #define BUNKI_ARM_CTX_FILE_H 3 | 4 | #if !defined(__arm__) 5 | #error "This is not an ARM architecture" 6 | #endif 7 | 8 | #include 9 | 10 | struct stack_ctx_s { 11 | uint32_t r4; 12 | uint32_t r5; 13 | uint32_t r6; 14 | uint32_t r7; 15 | uint32_t r8; 16 | uint32_t r9; 17 | uint32_t r10; 18 | uint32_t fp; 19 | uint32_t lr; 20 | }; 21 | 22 | #define ARCH_STK_ALIGN 0x08U 23 | 24 | unsigned bunki_patch_call_yield(uint32_t stack_size); 25 | bunki_t bunki_native_finalize_ctx(bunki_t ctx, uintptr_t (*func)(void*), void* arg, uintptr_t stack_end); 26 | uintptr_t bunki_ctx_stack_start(void); 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | debug/ 4 | target/ 5 | 6 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 7 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 8 | Cargo.lock 9 | 10 | # These are backup files generated by rustfmt 11 | **/*.rs.bk 12 | 13 | # MSVC Windows builds of rustc generate these, which store debugging information 14 | *.pdb 15 | 16 | # Prerequisites 17 | *.d 18 | 19 | # Object files 20 | *.o 21 | *.ko 22 | *.obj 23 | *.elf 24 | 25 | # Linker output 26 | *.ilk 27 | *.map 28 | *.exp 29 | 30 | # Precompiled Headers 31 | *.gch 32 | *.pch 33 | 34 | # Libraries 35 | *.lib 36 | *.a 37 | *.la 38 | *.lo 39 | 40 | # Shared objects (inc. Windows DLLs) 41 | *.dll 42 | *.so 43 | *.so.* 44 | *.dylib 45 | 46 | # Executables 47 | *.exe 48 | *.out 49 | *.app 50 | *.i*86 51 | *.x86_64 52 | *.hex 53 | tinyhost 54 | 55 | # Debug files 56 | *.dSYM/ 57 | *.su 58 | *.idb 59 | *.pdb 60 | # Directories 61 | /libs 62 | /build 63 | -------------------------------------------------------------------------------- /include/aarch64/bunki_ctx.h: -------------------------------------------------------------------------------- 1 | #ifndef BUNKI_AARCH64_CTX_FILE_H 2 | #define BUNKI_AARCH64_CTX_FILE_H 3 | 4 | #if !defined(__aarch64__) 5 | #error "This is not an ARM64 architecture" 6 | #endif 7 | 8 | #include 9 | 10 | struct stack_ctx_s { 11 | uint64_t lr; 12 | uint64_t x29; 13 | uint64_t x28; 14 | uint64_t x27; 15 | uint64_t x26; 16 | uint64_t x25; 17 | uint64_t x24; 18 | uint64_t x23; 19 | uint64_t x22; 20 | uint64_t x21; 21 | uint64_t x20; 22 | uint64_t x19; 23 | #if !defined(BUNKI_AARCH64_NO_VEC_FLOAT) 24 | uint64_t d15; 25 | uint64_t d14; 26 | uint64_t d13; 27 | uint64_t d12; 28 | uint64_t d11; 29 | uint64_t d10; 30 | uint64_t d9; 31 | uint64_t d8; 32 | #endif 33 | }; 34 | 35 | #define ARCH_STK_ALIGN 0x10U 36 | 37 | unsigned bunki_patch_call_yield(uint32_t stack_size); 38 | bunki_t bunki_native_finalize_ctx(bunki_t ctx, uintptr_t (*func)(void*), void* arg, uintptr_t stack_end); 39 | uintptr_t bunki_ctx_stack_start(void); 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Keith-Cancel 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /include/x86_64/bunki_ctx.h: -------------------------------------------------------------------------------- 1 | #ifndef BUNKI_x64_86_CTX_FILE_H 2 | #define BUNKI_x64_86_CTX_FILE_H 3 | 4 | #if !defined(__x86_64__) && !defined(_M_X64) 5 | #error "This is not an x86_64 architecture" 6 | #endif 7 | 8 | #include 9 | 10 | struct stack_ctx_s { 11 | #if !defined(BUNKI_SHARE_FCW_MXCSR) 12 | struct fcw_mxcsr_s { 13 | uint32_t mxcsr; 14 | uint16_t fcw; 15 | uint16_t pad; 16 | } both_csrs; 17 | #endif 18 | #ifdef _WIN64 19 | uint64_t fiber_data; 20 | #if !defined(BUNKI_NO_WIN64_XMM) 21 | uint8_t xmm8_15[128]; 22 | #endif 23 | uint64_t stk_base; 24 | uint64_t stk_limit; 25 | uint64_t stk_dealloc; 26 | uint64_t rsi; 27 | uint64_t rdi; 28 | #endif 29 | // System V and Windows x64 30 | uint64_t r15; 31 | uint64_t r14; 32 | uint64_t r13; 33 | uint64_t r12; 34 | uint64_t rbp; 35 | uint64_t rbx; 36 | uint64_t rip; // The rip or return ptr 37 | // Windows shadow/home space 38 | #ifdef _WIN64 39 | uint8_t home[32]; // Also store xmm6 and xmm7 here 40 | #endif 41 | }; 42 | 43 | #define ARCH_STK_ALIGN 0x10U 44 | 45 | unsigned bunki_patch_call_yield(uint32_t stack_size); 46 | bunki_t bunki_native_finalize_ctx(bunki_t ctx, uintptr_t (*func)(void*), void* arg, uintptr_t stack_end); 47 | uintptr_t bunki_ctx_stack_start(void); 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /example.c: -------------------------------------------------------------------------------- 1 | #include "bunki.h" 2 | #include 3 | #include 4 | 5 | uintptr_t print_unsigned(void* arg) { 6 | uintptr_t val = (uintptr_t)arg; 7 | printf("The value is: 0x%lx\n", val); 8 | return 0; 9 | } 10 | 11 | uintptr_t my_coroutine(void* arg) { 12 | // Save a value to the thread context storage 13 | bunki_ctx_data_set((void*)0xcafe); 14 | // Call a function on the current threads stack 15 | bunki_ctx_call((void*)0xf00d, print_unsigned); 16 | bunki_yield(10); 17 | 18 | // print out the value we save before last yield 19 | bunki_ctx_call((void*)bunki_ctx_data_get(), print_unsigned); 20 | // print the arg 21 | bunki_ctx_call(arg, print_unsigned); 22 | bunki_yield(12); 23 | } 24 | 25 | int main() { 26 | // only call once before using the library 27 | // The library will now expect stack memory to 28 | // aligned by 256 bytes and be 256 bytes in size. 29 | if(bunki_init(256)) { 30 | fprintf(stderr, "Failed to initialize!\n"); 31 | return 1; 32 | } 33 | void* stack_mem = aligned_alloc(256, 256); 34 | if(stack_mem == NULL) { 35 | fprintf(stderr, "Failed to allocate memory!\n"); 36 | return 1; 37 | } 38 | bunki_t ctx = bunki_init_prepare_ctx(stack_mem, my_coroutine, (void*)0xbeef); 39 | printf("Returned: %u\n", bunki_resume(ctx)); 40 | printf("Returned: %u\n", bunki_resume(ctx)); 41 | free(stack_mem); 42 | return 0; 43 | } 44 | -------------------------------------------------------------------------------- /include/bunki.h: -------------------------------------------------------------------------------- 1 | #ifndef BUNKI_FILE_H 2 | #define BUNKI_FILE_H 3 | 4 | #include 5 | #include 6 | 7 | typedef struct stack_ctx_s* bunki_t; 8 | 9 | unsigned bunki_init(uint32_t stack_size); 10 | uint32_t bunki_stack_min_size(void); 11 | 12 | uint32_t bunki_stack_alignment(void); 13 | void* bunki_stack_ptr(bunki_t ctx); 14 | void* bunki_stack_push(bunki_t* ctx, size_t allocation_length); 15 | void* bunki_stack_push_data(bunki_t* ctx, size_t data_length, void* data); 16 | 17 | void* bunki_data_get(bunki_t ctx); 18 | void bunki_data_set(bunki_t ctx, void* data); 19 | 20 | bunki_t bunki_init_stack_ctx(void* stack_mem); 21 | void bunki_prepare_ctx(bunki_t ctx, uintptr_t (*func)(void*), void* arg); 22 | bunki_t bunki_init_prepare_ctx(void* stack_mem, uintptr_t (*func)(void*), void* arg); 23 | 24 | uintptr_t bunki_resume(bunki_t ctx); 25 | void bunki_yield(uintptr_t ret); 26 | 27 | // Functions that can only be called with a coroutine ctx 28 | uintptr_t bunki_ctx_resume(bunki_t ctx); 29 | // Context local Storage functions 30 | void* bunki_ctx_data_get(void); 31 | void bunki_ctx_data_set(void* data); 32 | // Call a function on the thread stack 33 | uintptr_t bunki_ctx_call(void* arg, uintptr_t (*func)(void*)); 34 | uintptr_t bunki_ctx_call_arg2(void* arg0, void* arg1, uintptr_t (*func)(void*, void*)); 35 | uintptr_t bunki_ctx_call_arg3(void* arg0, void* arg1, void* arg2, uintptr_t (*func)(void*, void*, void*)); 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /include/bunki_common.h: -------------------------------------------------------------------------------- 1 | #ifndef BUNKI_COMMON_FILE_H 2 | #define BUNKI_COMMON_FILE_H 3 | 4 | #include 5 | #include 6 | 7 | #if defined(__GNUC__) 8 | #if UINT_MAX >= UINT32_MAX 9 | #define BUNKI_NATIVE_POPCNT(x) __builtin_popcount(x) 10 | #elif ULONG_MAX >= UINT32_MAX 11 | #define BUNKI_NATIVE_POPCNT(x) __builtin_popcountl(x) 12 | #endif 13 | #elif defined(_MSC_VER) 14 | #define BUNKI_NATIVE_POPCNT(x) __popcnt(x) 15 | #endif 16 | 17 | static inline uint32_t bunki_pop_cnt(uint32_t number) { 18 | #if defined(BUNKI_NATIVE_POPCNT) 19 | return BUNKI_NATIVE_POPCNT(number); 20 | #else 21 | uint32_t tmp = number & 0x55555555; 22 | tmp += (number >> 1) & 0x55555555; 23 | tmp = (tmp & 0x33333333) + ((tmp >> 2) & 0x33333333); 24 | // No need to mask here we have a zero to carry into 25 | tmp += (tmp + (tmp >> 4)); 26 | // Clean up any junk in the upper nibbles of each byte 27 | tmp &= 0x0f0f0f0f; 28 | tmp += (tmp >> 8); 29 | // Still have zeros to carry into 30 | tmp += (tmp >> 16); 31 | // Cleanup the junk 32 | return tmp & 0x3f; 33 | #endif 34 | } 35 | 36 | static inline unsigned bunki_is_power2(uint32_t number) { 37 | #if defined(BUNKI_NATIVE_POPCNT) 38 | return BUNKI_NATIVE_POPCNT(number) == 1; 39 | #else 40 | if(number == 0) { 41 | return 0; 42 | } 43 | return (number & (number - 1)) == 0; 44 | #endif 45 | } 46 | 47 | #if defined(_MSC_VER) && !defined(__clang__) 48 | #define BUNKI_BSWAP32(val) _byteswap_ulong(val) 49 | #elif defined(__GNUC__) 50 | #define BUNKI_BSWAP32(val) __builtin_bswap32(val) 51 | #else 52 | #define BUNKI_BSWAP32(val) ((val >> 24) | ((val >> 8) & 0xff00) | ((val << 8) & 0xff0000) | (val << 24)) 53 | #endif 54 | 55 | #define BUNKI_ALIGN_MASK(val) (~((size_t)val - 1)) 56 | 57 | // Function prototypes 58 | unsigned bunki_patch_obj_mprotect_exec(void* ptr, uint8_t obj_size, unsigned write); 59 | 60 | #endif 61 | -------------------------------------------------------------------------------- /src/bunki_common.c: -------------------------------------------------------------------------------- 1 | #include "bunki_common.h" 2 | 3 | #if defined(_WIN32) 4 | #include 5 | static unsigned get_page_size(void) { 6 | #if defined(__x86_64__) 7 | return 4096; 8 | #else 9 | SYSTEM_INFO si; 10 | GetSystemInfo(&si); 11 | return si.dwPageSize; 12 | #endif 13 | } 14 | 15 | unsigned bunki_patch_obj_mprotect_exec(void* ptr, uint8_t obj_size, unsigned write) { 16 | uintptr_t pg_sz = get_page_size(); 17 | uintptr_t shift = 0; 18 | uintptr_t ptr_beg = (uintptr_t)ptr; 19 | uintptr_t ptr_end = ptr_beg + obj_size - 1; 20 | ptr_beg &= -pg_sz; 21 | ptr_end &= -pg_sz; 22 | // straddles page boundary 23 | if(ptr_beg != ptr_end) { 24 | shift += 1; 25 | } 26 | int prot = write ? PAGE_EXECUTE_READWRITE : PAGE_EXECUTE_READ; 27 | DWORD old; 28 | return VirtualProtect((void*)ptr_beg, pg_sz << shift, prot, &old) == 0; 29 | } 30 | 31 | #elif defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) 32 | #include 33 | #include 34 | 35 | static unsigned get_page_size(void) { 36 | #if defined(__x86_64__) 37 | return 4096; 38 | #else 39 | long size = sysconf(_SC_PAGESIZE); 40 | return size; 41 | #endif 42 | } 43 | // Currently as written the object must only straddle two pages at most. 44 | unsigned bunki_patch_obj_mprotect_exec(void* ptr, uint8_t obj_size, unsigned write) { 45 | uintptr_t pg_sz = get_page_size(); 46 | uintptr_t shift = 0; 47 | uintptr_t ptr_beg = (uintptr_t)ptr; 48 | uintptr_t ptr_end = ptr_beg + obj_size - 1; 49 | ptr_beg &= -pg_sz; 50 | ptr_end &= -pg_sz; 51 | // straddles page boundary 52 | if(ptr_beg != ptr_end) { 53 | shift += 1; 54 | } 55 | int prot = PROT_READ | PROT_EXEC; 56 | prot |= write ? PROT_WRITE : 0; 57 | return mprotect((void*)ptr_beg, pg_sz << shift, prot) != 0; 58 | } 59 | #endif 60 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | define NEW_LINE 2 | 3 | 4 | endef 5 | CC := gcc 6 | CFLAGS := -O2 -falign-functions -march=native 7 | LDFLAGS := 8 | LDLIBS := 9 | LIBS := 10 | INCLUDES := -I./include 11 | C_WARN := -pedantic-errors -Wall -Wextra -Wparentheses -Wdouble-promotion \ 12 | -Warith-conversion -Wduplicated-branches -Wduplicated-cond -Wshadow \ 13 | -Wunsafe-loop-optimizations -Wbad-function-cast -Wunsuffixed-float-constants \ 14 | -fanalyzer -Wanalyzer-too-complex 15 | 16 | SRC_DIR := src 17 | OBJ_DIR := obj 18 | 19 | SRC_C := $(wildcard $(SRC_DIR)/*.c) 20 | SRC_ASM := 21 | SRC := $(SRC_C) $(SRC_ASM) 22 | 23 | ifneq (,$(findstring x86_64, $(shell $(CC) -dumpmachine))) 24 | INCLUDES += -I./include/x86_64 25 | SRC_C += $(wildcard $(SRC_DIR)/x86_64/*.c) 26 | SRC_ASM += $(wildcard $(SRC_DIR)/x86_64/*.S) 27 | endif 28 | 29 | ifneq (,$(findstring aarch64, $(shell $(CC) -dumpmachine))) 30 | INCLUDES += -I./include/aarch64 31 | SRC_C += $(wildcard $(SRC_DIR)/aarch64/*.c) 32 | SRC_ASM += $(wildcard $(SRC_DIR)/aarch64/*.S) 33 | endif 34 | 35 | ifneq (,$(findstring arm, $(shell $(CC) -dumpmachine))) 36 | INCLUDES += -I./include/arm 37 | SRC_C += $(wildcard $(SRC_DIR)/arm/*.c) 38 | SRC_ASM += $(wildcard $(SRC_DIR)/arm/*.S) 39 | endif 40 | 41 | $(info INCLUDES = $(INCLUDES)) 42 | $(info SRC_C = $(SRC_C)) 43 | $(info SRC_ASM = $(SRC_ASM)) 44 | 45 | OBJ_C := $(SRC_C:$(SRC_DIR)/%.c=$(OBJ_DIR)/%.o) 46 | OBJ_ASM := $(SRC_ASM:$(SRC_DIR)/%.S=$(OBJ_DIR)/%.o) 47 | OBJ := $(OBJ_C) $(OBJ_ASM) 48 | OBJ_DIRS := $(sort $(dir $(OBJ))) 49 | 50 | $(info OBJ = $(OBJ)) 51 | $(info OBJ_DIRS = $(OBJ_DIRS)$(NEW_LINE)) 52 | $(info Beginning Build) 53 | 54 | .PHONY: all clean 55 | 56 | all: ./build $(OBJ_DIRS) libbunki.a 57 | 58 | libbunki.a: $(OBJ) 59 | @echo "Building Library: $@" 60 | ar -r ./$@ $^ 61 | cp ./$@ ./build/lib/$@ 62 | cp ./include/bunki.h ./build/include/bunki.h 63 | @echo "" 64 | 65 | # include all the new rules that depend on the header 66 | -include $(OBJ_C:.o=.d) 67 | 68 | $(OBJ_C): $(SRC_C) 69 | @echo "Compiling: $(@:$(OBJ_DIR)/%.o=$(SRC_DIR)/%.c)" 70 | $(CC) -MMD $(C_WARN) $(CFLAGS) $(INCLUDES) -c $(@:$(OBJ_DIR)/%.o=$(SRC_DIR)/%.c) -o $@ 71 | @echo "" 72 | 73 | $(OBJ_ASM): $(SRC_ASM) 74 | @echo "Compiling: $(@:$(OBJ_DIR)/%.o=$(SRC_DIR)/%.S)" 75 | $(CC) $(CFLAGS) -c $(@:$(OBJ_DIR)/%.o=$(SRC_DIR)/%.S) -o $@ 76 | @echo "" 77 | 78 | clean: 79 | rm libbunki.a 80 | rm -rv $(OBJ_DIR) 81 | rm -rv ./build 82 | 83 | ./build: 84 | mkdir -p $@ 85 | mkdir -p $@/include 86 | mkdir -p $@/lib 87 | @echo "" 88 | 89 | $(OBJ_DIRS): 90 | mkdir -p $@ 91 | @echo "" 92 | -------------------------------------------------------------------------------- /src/arm/bunki_ctx.S: -------------------------------------------------------------------------------- 1 | #if !defined(BUNKI_STACK_CONST) 2 | #define BUNKI_STACK_CONST 0x2 3 | #endif 4 | 5 | .macro save_ctx save_ptr, save_off 6 | push { r4-r11, lr } 7 | str sp, [\save_ptr, #-\save_off] 8 | .endm 9 | 10 | .macro load_ctx load_ptr, load_off 11 | ldr sp, [\load_ptr, #-\load_off] 12 | pop { r4-r11, lr } 13 | .endm 14 | 15 | .text 16 | 17 | .global bunki_resume 18 | .global __bunki_patch0_r0_r0__ 19 | .align 4 20 | bunki_resume: 21 | __bunki_patch0_r0_r0__: 22 | orr r0, r0, #(BUNKI_STACK_CONST - 1) 23 | save_ctx r0, 0x7 24 | str sp, [r0, #-0xb] 25 | load_ctx r0, 0x3 26 | mov pc, lr 27 | 28 | .global bunki_ctx_resume 29 | .global __bunki_patch1_r3_sp__ 30 | .global __bunki_patch2_r0_r0__ 31 | .align 4 32 | bunki_ctx_resume: 33 | __bunki_patch1_r3_sp__: 34 | orr r3, sp, #(BUNKI_STACK_CONST - 1) 35 | __bunki_patch2_r0_r0__: 36 | orr r0, r0, #(BUNKI_STACK_CONST - 1) 37 | ldr r3, [r3, #-0xb] 38 | str r3, [r0, #-0xb] 39 | save_ctx r0, 0x7 40 | load_ctx r0, 0x3 41 | mov pc, lr 42 | 43 | .global bunki_yield 44 | .global __bunki_patch3_r3_sp__ 45 | .align 4 46 | bunki_yield: 47 | __bunki_patch3_r3_sp__: 48 | orr r3, sp, #(BUNKI_STACK_CONST - 1) 49 | save_ctx r3, 0x3 50 | load_ctx r3, 0x7 51 | mov pc, lr 52 | 53 | .global bunki_ctx_stack_start 54 | .global __bunki_patch4_r0_sp__ 55 | .align 4 56 | bunki_ctx_stack_start: 57 | __bunki_patch4_r0_sp__: 58 | orr r0, sp, #(BUNKI_STACK_CONST - 1) 59 | add r0, r0, #1 60 | mov pc, lr 61 | 62 | .global bunki_init_ctx 63 | .global __bunki_patch5_r11_sp__ 64 | .align 4 65 | bunki_init_ctx: 66 | mov r0, r5 67 | blx r4 68 | __bunki_patch5_r11_sp__: 69 | orr r11, sp, #(BUNKI_STACK_CONST - 1) 70 | ldr r5, =bunki_set_ret 71 | sub r4, sp, #0x24 72 | str r4, [r11, #-0x3] 73 | stm r4, { r5, r11 } 74 | bunki_done: 75 | load_ctx r11, 0x7 76 | mov pc, lr 77 | bunki_set_ret: 78 | eor r0, r0 79 | b bunki_done 80 | 81 | .global bunki_ctx_call 82 | .align 6 83 | bunki_ctx_call: 84 | mov r3, r1 85 | b bunki_ctx_call_arg3 86 | 87 | .global bunki_ctx_call_arg2 88 | .align 4 89 | bunki_ctx_call_arg2: 90 | mov r3, r2 91 | b bunki_ctx_call_arg3 92 | 93 | .global bunki_ctx_call_arg3 94 | .global __bunki_patch6_r4_sp__ 95 | .align 4 96 | bunki_ctx_call_arg3: 97 | push { r4, r5 } 98 | __bunki_patch6_r4_sp__: 99 | orr r4, sp, #(BUNKI_STACK_CONST - 1) 100 | ldr r4, [r4, #-0xb] 101 | and r4, r4, #-8 102 | mov r5, sp 103 | mov sp, r4 104 | push { r5, lr } 105 | blx r3 106 | pop { r5, lr } 107 | mov sp, r5 108 | pop { r4, r5 } 109 | mov pc, lr 110 | -------------------------------------------------------------------------------- /src/x86_64/bunki_native.c: -------------------------------------------------------------------------------- 1 | #include "bunki.h" 2 | #include "bunki_ctx.h" 3 | #include "bunki_common.h" 4 | #include 5 | 6 | void bunki_init_ctx(void); 7 | extern uint32_t __bunki_patch0__; 8 | extern uint32_t __bunki_patch1__; 9 | extern uint32_t __bunki_patch2__; 10 | extern uint32_t __bunki_patch3__; 11 | extern uint32_t __bunki_patch4__; 12 | 13 | bunki_t bunki_native_finalize_ctx(bunki_t ctx, uintptr_t (*func)(void*), void* arg, uintptr_t stack_end) { 14 | 15 | uintptr_t stk = (uintptr_t)ctx; 16 | // If stack alignment does not match use a little space to fix that. 17 | stk &= BUNKI_ALIGN_MASK(ARCH_STK_ALIGN); 18 | struct stack_ctx_s* new_ctx = (struct stack_ctx_s*)(stk - sizeof(struct stack_ctx_s)); 19 | // init_ctx moves arg where needed for calling argument then jumps to func. 20 | new_ctx->rip = (uintptr_t)bunki_init_ctx; 21 | new_ctx->rbx = (uintptr_t)func; 22 | new_ctx->r15 = (uintptr_t)arg; 23 | #if !defined(BUNKI_SHARE_FCW_MXCSR) 24 | #if defined(_WIN32) 25 | // https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention?view=vs-2019#fpcsr 26 | new_ctx->both_csrs.fcw = 0x17F; 27 | #else 28 | // https://github.com/torvalds/linux/blob/63355b9884b3d1677de6bd1517cd2b8a9bf53978/arch/x86/kernel/fpu/core.c#L483 29 | new_ctx->both_csrs.fcw = 0x37f; 30 | #endif 31 | 32 | // https://www.amd.com/system/files/TechDocs/24592.pdf page 113 use default reset value 33 | // Bits 7-12 should be set 34 | // MS uses same reset value 35 | // https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention?view=vs-2019#mxcsr 36 | new_ctx->both_csrs.mxcsr = 0x1F80; 37 | #endif 38 | #ifdef _WIN64 39 | new_ctx->stk_base = stk; 40 | new_ctx->stk_limit = stack_end; 41 | new_ctx->stk_dealloc = stack_end; 42 | #else 43 | (void)stack_end; // Suppress unused warning 44 | #endif 45 | return new_ctx; 46 | } 47 | 48 | #if !defined(BUNKI_STACK_CONST) 49 | unsigned bunki_patch_call_yield(uint32_t stack_size) { 50 | unsigned ret = 0; 51 | // Probably could do with less calls since I know some these patch points 52 | // are within a single page, but if I refactor any of the assembly at least 53 | // I do not need to update this. 54 | ret |= bunki_patch_obj_mprotect_exec(&__bunki_patch0__, sizeof(uint32_t), 1); 55 | ret |= bunki_patch_obj_mprotect_exec(&__bunki_patch1__, sizeof(uint32_t), 1); 56 | ret |= bunki_patch_obj_mprotect_exec(&__bunki_patch2__, sizeof(uint32_t), 1); 57 | ret |= bunki_patch_obj_mprotect_exec(&__bunki_patch3__, sizeof(uint32_t), 1); 58 | ret |= bunki_patch_obj_mprotect_exec(&__bunki_patch4__, sizeof(uint32_t), 1); 59 | if(ret) { 60 | goto done; 61 | } 62 | stack_size -= 1; 63 | memcpy(&__bunki_patch0__, &stack_size, sizeof(uint32_t)); 64 | memcpy(&__bunki_patch1__, &stack_size, sizeof(uint32_t)); 65 | memcpy(&__bunki_patch2__, &stack_size, sizeof(uint32_t)); 66 | memcpy(&__bunki_patch3__, &stack_size, sizeof(uint32_t)); 67 | memcpy(&__bunki_patch4__, &stack_size, sizeof(uint32_t)); 68 | done: 69 | ret |= bunki_patch_obj_mprotect_exec(&__bunki_patch0__, sizeof(uint32_t), 0); 70 | ret |= bunki_patch_obj_mprotect_exec(&__bunki_patch1__, sizeof(uint32_t), 0); 71 | ret |= bunki_patch_obj_mprotect_exec(&__bunki_patch2__, sizeof(uint32_t), 0); 72 | ret |= bunki_patch_obj_mprotect_exec(&__bunki_patch3__, sizeof(uint32_t), 0); 73 | ret |= bunki_patch_obj_mprotect_exec(&__bunki_patch4__, sizeof(uint32_t), 0); 74 | return ret; 75 | } 76 | #endif 77 | -------------------------------------------------------------------------------- /src/bunki_setup.c: -------------------------------------------------------------------------------- 1 | #include "bunki.h" 2 | #include "bunki_ctx.h" 3 | #include "bunki_common.h" 4 | 5 | #include 6 | #include 7 | 8 | #define MULT_OF_ALIGN(x, align) (((x) + ((align) - 1)) & (-(align))) 9 | 10 | #if defined(BUNKI_STACK_CONST) 11 | static const uint32_t global_stack_size = (BUNKI_STACK_CONST); 12 | #else 13 | static uint32_t global_stack_size = 0; 14 | #endif 15 | 16 | static uintptr_t get_stack_start(bunki_t ctx) { 17 | uintptr_t ret = (uintptr_t)ctx; 18 | ret |= (uintptr_t)(global_stack_size - 1); 19 | ret += 1; 20 | return ret; 21 | } 22 | 23 | static uintptr_t get_stack_base(bunki_t ctx) { 24 | uintptr_t ret = (uintptr_t)ctx; 25 | ret &= -((uintptr_t)global_stack_size); 26 | return ret; 27 | } 28 | 29 | uint32_t bunki_stack_min_size(void) { 30 | uint32_t size = 5 * sizeof(void*) + sizeof(struct stack_ctx_s); 31 | // round up to nearest power of 2 32 | size--; 33 | size |= size >> 1; 34 | size |= size >> 2; 35 | size |= size >> 4; 36 | size |= size >> 8; 37 | size |= size >> 16; 38 | size++; 39 | return size; 40 | } 41 | 42 | unsigned bunki_init(uint32_t stack_size) { 43 | #if defined(BUNKI_STACK_CONST) 44 | (void) stack_size; 45 | return 0; 46 | #else 47 | if(!bunki_is_power2(stack_size) || stack_size < bunki_stack_min_size()) { 48 | return 1; 49 | } 50 | global_stack_size = stack_size; 51 | return (bunki_patch_call_yield(stack_size) << 1); 52 | #endif 53 | } 54 | 55 | uint32_t bunki_stack_alignment(void) { 56 | return global_stack_size; 57 | } 58 | 59 | void* bunki_stack_ptr(bunki_t ctx) { 60 | uintptr_t ptr = get_stack_base(ctx); 61 | return (void*)ptr; 62 | } 63 | 64 | void* bunki_stack_push(bunki_t* ctx, size_t allocation_length) { 65 | uintptr_t stk = (uintptr_t)(*ctx); 66 | stk -= allocation_length; 67 | stk &= BUNKI_ALIGN_MASK(ARCH_STK_ALIGN); 68 | *ctx = (bunki_t)stk; 69 | return (void*)stk; 70 | } 71 | 72 | // Push data onto a stack before finalizing a ctx with this. Handy in 73 | // conjuction with the arg parameter for a make ctx function. 74 | void* bunki_stack_push_data(bunki_t* ctx, size_t data_length, void* data) { 75 | void* stk = bunki_stack_push(ctx, data_length); 76 | memcpy(stk, data, data_length); 77 | return stk; 78 | } 79 | 80 | void* bunki_data_get(bunki_t ctx) { 81 | uintptr_t ptr = get_stack_start(ctx) - 4 * sizeof(void*); 82 | void* ret; 83 | memcpy(&ret, (void*)ptr, sizeof(void*)); 84 | return ret; 85 | } 86 | 87 | void bunki_data_set(bunki_t ctx, void* data) { 88 | uintptr_t ptr = get_stack_start(ctx) - 4 * sizeof(void*); 89 | memcpy((void*)ptr, &data, sizeof(void*)); 90 | } 91 | 92 | bunki_t bunki_init_stack_ctx(void* stack_mem) { 93 | uintptr_t stk = (uintptr_t)stack_mem; 94 | stk += global_stack_size; 95 | bunki_t ctx = (bunki_t)stk; 96 | bunki_stack_push(&ctx, 4 * sizeof(void*)); 97 | //void** ptrs = bunki_stack_push(&ctx, 4 * sizeof(void*)); 98 | // ptrs[0] user data 99 | // ptrs[1] the thread stack pointer 100 | // ptrs[2] caller ctx 101 | // ptrs[3] callee ctx 102 | return ctx; 103 | } 104 | 105 | void bunki_prepare_ctx(bunki_t ctx, uintptr_t (*func)(void*), void* arg) { 106 | uintptr_t ptr = get_stack_base(ctx); 107 | bunki_t ret = bunki_native_finalize_ctx(ctx, func, arg, ptr); 108 | ptr = get_stack_start(ret) - sizeof(void*); 109 | memcpy((void*)ptr, &ret, sizeof(void*)); 110 | } 111 | 112 | bunki_t bunki_init_prepare_ctx(void* stack, uintptr_t (*func)(void*), void* arg) { 113 | bunki_t ctx = bunki_init_stack_ctx(stack); 114 | bunki_prepare_ctx(ctx, func, arg); 115 | return ctx; 116 | } 117 | -------------------------------------------------------------------------------- /src/aarch64/bunki_ctx.S: -------------------------------------------------------------------------------- 1 | #if !defined(BUNKI_STACK_CONST) 2 | #define BUNKI_STACK_CONST 0x2 3 | #endif 4 | 5 | .macro save_ctx save_ptr, save_off 6 | #if !defined(BUNKI_AARCH64_NO_VEC_FLOAT) 7 | stp d9, d8, [sp, -16]! 8 | stp d11, d10, [sp, -16]! 9 | stp d13, d12, [sp, -16]! 10 | stp d15, d14, [sp, -16]! 11 | #endif 12 | stp x20, x19, [sp, -16]! 13 | stp x22, x21, [sp, -16]! 14 | stp x24, x23, [sp, -16]! 15 | stp x26, x25, [sp, -16]! 16 | stp x28, x27, [sp, -16]! 17 | stp lr, x29, [sp, -16]! 18 | mov x7, sp 19 | str x7, [\save_ptr, -\save_off] 20 | .endm 21 | 22 | .macro load_ctx load_ptr, load_off 23 | ldr x7, [\load_ptr, -\load_off] 24 | mov sp, x7 25 | ldp lr, x29, [sp], 16 26 | ldp x28, x27, [sp], 16 27 | ldp x26, x25, [sp], 16 28 | ldp x24, x23, [sp], 16 29 | ldp x22, x21, [sp], 16 30 | ldp x20, x19, [sp], 16 31 | #if !defined(BUNKI_AARCH64_NO_VEC_FLOAT) 32 | ldp d15, d14, [sp], 16 33 | ldp d13, d12, [sp], 16 34 | ldp d11, d10, [sp], 16 35 | ldp d9, d8, [sp], 16 36 | #endif 37 | .endm 38 | 39 | .text 40 | .global bunki_resume 41 | .global __bunki_patch0_x0__ 42 | .align 4 43 | bunki_resume: 44 | __bunki_patch0_x0__: 45 | orr x0, x0, (BUNKI_STACK_CONST - 1) 46 | save_ctx x0, 0xf 47 | str x7, [x0, -0x17] 48 | load_ctx x0, 0x7 49 | ret 50 | 51 | .global bunki_ctx_resume 52 | .global __bunki_patch1_x6__ 53 | .global __bunki_patch2_x0__ 54 | .align 4 55 | bunki_ctx_resume: 56 | mov x6, sp 57 | __bunki_patch1_x6__: 58 | orr x6, x6, (BUNKI_STACK_CONST - 1) 59 | __bunki_patch2_x0__: 60 | orr x0, x0, (BUNKI_STACK_CONST - 1) 61 | ldr x6, [x6, -0x17] 62 | str x6, [x0, -0x17] 63 | save_ctx x0, 0xf 64 | load_ctx x0, 0x7 65 | ret 66 | 67 | 68 | .global bunki_yield 69 | .global __bunki_patch3_x6__ 70 | .align 4 71 | bunki_yield: 72 | mov x6, sp 73 | __bunki_patch3_x6__: 74 | orr x6, x6, (BUNKI_STACK_CONST - 1) 75 | save_ctx x6, 0x7 76 | load_ctx x6, 0xf 77 | ret 78 | 79 | .global bunki_ctx_stack_start 80 | .global __bunki_patch4_x0__ 81 | .align 4 82 | bunki_ctx_stack_start: 83 | mov x0, sp 84 | __bunki_patch4_x0__: 85 | orr x0, x0, (BUNKI_STACK_CONST - 1) 86 | add x0, x0, 1 87 | ret 88 | 89 | .global bunki_init_ctx 90 | .global __bunki_patch5_x29__ 91 | .align 4 92 | bunki_init_ctx: 93 | mov x0, x20 94 | blr x19 95 | mov x29, sp 96 | __bunki_patch5_x29__: 97 | orr x29, x29, (BUNKI_STACK_CONST - 1) 98 | adrp x5, bunki_set_ret 99 | add x5, x5, #:lo12:bunki_set_ret 100 | #if defined(BUNKI_AARCH64_NO_VEC_FLOAT) 101 | sub x4, x29, 0x7f 102 | #else 103 | sub x4, x29, 0xbf 104 | #endif 105 | str x4, [x29, -0x7] 106 | stp x5, x29, [x4] 107 | bunki_done: 108 | load_ctx x29, 0xf 109 | ret 110 | bunki_set_ret: 111 | mov x0, xzr 112 | b bunki_done 113 | 114 | .global bunki_ctx_call 115 | .align 6 116 | bunki_ctx_call: 117 | mov x3, x1 118 | b bunki_ctx_call_arg3 119 | 120 | .global bunki_ctx_call_arg2 121 | .align 4 122 | bunki_ctx_call_arg2: 123 | mov x3, x2 124 | b bunki_ctx_call_arg3 125 | 126 | .global bunki_ctx_call_arg3 127 | .global __bunki_patch6_x6__ 128 | .align 4 129 | bunki_ctx_call_arg3: 130 | mov x6, sp 131 | __bunki_patch6_x6__: 132 | orr x6, x6, (BUNKI_STACK_CONST - 1) 133 | ldr x6, [x6, -0x17] 134 | and x6, x6, -16 // align stack 135 | mov x5, sp // save stack pointer 136 | mov sp, x6 // switch stacks 137 | stp lr, x5, [sp, -16]! // save ret and stack pointer 138 | blr x3 139 | ldp lr, x5, [sp], 16 140 | mov sp, x5 // restore stack 141 | ret 142 | -------------------------------------------------------------------------------- /src/aarch64/bunki_native.c: -------------------------------------------------------------------------------- 1 | #include "bunki.h" 2 | #include "bunki_ctx.h" 3 | #include "bunki_common.h" 4 | 5 | #include 6 | 7 | void bunki_init_ctx(void); 8 | extern uint32_t __bunki_patch0_x0__; 9 | extern uint32_t __bunki_patch1_x6__; 10 | extern uint32_t __bunki_patch2_x0__; 11 | extern uint32_t __bunki_patch3_x6__; 12 | extern uint32_t __bunki_patch4_x0__; 13 | extern uint32_t __bunki_patch5_x29__; 14 | extern uint32_t __bunki_patch6_x6__; 15 | 16 | static uint32_t* patch_ptrs[7] = { 17 | &__bunki_patch0_x0__, 18 | &__bunki_patch1_x6__, 19 | &__bunki_patch2_x0__, 20 | &__bunki_patch3_x6__, 21 | &__bunki_patch4_x0__, 22 | &__bunki_patch5_x29__, 23 | &__bunki_patch6_x6__ 24 | }; 25 | 26 | // stack_size must be greater than 1 and a power of 2 27 | static uint32_t gen_orr(uint32_t dest, uint32_t src, uint32_t stack_size) { 28 | unsigned bits = bunki_pop_cnt(stack_size - 1) - 1; 29 | uint32_t ins = 0xb2400000; // sf = 1, OP = orr, N = 1 for 64 bit pattern 30 | ins |= bits << 10; // Set how many 1s we want to OR with src 31 | ins |= (dest & 0x1f); // Set dest register 32 | ins |= (src & 0x1f) << 5; // Set src register 33 | #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 34 | return BUNKI_BSWAP32(ins); 35 | #else 36 | return ins; 37 | #endif 38 | } 39 | 40 | bunki_t bunki_native_finalize_ctx(bunki_t ctx, uintptr_t (*func)(void*), void* arg, uintptr_t stack_end) { 41 | (void)stack_end; // Suppress unused warning 42 | uintptr_t stk = (uintptr_t)ctx; 43 | // If stack alignment does not match use a little space to fix that. 44 | stk &= BUNKI_ALIGN_MASK(ARCH_STK_ALIGN); 45 | struct stack_ctx_s* new_ctx = (struct stack_ctx_s*)(stk - sizeof(struct stack_ctx_s)); 46 | // init_ctx moves arg where needed for calling argument then jumps to func. 47 | new_ctx->lr = (uintptr_t)bunki_init_ctx; 48 | new_ctx->x19 = (uintptr_t)func; 49 | new_ctx->x20 = (uintptr_t)arg; 50 | return new_ctx; 51 | } 52 | 53 | void* bunki_ctx_data_get(void) { 54 | uintptr_t ptr = bunki_ctx_stack_start() - 0x20; 55 | void* ret = *((void**)ptr); 56 | return ret; 57 | } 58 | 59 | void bunki_ctx_data_set(void* data) { 60 | uintptr_t ptr = bunki_ctx_stack_start() - 0x20; 61 | *((void**)ptr) = data; 62 | } 63 | 64 | 65 | #if !defined(BUNKI_STACK_CONST) || 1 66 | unsigned bunki_patch_call_yield(uint32_t stack_size) { 67 | // Probably could do with less calls since I know some these patch points 68 | // are within a single page, but if I refactor any of the assembly at least 69 | // I do not need to update this. 70 | unsigned ret = 0; 71 | for(unsigned i = 0; i < 7; i++) { 72 | ret |= bunki_patch_obj_mprotect_exec(patch_ptrs[i], sizeof(uint32_t), 1); 73 | } 74 | if(ret) { 75 | goto done; 76 | } 77 | uint32_t x0 = gen_orr(0, 0, stack_size); 78 | uint32_t x6 = gen_orr(6, 6, stack_size); 79 | uint32_t x29 = gen_orr(29, 29, stack_size); 80 | memcpy(&__bunki_patch0_x0__, &x0, sizeof(uint32_t)); 81 | memcpy(&__bunki_patch1_x6__, &x6, sizeof(uint32_t)); 82 | memcpy(&__bunki_patch2_x0__, &x0, sizeof(uint32_t)); 83 | memcpy(&__bunki_patch3_x6__, &x6, sizeof(uint32_t)); 84 | memcpy(&__bunki_patch4_x0__, &x0, sizeof(uint32_t)); 85 | memcpy(&__bunki_patch5_x29__, &x29, sizeof(uint32_t)); 86 | memcpy(&__bunki_patch6_x6__, &x6, sizeof(uint32_t)); 87 | done: 88 | uintptr_t lowest = UINTPTR_MAX; 89 | uintptr_t highest = 0; 90 | for(unsigned i = 0; i < 7; i++) { 91 | uintptr_t ptr = (uintptr_t)(patch_ptrs[i]); 92 | if(ptr < lowest) { 93 | lowest = ptr; 94 | } 95 | if(ptr > highest) { 96 | highest = ptr; 97 | } 98 | ret |= bunki_patch_obj_mprotect_exec(patch_ptrs[i], sizeof(uint32_t), 0); 99 | } 100 | lowest &= -(uintptr_t)4096; 101 | highest &= -(uintptr_t)4096; 102 | highest += 4096; 103 | // important since ARM64 has an icache 104 | // mprotect does flush caches on linux for quite sometime 105 | // but that is not a given for other *nix systems 106 | __builtin___clear_cache((char*)lowest, (char*)highest); 107 | return ret; 108 | } 109 | #endif 110 | -------------------------------------------------------------------------------- /src/arm/bunki_native.c: -------------------------------------------------------------------------------- 1 | #include "bunki.h" 2 | #include "bunki_ctx.h" 3 | #include "bunki_common.h" 4 | 5 | #include 6 | 7 | void bunki_init_ctx(void); 8 | extern uint32_t __bunki_patch0_r0_r0__; 9 | extern uint32_t __bunki_patch1_r3_sp__; 10 | extern uint32_t __bunki_patch2_r0_r0__; 11 | extern uint32_t __bunki_patch3_r3_sp__; 12 | extern uint32_t __bunki_patch4_r0_sp__; 13 | extern uint32_t __bunki_patch5_r11_sp__; 14 | extern uint32_t __bunki_patch6_r4_sp__; 15 | 16 | static uint32_t* patch_ptrs[] = { 17 | &__bunki_patch0_r0_r0__, 18 | &__bunki_patch1_r3_sp__, 19 | &__bunki_patch2_r0_r0__, 20 | &__bunki_patch3_r3_sp__, 21 | &__bunki_patch4_r0_sp__, 22 | &__bunki_patch5_r11_sp__, 23 | &__bunki_patch6_r4_sp__ 24 | }; 25 | 26 | // stack_size must be greater than 1 and a power of 2 27 | static uint32_t gen_orr(uint32_t dest, uint32_t src, uint32_t stack_size) { 28 | uint32_t ins = 0xe3800000; // S = 0 29 | ins |= (stack_size - 1) & 0x0fff; 30 | ins |= (src & 0x000f) << 16; 31 | ins |= (dest & 0x000f) << 12; 32 | #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 33 | return BUNKI_BSWAP32(ins); 34 | #else 35 | return ins; 36 | #endif 37 | } 38 | 39 | // 40 | 41 | bunki_t bunki_native_finalize_ctx(bunki_t ctx, uintptr_t (*func)(void*), void* arg, uintptr_t stack_end) { 42 | (void)stack_end; // Suppress unused warning 43 | uintptr_t stk = (uintptr_t)ctx; 44 | // If stack alignment does not match use a little space to fix that. 45 | stk &= BUNKI_ALIGN_MASK(ARCH_STK_ALIGN); 46 | struct stack_ctx_s* new_ctx = (struct stack_ctx_s*)(stk - sizeof(struct stack_ctx_s)); 47 | // init_ctx moves arg where needed for calling argument then jumps to func. 48 | new_ctx->lr = (uintptr_t)bunki_init_ctx; 49 | new_ctx->r4 = (uintptr_t)func; 50 | new_ctx->r5 = (uintptr_t)arg; 51 | return new_ctx; 52 | } 53 | 54 | void* bunki_ctx_data_get(void) { 55 | uintptr_t ptr = bunki_ctx_stack_start() - 0x10; 56 | void* ret = *((void**)ptr); 57 | return ret; 58 | } 59 | 60 | void bunki_ctx_data_set(void* data) { 61 | uintptr_t ptr = bunki_ctx_stack_start() - 0x10; 62 | *((void**)ptr) = data; 63 | } 64 | 65 | 66 | #if !defined(BUNKI_STACK_CONST) || 1 67 | unsigned bunki_patch_call_yield(uint32_t stack_size) { 68 | // Probably could do with less calls since I know some these patch points 69 | // are within a single page, but if I refactor any of the assembly at least 70 | // I do not need to update this. 71 | unsigned ret = 0; 72 | for(unsigned i = 0; i < 7; i++) { 73 | ret |= bunki_patch_obj_mprotect_exec(patch_ptrs[i], sizeof(uint32_t), 1); 74 | } 75 | if(ret) { 76 | goto done; 77 | } 78 | uint32_t r0_r0 = gen_orr(0, 0, stack_size); 79 | uint32_t r3_sp = gen_orr(3, 13, stack_size); 80 | uint32_t r0_sp = gen_orr(0, 13, stack_size); 81 | uint32_t r11_sp = gen_orr(11, 13, stack_size); 82 | uint32_t r4_sp = gen_orr(4, 13, stack_size); 83 | memcpy(&__bunki_patch0_r0_r0__, &r0_r0, sizeof(uint32_t)); 84 | memcpy(&__bunki_patch1_r3_sp__, &r3_sp, sizeof(uint32_t)); 85 | memcpy(&__bunki_patch2_r0_r0__, &r0_r0, sizeof(uint32_t)); 86 | memcpy(&__bunki_patch3_r3_sp__, &r3_sp, sizeof(uint32_t)); 87 | memcpy(&__bunki_patch4_r0_sp__, &r0_sp, sizeof(uint32_t)); 88 | memcpy(&__bunki_patch5_r11_sp__, &r11_sp, sizeof(uint32_t)); 89 | memcpy(&__bunki_patch6_r4_sp__, &r4_sp, sizeof(uint32_t)); 90 | 91 | done: ; 92 | uintptr_t lowest = UINTPTR_MAX; 93 | uintptr_t highest = 0; 94 | for(unsigned i = 0; i < 7; i++) { 95 | uintptr_t ptr = (uintptr_t)(patch_ptrs[i]); 96 | if(ptr < lowest) { 97 | lowest = ptr; 98 | } 99 | if(ptr > highest) { 100 | highest = ptr; 101 | } 102 | ret |= bunki_patch_obj_mprotect_exec(patch_ptrs[i], sizeof(uint32_t), 0); 103 | } 104 | lowest &= -(uintptr_t)4096; 105 | highest &= -(uintptr_t)4096; 106 | highest += 4096; 107 | // important since ARM has an icache 108 | // mprotect does flush caches on linux for quite sometime 109 | // but that is not a given for other *nix systems 110 | __builtin___clear_cache((char*)lowest, (char*)highest); 111 | return ret; 112 | } 113 | #endif 114 | -------------------------------------------------------------------------------- /src/x86_64/bunki_ctx.S: -------------------------------------------------------------------------------- 1 | .intel_syntax noprefix 2 | 3 | #if defined(__linux__) 4 | #define SYSTEM_V_ABI 1 5 | #elif defined(__APPLE__) && defined(__MACH__) 6 | #define SYSTEM_V_ABI 1 7 | #elif defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) 8 | #define SYSTEM_V_ABI 1 9 | #endif 10 | 11 | #if defined(SYSTEM_V_ABI) 12 | #define ARG1 rdi 13 | #define ARG2 rsi 14 | #define ARG3 rdx 15 | #define ARG4 rcx 16 | #elif defined(_WIN64) 17 | #define ARG1 rcx 18 | #define ARG2 rdx 19 | #define ARG3 r8 20 | #define ARG4 r9 21 | #else 22 | #error Unknown x86_64 ABI. Sadly not ported to your system. 23 | #endif 24 | 25 | #if !defined(BUNKI_STACK_CONST) 26 | #define BUNKI_STACK_CONST 0x01 27 | #endif 28 | 29 | #if !defined(BUNKI_NO_WIN64_XMM) 30 | #define WIN_XMM_SZ 128 31 | #else 32 | #define WIN_XMM_SZ 0 33 | #endif 34 | 35 | #if defined(_WIN64) 36 | #define WIN_SZ (48 + WIN_XMM_SZ) 37 | #else 38 | #define WIN_SZ 0 39 | #endif 40 | 41 | #if !defined(BUNKI_SHARE_FCW_MXCSR) 42 | #define FCW_SAVE 8 43 | #else 44 | #define FCW_SAVE 0 45 | #endif 46 | 47 | #define CTX_SZ (56 + FCW_SAVE + WIN_SZ) 48 | 49 | .macro save_ctx save_ptr, save_off 50 | # save the current context on the stack 51 | push rbx 52 | push rbp 53 | push r12 54 | push r13 55 | push r14 56 | push r15 57 | #if defined(_WIN64) 58 | push rdi 59 | push rsi 60 | # Get TIB pointer 61 | mov rdi, gs:0x30 62 | push [rdi + 0x1478] 63 | push [rdi + 0x0010] 64 | push [rdi + 0x0008] 65 | #if !defined(BUNKI_NO_WIN64_XMM) 66 | # Store xmm6 and xmm7 in the shadow/home space 67 | movaps [rsp + 0x60], xmm6 68 | movaps [rsp + 0x70], xmm7 69 | sub rsp, 0x80 70 | movaps [rsp + 0x00], xmm15 71 | movaps [rsp + 0x10], xmm14 72 | movaps [rsp + 0x20], xmm13 73 | movaps [rsp + 0x30], xmm12 74 | movaps [rsp + 0x40], xmm11 75 | movaps [rsp + 0x50], xmm10 76 | movaps [rsp + 0x60], xmm9 77 | movaps [rsp + 0x70], xmm8 78 | #endif 79 | # save fiber data 80 | push [rdi + 0x00020] # save the thread 81 | #endif 82 | #if !defined(BUNKI_SHARE_FCW_MXCSR) 83 | sub rsp, 8 84 | stmxcsr [rsp] 85 | fnstcw [rsp + 0x04] 86 | #endif 87 | mov [\save_ptr - \save_off], rsp 88 | .endm 89 | 90 | .macro load_ctx load_ptr, load_off 91 | # load the context we are swapping too 92 | mov rsp, [\load_ptr - \load_off] 93 | #if !defined(BUNKI_SHARE_FCW_MXCSR) 94 | // fnclex maybe? 95 | ldmxcsr [rsp] 96 | fldcw [rsp + 0x04] 97 | add rsp, 8 98 | #endif 99 | #if defined(_WIN64) 100 | mov rdi, gs:0x30 101 | # restore fiber data 102 | pop [rdi + 0x0020] 103 | #if !defined(BUNKI_NO_WIN64_XMM) 104 | movaps xmm15, [rsp + 0x00] 105 | movaps xmm14, [rsp + 0x10] 106 | movaps xmm13, [rsp + 0x20] 107 | movaps xmm12, [rsp + 0x30] 108 | movaps xmm11, [rsp + 0x40] 109 | movaps xmm10, [rsp + 0x50] 110 | movaps xmm9, [rsp + 0x60] 111 | movaps xmm8, [rsp + 0x70] 112 | add rsp, 0x80 113 | # load xmm6 and xmm7 from the shadow/home space 114 | movaps xmm7, [rsp + 0x70] 115 | movaps xmm6, [rsp + 0x60] 116 | #endif 117 | # needs to be set for things like __chkstk to work properly 118 | pop [rdi + 0x0008] 119 | pop [rdi + 0x0010] 120 | # from what I gather not setting this can trigger some 121 | # stack corruption protection code on windows and other problems. 122 | pop [rdi + 0x1478] 123 | pop rsi 124 | pop rdi 125 | #endif 126 | pop r15 127 | pop r14 128 | pop r13 129 | pop r12 130 | pop rbp 131 | pop rbx 132 | .endm 133 | 134 | # bunki_resume function 135 | .text 136 | .global bunki_resume 137 | .global __bunki_patch0__ 138 | .align 16 139 | bunki_resume: 140 | #if defined(_WIN64) 141 | .byte 0x48, 0x81, 0xc9 142 | #endif 143 | #if defined(SYSTEM_V_ABI) 144 | .byte 0x48, 0x81, 0xcf 145 | #endif 146 | __bunki_patch0__: 147 | .long ((BUNKI_STACK_CONST) - 1) 148 | save_ctx ARG1, 0xf 149 | # save the thread stack 150 | mov [ARG1 - 0x17], rsp 151 | load_ctx ARG1, 0x7 152 | ret 153 | 154 | # bunki_ctx_resume function 155 | .text 156 | .global bunki_ctx_resume 157 | .global __bunki_patch1__ 158 | .align 16 159 | bunki_ctx_resume: 160 | .byte 0x48, 0xc7, 0xc0 161 | __bunki_patch1__: 162 | .long ((BUNKI_STACK_CONST) - 1) 163 | or ARG1, rax 164 | save_ctx ARG1, 0xf 165 | # get the the thread stack from 166 | # the callers ctx. 167 | or rax, rsp 168 | mov rax, [rax - 0x17] 169 | mov [ARG1 - 0x17], rax 170 | load_ctx ARG1, 0x7 171 | ret 172 | 173 | # bunki_yield function 174 | .global bunki_yield 175 | .global __bunki_patch2__ 176 | .align 16 177 | bunki_yield: 178 | mov rax, rsp 179 | .byte 0x48, 0x0d 180 | __bunki_patch2__: 181 | .long ((BUNKI_STACK_CONST) - 1) 182 | save_ctx rax, 0x7 183 | load_ctx rax, 0xf 184 | mov rax, ARG1 185 | ret 186 | 187 | # bunki_init_ctx function 188 | .global bunki_init_ctx 189 | .global __bunki_patch3__ 190 | .align 16 191 | bunki_init_ctx: 192 | mov ARG1, r15 193 | call rbx 194 | mov rbx, rsp 195 | .byte 0x48, 0x81, 0xcb 196 | __bunki_patch3__: 197 | .long ((BUNKI_STACK_CONST) - 1) 198 | lea ARG1, [rip + bunki_set_rax] 199 | push ARG1 200 | push rbx 201 | sub rsp, (CTX_SZ - 16) 202 | #if !defined(BUNKI_SHARE_FCW_MXCSR) 203 | stmxcsr [rsp] 204 | fnstcw [rsp + 0x04] 205 | #endif 206 | mov [rbx - 0x7], rsp 207 | bunki_done: 208 | load_ctx rbx, 0xf 209 | ret 210 | 211 | .align 16 212 | bunki_set_rax: 213 | xor eax, eax 214 | jmp bunki_done 215 | 216 | # bunki_ctx_stack_start 217 | .global bunki_ctx_stack_start 218 | .global __bunki_patch4__ 219 | .align 16 220 | bunki_ctx_stack_start: 221 | mov rax, rsp 222 | .byte 0x48, 0x0d 223 | __bunki_patch4__: 224 | .long ((BUNKI_STACK_CONST) - 1) 225 | inc rax 226 | ret 227 | 228 | # bunki_ctx_data_get 229 | .global bunki_ctx_data_get 230 | .align 16 231 | bunki_ctx_data_get: 232 | call bunki_ctx_stack_start 233 | mov rax, [rax - 0x20] 234 | ret 235 | 236 | # bunki_ctx_data_set 237 | .global bunki_ctx_data_set 238 | .align 16 239 | bunki_ctx_data_set: 240 | call bunki_ctx_stack_start 241 | mov [rax - 0x20], ARG1 242 | ret 243 | 244 | # bunki_ctx_call 245 | .global bunki_ctx_call 246 | .align 16 247 | bunki_ctx_call: 248 | mov ARG4, ARG2 249 | jmp bunki_ctx_call_arg3 250 | 251 | # bunki_ctx_call_arg2 252 | .global bunki_ctx_call_arg2 253 | .align 16 254 | bunki_ctx_call_arg2: 255 | mov ARG4, ARG3 256 | jmp bunki_ctx_call_arg3 257 | 258 | # bunki_ctx_call_arg3 259 | .global bunki_ctx_call_arg3 260 | .align 16 261 | bunki_ctx_call_arg3: 262 | call bunki_ctx_stack_start 263 | mov rax, [rax - 0x18] 264 | #if defined(_WIN64) 265 | mov r10, rax # save thread stack before re-alignment 266 | #endif 267 | and rax, -16 # align the thread stack pointer 268 | xchg rax, rsp 269 | #if defined(_WIN64) 270 | # save old stack pointer 271 | push rax 272 | # save the old thread stack info 273 | mov rax, gs:0x30 274 | push [rax + 0x1478] 275 | push [rax + 0x0010] 276 | push [rax + 0x0008] 277 | # load the the tread stack's TEB info 278 | mov r11, [r10 + (FCW_SAVE + WIN_XMM_SZ)] # stack base 279 | mov [rax + 0x0008], r11 280 | mov r11, [r10 + (FCW_SAVE + WIN_XMM_SZ + 8)] # stack limit 281 | mov [rax + 0x0010], r11 282 | mov r11, [r10 + (FCW_SAVE + WIN_XMM_SZ + 16)] # stack dealloc 283 | mov [rax + 0x1478], r11 284 | # Make room for shadow/home space 285 | sub rsp, 32 286 | #else 287 | # keep stack aligned 288 | sub rsp, 8 289 | # save old stack pointer 290 | push rax 291 | #endif 292 | call ARG4 # Call the function 293 | #if defined(_WIN64) 294 | add rsp, 32 295 | # restore the TEB info for this context 296 | mov ARG1, gs:0x30 297 | pop [ARG1 + 0x0008] 298 | pop [ARG1 + 0x0010] 299 | pop [ARG1 + 0x1478] 300 | #endif 301 | pop rsp 302 | ret 303 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Bunki 🛤️ 2 | A simple to use C stackful coroutine library. 3 | 4 | The name is the Japanese word bunki (分岐) which means to branch off. I consider the name quite fitting for a coroutine library just [google image search (分岐)](https://www.google.com/search?q=%E5%88%86%E5%B2%90&tbm=isch) and you will see what I mean. 5 | 6 | Currently supports the Sys-V calling convention for x86_64 and the Win64 x64_86 calling convention, aarch64, and arm. 7 | 8 | Issues and PRs are welcome 😃 9 | 10 | # Table of Contents 11 | * [Example](#example) 12 | * [Building](#building) 13 | * [Defines](#defines) 14 | * [API](#api) 15 | * [bunki_init](#bunki_init) 16 | * [bunki_stack_min_size](#bunki_stack_min_size) 17 | * [bunki_init_stack_ctx](#bunki_init_stack_ctx) 18 | * [bunki_prepare_ctx](#bunki_prepare_ctx) 19 | * [bunki_init_prepare_ctx](#bunki_init_prepare_ctx) 20 | * [bunki_resume](#bunki_resume) 21 | * [bunki_ctx_resume](#bunki_ctx_resume) 22 | * [bunki_yield](#bunki_yield) 23 | * [bunki_stack_alignment](#bunki_stack_alignment) 24 | * [bunki_stack_ptr](#bunki_stack_ptr) 25 | * [bunki_stack_push](#bunki_stack_push) 26 | * [bunki_stack_push_data](#bunki_stack_push_data) 27 | * [bunki_data_get](#bunki_data_get) 28 | * [bunki_data_set](#bunki_data_set) 29 | * [bunki_ctx_data_get](#bunki_ctx_data_get) 30 | * [bunki_ctx_data_set](#bunki_ctx_data_set) 31 | * [bunki_ctx_call](#bunki_ctx_call) 32 | * [Notes](#notes) 33 | * [TODO](#todo) 34 | * [Copyright and License](#copyright-and-license) 35 | 36 | 37 | # Example 38 | 39 | ```c 40 | #include "bunki.h" 41 | #include 42 | #include 43 | 44 | uintptr_t print_unsigned(void* arg) { 45 | uintptr_t val = (uintptr_t)arg; 46 | printf("The value is: 0x%lx\n", val); 47 | return 0; 48 | } 49 | 50 | uintptr_t my_coroutine(void* arg) { 51 | // Save a value to the thread context storage 52 | bunki_ctx_data_set((void*)0xcafe); 53 | // Call a function on the current threads stack 54 | bunki_ctx_call((void*)0xf00d, print_unsigned); 55 | bunki_yield(10); 56 | 57 | // print out the value we save before last yield 58 | bunki_ctx_call((void*)bunki_ctx_data_get(), print_unsigned); 59 | // print the arg 60 | bunki_ctx_call(arg, print_unsigned); 61 | bunki_yield(12); 62 | } 63 | 64 | int main() { 65 | // only call once before using the library 66 | // The library will now expect stack memory to 67 | // aligned by 256 bytes and be 256 bytes in size. 68 | if(bunki_init(256)) { 69 | fprintf(stderr, "Failed to initialize!\n"); 70 | return 1; 71 | } 72 | void* stack_mem = aligned_alloc(256, 256); 73 | if(stack_mem == NULL) { 74 | fprintf(stderr, "Failed to allocate memory!\n"); 75 | return 1; 76 | } 77 | bunki_t ctx = bunki_init_prepare_ctx(stack_mem, my_coroutine, (void*)0xbeef); 78 | printf("Returned: %u\n", bunki_resume(ctx)); 79 | printf("Returned: %u\n", bunki_resume(ctx)); 80 | free(stack_mem); 81 | return 0; 82 | } 83 | ``` 84 | # Building 85 | 86 | The project does have a makefile that will be build the library. Simply run `make` and a static library will be built and copied to the build directory. The build directory will contain 2 folders lib and include. The lib directory will contain a static library you can link against, and the include directory will include header to include that should be included in applications using this library. 87 | 88 | If not using the makefile compile `bunki_setup.c`, `bunki_common.c`, and the following for your architecture `bunki_native.c`, `bunki_ctx.S`. That's it just four files. 89 | 90 | ## Defines 91 | 92 | * `BUNKI_STACK_CONST` 93 | 94 | The `BUNKI_STACK_CONST` define lets you build a version of library so it can be used without calling `bunki_init()`. The main advantage is that it avoids the runtime code-patching and lets the compiler optimize with a constant for the stack sizes. Simply add to the CFLAGS in the makefile `-DBUNKI_STACK_CONST=`. The stack size must be a power of 2. 95 | 96 | * `BUNKI_SHARE_FCW_MXCSR` 97 | 98 | This define only applies to x86_64 builds. Adding `BUNKI_SHARE_FCW_MXCSR` as a define will make the state of a context switch a bit smaller. It also means that your coroutines will use whatever the current value of the MXCSR Register and FPU Control Word. 99 | 100 | * `BUNKI_NO_WIN64_XMM` 101 | 102 | This define only applies to x86_64 on windows. The define should be used with **CAUTION**. This define only applies to windows on x86_64, and defining `BUNKI_NO_WIN64_XMM` means that the non-volatile `xmm` registers will no longer be saved durning a coroutine context switch. If your familiar with windows fibers this define is similar to not including the `FIBER_FLAG_FLOAT_SWITCH`. **See**: [Microsoft Fiber Parameters](https://web.archive.org/web/20230313065255/https://learn.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-createfiberex#parameters) It is still possible to use floating arithmetic with this define, but one must ensure that no stack frames above any context switch are not storing any floating or vector state in the registers `xmm6`-`xmm15` 103 | 104 | * `BUNKI_AARCH64_NO_VEC_FLOAT` 105 | 106 | This define only applies to aarch64. The define is similar to `BUNKI_NO_WIN64_XMM`, although the amount of context data saved from this is not as large as windows. I only recommend defining this if the hardware your targeting does not have hardware support for SIMD **AND** floating point numbers. If using to make the context switch smaller/faster proceed with **CAUTION**. 107 | 108 | # API 109 | 110 | ## bunki_init 111 | 112 | ```c 113 | unsigned bunki_init(uint32_t stack_size); 114 | ``` 115 | 116 | Initializes the environment for the library to be used. The argument to this function is the size of stacks you wish to use in bytes. The size must be a power of 2 since the library uses that to be able to quickly get the information about a coroutines context. This function shall **NOT** be called if any coroutines that will be resumed were made before the call to this function. Further defining `BUNKI_STACK_CONST` make this function a no op. 117 | 118 | * The function returns 0 on success. 119 | * The function returns 1 if the stack is too small or not a power of 2. 120 | * The function returns 2 if it can not patch code with the needed values. 121 | 122 | ## bunki_stack_min_size 123 | 124 | ```c 125 | uint32_t bunki_stack_min_size(void); 126 | ``` 127 | 128 | This function returns the smallest power of 2 stack size in bytes that can be used. Any memory allocated to make a context must be greater than or equal to this amount. Further this is the smallest size [bunki_init()](#bunki_init) will accept. 129 | 130 | 131 | ## bunki_init_stack_ctx 132 | 133 | ```c 134 | bunki_t bunki_init_stack_ctx(void* stack_mem); 135 | ``` 136 | 137 | Creates a bunki_t context from the given memory. The size of stack_mem parameter **MUST** be equal to the size provided to [bunki_init()](#bunki_init) further the alignment of this memory **MUST** be aligned by that size amount. For example if the size of the stacks are 256 bytes `stack_mem` must be on a 256 byte alignment. 138 | 139 | 140 | ## bunki_prepare_ctx 141 | 142 | ```c 143 | void bunki_prepare_ctx(bunki_t ctx, uintptr_t (*func)(void*), void* arg); 144 | ``` 145 | 146 | This function readies your `bunki_t` context so it can be resumed with [bunki_resume()](#bunki_resume) or [bunki_ctx_resume()](#bunki_ctx_resume). The reason for separate init and prepare functions is so that [bunki_stack_push()](#bunki_stack_push) and [bunki_stack_push_data()](#bunki_stack_push_data) can be used to store data on the stack before using the coroutine. 147 | 148 | * The `ctx` parameter is your coroutines context. 149 | * The `func` parameter is a function pointer to the starting function of your coroutine. 150 | * THe `arg` parameter is the starting argument that will be passed to your starting function. 151 | 152 | ## bunki_init_prepare_ctx 153 | 154 | ```c 155 | bunki_t bunki_init_prepare_ctx(void* stack_mem, uintptr_t (*func)(void*), void* arg); 156 | ``` 157 | 158 | This function is equivalent to calling [bunki_init_stack_ctx()](#bunki_init_stack_ctx) and [bunki_prepare_ctx()](#bunki_prepare_ctx) in succession. 159 | 160 | ```c 161 | // ... code ... 162 | bunki_t ctx = bunki_init_prepare_ctx(mem, begin_func, NULL); 163 | // Is equivalent too: 164 | bunki_t ctx = bunki_init_stack_ctx(mem); 165 | bunki_prepare_ctx(ctx, begin_func, NULL); 166 | // ... more code ... 167 | ``` 168 | 169 | ## bunki_resume 170 | 171 | ```c 172 | uintptr_t bunki_resume(bunki_t ctx); 173 | ``` 174 | 175 | Yields from the main thread and begins/resumes the execution of the coroutine passed to resume. The returned value is the value yielded or returned from the resumed coroutine. The type uintptr_t was chosen to make returning integer codes more ergonomic, but still allow coroutines to return pointers if needed. Do **NOT** call this function inside a coroutine if nested coroutines are needed call [bunki_ctx_resume()](#bunki_ctx_resume) instead. Otherwise, your coroutines can not use the family of `bunki_ctx_call` functions safely. 176 | 177 | ## bunki_ctx_resume 178 | 179 | ```c 180 | uintptr_t bunki_ctx_resume(bunki_t ctx); 181 | ``` 182 | 183 | This function behaves just like [bunki_resume()](#bunki_resume), but **MUST** only be called inside a running coroutine. Calling it outside of a coroutine will likely crash your program. This function exists to allow for nested coroutine use. 184 | 185 | 186 | ## bunki_yield 187 | 188 | ```c 189 | void bunki_yield(uintptr_t ret); 190 | ``` 191 | Yield the execution of the coroutine and resumes the caller. The caller gets the value passed to the argument `ret` returned to them. This function **MUST** not be called outside of a coroutine. 192 | 193 | ## bunki_stack_alignment 194 | ```c 195 | uint32_t bunki_stack_alignment(void); 196 | ``` 197 | `bunki_stack_alignment()` returns the current alignment all stacks must have. This will be same value you passed to [bunki_init()](#bunki_init) or set via the `BUNKI_STACK_CONST` define. 198 | 199 | ## bunki_stack_ptr 200 | ```c 201 | void* bunki_stack_ptr(bunki_t ctx); 202 | ``` 203 | `bunki_stack_ptr()` lets one get the pointer back that was provided to [bunki_init_stack_ctx()](#bunki_init_stack_ctx) or [bunki_prepare_ctx()](#bunki_prepare_ctx). The function allows one to just the store the context and retrieve the pointer later for instance to free. 204 | 205 | 206 | ## bunki_stack_push 207 | ```c 208 | void* bunki_stack_push(bunki_t* ctx, size_t allocation_length); 209 | ``` 210 | `bunki_stack_push()` lets you reserve memory on a coroutines stack to store information there. The parameter `allocation_length` is how many bytes to reserve. This can be handy for instance to store a structure that you may provide as the starting argument for your coroutines starting function. Please keep in mind that does reduce the amount of stack space available to the coroutine. This function **MUST** not be called after calling [bunki_prepare_ctx()](#bunki_prepare_ctx) or [bunki_init_prepare_ctx()](#bunki_init_prepare_ctx). This function is meant to be used after calling [bunki_init_stack_ctx()](#bunki_init_stack_ctx), but before the coroutine has been fully prepared. 211 | 212 | * The return value is the pointer to allocated memory on the coroutines stack. 213 | 214 | ## bunki_stack_push_data 215 | ```c 216 | void* bunki_stack_push_data(bunki_t* ctx, size_t data_length, void* data); 217 | ``` 218 | 219 | This function behaves almost exactly like [bunki_stack_push()](#bunki_stack_push), but in addition it copies the data from the 3rd parameter to the memory reserved on the coroutines stack. 220 | 221 | ## bunki_data_get 222 | ```c 223 | void* bunki_data_get(bunki_t ctx); 224 | ``` 225 | 226 | This function does the same thing as [bunki_ctx_data_get()](#bunki_ctx_data_get), but allows you to get the value of the co-routine local variable outside of the coroutine. This can be helpful if the caller decides it is done with a coroutine. That's because if it's a pointer to allocated memory the caller can then free that memory. 227 | 228 | ## bunki_data_set 229 | ```c 230 | void bunki_data_set(bunki_t ctx, void* data); 231 | ``` 232 | 233 | This function does the same thing as [bunki_ctx_data_set()](#bunki_ctx_data_set), but allows you to set the value of the co-routine local variable outside of the coroutine. 234 | 235 | 236 | ## bunki_ctx_data_get 237 | ```c 238 | void* bunki_ctx_data_get(void); 239 | ``` 240 | 241 | This function lets you get the value previously stored in thread context storage. This function **MUST** only be called inside a coroutine. 242 | 243 | ## bunki_ctx_data_set 244 | ```c 245 | void bunki_ctx_data_set(void* data); 246 | ``` 247 | 248 | This function and [bunki_ctx_data_get()](#bunki_ctx_data_get) are the building blocks for you to create thread local storage. If you need more data than what can be stored in a `void*`, feel free to allocate memory and pass a pointer to that memory instead. Just don't forget to free when your coroutine is done 😉. This function **MUST** only be called inside a coroutine. 249 | 250 | ## bunki_ctx_call 251 | ```c 252 | uintptr_t bunki_ctx_call(void* arg, uintptr_t (*func)(void*)); 253 | ``` 254 | When using stackful coroutines ideally you want small stacks. The drawback of that is you can't call any functions that generate deep call stacks. This function lets you get around that drawback by calling the function pointer provided to the second argument on the threads stack instead. 255 | 256 | The first parameter `arg` is passed to the parameter `func` when called. The return value is the value returned from the function pointer when called. 257 | 258 | This function **MUST** only be called inside a coroutine, and secondly while on the the thread's stack [bunki_yield](#bunki_yield) and any function prefixed with `bunki_ctx` **MUST** not be called. 259 | 260 | ## bunki_ctx_call_arg2 261 | ```c 262 | uintptr_t bunki_ctx_call_arg2(void* arg0, void* arg1, uintptr_t (*func)(void*, void*)); 263 | ``` 264 | This function behaves just like [bunki_ctx_call()](#bunki_ctx_call), but 2 arguments are passed to `func` when called instead. 265 | 266 | ## bunki_ctx_call_arg3 267 | ```c 268 | uintptr_t bunki_ctx_call_arg3(void* arg0, void* arg1, void* arg2, uintptr_t (*func)(void*, void*, void*)); 269 | ``` 270 | This function behaves just like [bunki_ctx_call()](#bunki_ctx_call), but 3 arguments are passed to `func` when called instead. 271 | 272 | # Notes 273 | 274 | Unlike most other coroutine libraries you are allowed to return from a coroutine in Bunki with the `return` keyword. You can use this create a one-time/one-shot function. For example: 275 | 276 | ```c 277 | #include "bunki.h" 278 | #include 279 | #include 280 | 281 | uintptr_t my_one_shot(void* arg) { 282 | return 0xbeef; 283 | } 284 | 285 | int main() { 286 | if(bunki_init(256)) { 287 | fprintf(stderr, "Failed to initialize!\n"); 288 | return 1; 289 | } 290 | void* stack_mem = aligned_alloc(256, 256); 291 | if(stack_mem == NULL) { 292 | fprintf(stderr, "Failed to allocate memory!\n"); 293 | return 1; 294 | } 295 | bunki_t ctx = bunki_init_prepare_ctx(stack_mem, my_one_shot, (void*)0xbeef); 296 | printf("Returned: 0x%x\n", bunki_resume(ctx)); 297 | printf("Returned: 0x%x\n", bunki_resume(ctx)); 298 | free(stack_mem); 299 | return 0; 300 | } 301 | ``` 302 | 303 | 304 | The output of this will be: 305 | ```bash 306 | Returned: 0xbeef 307 | Returned: 0x0 308 | ``` 309 | 310 | After the coroutine `my_one_shot` returns it never will run again and calling [bunki_resume](#bunki_resume) afterwards will always return 0. Thus making it a one-time/one-shot function. 311 | 312 | 313 | # TODO 314 | 315 | Nothing here at the moment, but new ideas are Welcome! 316 | 317 | # Copyright and License 318 | 319 | Copyright (C) 2023, by Keith Cancel [](mailto:admin@keith.pro). 320 | 321 | Under the MIT License 322 | --------------------------------------------------------------------------------