├── .github └── workflows │ └── test.yml ├── .gitignore ├── Cargo.toml ├── Readme.md ├── benches └── async_benchmark.rs ├── examples ├── async.rs └── panic.rs ├── src └── lib.rs ├── switcheroo ├── Cargo.toml ├── Readme.md ├── benches │ └── switcheroo_benchmark.rs ├── examples │ └── generator_with_drop.rs ├── src │ ├── arch │ │ ├── mod.rs │ │ ├── unix_aarch64.rs │ │ ├── unix_x64.rs │ │ └── windows_x64.rs │ ├── lib.rs │ └── stack │ │ ├── eight_mb.rs │ │ ├── mod.rs │ │ └── one_mb.rs └── tests │ ├── stack_test.rs │ └── switch_test.rs └── tests └── async_test.rs /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: [master] 4 | pull_request: 5 | branches: [master] 6 | 7 | name: Test 8 | 9 | jobs: 10 | test: 11 | runs-on: ${{ matrix.os }} 12 | strategy: 13 | matrix: 14 | include: 15 | - os: ubuntu-latest 16 | - os: macos-latest 17 | - os: windows-latest 18 | steps: 19 | - name: Checkout code 20 | uses: actions/checkout@v2 21 | - name: Install latest nightly 22 | uses: actions-rs/toolchain@v1 23 | with: 24 | toolchain: nightly 25 | override: true 26 | components: rustfmt, clippy 27 | - name: Run tests in Debug Build 28 | run: cargo +nightly test --all 29 | - name: Run tests in Release Build 30 | run: cargo +nightly test --all --release 31 | - name: Run benchmarks 32 | run: cargo +nightly bench --all 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /*/target 3 | Cargo.lock 4 | .vscode -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "async-wormhole" 3 | version = "0.3.7" 4 | authors = ["Bernard Kolobara "] 5 | edition = "2018" 6 | license = "Apache-2.0/MIT" 7 | description = "Async calls across non-async functions" 8 | readme = "Readme.md" 9 | repository = "https://github.com/bkolobara/async-wormhole/" 10 | 11 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 12 | 13 | [dependencies] 14 | switcheroo = { path = "./switcheroo", version = "0.2" } 15 | 16 | [dev-dependencies] 17 | async-executor = "1.4" 18 | futures = "0.3" 19 | criterion = "0.3" 20 | libc = "0.2" 21 | backtrace = "0.3" 22 | 23 | [[bench]] 24 | name = "async_benchmark" 25 | harness = false 26 | 27 | [workspace] 28 | members = [ 29 | ".", 30 | "switcheroo", 31 | ] -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # async-wormhole 2 | 3 | [Documentation](https://docs.rs/async-wormhole/latest/async_wormhole/) 4 | 5 | > This library is experimental, I use it to prototype the foundation for [Lunatic](https://lunatic.solutions/) . 6 | > 7 | > **Currently only works in Rust nightly, as it depends on [switcheroo](https://github.com/bkolobara/async-wormhole/tree/master/switcheroo).** 8 | 9 | async-wormhole allows you to `.await` async calls in non-async functions, like extern "C" or JIT generated code. 10 | It runs on Windows, MacOs and Linux (x64 & AArch64). 11 | 12 | ## Motivation 13 | 14 | Sometimes, when running inside an async environment you need to call into JIT generated code (e.g. wasm) 15 | and .await from there. Because the JIT code is not available at compile time, the Rust compiler can't 16 | do their "create a state machine" magic. In the end you can't have `.await` statements in non-async 17 | functions. 18 | 19 | This library creates a special stack for executing the JIT code, so it's possible to suspend it at any 20 | point of the execution. Once you pass it a closure inside `AsyncWormhole::new` you will get back a future 21 | that you can `.await` on. The passed in closure is going to be executed on a new stack. 22 | 23 | ## Example 24 | 25 | ```rust 26 | use async_wormhole::{AsyncWormhole, AsyncYielder}; 27 | use switcheroo::stack::*; 28 | 29 | // non-async function 30 | #[allow(improper_ctypes_definitions)] 31 | extern "C" fn non_async(mut yielder: AsyncYielder) -> u32 { 32 | // Suspend the runtime until async value is ready. 33 | // Can contain .await calls. 34 | yielder.async_suspend(async { 42 }) 35 | } 36 | 37 | fn main() { 38 | let stack = EightMbStack::new().unwrap(); 39 | let task = AsyncWormhole::<_, _, fn()>::new(stack, |yielder| { 40 | let result = non_async(yielder); 41 | assert_eq!(result, 42); 42 | 64 43 | }) 44 | .unwrap(); 45 | 46 | let outside = futures::executor::block_on(task); 47 | assert_eq!(outside, 64); 48 | } 49 | ``` 50 | 51 | ## Performance 52 | 53 | There should be almost no performance overhead to `.await` calls inside the closure passed to 54 | `AsyncWormhole::new` and caught by `async_suspend`. 55 | But instantiating a new AsyncWormhole will require one memory allocation. 56 | And of course you are not going to get [perfectly sized stacks](https://without.boats/blog/futures-and-segmented-stacks/#futures-as-a-perfectly-sized-stack). 57 | 58 | ## License 59 | 60 | Licensed under either of 61 | 62 | - Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) 63 | - MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) 64 | 65 | at your option. 66 | 67 | ### Contribution 68 | 69 | Unless you explicitly state otherwise, any contribution intentionally 70 | submitted for inclusion in the work by you, as defined in the Apache-2.0 71 | license, shall be dual licensed as above, without any additional terms or 72 | conditions. 73 | -------------------------------------------------------------------------------- /benches/async_benchmark.rs: -------------------------------------------------------------------------------- 1 | use std::cell::Cell; 2 | use std::ptr; 3 | 4 | use criterion::{criterion_group, criterion_main, BatchSize, Criterion}; 5 | 6 | use async_wormhole::AsyncWormhole; 7 | use switcheroo::stack::*; 8 | 9 | thread_local!( 10 | /// Mock TLS 11 | pub static TLS: Cell<*const usize> = Cell::new(ptr::null()) 12 | ); 13 | 14 | fn async_bench(c: &mut Criterion) { 15 | c.bench_function("async_wormhole creation", |b| { 16 | b.iter(|| { 17 | let stack = EightMbStack::new().unwrap(); 18 | AsyncWormhole::<_, _, fn()>::new(stack, |mut yielder| { 19 | yielder.async_suspend(async { 42 }); 20 | }) 21 | .unwrap(); 22 | }) 23 | }); 24 | 25 | c.bench_function("async switch", |b| { 26 | b.iter_batched( 27 | || { 28 | let stack = EightMbStack::new().unwrap(); 29 | let async_ = AsyncWormhole::<_, _, fn()>::new(stack, |mut yielder| { 30 | yielder.async_suspend(async { 42 }); 31 | }) 32 | .unwrap(); 33 | async_ 34 | }, 35 | |mut task| { 36 | futures::executor::block_on(&mut task); 37 | task 38 | }, 39 | BatchSize::SmallInput, 40 | ); 41 | }); 42 | 43 | c.bench_function("async switch with pre and post poll hooks", |b| { 44 | b.iter_batched( 45 | || { 46 | let stack = EightMbStack::new().unwrap(); 47 | let mut async_ = AsyncWormhole::<_, _, fn()>::new(stack, |mut yielder| { 48 | yielder.async_suspend(async { 42 }); 49 | }) 50 | .unwrap(); 51 | async_.set_pre_post_poll(|| { 52 | let _ = 33 + 34; 53 | }); 54 | async_ 55 | }, 56 | |mut task| { 57 | futures::executor::block_on(&mut task); 58 | task 59 | }, 60 | BatchSize::SmallInput, 61 | ); 62 | }); 63 | } 64 | 65 | criterion_group!(benches, async_bench); 66 | criterion_main!(benches); 67 | -------------------------------------------------------------------------------- /examples/async.rs: -------------------------------------------------------------------------------- 1 | use async_wormhole::{AsyncWormhole, AsyncYielder}; 2 | use switcheroo::stack::*; 3 | 4 | // non-async function 5 | #[allow(improper_ctypes_definitions)] 6 | extern "C" fn non_async(mut yielder: AsyncYielder) -> u32 { 7 | // Suspend the runtime until async value is ready 8 | yielder.async_suspend(async { 42 }) 9 | } 10 | 11 | fn main() { 12 | let stack = EightMbStack::new().unwrap(); 13 | let task = AsyncWormhole::<_, _, fn()>::new(stack, |yielder| { 14 | let result = non_async(yielder); 15 | assert_eq!(result, 42); 16 | 64 17 | }) 18 | .unwrap(); 19 | 20 | let outside = futures::executor::block_on(task); 21 | assert_eq!(outside, 64); 22 | } 23 | -------------------------------------------------------------------------------- /examples/panic.rs: -------------------------------------------------------------------------------- 1 | use async_wormhole::AsyncWormhole; 2 | use backtrace::Backtrace; 3 | use switcheroo::stack::*; 4 | 5 | fn main() { 6 | let stack = EightMbStack::new().unwrap(); 7 | let task = AsyncWormhole::<_, _, fn()>::new(stack, |_yielder| { 8 | let b = Backtrace::new(); 9 | println!("{:?}", b); 10 | panic!("Panic inside wormhole") 11 | }) 12 | .unwrap(); 13 | 14 | futures::executor::block_on(task); 15 | } 16 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! async-wormhole allows you to call `.await` async calls across non-async functions, like extern "C" or JIT 2 | //! generated code. 3 | //! 4 | //! ## Motivation 5 | //! 6 | //! Sometimes, when running inside an async environment you need to call into JIT generated code (e.g. wasm) 7 | //! and .await from there. Because the JIT code is not available at compile time, the Rust compiler can't 8 | //! do their "create a state machine" magic. In the end you can't have `.await` statements in non-async 9 | //! functions. 10 | //! 11 | //! This library creates a special stack for executing the JIT code, so it's possible to suspend it at any 12 | //! point of the execution. Once you pass it a closure inside [AsyncWormhole::new](struct.AsyncWormhole.html#method.new) 13 | //! you will get back a future that you can `.await` on. The passed in closure is going to be executed on a 14 | //! new stack. 15 | //! 16 | //! ## Example 17 | //! 18 | //! ```rust 19 | //! use async_wormhole::{AsyncWormhole, AsyncYielder}; 20 | //! use switcheroo::stack::*; 21 | //! 22 | //! // non-async function 23 | //! #[allow(improper_ctypes_definitions)] 24 | //! extern "C" fn non_async(mut yielder: AsyncYielder) -> u32 { 25 | //! // Suspend the runtime until async value is ready. 26 | //! // Can contain .await calls. 27 | //! yielder.async_suspend(async { 42 }) 28 | //! } 29 | //! 30 | //! fn main() { 31 | //! let stack = EightMbStack::new().unwrap(); 32 | //! let task = AsyncWormhole::<_, _, fn()>::new(stack, |yielder| { 33 | //! let result = non_async(yielder); 34 | //! assert_eq!(result, 42); 35 | //! 64 36 | //! }) 37 | //! .unwrap(); 38 | //! 39 | //! let outside = futures::executor::block_on(task); 40 | //! assert_eq!(outside, 64); 41 | //! } 42 | //! ``` 43 | 44 | use switcheroo::Generator; 45 | use switcheroo::Yielder; 46 | 47 | use std::cell::Cell; 48 | use std::future::Future; 49 | use std::io::Error; 50 | use std::pin::Pin; 51 | use std::task::{Context, Poll, Waker}; 52 | 53 | pub use switcheroo::stack; 54 | 55 | /// AsyncWormhole represents a Future that uses a generator with a separate stack to execute a closure. 56 | /// 57 | /// It has the capability to .await on other Futures in the closure using the received 58 | /// [AsyncYielder](struct.AsyncYielder). Once all Futures have been awaited on AsyncWormhole will resolve 59 | /// to the return value of the provided closure. 60 | /// 61 | /// For dealing with thread local storage 62 | /// [AsyncWormhole::set_pre_post_poll](struct.AsyncWormhole.html#method.set_pre_post_poll) is provided. 63 | /// 64 | /// Every time an executor polls AsyncWormhole, the `pre_post_poll` function will be called and every time 65 | /// AsyncWormhole returns `Poll::Pending`, `pre_post_poll` will be called again. Between this two calls we 66 | /// have a guarantee that the executor will not be able to move the execution to another thread, and we 67 | /// can use this guarantee to our advantage in specific scenarios. 68 | pub struct AsyncWormhole<'a, Stack, Output, P> 69 | where 70 | Stack: stack::Stack + Send, 71 | P: FnMut() + Send, 72 | { 73 | generator: Option, Stack>>>, 74 | pre_post_poll: Option

, 75 | } 76 | 77 | impl<'a, Stack, Output, P> AsyncWormhole<'a, Stack, Output, P> 78 | where 79 | Stack: stack::Stack + Send, 80 | P: FnMut() + Send, 81 | { 82 | /// Returns a new AsyncWormhole, using the passed `stack` to execute the closure `f` on. 83 | /// The closure will not be executed right away, only if you pass AsyncWormhole to an 84 | /// async executor (.await on it) 85 | pub fn new(stack: Stack, f: F) -> Result 86 | where 87 | F: FnOnce(AsyncYielder) -> Output + 'a + Send, 88 | { 89 | let generator = Generator::new(stack, |yielder, waker| { 90 | let async_yielder = AsyncYielder::new(yielder, waker); 91 | let finished = Some(f(async_yielder)); 92 | yielder.suspend(finished); 93 | }); 94 | 95 | Ok(Self { 96 | generator: Some(Cell::new(generator)), 97 | pre_post_poll: None, 98 | }) 99 | } 100 | 101 | /// Every time the executor polls `AsyncWormhole` we may end up on another thread, here we can set a function 102 | /// that swaps some thread local storage and a context that can travel with `AsyncWormhole` between threads. 103 | pub fn set_pre_post_poll(&mut self, f: P) { 104 | self.pre_post_poll = Some(f); 105 | } 106 | 107 | /// Get the stack from the internal generator. 108 | pub fn stack(mut self) -> Stack { 109 | let generator = self.generator.take().unwrap().into_inner(); 110 | // If the generator didn't finish yet, the stack is going to be unwinded on drop(). 111 | // Fire a last pre_post_poll before this happens. 112 | if generator.started() && !generator.finished() { 113 | if let Some(pre_post_poll) = &mut self.pre_post_poll { 114 | pre_post_poll(); 115 | } 116 | } 117 | generator.stack() 118 | } 119 | } 120 | 121 | impl<'a, Stack, Output, P> Future for AsyncWormhole<'a, Stack, Output, P> 122 | where 123 | Stack: stack::Stack + Unpin + Send, 124 | P: FnMut() + Unpin + Send, 125 | { 126 | type Output = Output; 127 | 128 | fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { 129 | // If pre_post_poll is provided execute it before entering separate stack 130 | if let Some(pre_post_poll) = &mut self.pre_post_poll { 131 | pre_post_poll() 132 | } 133 | 134 | match self 135 | .generator 136 | .as_mut() 137 | .unwrap() 138 | .get_mut() 139 | .resume(cx.waker().clone()) 140 | { 141 | // If we call the future after it completed it will always return Poll::Pending. 142 | // But polling a completed future is either way undefined behaviour. 143 | None | Some(None) => { 144 | // If pre_post_poll is provided execute it before returning a Poll::Pending 145 | if let Some(pre_post_poll) = &mut self.pre_post_poll { 146 | pre_post_poll() 147 | } 148 | Poll::Pending 149 | } 150 | Some(Some(out)) => { 151 | // Poll one last time to finish the generator 152 | self.generator 153 | .as_mut() 154 | .unwrap() 155 | .get_mut() 156 | .resume(cx.waker().clone()); 157 | Poll::Ready(out) 158 | } 159 | } 160 | } 161 | } 162 | 163 | impl<'a, Stack, Output, P> Drop for AsyncWormhole<'a, Stack, Output, P> 164 | where 165 | Stack: stack::Stack + Send, 166 | P: FnMut() + Send, 167 | { 168 | fn drop(&mut self) { 169 | // Dropping a generator can cause an unwind and execute code inside of the separate context. 170 | // In this regard it's similar to a `poll` call and we need to fire pre and post poll hooks. 171 | // Note, that we **don't** do a last `post_poll` call once the generator is dropped. 172 | if let Some(pre_post_poll) = &mut self.pre_post_poll { 173 | if let Some(generator) = self.generator.as_mut() { 174 | if generator.get_mut().started() && !generator.get_mut().finished() { 175 | pre_post_poll() 176 | } 177 | } 178 | } 179 | } 180 | } 181 | 182 | #[derive(Clone)] 183 | pub struct AsyncYielder<'a, Output> { 184 | yielder: &'a Yielder>, 185 | waker: Waker, 186 | } 187 | 188 | impl<'a, Output> AsyncYielder<'a, Output> { 189 | pub(crate) fn new(yielder: &'a Yielder>, waker: Waker) -> Self { 190 | Self { yielder, waker } 191 | } 192 | 193 | /// Takes an `impl Future` and awaits it, returning the value from it once ready. 194 | pub fn async_suspend(&mut self, mut future: Fut) -> R 195 | where 196 | Fut: Future, 197 | { 198 | let mut future = unsafe { Pin::new_unchecked(&mut future) }; 199 | loop { 200 | let mut cx = Context::from_waker(&mut self.waker); 201 | self.waker = match future.as_mut().poll(&mut cx) { 202 | Poll::Pending => self.yielder.suspend(None), 203 | Poll::Ready(result) => return result, 204 | }; 205 | } 206 | } 207 | } 208 | -------------------------------------------------------------------------------- /switcheroo/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "switcheroo" 3 | version = "0.2.9" 4 | authors = ["Bernard Kolobara "] 5 | edition = "2018" 6 | license = "Apache-2.0/MIT" 7 | description = "Lightweight userland context switches" 8 | readme = "Readme.md" 9 | repository = "https://github.com/bkolobara/async-wormhole/tree/master/switcheroo" 10 | 11 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 12 | 13 | [target.'cfg(unix)'.dependencies] 14 | libc = "0.2" 15 | 16 | [target.'cfg(windows)'.dependencies] 17 | winapi = { version = "0.3", features = ["winbase", "memoryapi", "errhandlingapi"] } 18 | 19 | [dev-dependencies] 20 | criterion = "0.3" 21 | 22 | [[bench]] 23 | name = "switcheroo_benchmark" 24 | harness = false -------------------------------------------------------------------------------- /switcheroo/Readme.md: -------------------------------------------------------------------------------- 1 | # Switcheroo 2 | 3 | [Documentation](https://docs.rs/switcheroo/0.1.1/switcheroo/) 4 | 5 | > This library is heavily inspired by https://github.com/edef1c/libfringe. 6 | 7 | > **Currently only works in Rust nightly.** 8 | 9 | Switcheroo provides lightweight context switches in Rust. It runs on Windows, MacOs and Linux (x64 & AArch64). 10 | 11 | ## Example 12 | 13 | ```rust 14 | use switcheroo::stack::*; 15 | use switcheroo::Generator; 16 | 17 | fn main() { 18 | let stack = EightMbStack::new().unwrap(); 19 | let mut add_one = Generator::new(stack, |yielder, mut input| { 20 | loop { 21 | if input == 0 { 22 | break; 23 | } 24 | input = yielder.suspend(input + 1); 25 | } 26 | }); 27 | 28 | assert_eq!(add_one.resume(2), Some(3)); 29 | assert_eq!(add_one.resume(127), Some(128)); 30 | assert_eq!(add_one.resume(0), None); 31 | assert_eq!(add_one.resume(0), None); 32 | } 33 | ``` 34 | 35 | ## Performance 36 | 37 | On my Macbook Pro 15" (Late 2013) each context switch is comparable to a function call (sub-nanosecond). 38 | 39 | ## Developer Experience 40 | 41 | Switcheroo **tries** hard to not let the context switching disturb default Rust behaviour on panics and unwinds. 42 | The displayed backtrace should stretch across the context switch boundary. 43 | 44 | When dropping a non-empty stack, it will be unwind to free any resources allocated on it. 45 | 46 | ## License 47 | 48 | Licensed under either of 49 | 50 | - Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) 51 | - MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) 52 | 53 | at your option. 54 | 55 | ### Contribution 56 | 57 | Unless you explicitly state otherwise, any contribution intentionally 58 | submitted for inclusion in the work by you, as defined in the Apache-2.0 59 | license, shall be dual licensed as above, without any additional terms or 60 | conditions. 61 | -------------------------------------------------------------------------------- /switcheroo/benches/switcheroo_benchmark.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 2 | 3 | use switcheroo::stack::*; 4 | use switcheroo::Generator; 5 | 6 | fn switcheroo(c: &mut Criterion) { 7 | // Bench allocation 8 | c.bench_function("create 8 MB stack", |b| b.iter(|| EightMbStack::new())); 9 | 10 | c.bench_function("switch stacks", |b| { 11 | let stack = EightMbStack::new().unwrap(); 12 | let mut gen = Generator::new(stack, |yielder, input| { 13 | black_box(yielder.suspend(input + 1)); 14 | }); 15 | b.iter(|| black_box(gen.resume(2))) 16 | }); 17 | } 18 | 19 | criterion_group!(benches, switcheroo); 20 | criterion_main!(benches); 21 | -------------------------------------------------------------------------------- /switcheroo/examples/generator_with_drop.rs: -------------------------------------------------------------------------------- 1 | use switcheroo::Generator; 2 | use switcheroo::{stack::*, Yielder}; 3 | 4 | struct DropMarker {} 5 | 6 | impl Drop for DropMarker { 7 | fn drop(&mut self) { 8 | println!("Dropped"); 9 | } 10 | } 11 | 12 | fn main() { 13 | let stack = EightMbStack::new().unwrap(); 14 | let mut add_one = Generator::new(stack, |yielder: &Yielder, mut input| { 15 | let _marker = DropMarker {}; 16 | input = yielder.suspend(input + 1); 17 | input = yielder.suspend(input + 1); 18 | input = yielder.suspend(input + 1); 19 | yielder.suspend(input + 1); 20 | }); 21 | 22 | assert_eq!(add_one.resume(2), Some(3)); 23 | assert_eq!(add_one.resume(2), Some(3)); 24 | assert_eq!(add_one.resume(127), Some(128)); 25 | // assert_eq!(add_one.resume(0), Some(1)); 26 | assert_eq!(add_one.finished(), false); 27 | } 28 | -------------------------------------------------------------------------------- /switcheroo/src/arch/mod.rs: -------------------------------------------------------------------------------- 1 | // All architectures expose a similar api. Here I just want to take some time explaining the general 2 | // idea behind all of them. 3 | // 4 | // At the core of the implementation there are 3 functions: 5 | // * `init(stack: Stack, f: unsafe extern "C" fn(usize, *mut usize))` 6 | // * `swap_and_link_stacks(arg: usize, new_sp: *mut usize, sp: *mut usize) -> (usize, *mut usize)` 7 | // * `swap(arg: usize, new_sp: *mut usize, sp: *mut usize) -> (usize, *mut usize)` 8 | // 9 | // ### init 10 | // `init` takes a **stack** and a **pointer to a function**. It will prepare the stack so it is ready 11 | // to be switched to. Once we switch to it the function we set up here will be called. 12 | // 13 | // Unix and Windows operating systems require different stack setups. Here is an illustration on how 14 | // the stacks look after the call to `init`: 15 | // ``` 16 | // + + 17 | // | ....... | 18 | // | | 19 | // |Deallocation stack| 20 | // +------------------+ 21 | // |Stack limit | 22 | // +------------------+ 23 | // |Stack base | + + 24 | // +------------------+ | | 25 | // +----+Stack frame ptr | | | 26 | // | +------------------+ | ......... | 27 | // | |Trampoline | | | 28 | // | +------------------+ +----+Stack frame ptr | 29 | // +---->Caller frame | | +------------------+ 30 | // +------------------+ | |Trampoline 2 ptr | 31 | // |Function ptr | | +------------------+ 32 | // +------------------+ +---->Caller frame | 33 | // +------------------+ 34 | // |Trampoline 1 ptr | 35 | // +------------------+ 36 | // |Function ptr | 37 | // +------------------+ 38 | // |Alignment | 39 | // +------------------+ 40 | // 41 | // Windows Unix 42 | // ``` 43 | // Windows needs to preserve some extra information across context switches, like the stack base, top 44 | // and deallocation values. If they are not present Windows will not know how to grow the stack. 45 | // The [Boost.Context](https://www.boost.org/doc/libs/1_61_0/libs/context/doc/html/context/overview.html) 46 | // library also preserves some other information, like the current 47 | // [Fiber](https://docs.microsoft.com/en-us/windows/win32/procthread/fibers) data, but I don't expect 48 | // anyone to use switcheroo and Windows Fibers in the same app. 49 | // 50 | // The **Caller frame** value will be filled in by the `swap_and_link_stacks` function to link the 2 51 | // stacks from different contexts. At this point of time we can't know from where we are jumping to 52 | // the stack. 53 | // 54 | // ### swap_and_link_stacks 55 | // This function is really similar to `swap`, but it's expected to be the first one called when jumping 56 | // to a new stack. It will write the **Caller frame** data inside the new stack, basically linking them 57 | // together. Once this data exists on the new stack we don't need to call it anymore and can switch 58 | // stacks with just the `swap` function. 59 | // 60 | // The swap functions will: 61 | // 1. Preserve the frame pointer and instruction pointer of the current context. 62 | // On Windows, deallocation stack, stack limit and base stack are also preserved. 63 | // 2. Change the stack pointer to the new stack. 64 | // 3. Pop the frame pointer and instruction pointer from the new stack. 65 | // 4. Jump to the instruction. 66 | // 67 | // Notice that the instruction pointer points to a cryptic **Trampoline 2** function and not to the 68 | // passed in **Function**. Trampoline 1 and 2 contain some extra assembler information so that it's 69 | // possible to re-create a backtrace across contexts if we panic inside the new context. 70 | 71 | #[cfg(all(target_family = "unix", target_arch = "x86_64"))] 72 | mod unix_x64; 73 | #[cfg(all(target_family = "unix", target_arch = "x86_64"))] 74 | pub use self::unix_x64::*; 75 | 76 | #[cfg(all(target_family = "unix", target_arch = "aarch64"))] 77 | mod unix_aarch64; 78 | #[cfg(all(target_family = "unix", target_arch = "aarch64"))] 79 | pub use self::unix_aarch64::*; 80 | 81 | #[cfg(all(target_family = "windows", target_arch = "x86_64"))] 82 | mod windows_x64; 83 | #[cfg(all(target_family = "windows", target_arch = "x86_64"))] 84 | pub use self::windows_x64::*; 85 | -------------------------------------------------------------------------------- /switcheroo/src/arch/unix_aarch64.rs: -------------------------------------------------------------------------------- 1 | use crate::stack; 2 | use core::arch::asm; 3 | 4 | pub unsafe fn init( 5 | stack: &S, 6 | f: unsafe extern "C" fn(usize, *mut usize), 7 | ) -> *mut usize { 8 | unsafe fn push(mut sp: *mut usize, val: usize) -> *mut usize { 9 | sp = sp.offset(-1); 10 | *sp = val; 11 | sp 12 | } 13 | 14 | let mut sp = stack.bottom(); 15 | 16 | // Save the (generator_wrapper) function on the stack. 17 | sp = push(sp, f as usize); 18 | sp = push(sp, 0xdeaddeaddead0cfa); 19 | 20 | #[naked] 21 | unsafe extern "C" fn trampoline() { 22 | asm!( 23 | // Stops unwinding/backtracing at this function. 24 | ".cfi_undefined lr", 25 | "ldr x2, [sp, #8]", 26 | "blr x2", 27 | options(noreturn) 28 | ) 29 | } 30 | 31 | // Save frame pointer 32 | let frame = sp; 33 | sp = push(sp, trampoline as usize); 34 | sp = push(sp, frame as usize); 35 | 36 | // x18 & x 19 37 | sp = push(sp, 0); sp = push(sp, 0); 38 | 39 | sp 40 | } 41 | 42 | #[inline(always)] 43 | pub unsafe fn swap_and_link_stacks( 44 | arg: usize, 45 | new_sp: *mut usize, 46 | sp: *const usize, 47 | ) -> (usize, *mut usize) { 48 | let ret_val: usize; 49 | let ret_sp: *mut usize; 50 | 51 | asm!( 52 | "adr lr, 1337f", 53 | "stp x29, x30, [sp, #-16]!", 54 | "stp x18, x19, [sp, #-16]!", 55 | "mov x1, sp", 56 | "str x1, [x3, #-32]", 57 | "mov sp, x2", 58 | "ldp x18, x19, [sp], #16", 59 | "ldp x29, x30, [sp], #16", 60 | "br x30", 61 | "1337:", 62 | 63 | inout("x3") sp => _, 64 | inout("x2") new_sp => _, 65 | inout("x0") arg => ret_val, 66 | out("x1") ret_sp, 67 | 68 | out("x4") _, out("x5") _, out("x6") _, out("x7") _, 69 | out("x8") _, out("x9") _, out("x10") _, out("x11") _, 70 | out("x12") _, out("x13") _, out("x14") _, out("x15") _, 71 | out("x16") _, out("x17") _, 72 | out("x20") _, out("x21") _, out("x22") _, out("x23") _, 73 | out("x24") _, out("x25") _, out("x26") _, out("x27") _, 74 | out("x28") _, out("lr") _, 75 | 76 | out("v0") _, out("v1") _, out("v2") _, out("v3") _, 77 | out("v4") _, out("v5") _, out("v6") _, out("v7") _, 78 | out("v8") _, out("v9") _, out("v10") _, out("v11") _, 79 | out("v12") _, out("v13") _, out("v14") _, out("v15") _, 80 | out("v16") _, out("v17") _, out("v18") _, out("v19") _, 81 | out("v20") _, out("v21") _, out("v22") _, out("v23") _, 82 | out("v24") _, out("v25") _, out("v26") _, out("v27") _, 83 | out("v28") _, out("v29") _, out("v30") _, out("v31") _, 84 | ); 85 | 86 | (ret_val, ret_sp) 87 | } 88 | 89 | #[inline(always)] 90 | pub unsafe fn swap(arg: usize, new_sp: *mut usize) -> (usize, *mut usize) { 91 | let ret_val: usize; 92 | let ret_sp: *mut usize; 93 | 94 | asm!( 95 | "adr lr, 1337f", 96 | "stp x29, x30, [sp, #-16]!", 97 | "stp x18, x19, [sp, #-16]!", 98 | "mov x1, sp", 99 | "mov sp, x2", 100 | "ldp x18, x19, [sp], #16", 101 | "ldp x29, x30, [sp], #16", 102 | "br x30", 103 | "1337:", 104 | 105 | inout("x2") new_sp => _, 106 | inout("x0") arg => ret_val, 107 | out("x1") ret_sp, out("x3") _, 108 | 109 | out("x4") _, out("x5") _, out("x6") _, out("x7") _, 110 | out("x8") _, out("x9") _, out("x10") _, out("x11") _, 111 | out("x12") _, out("x13") _, out("x14") _, out("x15") _, 112 | out("x16") _, out("x17") _, 113 | out("x20") _, out("x21") _, out("x22") _, out("x23") _, 114 | out("x24") _, out("x25") _, out("x26") _, out("x27") _, 115 | out("x28") _, out("lr") _, 116 | 117 | out("v0") _, out("v1") _, out("v2") _, out("v3") _, 118 | out("v4") _, out("v5") _, out("v6") _, out("v7") _, 119 | out("v8") _, out("v9") _, out("v10") _, out("v11") _, 120 | out("v12") _, out("v13") _, out("v14") _, out("v15") _, 121 | out("v16") _, out("v17") _, out("v18") _, out("v19") _, 122 | out("v20") _, out("v21") _, out("v22") _, out("v23") _, 123 | out("v24") _, out("v25") _, out("v26") _, out("v27") _, 124 | out("v28") _, out("v29") _, out("v30") _, out("v31") _, 125 | ); 126 | 127 | (ret_val, ret_sp) 128 | } 129 | -------------------------------------------------------------------------------- /switcheroo/src/arch/unix_x64.rs: -------------------------------------------------------------------------------- 1 | use crate::stack; 2 | use core::arch::asm; 3 | 4 | pub unsafe fn init( 5 | stack: &S, 6 | f: unsafe extern "C" fn(usize, *mut usize), 7 | ) -> *mut usize { 8 | unsafe fn push(mut sp: *mut usize, val: usize) -> *mut usize { 9 | sp = sp.offset(-1); 10 | *sp = val; 11 | sp 12 | } 13 | 14 | let mut sp = stack.bottom(); 15 | 16 | // Save the (generator_wrapper) function on the stack. 17 | sp = push(sp, f as usize); 18 | sp = push(sp, 0xdeaddeaddead0cfa); 19 | 20 | #[naked] 21 | unsafe extern "C" fn trampoline() { 22 | asm!( 23 | // Stops unwinding/backtracing at this function. 24 | ".cfi_undefined rip", 25 | "call [rsp + 8]", 26 | options(noreturn) 27 | ) 28 | } 29 | 30 | // Save frame pointer 31 | let frame = sp; 32 | sp = push(sp, trampoline as usize); // call instruction 33 | sp = push(sp, frame as usize); 34 | 35 | // Set rbx starting value to 0 36 | sp = push(sp, 0); 37 | 38 | sp 39 | } 40 | 41 | #[inline(always)] 42 | pub unsafe fn swap_and_link_stacks( 43 | arg: usize, 44 | new_sp: *mut usize, 45 | sp: *const usize, 46 | ) -> (usize, *mut usize) { 47 | let ret_val: usize; 48 | let ret_sp: *mut usize; 49 | 50 | asm!( 51 | // Save the continuation spot after we jump back here to be after this asm block. 52 | "lea rax, [rip + 1337f]", 53 | "push rax", 54 | // Save the frame pointer as it can't be marked as an output register. 55 | "push rbp", 56 | // rbx is is used internally by LLVM and can't be marked as an output register. 57 | "push rbx", 58 | // Link stacks by swapping the CFA value 59 | "mov [rcx - 16], rsp", 60 | // Set the current pointer as the 2nd element (rsi) of the function we are jumping to. 61 | "mov rsi, rsp", 62 | // Change the stack pointer to the passed value. 63 | "mov rsp, rdx", 64 | // Restore rbx 65 | "pop rbx", 66 | // Set the frame pointer according to the new stack. 67 | "pop rbp", 68 | // Get the next instruction to jump to. 69 | "pop rax", 70 | // Doing a pop & jmp instad of a ret helps us here with brench prediction (3x faster on my machine). 71 | "jmp rax", 72 | "1337:", 73 | // Mark all registers as clobbered as we don't know what the code we are jumping to is going to use. 74 | // The compiler will optimise this out and just save the registers it actually knows it must. 75 | inout("rcx") sp => _, 76 | inout("rdx") new_sp => _, 77 | inout("rdi") arg => ret_val, // 1st argument to called function 78 | out("rsi") ret_sp, // 2nd argument to called function 79 | out("rax") _, 80 | 81 | out("r8") _, out("r9") _, out("r10") _, out("r11") _, 82 | out("r12") _, out("r13") _, out("r14") _, out("r15") _, 83 | 84 | out("xmm0") _, out("xmm1") _, out("xmm2") _, out("xmm3") _, 85 | out("xmm4") _, out("xmm5") _, out("xmm6") _, out("xmm7") _, 86 | out("xmm8") _, out("xmm9") _, out("xmm10") _, out("xmm11") _, 87 | out("xmm12") _, out("xmm13") _, out("xmm14") _, out("xmm15") _, 88 | ); 89 | 90 | (ret_val, ret_sp) 91 | } 92 | 93 | /// Swap between two stacks. 94 | /// `new_sp` is the stack we are jumping to. This stack needs to have at the top: 95 | /// 1. Stack frame pointer 96 | /// 2. Pointer to the next instruction to execute on the new stack 97 | /// If the pointer points to an `extern "C"` function then the `arg` element is forwarded to it 98 | /// through the `rdi` register. 99 | /// 100 | /// This function also pushes the stack pointer and next instruction to the current stack. 101 | /// When we jump back to it, it will return the content of the new `arg` as ret_val. 102 | /// TODO: Document in more detail the exact flow as this is super confusing. 103 | #[inline(always)] 104 | pub unsafe fn swap(arg: usize, new_sp: *mut usize) -> (usize, *mut usize) { 105 | let ret_val: usize; 106 | let ret_sp: *mut usize; 107 | 108 | asm!( 109 | // Save the continuation spot after we jump back here to be after this asm block. 110 | "lea rax, [rip + 1337f]", 111 | "push rax", 112 | // Save the frame pointer as it can't be marked as an output register. 113 | "push rbp", 114 | // rbx is is used internally by LLVM and can't be marked as an output register. 115 | "push rbx", 116 | // Set the current pointer as the 2nd element (rsi) of the function we are jumping to. 117 | "mov rsi, rsp", 118 | // Change the stack pointer to the passed value. 119 | "mov rsp, rdx", 120 | // Restore rbx 121 | "pop rbx", 122 | // Set the frame pointer according to the new stack. 123 | "pop rbp", 124 | // Get the next instruction to jump to. 125 | "pop rax", 126 | // Doing a pop & jmp instad of a ret helps us here with brench prediction (3x faster on my machine). 127 | "jmp rax", 128 | "1337:", 129 | // Mark all registers as clobbered as we don't know what the code we are jumping to is going to use. 130 | // The compiler will optimise this out and just save the registers it actually knows it must. 131 | inout("rdx") new_sp => _, 132 | inout("rdi") arg => ret_val, // 1st argument to called function 133 | out("rsi") ret_sp, // 2nd argument to called function 134 | out("rax") _, out("rcx") _, 135 | 136 | out("r8") _, out("r9") _, out("r10") _, out("r11") _, 137 | out("r12") _, out("r13") _, out("r14") _, out("r15") _, 138 | 139 | out("xmm0") _, out("xmm1") _, out("xmm2") _, out("xmm3") _, 140 | out("xmm4") _, out("xmm5") _, out("xmm6") _, out("xmm7") _, 141 | out("xmm8") _, out("xmm9") _, out("xmm10") _, out("xmm11") _, 142 | out("xmm12") _, out("xmm13") _, out("xmm14") _, out("xmm15") _, 143 | ); 144 | 145 | (ret_val, ret_sp) 146 | } 147 | -------------------------------------------------------------------------------- /switcheroo/src/arch/windows_x64.rs: -------------------------------------------------------------------------------- 1 | use crate::stack; 2 | use core::arch::asm; 3 | 4 | pub unsafe fn init( 5 | stack: &S, 6 | f: unsafe extern "C" fn(usize, *mut usize), 7 | ) -> *mut usize { 8 | unsafe fn push(mut sp: *mut usize, val: usize) -> *mut usize { 9 | sp = sp.offset(-1); 10 | *sp = val; 11 | sp 12 | } 13 | 14 | let mut sp = stack.bottom(); 15 | 16 | // Save the (generator_wrapper) function on the stack. 17 | sp = push(sp, f as usize); 18 | sp = push(sp, 0xdeaddeaddead0cfa); 19 | 20 | #[naked] 21 | #[no_mangle] 22 | unsafe extern "C" fn trampoline() { 23 | asm!( 24 | // This directives will create unwind codes to link the two stacks together during stack traces. 25 | // The assembly was carefully crafted by a painfully long process of trial and error. For the most 26 | // part I was guessing how the stack tracing uses the Windows unwind codes and then went ahead and 27 | // constructed appropriate seh_* directives to generate this unwind codes. The desired outcome can 28 | // be described in different ways with seh_* directives, but after many tests this was established 29 | // to be the most reliable one under debug and release builds. The produced unwind codes are: 30 | // 31 | // 0x04: UOP_PushNonVol RSP - Restore the RSP by pointing it to the previous stack and increment it 32 | // by 8, jumping over the stack place holding the the deallocation stack. 33 | // 0x03: UOP_AllocSmall 16 - Increment the RSP by 16 jumping over 2 stack slots: stack limit & base. 34 | // 0x02: UOP_PushNonVol RBX - Restore RBX register that is used internally by LLVM and can't be 35 | // marked as clobbered. 36 | // 0x01: UOP_PushNonVol RBP - Pop the previous RBP from the stack. 37 | // 38 | // Once the unwinder reaches this function the value on the stack is going to be the value of the 39 | // previous RSP. After it processes the unwind codes it will look like `trampoline` was called from 40 | // the `swap` function, because the next value on the stack is the IP value pointing back inside 41 | // `swap`. 42 | // 43 | // Opposite of Unix systems, here we only need one trampoline function to achieve the same outcome. 44 | // 45 | // NOTE: To get the unwind codes from a Windows executable run: 46 | // 1. rabin2.exe -P .\target\debug\examples\async.pdb > pdb.txt 47 | // 2. Search inside the pdb.txt file to locate the `trampoline` function and note the address. 48 | // 3. llvm-objdump -u target\debug\examples\async.exe > unwind_info.txt 49 | // 4. Use the address from step 2 to locate the unwind codes of the `trampline` function. 50 | // 51 | // TODO: Create ASCII art showing how exactly the stack looks. 52 | ".seh_proc trampoline", 53 | "nop", 54 | ".seh_pushreg rbp", 55 | "nop", 56 | ".seh_pushreg rbx", 57 | "nop", 58 | ".seh_stackalloc 16", 59 | "nop", 60 | ".seh_pushreg rsp", 61 | ".seh_endprologue", 62 | "call [rsp + 8]", 63 | "nop", 64 | "nop", 65 | ".seh_endproc", 66 | options(noreturn) 67 | ) 68 | } 69 | 70 | // Save frame pointer 71 | let frame = sp; 72 | sp = push(sp, trampoline as usize + 4); // "call [rsp + 8]" instruction 73 | sp = push(sp, frame as usize); 74 | 75 | // Set rbx starting value to 0 76 | sp = push(sp, 0); 77 | 78 | // The next few values are not really documented in Windows and we rely on this Wiki page: 79 | // https://en.wikipedia.org/wiki/Win32_Thread_Information_Block 80 | // and this file from Boost's Context library: 81 | // https://github.com/boostorg/context/blob/develop/src/asm/jump_x86_64_ms_pe_masm.asm 82 | // to preserve all needed information for Windows to be able to automatically extend the stack and 83 | // move the stack guard page. 84 | 85 | // Stack base 86 | sp = push(sp, stack.bottom() as usize); 87 | 88 | // Stack limit, 4 pages under the deallocation stack on Windows. 89 | sp = push(sp, stack.top() as usize); 90 | 91 | // Deallocation stack, where the actual memory address of the stack starts. 92 | // There are a few pages between the limit and here for the exception handler to have enough stack in case 93 | // of a stack overflow exception. 94 | sp = push(sp, stack.deallocation() as usize); 95 | 96 | sp 97 | } 98 | 99 | #[inline(always)] 100 | pub unsafe fn swap_and_link_stacks( 101 | arg: usize, 102 | new_sp: *mut usize, 103 | sp: *const usize, 104 | ) -> (usize, *mut usize) { 105 | let ret_val: usize; 106 | let ret_sp: *mut usize; 107 | 108 | asm!( 109 | // Save the continuation spot after we jump back here to be after this asm block. 110 | "lea rax, [rip + 1337f]", 111 | "push rax", 112 | // Save the frame pointer as it can't be marked as an output register. 113 | "push rbp", 114 | // rbx is is used internally by LLVM and can't be marked as an output register. 115 | "push rbx", 116 | 117 | // Load NT_TIB 118 | "mov r10, gs:[030h]", 119 | // Save stack base 120 | "mov rax, [r10+08h]", 121 | "push rax", 122 | // Save stack limit 123 | "mov rax, [r10+010h]", 124 | "push rax", 125 | // Save deallocation stack 126 | "mov rax, [r10+01478h]", 127 | "push rax", 128 | 129 | // Link stacks 130 | "mov [rdi - 16], rsp", 131 | 132 | // Set the current pointer as the 2nd element (rdx) of the function we are jumping to. 133 | "mov rdx, rsp", 134 | // Change the stack pointer to the passed value. 135 | "mov rsp, rsi", 136 | 137 | // Set deallocation stack 138 | "pop rax", 139 | "mov [r10+01478h], rax", 140 | // Set stack limit 141 | "pop rax", 142 | "mov [r10+010h], rax", 143 | // Set stack base 144 | "pop rax", 145 | "mov [r10+08h], rax", 146 | 147 | // Restore rbx 148 | "pop rbx", 149 | // Set the frame pointer according to the new stack. 150 | "pop rbp", 151 | // Get the next instruction to jump to. 152 | "pop rax", 153 | // Doing a pop & jmp instad of a ret helps us here with brench prediction (3x faster on my machine). 154 | "jmp rax", 155 | "1337:", 156 | // Mark all registers as clobbered as we don't know what the code we are jumping to is going to use. 157 | // The compiler will optimise this out and just save the registers it actually knows it must. 158 | inout("rdi") sp => _, 159 | inout("rsi") new_sp => _, 160 | inout("rcx") arg => ret_val, // 1st argument to called function 161 | out("rdx") ret_sp, // 2nd argument to called function 162 | out("rax") _, 163 | 164 | out("r8") _, out("r9") _, out("r10") _, out("r11") _, 165 | out("r12") _, out("r13") _, out("r14") _, out("r15") _, 166 | 167 | out("xmm0") _, out("xmm1") _, out("xmm2") _, out("xmm3") _, 168 | out("xmm4") _, out("xmm5") _, out("xmm6") _, out("xmm7") _, 169 | out("xmm8") _, out("xmm9") _, out("xmm10") _, out("xmm11") _, 170 | out("xmm12") _, out("xmm13") _, out("xmm14") _, out("xmm15") _, 171 | ); 172 | 173 | (ret_val, ret_sp) 174 | } 175 | 176 | #[inline(always)] 177 | pub unsafe fn swap(arg: usize, new_sp: *mut usize) -> (usize, *mut usize) { 178 | let ret_val: usize; 179 | let ret_sp: *mut usize; 180 | 181 | asm!( 182 | // Save the continuation spot after we jump back here to be after this asm block. 183 | "lea rax, [rip + 1337f]", 184 | "push rax", 185 | // Save the frame pointer as it can't be marked as an output register. 186 | "push rbp", 187 | // rbx is is used internally by LLVM can't be marked as an output register. 188 | "push rbx", 189 | 190 | // Load NT_TIB 191 | "mov r10, gs:[030h]", 192 | // Save stack base 193 | "mov rax, [r10+08h]", 194 | "push rax", 195 | // Save stack limit 196 | "mov rax, [r10+010h]", 197 | "push rax", 198 | // Save deallocation stack 199 | "mov rax, [r10+01478h]", 200 | "push rax", 201 | 202 | // Set the current pointer as the 2nd element (rdx) of the function we are jumping to. 203 | "mov rdx, rsp", 204 | // Change the stack pointer to the passed value. 205 | "mov rsp, rsi", 206 | 207 | // Set deallocation stack 208 | "pop rax", 209 | "mov [r10+01478h], rax", 210 | // Set stack limit 211 | "pop rax", 212 | "mov [r10+010h], rax", 213 | // Set stack base 214 | "pop rax", 215 | "mov [r10+08h], rax", 216 | 217 | // Restore rbx 218 | "pop rbx", 219 | // Set the frame pointer according to the new stack. 220 | "pop rbp", 221 | // Get the next instruction to jump to. 222 | "pop rax", 223 | // Doing a pop & jmp instad of a ret helps us here with brench prediction (3x faster on my machine). 224 | "jmp rax", 225 | "1337:", 226 | // Mark all registers as clobbered as we don't know what the code we are jumping to is going to use. 227 | // The compiler will optimise this out and just save the registers it actually knows it must. 228 | inout("rsi") new_sp => _, 229 | inout("rcx") arg => ret_val, // 1st argument to called function 230 | out("rdx") ret_sp, // 2nd argument to called function 231 | out("rax") _, out("rdi") _, 232 | 233 | out("r8") _, out("r9") _, out("r10") _, out("r11") _, 234 | out("r12") _, out("r13") _, out("r14") _, out("r15") _, 235 | 236 | out("xmm0") _, out("xmm1") _, out("xmm2") _, out("xmm3") _, 237 | out("xmm4") _, out("xmm5") _, out("xmm6") _, out("xmm7") _, 238 | out("xmm8") _, out("xmm9") _, out("xmm10") _, out("xmm11") _, 239 | out("xmm12") _, out("xmm13") _, out("xmm14") _, out("xmm15") _, 240 | ); 241 | 242 | (ret_val, ret_sp) 243 | } 244 | -------------------------------------------------------------------------------- /switcheroo/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![feature(naked_functions)] 2 | 3 | //! Switcheroo provides lightweight context switches in Rust. 4 | //! 5 | //! It consists of two parts: 6 | //! 1. A stack implementation (currently only providing a [fixed 8Mb stack](stack/struct.EightMbStack.html)). 7 | //! 2. A [generator](struct.Generator.html) implementation. 8 | //! ## Example 9 | //! ``` 10 | //! use switcheroo::stack::*; 11 | //! use switcheroo::Generator; 12 | //! 13 | //! fn main() { 14 | //! let stack = EightMbStack::new().unwrap(); 15 | //! let mut add_one = Generator::new(stack, |yielder, mut input| { 16 | //! loop { 17 | //! if input == 0 { 18 | //! break; 19 | //! } 20 | //! input = yielder.suspend(input + 1); 21 | //! } 22 | //! }); 23 | //! 24 | //! assert_eq!(add_one.resume(2), Some(3)); 25 | //! assert_eq!(add_one.resume(127), Some(128)); 26 | //! assert_eq!(add_one.resume(0), None); 27 | //! assert_eq!(add_one.resume(0), None); 28 | //! } 29 | // ``` 30 | 31 | mod arch; 32 | pub mod stack; 33 | 34 | use std::any::Any; 35 | use std::cell::Cell; 36 | use std::marker::PhantomData; 37 | use std::panic::{catch_unwind, resume_unwind, AssertUnwindSafe}; 38 | use std::{mem, ptr::NonNull}; 39 | 40 | // Communicates the return of the Generator. 41 | enum GeneratorOutput { 42 | // The generator returned a regular value. 43 | Value(Output), 44 | // The generator finished and there are no more values to be returned. 45 | Finished, 46 | // The generator panicked. This value is passed to `resume_unwind` to continue the unwind 47 | // across contexts. 48 | Panic(Box), // Err part of std::thread::Result 49 | } 50 | 51 | /// Generator wraps a closure and allows suspending its execution more than once, returning 52 | /// a value each time. 53 | /// 54 | /// If the closure finishes each other call to [resume](struct.Generator.html#method.resume) 55 | /// will yield `None`. If the closure panics the unwind will happen correctly across contexts. 56 | pub struct Generator<'a, Input: 'a, Output: 'a, Stack: stack::Stack> { 57 | started: bool, 58 | stack: Option, 59 | stack_ptr: Option>, 60 | phantom: PhantomData<(&'a (), *mut Input, *const Output)>, 61 | } 62 | 63 | unsafe impl<'a, Input, Output, Stack> Send for Generator<'a, Input, Output, Stack> 64 | where 65 | Input: 'a, 66 | Output: 'a, 67 | Stack: stack::Stack, 68 | { 69 | } 70 | 71 | impl<'a, Input, Output, Stack> Generator<'a, Input, Output, Stack> 72 | where 73 | Input: 'a, 74 | Output: 'a, 75 | Stack: stack::Stack, 76 | { 77 | /// Create a new generator from a stack and closure. 78 | pub fn new(stack: Stack, f: F) -> Generator<'a, Input, Output, Stack> 79 | where 80 | F: FnOnce(&Yielder, Input) + 'a, 81 | { 82 | // This function will be written to the new stack (by `arch::init`) as the initial 83 | // entry point. During the `arch::swap_and_link_stacks` call it will be called with 84 | // the correct closure passed as the first argument. This function will never return. 85 | // Yielding back into it after `yielder.suspend_(GeneratorOutput::Finished)` was 86 | // called would be undefined behavior. 87 | unsafe extern "C" fn generator_wrapper( 88 | f_ptr: usize, 89 | stack_ptr: *mut usize, 90 | ) where 91 | Stack: stack::Stack, 92 | F: FnOnce(&Yielder, Input), 93 | { 94 | let f = std::ptr::read(f_ptr as *const F); 95 | let (data, stack_ptr) = arch::swap(0, stack_ptr); 96 | let input = std::ptr::read(data as *const Input); 97 | let yielder = Yielder::new(stack_ptr); 98 | 99 | // It is not safe to unwind across the context switch. 100 | // The unwind will continue in the original context. 101 | match catch_unwind(AssertUnwindSafe(|| { 102 | f(&yielder, input); 103 | })) { 104 | Ok(_) => yielder.suspend_(GeneratorOutput::Finished), 105 | Err(panic) => yielder.suspend_(GeneratorOutput::Panic(panic)), 106 | }; 107 | } 108 | 109 | // Prepare the stack 110 | let stack_ptr = unsafe { arch::init(&stack, generator_wrapper::) }; 111 | 112 | // f needs to live on after this function, it is part of the new context. This prevents it 113 | // from being dropped. The drop happens inside of the `generator_wrapper()` function. 114 | let f = mem::ManuallyDrop::new(f); 115 | 116 | // This call will link the stacks together with assembly directives magic, but once the 117 | // first `arch::swap` inside `generator_wrapper` is reached it will yield back before the 118 | // execution of the closure `f`. 119 | // Only the next call to `resume` will start executing the closure. 120 | let stack_ptr = unsafe { 121 | arch::swap_and_link_stacks( 122 | &f as *const mem::ManuallyDrop as usize, 123 | stack_ptr, 124 | stack.bottom(), 125 | ) 126 | .1 127 | }; 128 | 129 | Generator { 130 | started: false, 131 | stack: Some(stack), 132 | stack_ptr: Some(NonNull::new(stack_ptr).unwrap()), 133 | phantom: PhantomData, 134 | } 135 | } 136 | 137 | /// Resume the generator yielding the next value. 138 | #[inline(always)] 139 | pub fn resume(&mut self, input: Input) -> Option { 140 | if self.stack_ptr.is_none() { 141 | return None; 142 | }; 143 | let stack_ptr = self.stack_ptr.unwrap(); 144 | 145 | unsafe { 146 | let input = mem::ManuallyDrop::new(input); 147 | // Mark the `Generator` as started 148 | self.started = true; 149 | let (data_out, stack_ptr) = arch::swap( 150 | &input as *const mem::ManuallyDrop as usize, 151 | stack_ptr.as_ptr(), 152 | ); 153 | 154 | let output = std::ptr::read(data_out as *const GeneratorOutput); 155 | match output { 156 | GeneratorOutput::Value(value) => { 157 | self.stack_ptr = Some(NonNull::new(stack_ptr).unwrap()); 158 | Some(value) 159 | } 160 | GeneratorOutput::Finished => { 161 | self.stack_ptr = None; 162 | None 163 | } 164 | GeneratorOutput::Panic(panic) => { 165 | self.stack_ptr = None; 166 | resume_unwind(panic); 167 | } 168 | } 169 | } 170 | } 171 | 172 | /// Returns true if the execution of the passed in closure started 173 | #[inline(always)] 174 | pub fn started(&self) -> bool { 175 | self.started 176 | } 177 | 178 | /// Returns true if the generator finished running. 179 | #[inline(always)] 180 | pub fn finished(&self) -> bool { 181 | self.stack_ptr.is_none() 182 | } 183 | 184 | /// Consume the generator and extract the stack. 185 | pub fn stack(mut self) -> Stack { 186 | self.stack.take().unwrap() 187 | // Drop for Generator is executed here while the stack is still alive. 188 | } 189 | } 190 | 191 | impl<'a, Input, Output, Stack> Drop for Generator<'a, Input, Output, Stack> 192 | where 193 | Input: 'a, 194 | Output: 'a, 195 | Stack: stack::Stack, 196 | { 197 | fn drop(&mut self) { 198 | // If there is still data on the stack unwind it. 199 | if self.started() && !self.finished() { 200 | unsafe { 201 | let (data, _stack_ptr) = arch::swap(0, self.stack_ptr.unwrap().as_ptr()); 202 | // We catch the unwind in the other context, but don't resume it here (just drop the panic value). 203 | let _panic = std::ptr::read(data as *const GeneratorOutput); 204 | }; 205 | } 206 | } 207 | } 208 | 209 | /// Yielder is an interface provided to every generator through which it returns a value. 210 | pub struct Yielder { 211 | stack_ptr: Cell<*mut usize>, 212 | phantom: PhantomData<(*const Input, *mut Output)>, 213 | } 214 | 215 | impl Yielder { 216 | fn new(stack_ptr: *mut usize) -> Yielder { 217 | Yielder { 218 | stack_ptr: Cell::new(stack_ptr), 219 | phantom: PhantomData, 220 | } 221 | } 222 | 223 | /// Suspends the generator and returns `Some(val)` from the `resume()` invocation that resumed 224 | /// the generator. 225 | #[inline(always)] 226 | pub fn suspend(&self, val: Output) -> Input { 227 | unsafe { self.suspend_(GeneratorOutput::Value(val)) } 228 | } 229 | 230 | #[inline(always)] 231 | unsafe fn suspend_(&self, out: GeneratorOutput) -> Input { 232 | let out = mem::ManuallyDrop::new(out); 233 | let (data, stack_ptr) = arch::swap( 234 | &out as *const mem::ManuallyDrop> as usize, 235 | self.stack_ptr.get(), 236 | ); 237 | 238 | // Set return point. This needs to happen before unwind is triggered. 239 | self.stack_ptr.set(stack_ptr); 240 | 241 | // We use the data pointer to signalize an unwind trigger. 242 | // It should never be 0 otherwise. 243 | if data == 0 { 244 | resume_unwind(Box::new(())); 245 | } 246 | 247 | std::ptr::read(data as *const Input) 248 | } 249 | } 250 | -------------------------------------------------------------------------------- /switcheroo/src/stack/eight_mb.rs: -------------------------------------------------------------------------------- 1 | use std::io::Error; 2 | use std::mem::size_of; 3 | use std::ptr; 4 | 5 | #[cfg(target_family = "unix")] 6 | use libc::{mmap, MAP_ANON, MAP_FAILED, MAP_NORESERVE, MAP_PRIVATE, PROT_READ, PROT_WRITE}; 7 | 8 | #[cfg(target_family = "windows")] 9 | use winapi::ctypes::c_void; 10 | #[cfg(target_family = "windows")] 11 | use winapi::um::memoryapi::{VirtualAlloc, VirtualFree, VirtualProtect}; 12 | #[cfg(target_family = "windows")] 13 | use winapi::um::winnt::{ 14 | MEM_COMMIT, MEM_RELEASE, MEM_RESERVE, PAGE_GUARD, PAGE_NOACCESS, PAGE_READWRITE, 15 | }; 16 | 17 | use super::Stack; 18 | 19 | /// A 8 Mb Stack. 20 | /// 21 | /// On Unix platforms this will simply reserve 8 Mb of memory to be used as a stack (without a 22 | /// guard page). Mmap will be called with the MAP_NORESERVE flag to allow us to overcommit on stack 23 | /// allocations. 24 | /// 25 | /// On Windows it will reserve 8 Mb of memory + 4 pages on top for the exception handler. Only the 26 | /// bottom of the stack will be marked as commited, while the rest will be reserved. This allows us 27 | /// to overcommit on stack allocations. The memory is specifically set up with guard pages in a way 28 | /// that Windows expect it to be, so that the OS can automatically grow and commit memory. 29 | /// 30 | /// Even 8 Mb may sound like a lot on all modern operating systems only pages that have something 31 | /// written to consume physical memory, the rest is cheap virtual memory. 32 | pub struct EightMbStack(*mut usize); 33 | 34 | unsafe impl Send for EightMbStack {} 35 | 36 | const EIGHT_MB: usize = 8 * 1024 * 1024; 37 | #[cfg(target_family = "windows")] 38 | const EXCEPTION_ZONE: usize = 4 * 4096; 39 | 40 | impl Stack for EightMbStack { 41 | #[cfg(target_family = "unix")] 42 | fn new() -> Result { 43 | let ptr = unsafe { 44 | mmap( 45 | ptr::null_mut(), 46 | EIGHT_MB, 47 | PROT_READ | PROT_WRITE, 48 | MAP_PRIVATE | MAP_ANON | MAP_NORESERVE, 49 | -1, 50 | 0, 51 | ) 52 | }; 53 | if ptr == MAP_FAILED { 54 | Err(Error::last_os_error()) 55 | } else { 56 | Ok(Self(ptr as *mut usize)) 57 | } 58 | } 59 | 60 | #[cfg(target_family = "unix")] 61 | fn bottom(&self) -> *mut usize { 62 | unsafe { self.0.add(EIGHT_MB / size_of::()) } 63 | } 64 | #[cfg(target_family = "unix")] 65 | fn top(&self) -> *mut usize { 66 | self.0 67 | } 68 | #[cfg(target_family = "unix")] 69 | fn deallocation(&self) -> *mut usize { 70 | panic!("Not used on unix"); 71 | } 72 | 73 | // Windows 74 | #[cfg(target_family = "windows")] 75 | fn new() -> Result { 76 | unsafe { 77 | // Add extra 16 Kb on top of the stack to be used by the exception handler in case of a stack overflow. 78 | // Cast pointer to `usize`, because calculating offsets with `c_void` is impossible. Sometimes it has a 79 | // size of 0, sometimes it decides to be 1 byte. 80 | let ptr = VirtualAlloc( 81 | ptr::null_mut(), 82 | EIGHT_MB + EXCEPTION_ZONE, 83 | MEM_RESERVE, 84 | PAGE_NOACCESS, 85 | ) as *mut usize; 86 | if ptr.is_null() { 87 | return Err(Error::last_os_error()); 88 | } 89 | // Commit 3 bottom pages (1 read/write and 2 guard pages) 90 | let bottom_2 = VirtualAlloc( 91 | ptr.add((EIGHT_MB + EXCEPTION_ZONE - 3 * 4096) / size_of::()) as *mut c_void, 92 | 3 * 4096, 93 | MEM_COMMIT, 94 | PAGE_GUARD | PAGE_READWRITE, 95 | ); 96 | if bottom_2.is_null() { 97 | return Err(Error::last_os_error()); 98 | } 99 | 100 | let old_protect: u32 = 0; 101 | let bottom_1 = VirtualProtect( 102 | ptr.add((EIGHT_MB + EXCEPTION_ZONE - 1 * 4096) / size_of::()) as *mut c_void, 103 | 1 * 4096, 104 | PAGE_READWRITE, 105 | &old_protect as *const u32 as *mut u32, 106 | ); 107 | if bottom_1 == 0 { 108 | return Err(Error::last_os_error()); 109 | } 110 | 111 | Ok(Self(ptr as *mut usize)) 112 | } 113 | } 114 | 115 | #[cfg(target_family = "windows")] 116 | fn bottom(&self) -> *mut usize { 117 | unsafe { self.0.add((EIGHT_MB + EXCEPTION_ZONE) / size_of::()) } 118 | } 119 | #[cfg(target_family = "windows")] 120 | fn top(&self) -> *mut usize { 121 | unsafe { self.0.add(EXCEPTION_ZONE / size_of::()) } 122 | } 123 | #[cfg(target_family = "windows")] 124 | fn deallocation(&self) -> *mut usize { 125 | self.0 126 | } 127 | } 128 | 129 | #[cfg(target_family = "unix")] 130 | impl Drop for EightMbStack { 131 | fn drop(&mut self) { 132 | let result = unsafe { libc::munmap(self.0 as *mut libc::c_void, EIGHT_MB) }; 133 | debug_assert_eq!(result, 0); 134 | } 135 | } 136 | 137 | #[cfg(target_family = "windows")] 138 | impl Drop for EightMbStack { 139 | fn drop(&mut self) { 140 | let result = unsafe { VirtualFree(self.0 as *mut winapi::ctypes::c_void, 0, MEM_RELEASE) }; 141 | debug_assert_ne!(result, 0); 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /switcheroo/src/stack/mod.rs: -------------------------------------------------------------------------------- 1 | //! Different stack implementations (currently only contains a 8 Mb stack). 2 | 3 | mod eight_mb; 4 | mod one_mb; 5 | pub use eight_mb::EightMbStack; 6 | pub use one_mb::OneMbStack; 7 | 8 | /// An implementation of this trait will be accepted by a [generator](struct.Generator.html) as a 9 | /// valid Stack. Most of the functions provided here are straightforward except for 10 | /// [deallocation](trait.Stack.html#tymethod.deallocation), this is a Windows only construct. 11 | /// 12 | /// Windows reserves a few pages above the stack top, so if a stack overflow exception is triggered 13 | /// the handler has still enough of stack to process it. The name comes from the fact that it 14 | /// points to the top most address of the memory area designated to the stack and will be used as a 15 | /// pointer when freeing/deallocating the stack. 16 | pub trait Stack: Sized + Send { 17 | /// Returns a new stack. 18 | fn new() -> Result; 19 | 20 | /// Returns a pointer to the bottom of the stack. 21 | fn bottom(&self) -> *mut usize; 22 | 23 | /// Returns a pointer to the top of the stack. 24 | fn top(&self) -> *mut usize; 25 | 26 | /// Returns a pointer to the deallocation stack (a Windows construct). 27 | fn deallocation(&self) -> *mut usize; 28 | } 29 | -------------------------------------------------------------------------------- /switcheroo/src/stack/one_mb.rs: -------------------------------------------------------------------------------- 1 | use std::io::Error; 2 | use std::mem::size_of; 3 | use std::ptr; 4 | 5 | #[cfg(target_family = "unix")] 6 | use libc::{mmap, MAP_ANON, MAP_FAILED, MAP_NORESERVE, MAP_PRIVATE, PROT_READ, PROT_WRITE}; 7 | 8 | #[cfg(target_family = "windows")] 9 | use winapi::ctypes::c_void; 10 | #[cfg(target_family = "windows")] 11 | use winapi::um::memoryapi::{VirtualAlloc, VirtualFree, VirtualProtect}; 12 | #[cfg(target_family = "windows")] 13 | use winapi::um::winnt::{ 14 | MEM_COMMIT, MEM_RELEASE, MEM_RESERVE, PAGE_GUARD, PAGE_NOACCESS, PAGE_READWRITE, 15 | }; 16 | 17 | use super::Stack; 18 | 19 | /// A 1 Mb Stack (1 Mb + 4 Kb). 20 | /// 21 | /// On Unix platforms this will simply reserve 1 Mb + 4 Kb of memory to be used as a stack (without 22 | /// a guard page). Mmap will be called with the MAP_NORESERVE flag to allow us to overcommit on stack 23 | /// allocations. 24 | /// 25 | /// On Windows it will reserve 1 Mb + 4Kb of memory + 4 pages on top for the exception handler. Only the 26 | /// bottom of the stack will be marked as commited, while the rest will be reserved. This allows us 27 | /// to overcommit on stack allocations. The memory is specifically set up with guard pages in a way 28 | /// that Windows expect it to be, so that the OS can automatically grow and commit memory. 29 | pub struct OneMbStack(*mut usize); 30 | 31 | unsafe impl Send for OneMbStack {} 32 | 33 | const ONE_MB: usize = 1 * 1024 * 1024 + 4096; 34 | #[cfg(target_family = "windows")] 35 | const EXCEPTION_ZONE: usize = 4 * 4096; 36 | 37 | impl Stack for OneMbStack { 38 | #[cfg(target_family = "unix")] 39 | fn new() -> Result { 40 | let ptr = unsafe { 41 | mmap( 42 | ptr::null_mut(), 43 | ONE_MB, 44 | PROT_READ | PROT_WRITE, 45 | MAP_PRIVATE | MAP_ANON | MAP_NORESERVE, 46 | -1, 47 | 0, 48 | ) 49 | }; 50 | if ptr == MAP_FAILED { 51 | Err(Error::last_os_error()) 52 | } else { 53 | Ok(Self(ptr as *mut usize)) 54 | } 55 | } 56 | 57 | #[cfg(target_family = "unix")] 58 | fn bottom(&self) -> *mut usize { 59 | unsafe { self.0.add(ONE_MB / size_of::()) } 60 | } 61 | #[cfg(target_family = "unix")] 62 | fn top(&self) -> *mut usize { 63 | self.0 64 | } 65 | #[cfg(target_family = "unix")] 66 | fn deallocation(&self) -> *mut usize { 67 | panic!("Not used on unix"); 68 | } 69 | 70 | // Windows 71 | #[cfg(target_family = "windows")] 72 | fn new() -> Result { 73 | unsafe { 74 | // Add extra 16 Kb on top of the stack to be used by the exception handler in case of a stack overflow. 75 | // Cast pointer to `usize`, because calculating offsets with `c_void` is impossible. Sometimes it has a 76 | // size of 0, sometimes it decides to be 1 byte. 77 | let ptr = VirtualAlloc( 78 | ptr::null_mut(), 79 | ONE_MB + EXCEPTION_ZONE, 80 | MEM_RESERVE, 81 | PAGE_NOACCESS, 82 | ) as *mut usize; 83 | if ptr.is_null() { 84 | return Err(Error::last_os_error()); 85 | } 86 | // Commit 3 bottom pages (1 read/write and 2 guard pages) 87 | let bottom_2 = VirtualAlloc( 88 | ptr.add((ONE_MB + EXCEPTION_ZONE - 3 * 4096) / size_of::()) as *mut c_void, 89 | 3 * 4096, 90 | MEM_COMMIT, 91 | PAGE_GUARD | PAGE_READWRITE, 92 | ); 93 | if bottom_2.is_null() { 94 | return Err(Error::last_os_error()); 95 | } 96 | 97 | let old_protect: u32 = 0; 98 | let bottom_1 = VirtualProtect( 99 | ptr.add((ONE_MB + EXCEPTION_ZONE - 1 * 4096) / size_of::()) as *mut c_void, 100 | 1 * 4096, 101 | PAGE_READWRITE, 102 | &old_protect as *const u32 as *mut u32, 103 | ); 104 | if bottom_1 == 0 { 105 | return Err(Error::last_os_error()); 106 | } 107 | 108 | Ok(Self(ptr as *mut usize)) 109 | } 110 | } 111 | 112 | #[cfg(target_family = "windows")] 113 | fn bottom(&self) -> *mut usize { 114 | unsafe { self.0.add((ONE_MB + EXCEPTION_ZONE) / size_of::()) } 115 | } 116 | #[cfg(target_family = "windows")] 117 | fn top(&self) -> *mut usize { 118 | unsafe { self.0.add(EXCEPTION_ZONE / size_of::()) } 119 | } 120 | #[cfg(target_family = "windows")] 121 | fn deallocation(&self) -> *mut usize { 122 | self.0 123 | } 124 | } 125 | 126 | #[cfg(target_family = "unix")] 127 | impl Drop for OneMbStack { 128 | fn drop(&mut self) { 129 | let result = unsafe { libc::munmap(self.0 as *mut libc::c_void, ONE_MB) }; 130 | debug_assert_eq!(result, 0); 131 | } 132 | } 133 | 134 | #[cfg(target_family = "windows")] 135 | impl Drop for OneMbStack { 136 | fn drop(&mut self) { 137 | let result = unsafe { VirtualFree(self.0 as *mut winapi::ctypes::c_void, 0, MEM_RELEASE) }; 138 | debug_assert_ne!(result, 0); 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /switcheroo/tests/stack_test.rs: -------------------------------------------------------------------------------- 1 | use std::io::Error; 2 | 3 | use switcheroo::stack::*; 4 | 5 | #[test] 6 | fn create_8_mb_stack() -> Result<(), Error> { 7 | EightMbStack::new()?; 8 | Ok(()) 9 | } 10 | 11 | #[test] 12 | fn create_300k_8_mb_stacks() { 13 | // Uses around 4 Gb of commited memory 14 | let mut stacks = vec![]; 15 | for _i in 0..300_000 { 16 | let stack = EightMbStack::new(); 17 | assert!(stack.is_ok()); 18 | stacks.push(stack); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /switcheroo/tests/switch_test.rs: -------------------------------------------------------------------------------- 1 | use switcheroo::stack::*; 2 | use switcheroo::Generator; 3 | 4 | #[test] 5 | fn switch_stack() { 6 | let stack = EightMbStack::new().unwrap(); 7 | let mut add_one = Generator::new(stack, |yielder, mut input| { 8 | println!("Sometimes println doesn't touch all pages on windows"); 9 | loop { 10 | if input == 0 { 11 | break; 12 | } 13 | input = yielder.suspend(input + 1); 14 | } 15 | }); 16 | assert_eq!(add_one.resume(2), Some(3)); 17 | assert_eq!(add_one.resume(127), Some(128)); 18 | assert_eq!(add_one.resume(-1), Some(0)); 19 | assert_eq!(add_one.resume(0), None); 20 | assert_eq!(add_one.resume(0), None); 21 | assert_eq!(add_one.resume(0), None); 22 | } 23 | 24 | #[test] 25 | fn extend_small_stack() { 26 | let stack = EightMbStack::new().unwrap(); 27 | let mut blow_stack = Generator::new(stack, |yielder, input| { 28 | rec(input); 29 | yielder.suspend(Some(0)); 30 | }); 31 | // This will use 7 Mb of stack, more than the first 4 Kb commited memory on Windows 32 | blow_stack.resume(7_000); 33 | } 34 | 35 | // Uses 1 Kb per iteration 36 | fn rec(n: u64) -> u8 { 37 | let x: [u8; 1024] = [1; 1024]; 38 | if n < 1 { 39 | x[0] 40 | } else { 41 | rec(n - 1) 42 | } 43 | } 44 | 45 | #[test] 46 | #[should_panic] 47 | fn panic_on_different_stack() { 48 | let stack = EightMbStack::new().unwrap(); 49 | let mut add_one = Generator::new(stack, |_yielder, mut _input| { 50 | panic!("Ups"); 51 | }); 52 | let _: u32 = add_one.resume(0).unwrap(); 53 | } 54 | 55 | #[test] 56 | fn drop_stack_with_unwind() { 57 | let stack = EightMbStack::new().unwrap(); 58 | let mut add_one = Generator::new(stack, |yielder, mut _input| { 59 | let _local_variable = Box::new(0); 60 | yielder.suspend(()); 61 | yielder.suspend(()); 62 | yielder.suspend(()); 63 | }); 64 | let _: () = add_one.resume(()).unwrap(); 65 | } 66 | -------------------------------------------------------------------------------- /tests/async_test.rs: -------------------------------------------------------------------------------- 1 | use async_executor::LocalExecutor; 2 | use async_wormhole::AsyncWormhole; 3 | use backtrace::Backtrace; 4 | use switcheroo::stack::*; 5 | 6 | #[test] 7 | fn async_yield() { 8 | let stack = EightMbStack::new().unwrap(); 9 | let task = AsyncWormhole::<_, _, fn()>::new(stack, |mut yielder| { 10 | println!("The println function blows up the stack more than 4Kb."); 11 | let x = yielder.async_suspend(async { 5 }); 12 | assert_eq!(x, 5); 13 | let y = yielder.async_suspend(async { true }); 14 | assert_eq!(y, true); 15 | 42 16 | }) 17 | .unwrap(); 18 | let output = futures::executor::block_on(task); 19 | assert_eq!(output, 42); 20 | } 21 | 22 | #[test] 23 | #[should_panic] 24 | fn async_yield_panics() { 25 | let stack = EightMbStack::new().unwrap(); 26 | let task = AsyncWormhole::<_, _, fn()>::new(stack, |mut yielder| { 27 | let x = yielder.async_suspend(async { 5 }); 28 | assert_eq!(x, 5); 29 | let y = yielder.async_suspend(async { true }); 30 | assert_eq!(y, true); 31 | panic!(); 32 | }) 33 | .unwrap(); 34 | futures::executor::block_on(task); 35 | } 36 | 37 | #[test] 38 | fn async_yield_drop_without_poll() { 39 | let stack = EightMbStack::new().unwrap(); 40 | AsyncWormhole::<_, _, fn()>::new(stack, |mut yielder| { 41 | let x = yielder.async_suspend(async { 5 }); 42 | assert_eq!(x, 5); 43 | let y = yielder.async_suspend(async { true }); 44 | assert_eq!(y, true); 45 | 42 46 | }) 47 | .unwrap(); 48 | } 49 | 50 | #[test] 51 | fn async_yield_drop_with_one_poll() { 52 | let stack = EightMbStack::new().unwrap(); 53 | let task = AsyncWormhole::<_, _, fn()>::new(stack, |mut yielder| { 54 | yielder.async_suspend(async { futures::pending!() }); 55 | println!("Never gets here"); 56 | }) 57 | .unwrap(); 58 | 59 | let ex = LocalExecutor::new(); 60 | ex.spawn(task).detach(); 61 | ex.try_tick(); 62 | } 63 | 64 | #[test] 65 | fn backtrace_test() { 66 | let stack = EightMbStack::new().unwrap(); 67 | let task = AsyncWormhole::<_, _, fn()>::new(stack, |_yielder| { 68 | let _ = Backtrace::new_unresolved(); 69 | }) 70 | .unwrap(); 71 | 72 | futures::executor::block_on(task); 73 | } 74 | --------------------------------------------------------------------------------