├── .github
    └── workflows
    │   └── test.yml
├── .gitignore
├── Cargo.toml
├── Readme.md
├── benches
    └── async_benchmark.rs
├── examples
    ├── async.rs
    └── panic.rs
├── src
    └── lib.rs
├── switcheroo
    ├── Cargo.toml
    ├── Readme.md
    ├── benches
    │   └── switcheroo_benchmark.rs
    ├── examples
    │   └── generator_with_drop.rs
    ├── src
    │   ├── arch
    │   │   ├── mod.rs
    │   │   ├── unix_aarch64.rs
    │   │   ├── unix_x64.rs
    │   │   └── windows_x64.rs
    │   ├── lib.rs
    │   └── stack
    │   │   ├── eight_mb.rs
    │   │   ├── mod.rs
    │   │   └── one_mb.rs
    └── tests
    │   ├── stack_test.rs
    │   └── switch_test.rs
└── tests
    └── async_test.rs


/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches: [master]
 4 |   pull_request:
 5 |     branches: [master]
 6 | 
 7 | name: Test
 8 | 
 9 | jobs:
10 |   test:
11 |     runs-on: ${{ matrix.os }}
12 |     strategy:
13 |       matrix:
14 |         include:
15 |           - os: ubuntu-latest
16 |           - os: macos-latest
17 |           - os: windows-latest
18 |     steps:
19 |       - name: Checkout code
20 |         uses: actions/checkout@v2
21 |       - name: Install latest nightly
22 |         uses: actions-rs/toolchain@v1
23 |         with:
24 |           toolchain: nightly
25 |           override: true
26 |           components: rustfmt, clippy
27 |       - name: Run tests in Debug Build
28 |         run: cargo +nightly test --all
29 |       - name: Run tests in Release Build
30 |         run: cargo +nightly test --all --release
31 |       - name: Run benchmarks
32 |         run: cargo +nightly bench --all
33 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /*/target
3 | Cargo.lock
4 | .vscode


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "async-wormhole"
 3 | version = "0.3.7"
 4 | authors = ["Bernard Kolobara <me@kolobara.com>"]
 5 | edition = "2018"
 6 | license = "Apache-2.0/MIT"
 7 | description = "Async calls across non-async functions"
 8 | readme = "Readme.md"
 9 | repository = "https://github.com/bkolobara/async-wormhole/"
10 | 
11 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
12 | 
13 | [dependencies]
14 | switcheroo = { path = "./switcheroo", version = "0.2" }
15 | 
16 | [dev-dependencies]
17 | async-executor = "1.4"
18 | futures = "0.3"
19 | criterion = "0.3"
20 | libc = "0.2"
21 | backtrace = "0.3"
22 | 
23 | [[bench]]
24 | name = "async_benchmark"
25 | harness = false
26 | 
27 | [workspace]
28 | members = [
29 |   ".",
30 |   "switcheroo",
31 | ]


--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
 1 | # async-wormhole
 2 | 
 3 | [Documentation](https://docs.rs/async-wormhole/latest/async_wormhole/)
 4 | 
 5 | > This library is experimental, I use it to prototype the foundation for [Lunatic](https://lunatic.solutions/) .
 6 | >
 7 | > **Currently only works in Rust nightly, as it depends on [switcheroo](https://github.com/bkolobara/async-wormhole/tree/master/switcheroo).**
 8 | 
 9 | async-wormhole allows you to `.await` async calls in non-async functions, like extern "C" or JIT generated code.
10 | It runs on Windows, MacOs and Linux (x64 & AArch64).
11 | 
12 | ## Motivation
13 | 
14 | Sometimes, when running inside an async environment you need to call into JIT generated code (e.g. wasm)
15 | and .await from there. Because the JIT code is not available at compile time, the Rust compiler can't
16 | do their "create a state machine" magic. In the end you can't have `.await` statements in non-async
17 | functions.
18 | 
19 | This library creates a special stack for executing the JIT code, so it's possible to suspend it at any
20 | point of the execution. Once you pass it a closure inside `AsyncWormhole::new` you will get back a future
21 | that you can `.await` on. The passed in closure is going to be executed on a new stack.
22 | 
23 | ## Example
24 | 
25 | ```rust
26 | use async_wormhole::{AsyncWormhole, AsyncYielder};
27 | use switcheroo::stack::*;
28 | 
29 | // non-async function
30 | #[allow(improper_ctypes_definitions)]
31 | extern "C" fn non_async(mut yielder: AsyncYielder<u32>) -> u32 {
32 | 	// Suspend the runtime until async value is ready.
33 | 	// Can contain .await calls.
34 |     yielder.async_suspend(async { 42 })
35 | }
36 | 
37 | fn main() {
38 |     let stack = EightMbStack::new().unwrap();
39 |     let task = AsyncWormhole::<_, _, fn()>::new(stack, |yielder| {
40 |         let result = non_async(yielder);
41 |         assert_eq!(result, 42);
42 |         64
43 |     })
44 |     .unwrap();
45 | 
46 |     let outside = futures::executor::block_on(task);
47 |     assert_eq!(outside, 64);
48 | }
49 | ```
50 | 
51 | ## Performance
52 | 
53 | There should be almost no performance overhead to `.await` calls inside the closure passed to
54 | `AsyncWormhole::new` and caught by `async_suspend`.
55 | But instantiating a new AsyncWormhole will require one memory allocation.
56 | And of course you are not going to get [perfectly sized stacks](https://without.boats/blog/futures-and-segmented-stacks/#futures-as-a-perfectly-sized-stack).
57 | 
58 | ## License
59 | 
60 | Licensed under either of
61 | 
62 | - Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
63 | - MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
64 | 
65 | at your option.
66 | 
67 | ### Contribution
68 | 
69 | Unless you explicitly state otherwise, any contribution intentionally
70 | submitted for inclusion in the work by you, as defined in the Apache-2.0
71 | license, shall be dual licensed as above, without any additional terms or
72 | conditions.
73 | 


--------------------------------------------------------------------------------
/benches/async_benchmark.rs:
--------------------------------------------------------------------------------
 1 | use std::cell::Cell;
 2 | use std::ptr;
 3 | 
 4 | use criterion::{criterion_group, criterion_main, BatchSize, Criterion};
 5 | 
 6 | use async_wormhole::AsyncWormhole;
 7 | use switcheroo::stack::*;
 8 | 
 9 | thread_local!(
10 |     /// Mock TLS
11 |     pub static TLS: Cell<*const usize> = Cell::new(ptr::null())
12 | );
13 | 
14 | fn async_bench(c: &mut Criterion) {
15 |     c.bench_function("async_wormhole creation", |b| {
16 |         b.iter(|| {
17 |             let stack = EightMbStack::new().unwrap();
18 |             AsyncWormhole::<_, _, fn()>::new(stack, |mut yielder| {
19 |                 yielder.async_suspend(async { 42 });
20 |             })
21 |             .unwrap();
22 |         })
23 |     });
24 | 
25 |     c.bench_function("async switch", |b| {
26 |         b.iter_batched(
27 |             || {
28 |                 let stack = EightMbStack::new().unwrap();
29 |                 let async_ = AsyncWormhole::<_, _, fn()>::new(stack, |mut yielder| {
30 |                     yielder.async_suspend(async { 42 });
31 |                 })
32 |                 .unwrap();
33 |                 async_
34 |             },
35 |             |mut task| {
36 |                 futures::executor::block_on(&mut task);
37 |                 task
38 |             },
39 |             BatchSize::SmallInput,
40 |         );
41 |     });
42 | 
43 |     c.bench_function("async switch with pre and post poll hooks", |b| {
44 |         b.iter_batched(
45 |             || {
46 |                 let stack = EightMbStack::new().unwrap();
47 |                 let mut async_ = AsyncWormhole::<_, _, fn()>::new(stack, |mut yielder| {
48 |                     yielder.async_suspend(async { 42 });
49 |                 })
50 |                 .unwrap();
51 |                 async_.set_pre_post_poll(|| {
52 |                     let _ = 33 + 34;
53 |                 });
54 |                 async_
55 |             },
56 |             |mut task| {
57 |                 futures::executor::block_on(&mut task);
58 |                 task
59 |             },
60 |             BatchSize::SmallInput,
61 |         );
62 |     });
63 | }
64 | 
65 | criterion_group!(benches, async_bench);
66 | criterion_main!(benches);
67 | 


--------------------------------------------------------------------------------
/examples/async.rs:
--------------------------------------------------------------------------------
 1 | use async_wormhole::{AsyncWormhole, AsyncYielder};
 2 | use switcheroo::stack::*;
 3 | 
 4 | // non-async function
 5 | #[allow(improper_ctypes_definitions)]
 6 | extern "C" fn non_async(mut yielder: AsyncYielder<u32>) -> u32 {
 7 |     // Suspend the runtime until async value is ready
 8 |     yielder.async_suspend(async { 42 })
 9 | }
10 | 
11 | fn main() {
12 |     let stack = EightMbStack::new().unwrap();
13 |     let task = AsyncWormhole::<_, _, fn()>::new(stack, |yielder| {
14 |         let result = non_async(yielder);
15 |         assert_eq!(result, 42);
16 |         64
17 |     })
18 |     .unwrap();
19 | 
20 |     let outside = futures::executor::block_on(task);
21 |     assert_eq!(outside, 64);
22 | }
23 | 


--------------------------------------------------------------------------------
/examples/panic.rs:
--------------------------------------------------------------------------------
 1 | use async_wormhole::AsyncWormhole;
 2 | use backtrace::Backtrace;
 3 | use switcheroo::stack::*;
 4 | 
 5 | fn main() {
 6 |     let stack = EightMbStack::new().unwrap();
 7 |     let task = AsyncWormhole::<_, _, fn()>::new(stack, |_yielder| {
 8 |         let b = Backtrace::new();
 9 |         println!("{:?}", b);
10 |         panic!("Panic inside wormhole")
11 |     })
12 |     .unwrap();
13 | 
14 |     futures::executor::block_on(task);
15 | }
16 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! async-wormhole allows you to call `.await` async calls across non-async functions, like extern "C" or JIT
  2 | //! generated code.
  3 | //!
  4 | //! ## Motivation
  5 | //!
  6 | //! Sometimes, when running inside an async environment you need to call into JIT generated code (e.g. wasm)
  7 | //! and .await from there. Because the JIT code is not available at compile time, the Rust compiler can't
  8 | //! do their "create a state machine" magic. In the end you can't have `.await` statements in non-async
  9 | //! functions.
 10 | //!
 11 | //! This library creates a special stack for executing the JIT code, so it's possible to suspend it at any
 12 | //! point of the execution. Once you pass it a closure inside [AsyncWormhole::new](struct.AsyncWormhole.html#method.new)
 13 | //! you will get back a future that you can `.await` on. The passed in closure is going to be executed on a
 14 | //! new stack.
 15 | //!
 16 | //! ## Example
 17 | //!
 18 | //! ```rust
 19 | //! use async_wormhole::{AsyncWormhole, AsyncYielder};
 20 | //! use switcheroo::stack::*;
 21 | //!
 22 | //! // non-async function
 23 | //! #[allow(improper_ctypes_definitions)]
 24 | //! extern "C" fn non_async(mut yielder: AsyncYielder<u32>) -> u32 {
 25 | //! 	// Suspend the runtime until async value is ready.
 26 | //! 	// Can contain .await calls.
 27 | //!     yielder.async_suspend(async { 42 })
 28 | //! }
 29 | //!
 30 | //! fn main() {
 31 | //!     let stack = EightMbStack::new().unwrap();
 32 | //!     let task = AsyncWormhole::<_, _, fn()>::new(stack, |yielder| {
 33 | //!         let result = non_async(yielder);
 34 | //!         assert_eq!(result, 42);
 35 | //!         64
 36 | //!     })
 37 | //!     .unwrap();
 38 | //!
 39 | //!     let outside = futures::executor::block_on(task);
 40 | //!     assert_eq!(outside, 64);
 41 | //! }
 42 | //! ```
 43 | 
 44 | use switcheroo::Generator;
 45 | use switcheroo::Yielder;
 46 | 
 47 | use std::cell::Cell;
 48 | use std::future::Future;
 49 | use std::io::Error;
 50 | use std::pin::Pin;
 51 | use std::task::{Context, Poll, Waker};
 52 | 
 53 | pub use switcheroo::stack;
 54 | 
 55 | /// AsyncWormhole represents a Future that uses a generator with a separate stack to execute a closure.
 56 | ///
 57 | /// It has the capability to .await on other Futures in the closure using the received
 58 | /// [AsyncYielder](struct.AsyncYielder). Once all Futures have been awaited on AsyncWormhole will resolve
 59 | /// to the return value of the provided closure.
 60 | ///
 61 | /// For dealing with thread local storage
 62 | /// [AsyncWormhole::set_pre_post_poll](struct.AsyncWormhole.html#method.set_pre_post_poll) is provided.
 63 | ///
 64 | /// Every time an executor polls AsyncWormhole, the `pre_post_poll` function will be called and every time
 65 | /// AsyncWormhole returns `Poll::Pending`, `pre_post_poll` will be called again. Between this two calls we
 66 | /// have a guarantee that the executor will not be able to move the execution to another thread, and we
 67 | /// can use this guarantee to our advantage in specific scenarios.
 68 | pub struct AsyncWormhole<'a, Stack, Output, P>
 69 | where
 70 |     Stack: stack::Stack + Send,
 71 |     P: FnMut() + Send,
 72 | {
 73 |     generator: Option<Cell<Generator<'a, Waker, Option<Output>, Stack>>>,
 74 |     pre_post_poll: Option<P>,
 75 | }
 76 | 
 77 | impl<'a, Stack, Output, P> AsyncWormhole<'a, Stack, Output, P>
 78 | where
 79 |     Stack: stack::Stack + Send,
 80 |     P: FnMut() + Send,
 81 | {
 82 |     /// Returns a new AsyncWormhole, using the passed `stack` to execute the closure `f` on.
 83 |     /// The closure will not be executed right away, only if you pass AsyncWormhole to an
 84 |     /// async executor (.await on it)
 85 |     pub fn new<F>(stack: Stack, f: F) -> Result<Self, Error>
 86 |     where
 87 |         F: FnOnce(AsyncYielder<Output>) -> Output + 'a + Send,
 88 |     {
 89 |         let generator = Generator::new(stack, |yielder, waker| {
 90 |             let async_yielder = AsyncYielder::new(yielder, waker);
 91 |             let finished = Some(f(async_yielder));
 92 |             yielder.suspend(finished);
 93 |         });
 94 | 
 95 |         Ok(Self {
 96 |             generator: Some(Cell::new(generator)),
 97 |             pre_post_poll: None,
 98 |         })
 99 |     }
100 | 
101 |     /// Every time the executor polls `AsyncWormhole` we may end up on another thread, here we can set a function
102 |     /// that swaps some thread local storage and a context that can travel with `AsyncWormhole` between threads.
103 |     pub fn set_pre_post_poll(&mut self, f: P) {
104 |         self.pre_post_poll = Some(f);
105 |     }
106 | 
107 |     /// Get the stack from the internal generator.
108 |     pub fn stack(mut self) -> Stack {
109 |         let generator = self.generator.take().unwrap().into_inner();
110 |         // If the generator didn't finish yet, the stack is going to be unwinded on drop().
111 |         // Fire a last pre_post_poll before this happens.
112 |         if generator.started() && !generator.finished() {
113 |             if let Some(pre_post_poll) = &mut self.pre_post_poll {
114 |                 pre_post_poll();
115 |             }
116 |         }
117 |         generator.stack()
118 |     }
119 | }
120 | 
121 | impl<'a, Stack, Output, P> Future for AsyncWormhole<'a, Stack, Output, P>
122 | where
123 |     Stack: stack::Stack + Unpin + Send,
124 |     P: FnMut() + Unpin + Send,
125 | {
126 |     type Output = Output;
127 | 
128 |     fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
129 |         // If pre_post_poll is provided execute it before entering separate stack
130 |         if let Some(pre_post_poll) = &mut self.pre_post_poll {
131 |             pre_post_poll()
132 |         }
133 | 
134 |         match self
135 |             .generator
136 |             .as_mut()
137 |             .unwrap()
138 |             .get_mut()
139 |             .resume(cx.waker().clone())
140 |         {
141 |             // If we call the future after it completed it will always return Poll::Pending.
142 |             // But polling a completed future is either way undefined behaviour.
143 |             None | Some(None) => {
144 |                 // If pre_post_poll is provided execute it before returning a Poll::Pending
145 |                 if let Some(pre_post_poll) = &mut self.pre_post_poll {
146 |                     pre_post_poll()
147 |                 }
148 |                 Poll::Pending
149 |             }
150 |             Some(Some(out)) => {
151 |                 // Poll one last time to finish the generator
152 |                 self.generator
153 |                     .as_mut()
154 |                     .unwrap()
155 |                     .get_mut()
156 |                     .resume(cx.waker().clone());
157 |                 Poll::Ready(out)
158 |             }
159 |         }
160 |     }
161 | }
162 | 
163 | impl<'a, Stack, Output, P> Drop for AsyncWormhole<'a, Stack, Output, P>
164 | where
165 |     Stack: stack::Stack + Send,
166 |     P: FnMut() + Send,
167 | {
168 |     fn drop(&mut self) {
169 |         // Dropping a generator can cause an unwind and execute code inside of the separate context.
170 |         // In this regard it's similar to a `poll` call and we need to fire pre and post poll hooks.
171 |         // Note, that we **don't** do a last `post_poll` call once the generator is dropped.
172 |         if let Some(pre_post_poll) = &mut self.pre_post_poll {
173 |             if let Some(generator) = self.generator.as_mut() {
174 |                 if generator.get_mut().started() && !generator.get_mut().finished() {
175 |                     pre_post_poll()
176 |                 }
177 |             }
178 |         }
179 |     }
180 | }
181 | 
182 | #[derive(Clone)]
183 | pub struct AsyncYielder<'a, Output> {
184 |     yielder: &'a Yielder<Waker, Option<Output>>,
185 |     waker: Waker,
186 | }
187 | 
188 | impl<'a, Output> AsyncYielder<'a, Output> {
189 |     pub(crate) fn new(yielder: &'a Yielder<Waker, Option<Output>>, waker: Waker) -> Self {
190 |         Self { yielder, waker }
191 |     }
192 | 
193 |     /// Takes an `impl Future` and awaits it, returning the value from it once ready.
194 |     pub fn async_suspend<Fut, R>(&mut self, mut future: Fut) -> R
195 |     where
196 |         Fut: Future<Output = R>,
197 |     {
198 |         let mut future = unsafe { Pin::new_unchecked(&mut future) };
199 |         loop {
200 |             let mut cx = Context::from_waker(&mut self.waker);
201 |             self.waker = match future.as_mut().poll(&mut cx) {
202 |                 Poll::Pending => self.yielder.suspend(None),
203 |                 Poll::Ready(result) => return result,
204 |             };
205 |         }
206 |     }
207 | }
208 | 


--------------------------------------------------------------------------------
/switcheroo/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "switcheroo"
 3 | version = "0.2.9"
 4 | authors = ["Bernard Kolobara <me@kolobara.com>"]
 5 | edition = "2018"
 6 | license = "Apache-2.0/MIT"
 7 | description = "Lightweight userland context switches"
 8 | readme = "Readme.md"
 9 | repository = "https://github.com/bkolobara/async-wormhole/tree/master/switcheroo"
10 | 
11 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
12 | 
13 | [target.'cfg(unix)'.dependencies]
14 | libc = "0.2"
15 | 
16 | [target.'cfg(windows)'.dependencies]
17 | winapi = { version = "0.3", features = ["winbase", "memoryapi", "errhandlingapi"] }
18 | 
19 | [dev-dependencies]
20 | criterion = "0.3"
21 | 
22 | [[bench]]
23 | name = "switcheroo_benchmark"
24 | harness = false


--------------------------------------------------------------------------------
/switcheroo/Readme.md:
--------------------------------------------------------------------------------
 1 | # Switcheroo
 2 | 
 3 | [Documentation](https://docs.rs/switcheroo/0.1.1/switcheroo/)
 4 | 
 5 | > This library is heavily inspired by https://github.com/edef1c/libfringe.
 6 | 
 7 | > **Currently only works in Rust nightly.**
 8 | 
 9 | Switcheroo provides lightweight context switches in Rust. It runs on Windows, MacOs and Linux (x64 & AArch64).
10 | 
11 | ## Example
12 | 
13 | ```rust
14 | use switcheroo::stack::*;
15 | use switcheroo::Generator;
16 | 
17 | fn  main() {
18 |     let stack = EightMbStack::new().unwrap();
19 |     let mut add_one = Generator::new(stack, |yielder, mut input| {
20 |         loop {
21 |             if input ==  0 {
22 |                 break;
23 |             }
24 |             input = yielder.suspend(input +  1);
25 |         }
26 |     });
27 | 
28 |     assert_eq!(add_one.resume(2), Some(3));
29 |     assert_eq!(add_one.resume(127), Some(128));
30 |     assert_eq!(add_one.resume(0), None);
31 |     assert_eq!(add_one.resume(0), None);
32 | }
33 | ```
34 | 
35 | ## Performance
36 | 
37 | On my Macbook Pro 15" (Late 2013) each context switch is comparable to a function call (sub-nanosecond).
38 | 
39 | ## Developer Experience
40 | 
41 | Switcheroo **tries** hard to not let the context switching disturb default Rust behaviour on panics and unwinds.
42 | The displayed backtrace should stretch across the context switch boundary.
43 | 
44 | When dropping a non-empty stack, it will be unwind to free any resources allocated on it.
45 | 
46 | ## License
47 | 
48 | Licensed under either of
49 | 
50 | - Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
51 | - MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
52 | 
53 | at your option.
54 | 
55 | ### Contribution
56 | 
57 | Unless you explicitly state otherwise, any contribution intentionally
58 | submitted for inclusion in the work by you, as defined in the Apache-2.0
59 | license, shall be dual licensed as above, without any additional terms or
60 | conditions.
61 | 


--------------------------------------------------------------------------------
/switcheroo/benches/switcheroo_benchmark.rs:
--------------------------------------------------------------------------------
 1 | use criterion::{black_box, criterion_group, criterion_main, Criterion};
 2 | 
 3 | use switcheroo::stack::*;
 4 | use switcheroo::Generator;
 5 | 
 6 | fn switcheroo(c: &mut Criterion) {
 7 |     // Bench allocation
 8 |     c.bench_function("create 8 MB stack", |b| b.iter(|| EightMbStack::new()));
 9 | 
10 |     c.bench_function("switch stacks", |b| {
11 |         let stack = EightMbStack::new().unwrap();
12 |         let mut gen = Generator::new(stack, |yielder, input| {
13 |             black_box(yielder.suspend(input + 1));
14 |         });
15 |         b.iter(|| black_box(gen.resume(2)))
16 |     });
17 | }
18 | 
19 | criterion_group!(benches, switcheroo);
20 | criterion_main!(benches);
21 | 


--------------------------------------------------------------------------------
/switcheroo/examples/generator_with_drop.rs:
--------------------------------------------------------------------------------
 1 | use switcheroo::Generator;
 2 | use switcheroo::{stack::*, Yielder};
 3 | 
 4 | struct DropMarker {}
 5 | 
 6 | impl Drop for DropMarker {
 7 |     fn drop(&mut self) {
 8 |         println!("Dropped");
 9 |     }
10 | }
11 | 
12 | fn main() {
13 |     let stack = EightMbStack::new().unwrap();
14 |     let mut add_one = Generator::new(stack, |yielder: &Yielder<i32, i32>, mut input| {
15 |         let _marker = DropMarker {};
16 |         input = yielder.suspend(input + 1);
17 |         input = yielder.suspend(input + 1);
18 |         input = yielder.suspend(input + 1);
19 |         yielder.suspend(input + 1);
20 |     });
21 | 
22 |     assert_eq!(add_one.resume(2), Some(3));
23 |     assert_eq!(add_one.resume(2), Some(3));
24 |     assert_eq!(add_one.resume(127), Some(128));
25 |     // assert_eq!(add_one.resume(0), Some(1));
26 |     assert_eq!(add_one.finished(), false);
27 | }
28 | 


--------------------------------------------------------------------------------
/switcheroo/src/arch/mod.rs:
--------------------------------------------------------------------------------
 1 | // All architectures expose a similar api. Here I just want to take some time explaining the general
 2 | // idea behind all of them.
 3 | //
 4 | // At the core of the implementation there are 3 functions:
 5 | // * `init(stack: Stack, f: unsafe extern "C" fn(usize, *mut  usize))`
 6 | // * `swap_and_link_stacks(arg: usize, new_sp: *mut usize, sp: *mut usize) -> (usize, *mut usize)`
 7 | // * `swap(arg: usize, new_sp: *mut usize, sp: *mut usize) -> (usize, *mut usize)`
 8 | //
 9 | // ### init
10 | // `init` takes a **stack** and a **pointer to a function**. It will prepare the stack so it is ready
11 | // to be switched to. Once we switch to it the function we set up here will be called.
12 | //
13 | // Unix and Windows operating systems require different stack setups. Here is an illustration on how
14 | // the stacks look after the call to `init`:
15 | // ```
16 | //      +                  +
17 | //      |     .......      |
18 | //      |                  |
19 | //      |Deallocation stack|
20 | //      +------------------+
21 | //      |Stack limit       |
22 | //      +------------------+
23 | //      |Stack base        |        +                  +
24 | //      +------------------+        |                  |
25 | // +----+Stack frame ptr   |        |                  |
26 | // |    +------------------+        |    .........     |
27 | // |    |Trampoline        |        |                  |
28 | // |    +------------------+   +----+Stack frame ptr   |
29 | // +---->Caller frame      |   |    +------------------+
30 | //      +------------------+   |    |Trampoline 2 ptr  |
31 | //      |Function ptr      |   |    +------------------+
32 | //      +------------------+   +---->Caller frame      |
33 | //                                 +------------------+
34 | //                                  |Trampoline 1 ptr  |
35 | //                                  +------------------+
36 | //                                  |Function ptr      |
37 | //                                  +------------------+
38 | //                                  |Alignment         |
39 | //                                  +------------------+
40 | //
41 | //            Windows                      Unix
42 | // ```
43 | // Windows needs to preserve some extra information across context switches, like the stack base, top
44 | // and deallocation values. If they are not present Windows will not know how to grow the stack.
45 | // The [Boost.Context](https://www.boost.org/doc/libs/1_61_0/libs/context/doc/html/context/overview.html)
46 | // library also preserves some other information, like the current
47 | // [Fiber](https://docs.microsoft.com/en-us/windows/win32/procthread/fibers) data, but I don't expect
48 | // anyone to use switcheroo and Windows Fibers in the same app.
49 | //
50 | // The **Caller frame** value will be filled in by the `swap_and_link_stacks` function to link the 2
51 | // stacks from different contexts. At this point of time we can't know from where we are jumping to
52 | // the stack.
53 | //
54 | // ### swap_and_link_stacks
55 | // This function is really similar to `swap`, but it's expected to be the first one called when jumping
56 | // to a new stack. It will write the **Caller frame** data inside the new stack, basically linking them
57 | // together. Once this data exists on the new stack we don't need to call it anymore and can switch
58 | // stacks with just the `swap` function.
59 | //
60 | // The swap functions will:
61 | // 1. Preserve the frame pointer and instruction pointer of the current context.
62 | //    On Windows, deallocation stack, stack limit and base stack are also preserved.
63 | // 2. Change the stack pointer to the new stack.
64 | // 3. Pop the frame pointer and instruction pointer from the new stack.
65 | // 4. Jump to the instruction.
66 | //
67 | // Notice that the instruction pointer points to a cryptic **Trampoline 2** function and not to the
68 | // passed in **Function**. Trampoline 1 and 2 contain some extra assembler information so that it's
69 | // possible to re-create a backtrace across contexts if we panic inside the new context.
70 | 
71 | #[cfg(all(target_family = "unix", target_arch = "x86_64"))]
72 | mod unix_x64;
73 | #[cfg(all(target_family = "unix", target_arch = "x86_64"))]
74 | pub use self::unix_x64::*;
75 | 
76 | #[cfg(all(target_family = "unix", target_arch = "aarch64"))]
77 | mod unix_aarch64;
78 | #[cfg(all(target_family = "unix", target_arch = "aarch64"))]
79 | pub use self::unix_aarch64::*;
80 | 
81 | #[cfg(all(target_family = "windows", target_arch = "x86_64"))]
82 | mod windows_x64;
83 | #[cfg(all(target_family = "windows", target_arch = "x86_64"))]
84 | pub use self::windows_x64::*;
85 | 


--------------------------------------------------------------------------------
/switcheroo/src/arch/unix_aarch64.rs:
--------------------------------------------------------------------------------
  1 | use crate::stack;
  2 | use core::arch::asm;
  3 | 
  4 | pub unsafe fn init<S: stack::Stack>(
  5 |     stack: &S,
  6 |     f: unsafe extern "C" fn(usize, *mut usize),
  7 | ) -> *mut usize {
  8 |     unsafe fn push(mut sp: *mut usize, val: usize) -> *mut usize {
  9 |         sp = sp.offset(-1);
 10 |         *sp = val;
 11 |         sp
 12 |     }
 13 | 
 14 |     let mut sp = stack.bottom();
 15 | 
 16 |     // Save the (generator_wrapper) function on the stack.
 17 |     sp = push(sp, f as usize);
 18 |     sp = push(sp, 0xdeaddeaddead0cfa);
 19 | 
 20 |     #[naked]
 21 |     unsafe extern "C" fn trampoline() {
 22 |         asm!(
 23 |             // Stops unwinding/backtracing at this function.
 24 |             ".cfi_undefined lr",
 25 |             "ldr x2, [sp, #8]",
 26 |             "blr x2",
 27 |             options(noreturn)
 28 |         )
 29 |     }
 30 | 
 31 |     // Save frame pointer
 32 |     let frame = sp;
 33 |     sp = push(sp, trampoline as usize);
 34 |     sp = push(sp, frame as usize);
 35 | 
 36 |     // x18 & x 19
 37 |     sp = push(sp, 0); sp = push(sp, 0);
 38 | 
 39 |     sp
 40 | }
 41 | 
 42 | #[inline(always)]
 43 | pub unsafe fn swap_and_link_stacks(
 44 |     arg: usize,
 45 |     new_sp: *mut usize,
 46 |     sp: *const usize,
 47 | ) -> (usize, *mut usize) {
 48 |     let ret_val: usize;
 49 |     let ret_sp: *mut usize;
 50 | 
 51 |     asm!(
 52 |         "adr lr, 1337f",
 53 |         "stp x29, x30, [sp, #-16]!",
 54 |         "stp x18, x19, [sp, #-16]!",
 55 |         "mov x1, sp",
 56 |         "str x1, [x3, #-32]",
 57 |         "mov sp, x2",
 58 |         "ldp x18, x19, [sp], #16",
 59 |         "ldp x29, x30, [sp], #16",
 60 |         "br x30",
 61 |         "1337:",
 62 | 
 63 |         inout("x3") sp => _,
 64 |         inout("x2") new_sp => _,
 65 |         inout("x0") arg => ret_val,
 66 |         out("x1") ret_sp,
 67 | 
 68 |         out("x4") _, out("x5") _, out("x6") _, out("x7") _,
 69 |         out("x8") _, out("x9") _, out("x10") _, out("x11") _,
 70 |         out("x12") _, out("x13") _, out("x14") _, out("x15") _,
 71 |         out("x16") _, out("x17") _,
 72 |         out("x20") _, out("x21") _, out("x22") _, out("x23") _,
 73 |         out("x24") _, out("x25") _, out("x26") _, out("x27") _,
 74 |         out("x28") _, out("lr") _,
 75 | 
 76 |         out("v0") _, out("v1") _, out("v2") _, out("v3") _,
 77 |         out("v4") _, out("v5") _, out("v6") _, out("v7") _,
 78 |         out("v8") _, out("v9") _, out("v10") _, out("v11") _,
 79 |         out("v12") _, out("v13") _, out("v14") _, out("v15") _,
 80 |         out("v16") _, out("v17") _, out("v18") _, out("v19") _,
 81 |         out("v20") _, out("v21") _, out("v22") _, out("v23") _,
 82 |         out("v24") _, out("v25") _, out("v26") _, out("v27") _,
 83 |         out("v28") _, out("v29") _, out("v30") _, out("v31") _,
 84 |     );
 85 | 
 86 |     (ret_val, ret_sp)
 87 | }
 88 | 
 89 | #[inline(always)]
 90 | pub unsafe fn swap(arg: usize, new_sp: *mut usize) -> (usize, *mut usize) {
 91 |     let ret_val: usize;
 92 |     let ret_sp: *mut usize;
 93 | 
 94 |     asm!(
 95 |         "adr lr, 1337f",
 96 |         "stp x29, x30, [sp, #-16]!",
 97 |         "stp x18, x19, [sp, #-16]!",
 98 |         "mov x1, sp",
 99 |         "mov sp, x2",
100 |         "ldp x18, x19, [sp], #16",
101 |         "ldp x29, x30, [sp], #16",
102 |         "br x30",
103 |         "1337:",
104 | 
105 |         inout("x2") new_sp => _,
106 |         inout("x0") arg => ret_val,
107 |         out("x1") ret_sp, out("x3") _,
108 | 
109 |         out("x4") _, out("x5") _, out("x6") _, out("x7") _,
110 |         out("x8") _, out("x9") _, out("x10") _, out("x11") _,
111 |         out("x12") _, out("x13") _, out("x14") _, out("x15") _,
112 |         out("x16") _, out("x17") _,
113 |         out("x20") _, out("x21") _, out("x22") _, out("x23") _,
114 |         out("x24") _, out("x25") _, out("x26") _, out("x27") _,
115 |         out("x28") _, out("lr") _,
116 | 
117 |         out("v0") _, out("v1") _, out("v2") _, out("v3") _,
118 |         out("v4") _, out("v5") _, out("v6") _, out("v7") _,
119 |         out("v8") _, out("v9") _, out("v10") _, out("v11") _,
120 |         out("v12") _, out("v13") _, out("v14") _, out("v15") _,
121 |         out("v16") _, out("v17") _, out("v18") _, out("v19") _,
122 |         out("v20") _, out("v21") _, out("v22") _, out("v23") _,
123 |         out("v24") _, out("v25") _, out("v26") _, out("v27") _,
124 |         out("v28") _, out("v29") _, out("v30") _, out("v31") _,
125 |     );
126 | 
127 |     (ret_val, ret_sp)
128 | }
129 | 


--------------------------------------------------------------------------------
/switcheroo/src/arch/unix_x64.rs:
--------------------------------------------------------------------------------
  1 | use crate::stack;
  2 | use core::arch::asm;
  3 | 
  4 | pub unsafe fn init<S: stack::Stack>(
  5 |     stack: &S,
  6 |     f: unsafe extern "C" fn(usize, *mut usize),
  7 | ) -> *mut usize {
  8 |     unsafe fn push(mut sp: *mut usize, val: usize) -> *mut usize {
  9 |         sp = sp.offset(-1);
 10 |         *sp = val;
 11 |         sp
 12 |     }
 13 | 
 14 |     let mut sp = stack.bottom();
 15 | 
 16 |     // Save the (generator_wrapper) function on the stack.
 17 |     sp = push(sp, f as usize);
 18 |     sp = push(sp, 0xdeaddeaddead0cfa);
 19 | 
 20 |     #[naked]
 21 |     unsafe extern "C" fn trampoline() {
 22 |         asm!(
 23 |             // Stops unwinding/backtracing at this function.
 24 |             ".cfi_undefined rip",
 25 |             "call [rsp + 8]",
 26 |             options(noreturn)
 27 |         )
 28 |     }
 29 | 
 30 |     // Save frame pointer
 31 |     let frame = sp;
 32 |     sp = push(sp, trampoline as usize); // call instruction
 33 |     sp = push(sp, frame as usize);
 34 | 
 35 |     // Set rbx starting value to 0
 36 |     sp = push(sp, 0);
 37 | 
 38 |     sp
 39 | }
 40 | 
 41 | #[inline(always)]
 42 | pub unsafe fn swap_and_link_stacks(
 43 |     arg: usize,
 44 |     new_sp: *mut usize,
 45 |     sp: *const usize,
 46 | ) -> (usize, *mut usize) {
 47 |     let ret_val: usize;
 48 |     let ret_sp: *mut usize;
 49 | 
 50 |     asm!(
 51 |         // Save the continuation spot after we jump back here to be after this asm block.
 52 |         "lea rax, [rip + 1337f]",
 53 |         "push rax",
 54 |         // Save the frame pointer as it can't be marked as an output register.
 55 |         "push rbp",
 56 |         // rbx is is used internally by LLVM and can't be marked as an output register.
 57 |         "push rbx",
 58 |         // Link stacks by swapping the CFA value
 59 |         "mov [rcx - 16], rsp",
 60 |         // Set the current pointer as the 2nd element (rsi) of the function we are jumping to.
 61 |         "mov rsi, rsp",
 62 |         // Change the stack pointer to the passed value.
 63 |         "mov rsp, rdx",
 64 |         // Restore rbx
 65 |         "pop rbx",
 66 |         // Set the frame pointer according to the new stack.
 67 |         "pop rbp",
 68 |         // Get the next instruction to jump to.
 69 |         "pop rax",
 70 |         // Doing a pop & jmp instad of a ret helps us here with brench prediction (3x faster on my machine).
 71 |         "jmp rax",
 72 |         "1337:",
 73 |         // Mark all registers as clobbered as we don't know what the code we are jumping to is going to use.
 74 |         // The compiler will optimise this out and just save the registers it actually knows it must.
 75 |         inout("rcx") sp => _,
 76 |         inout("rdx") new_sp => _,
 77 |         inout("rdi") arg => ret_val, // 1st argument to called function
 78 |         out("rsi") ret_sp, // 2nd argument to called function
 79 |         out("rax") _,
 80 | 
 81 |         out("r8") _, out("r9") _, out("r10") _, out("r11") _,
 82 |         out("r12") _, out("r13") _, out("r14") _, out("r15") _,
 83 | 
 84 |         out("xmm0") _, out("xmm1") _, out("xmm2") _, out("xmm3") _,
 85 |         out("xmm4") _, out("xmm5") _, out("xmm6") _, out("xmm7") _,
 86 |         out("xmm8") _, out("xmm9") _, out("xmm10") _, out("xmm11") _,
 87 |         out("xmm12") _, out("xmm13") _, out("xmm14") _, out("xmm15") _,
 88 |     );
 89 | 
 90 |     (ret_val, ret_sp)
 91 | }
 92 | 
 93 | /// Swap between two stacks.
 94 | /// `new_sp` is the stack we are jumping to. This stack needs to have at the top:
 95 | /// 1. Stack frame pointer
 96 | /// 2. Pointer to the next instruction to execute on the new stack
 97 | /// If the pointer points to an `extern "C"` function then the `arg` element is forwarded to it
 98 | /// through the `rdi` register.
 99 | ///
100 | /// This function also pushes the stack pointer and next instruction to the current stack.
101 | /// When we jump back to it, it will return the content of the new `arg` as ret_val.
102 | /// TODO: Document in more detail the exact flow as this is super confusing.
103 | #[inline(always)]
104 | pub unsafe fn swap(arg: usize, new_sp: *mut usize) -> (usize, *mut usize) {
105 |     let ret_val: usize;
106 |     let ret_sp: *mut usize;
107 | 
108 |     asm!(
109 |         // Save the continuation spot after we jump back here to be after this asm block.
110 |         "lea rax, [rip + 1337f]",
111 |         "push rax",
112 |         // Save the frame pointer as it can't be marked as an output register.
113 |         "push rbp",
114 |         // rbx is is used internally by LLVM and can't be marked as an output register.
115 |         "push rbx",
116 |         // Set the current pointer as the 2nd element (rsi) of the function we are jumping to.
117 |         "mov rsi, rsp",
118 |         // Change the stack pointer to the passed value.
119 |         "mov rsp, rdx",
120 |         // Restore rbx
121 |         "pop rbx",
122 |         // Set the frame pointer according to the new stack.
123 |         "pop rbp",
124 |         // Get the next instruction to jump to.
125 |         "pop rax",
126 |         // Doing a pop & jmp instad of a ret helps us here with brench prediction (3x faster on my machine).
127 |         "jmp rax",
128 |         "1337:",
129 |         // Mark all registers as clobbered as we don't know what the code we are jumping to is going to use.
130 |         // The compiler will optimise this out and just save the registers it actually knows it must.
131 |         inout("rdx") new_sp => _,
132 |         inout("rdi") arg => ret_val, // 1st argument to called function
133 |         out("rsi") ret_sp, // 2nd argument to called function
134 |         out("rax") _, out("rcx") _,
135 | 
136 |         out("r8") _, out("r9") _, out("r10") _, out("r11") _,
137 |         out("r12") _, out("r13") _, out("r14") _, out("r15") _,
138 | 
139 |         out("xmm0") _, out("xmm1") _, out("xmm2") _, out("xmm3") _,
140 |         out("xmm4") _, out("xmm5") _, out("xmm6") _, out("xmm7") _,
141 |         out("xmm8") _, out("xmm9") _, out("xmm10") _, out("xmm11") _,
142 |         out("xmm12") _, out("xmm13") _, out("xmm14") _, out("xmm15") _,
143 |     );
144 | 
145 |     (ret_val, ret_sp)
146 | }
147 | 


--------------------------------------------------------------------------------
/switcheroo/src/arch/windows_x64.rs:
--------------------------------------------------------------------------------
  1 | use crate::stack;
  2 | use core::arch::asm;
  3 | 
  4 | pub unsafe fn init<S: stack::Stack>(
  5 |     stack: &S,
  6 |     f: unsafe extern "C" fn(usize, *mut usize),
  7 | ) -> *mut usize {
  8 |     unsafe fn push(mut sp: *mut usize, val: usize) -> *mut usize {
  9 |         sp = sp.offset(-1);
 10 |         *sp = val;
 11 |         sp
 12 |     }
 13 | 
 14 |     let mut sp = stack.bottom();
 15 | 
 16 |     // Save the (generator_wrapper) function on the stack.
 17 |     sp = push(sp, f as usize);
 18 |     sp = push(sp, 0xdeaddeaddead0cfa);
 19 | 
 20 |     #[naked]
 21 |     #[no_mangle]
 22 |     unsafe extern "C" fn trampoline() {
 23 |         asm!(
 24 |             // This directives will create unwind codes to link the two stacks together during stack traces.
 25 |             // The assembly was carefully crafted by a painfully long process of trial and error. For the most
 26 |             // part I was guessing how the stack tracing uses the Windows unwind codes and then went ahead and
 27 |             // constructed appropriate seh_* directives to generate this unwind codes. The desired outcome can
 28 |             // be described in different ways with seh_* directives, but after many tests this was established
 29 |             // to be the most reliable one under debug and release builds. The produced unwind codes are:
 30 |             //
 31 |             // 0x04: UOP_PushNonVol RSP - Restore the RSP by pointing it to the previous stack and increment it
 32 |             //                            by 8, jumping over the stack place holding the the deallocation stack.
 33 |             // 0x03: UOP_AllocSmall 16  - Increment the RSP by 16 jumping over 2 stack slots: stack limit & base.
 34 |             // 0x02: UOP_PushNonVol RBX - Restore RBX register that is used internally by LLVM and can't be
 35 |             //                            marked as clobbered.
 36 |             // 0x01: UOP_PushNonVol RBP - Pop the previous RBP from the stack.
 37 |             //
 38 |             // Once the unwinder reaches this function the value on the stack is going to be the value of the
 39 |             // previous RSP. After it processes the unwind codes it will look like `trampoline` was called from
 40 |             // the `swap` function, because the next value on the stack is the IP value pointing back inside
 41 |             // `swap`.
 42 |             //
 43 |             // Opposite of Unix systems, here we only need one trampoline function to achieve the same outcome.
 44 |             //
 45 |             // NOTE: To get the unwind codes from a Windows executable run:
 46 |             // 1. rabin2.exe -P .\target\debug\examples\async.pdb > pdb.txt
 47 |             // 2. Search inside the pdb.txt file to locate the `trampoline` function and note the address.
 48 |             // 3. llvm-objdump -u target\debug\examples\async.exe > unwind_info.txt
 49 |             // 4. Use the address from step 2 to locate the unwind codes of the `trampline` function.
 50 |             //
 51 |             // TODO: Create ASCII art showing how exactly the stack looks.
 52 |             ".seh_proc trampoline",
 53 |             "nop",
 54 |             ".seh_pushreg rbp",
 55 |             "nop",
 56 |             ".seh_pushreg rbx",
 57 |             "nop",
 58 |             ".seh_stackalloc 16",
 59 |             "nop",
 60 |             ".seh_pushreg rsp",
 61 |             ".seh_endprologue",
 62 |             "call [rsp + 8]",
 63 |             "nop",
 64 |             "nop",
 65 |             ".seh_endproc",
 66 |             options(noreturn)
 67 |         )
 68 |     }
 69 | 
 70 |     // Save frame pointer
 71 |     let frame = sp;
 72 |     sp = push(sp, trampoline as usize + 4); //  "call [rsp + 8]" instruction
 73 |     sp = push(sp, frame as usize);
 74 | 
 75 |     // Set rbx starting value to 0
 76 |     sp = push(sp, 0);
 77 | 
 78 |     // The next few values are not really documented in Windows and we rely on this Wiki page:
 79 |     // https://en.wikipedia.org/wiki/Win32_Thread_Information_Block
 80 |     // and this file from Boost's Context library:
 81 |     // https://github.com/boostorg/context/blob/develop/src/asm/jump_x86_64_ms_pe_masm.asm
 82 |     // to preserve all needed information for Windows to be able to automatically extend the stack and
 83 |     // move the stack guard page.
 84 | 
 85 |     // Stack base
 86 |     sp = push(sp, stack.bottom() as usize);
 87 | 
 88 |     // Stack limit, 4 pages under the deallocation stack on Windows.
 89 |     sp = push(sp, stack.top() as usize);
 90 | 
 91 |     // Deallocation stack, where the actual memory address of the stack starts.
 92 |     // There are a few pages between the limit and here for the exception handler to have enough stack in case
 93 |     // of a stack overflow exception.
 94 |     sp = push(sp, stack.deallocation() as usize);
 95 | 
 96 |     sp
 97 | }
 98 | 
 99 | #[inline(always)]
100 | pub unsafe fn swap_and_link_stacks(
101 |     arg: usize,
102 |     new_sp: *mut usize,
103 |     sp: *const usize,
104 | ) -> (usize, *mut usize) {
105 |     let ret_val: usize;
106 |     let ret_sp: *mut usize;
107 | 
108 |     asm!(
109 |         // Save the continuation spot after we jump back here to be after this asm block.
110 |         "lea rax, [rip + 1337f]",
111 |         "push rax",
112 |         // Save the frame pointer as it can't be marked as an output register.
113 |         "push rbp",
114 |         // rbx is is used internally by LLVM and can't be marked as an output register.
115 |         "push rbx",
116 | 
117 |         // Load NT_TIB
118 |         "mov r10, gs:[030h]",
119 |         // Save stack base
120 |         "mov rax, [r10+08h]",
121 |         "push rax",
122 |         // Save stack limit
123 |         "mov rax, [r10+010h]",
124 |         "push rax",
125 |         // Save deallocation stack
126 |         "mov rax, [r10+01478h]",
127 |         "push rax",
128 | 
129 |         // Link stacks
130 |         "mov [rdi - 16], rsp",
131 | 
132 |         // Set the current pointer as the 2nd element (rdx) of the function we are jumping to.
133 |         "mov rdx, rsp",
134 |         // Change the stack pointer to the passed value.
135 |         "mov rsp, rsi",
136 | 
137 |         // Set deallocation stack
138 |         "pop rax",
139 |         "mov  [r10+01478h], rax",
140 |         // Set stack limit
141 |         "pop rax",
142 |         "mov  [r10+010h], rax",
143 |         // Set stack base
144 |         "pop rax",
145 |         "mov  [r10+08h], rax",
146 | 
147 |         // Restore rbx
148 |         "pop rbx",
149 |         // Set the frame pointer according to the new stack.
150 |         "pop rbp",
151 |         // Get the next instruction to jump to.
152 |         "pop rax",
153 |         // Doing a pop & jmp instad of a ret helps us here with brench prediction (3x faster on my machine).
154 |         "jmp rax",
155 |         "1337:",
156 |         // Mark all registers as clobbered as we don't know what the code we are jumping to is going to use.
157 |         // The compiler will optimise this out and just save the registers it actually knows it must.
158 |         inout("rdi") sp => _,
159 |         inout("rsi") new_sp => _,
160 |         inout("rcx") arg => ret_val, // 1st argument to called function
161 |         out("rdx") ret_sp, // 2nd argument to called function
162 |         out("rax") _,
163 | 
164 |         out("r8") _, out("r9") _, out("r10") _, out("r11") _,
165 |         out("r12") _, out("r13") _, out("r14") _, out("r15") _,
166 | 
167 |         out("xmm0") _, out("xmm1") _, out("xmm2") _, out("xmm3") _,
168 |         out("xmm4") _, out("xmm5") _, out("xmm6") _, out("xmm7") _,
169 |         out("xmm8") _, out("xmm9") _, out("xmm10") _, out("xmm11") _,
170 |         out("xmm12") _, out("xmm13") _, out("xmm14") _, out("xmm15") _,
171 |     );
172 | 
173 |     (ret_val, ret_sp)
174 | }
175 | 
176 | #[inline(always)]
177 | pub unsafe fn swap(arg: usize, new_sp: *mut usize) -> (usize, *mut usize) {
178 |     let ret_val: usize;
179 |     let ret_sp: *mut usize;
180 | 
181 |     asm!(
182 |         // Save the continuation spot after we jump back here to be after this asm block.
183 |         "lea rax, [rip + 1337f]",
184 |         "push rax",
185 |         // Save the frame pointer as it can't be marked as an output register.
186 |         "push rbp",
187 |         // rbx is is used internally by LLVM can't be marked as an output register.
188 |         "push rbx",
189 | 
190 |         // Load NT_TIB
191 |         "mov r10, gs:[030h]",
192 |         // Save stack base
193 |         "mov rax, [r10+08h]",
194 |         "push rax",
195 |         // Save stack limit
196 |         "mov rax, [r10+010h]",
197 |         "push rax",
198 |         // Save deallocation stack
199 |         "mov rax, [r10+01478h]",
200 |         "push rax",
201 | 
202 |         // Set the current pointer as the 2nd element (rdx) of the function we are jumping to.
203 |         "mov rdx, rsp",
204 |         // Change the stack pointer to the passed value.
205 |         "mov rsp, rsi",
206 | 
207 |         // Set deallocation stack
208 |         "pop rax",
209 |         "mov  [r10+01478h], rax",
210 |         // Set stack limit
211 |         "pop rax",
212 |         "mov  [r10+010h], rax",
213 |         // Set stack base
214 |         "pop rax",
215 |         "mov  [r10+08h], rax",
216 | 
217 |         // Restore rbx
218 |         "pop rbx",
219 |         // Set the frame pointer according to the new stack.
220 |         "pop rbp",
221 |         // Get the next instruction to jump to.
222 |         "pop rax",
223 |         // Doing a pop & jmp instad of a ret helps us here with brench prediction (3x faster on my machine).
224 |         "jmp rax",
225 |         "1337:",
226 |         // Mark all registers as clobbered as we don't know what the code we are jumping to is going to use.
227 |         // The compiler will optimise this out and just save the registers it actually knows it must.
228 |         inout("rsi") new_sp => _,
229 |         inout("rcx") arg => ret_val, // 1st argument to called function
230 |         out("rdx") ret_sp, // 2nd argument to called function
231 |         out("rax") _,  out("rdi") _,
232 | 
233 |         out("r8") _, out("r9") _, out("r10") _, out("r11") _,
234 |         out("r12") _, out("r13") _, out("r14") _, out("r15") _,
235 | 
236 |         out("xmm0") _, out("xmm1") _, out("xmm2") _, out("xmm3") _,
237 |         out("xmm4") _, out("xmm5") _, out("xmm6") _, out("xmm7") _,
238 |         out("xmm8") _, out("xmm9") _, out("xmm10") _, out("xmm11") _,
239 |         out("xmm12") _, out("xmm13") _, out("xmm14") _, out("xmm15") _,
240 |     );
241 | 
242 |     (ret_val, ret_sp)
243 | }
244 | 


--------------------------------------------------------------------------------
/switcheroo/src/lib.rs:
--------------------------------------------------------------------------------
  1 | #![feature(naked_functions)]
  2 | 
  3 | //! Switcheroo provides lightweight context switches in Rust.
  4 | //!
  5 | //! It consists of two parts:
  6 | //! 1. A stack implementation (currently only providing a [fixed 8Mb stack](stack/struct.EightMbStack.html)).
  7 | //! 2. A [generator](struct.Generator.html) implementation.
  8 | //! ## Example
  9 | //! ```
 10 | //! use switcheroo::stack::*;
 11 | //! use switcheroo::Generator;
 12 | //!
 13 | //! fn  main() {
 14 | //! 	let stack = EightMbStack::new().unwrap();
 15 | //! 	let mut add_one = Generator::new(stack, |yielder, mut input| {
 16 | //! 		loop {
 17 | //! 			if input ==  0 {
 18 | //! 				break;
 19 | //! 			}
 20 | //! 			input = yielder.suspend(input +  1);
 21 | //! 		}
 22 | //! 	});
 23 | //!
 24 | //! 	assert_eq!(add_one.resume(2), Some(3));
 25 | //! 	assert_eq!(add_one.resume(127), Some(128));
 26 | //! 	assert_eq!(add_one.resume(0), None);
 27 | //! 	assert_eq!(add_one.resume(0), None);
 28 | //! }
 29 | // ```
 30 | 
 31 | mod arch;
 32 | pub mod stack;
 33 | 
 34 | use std::any::Any;
 35 | use std::cell::Cell;
 36 | use std::marker::PhantomData;
 37 | use std::panic::{catch_unwind, resume_unwind, AssertUnwindSafe};
 38 | use std::{mem, ptr::NonNull};
 39 | 
 40 | // Communicates the return of the Generator.
 41 | enum GeneratorOutput<Output> {
 42 |     // The generator returned a regular value.
 43 |     Value(Output),
 44 |     // The generator finished and there are no more values to be returned.
 45 |     Finished,
 46 |     // The generator panicked. This value is passed to `resume_unwind` to continue the unwind
 47 |     // across contexts.
 48 |     Panic(Box<dyn Any + Send + 'static>), // Err part of std::thread::Result
 49 | }
 50 | 
 51 | /// Generator wraps a closure and allows suspending its execution more than once, returning
 52 | /// a value each time.
 53 | ///
 54 | /// If the closure finishes each other call to [resume](struct.Generator.html#method.resume)
 55 | /// will yield `None`. If the closure panics the unwind will happen correctly across contexts.
 56 | pub struct Generator<'a, Input: 'a, Output: 'a, Stack: stack::Stack> {
 57 |     started: bool,
 58 |     stack: Option<Stack>,
 59 |     stack_ptr: Option<NonNull<usize>>,
 60 |     phantom: PhantomData<(&'a (), *mut Input, *const Output)>,
 61 | }
 62 | 
 63 | unsafe impl<'a, Input, Output, Stack> Send for Generator<'a, Input, Output, Stack>
 64 | where
 65 |     Input: 'a,
 66 |     Output: 'a,
 67 |     Stack: stack::Stack,
 68 | {
 69 | }
 70 | 
 71 | impl<'a, Input, Output, Stack> Generator<'a, Input, Output, Stack>
 72 | where
 73 |     Input: 'a,
 74 |     Output: 'a,
 75 |     Stack: stack::Stack,
 76 | {
 77 |     /// Create a new generator from a stack and closure.
 78 |     pub fn new<F>(stack: Stack, f: F) -> Generator<'a, Input, Output, Stack>
 79 |     where
 80 |         F: FnOnce(&Yielder<Input, Output>, Input) + 'a,
 81 |     {
 82 |         // This function will be written to the new stack (by `arch::init`) as the initial
 83 |         // entry point. During the `arch::swap_and_link_stacks` call it will be called with
 84 |         // the correct closure passed as the first argument. This function will never return.
 85 |         // Yielding back into it after `yielder.suspend_(GeneratorOutput::Finished)` was
 86 |         // called would be undefined behavior.
 87 |         unsafe extern "C" fn generator_wrapper<Input, Output, Stack, F>(
 88 |             f_ptr: usize,
 89 |             stack_ptr: *mut usize,
 90 |         ) where
 91 |             Stack: stack::Stack,
 92 |             F: FnOnce(&Yielder<Input, Output>, Input),
 93 |         {
 94 |             let f = std::ptr::read(f_ptr as *const F);
 95 |             let (data, stack_ptr) = arch::swap(0, stack_ptr);
 96 |             let input = std::ptr::read(data as *const Input);
 97 |             let yielder = Yielder::new(stack_ptr);
 98 | 
 99 |             // It is not safe to unwind across the context switch.
100 |             // The unwind will continue in the original context.
101 |             match catch_unwind(AssertUnwindSafe(|| {
102 |                 f(&yielder, input);
103 |             })) {
104 |                 Ok(_) => yielder.suspend_(GeneratorOutput::Finished),
105 |                 Err(panic) => yielder.suspend_(GeneratorOutput::Panic(panic)),
106 |             };
107 |         }
108 | 
109 |         // Prepare the stack
110 |         let stack_ptr = unsafe { arch::init(&stack, generator_wrapper::<Input, Output, Stack, F>) };
111 | 
112 |         // f needs to live on after this function, it is part of the new context. This prevents it
113 |         // from being dropped. The drop happens inside of the `generator_wrapper()` function.
114 |         let f = mem::ManuallyDrop::new(f);
115 | 
116 |         // This call will link the stacks together with assembly directives magic, but once the
117 |         // first `arch::swap` inside `generator_wrapper` is reached it will yield back before the
118 |         // execution of the closure `f`.
119 |         // Only the next call to `resume` will start executing the closure.
120 |         let stack_ptr = unsafe {
121 |             arch::swap_and_link_stacks(
122 |                 &f as *const mem::ManuallyDrop<F> as usize,
123 |                 stack_ptr,
124 |                 stack.bottom(),
125 |             )
126 |             .1
127 |         };
128 | 
129 |         Generator {
130 |             started: false,
131 |             stack: Some(stack),
132 |             stack_ptr: Some(NonNull::new(stack_ptr).unwrap()),
133 |             phantom: PhantomData,
134 |         }
135 |     }
136 | 
137 |     /// Resume the generator yielding the next value.
138 |     #[inline(always)]
139 |     pub fn resume(&mut self, input: Input) -> Option<Output> {
140 |         if self.stack_ptr.is_none() {
141 |             return None;
142 |         };
143 |         let stack_ptr = self.stack_ptr.unwrap();
144 | 
145 |         unsafe {
146 |             let input = mem::ManuallyDrop::new(input);
147 |             // Mark the `Generator` as started
148 |             self.started = true;
149 |             let (data_out, stack_ptr) = arch::swap(
150 |                 &input as *const mem::ManuallyDrop<Input> as usize,
151 |                 stack_ptr.as_ptr(),
152 |             );
153 | 
154 |             let output = std::ptr::read(data_out as *const GeneratorOutput<Output>);
155 |             match output {
156 |                 GeneratorOutput::Value(value) => {
157 |                     self.stack_ptr = Some(NonNull::new(stack_ptr).unwrap());
158 |                     Some(value)
159 |                 }
160 |                 GeneratorOutput::Finished => {
161 |                     self.stack_ptr = None;
162 |                     None
163 |                 }
164 |                 GeneratorOutput::Panic(panic) => {
165 |                     self.stack_ptr = None;
166 |                     resume_unwind(panic);
167 |                 }
168 |             }
169 |         }
170 |     }
171 | 
172 |     /// Returns true if the execution of the passed in closure started
173 |     #[inline(always)]
174 |     pub fn started(&self) -> bool {
175 |         self.started
176 |     }
177 | 
178 |     /// Returns true if the generator finished running.
179 |     #[inline(always)]
180 |     pub fn finished(&self) -> bool {
181 |         self.stack_ptr.is_none()
182 |     }
183 | 
184 |     /// Consume the generator and extract the stack.
185 |     pub fn stack(mut self) -> Stack {
186 |         self.stack.take().unwrap()
187 |         // Drop for Generator is executed here while the stack is still alive.
188 |     }
189 | }
190 | 
191 | impl<'a, Input, Output, Stack> Drop for Generator<'a, Input, Output, Stack>
192 | where
193 |     Input: 'a,
194 |     Output: 'a,
195 |     Stack: stack::Stack,
196 | {
197 |     fn drop(&mut self) {
198 |         // If there is still data on the stack unwind it.
199 |         if self.started() && !self.finished() {
200 |             unsafe {
201 |                 let (data, _stack_ptr) = arch::swap(0, self.stack_ptr.unwrap().as_ptr());
202 |                 // We catch the unwind in the other context, but don't resume it here (just drop the panic value).
203 |                 let _panic = std::ptr::read(data as *const GeneratorOutput<Output>);
204 |             };
205 |         }
206 |     }
207 | }
208 | 
209 | /// Yielder is an interface provided to every generator through which it returns a value.
210 | pub struct Yielder<Input, Output> {
211 |     stack_ptr: Cell<*mut usize>,
212 |     phantom: PhantomData<(*const Input, *mut Output)>,
213 | }
214 | 
215 | impl<Input, Output> Yielder<Input, Output> {
216 |     fn new(stack_ptr: *mut usize) -> Yielder<Input, Output> {
217 |         Yielder {
218 |             stack_ptr: Cell::new(stack_ptr),
219 |             phantom: PhantomData,
220 |         }
221 |     }
222 | 
223 |     /// Suspends the generator and returns `Some(val)` from the `resume()` invocation that resumed
224 |     /// the generator.
225 |     #[inline(always)]
226 |     pub fn suspend(&self, val: Output) -> Input {
227 |         unsafe { self.suspend_(GeneratorOutput::Value(val)) }
228 |     }
229 | 
230 |     #[inline(always)]
231 |     unsafe fn suspend_(&self, out: GeneratorOutput<Output>) -> Input {
232 |         let out = mem::ManuallyDrop::new(out);
233 |         let (data, stack_ptr) = arch::swap(
234 |             &out as *const mem::ManuallyDrop<GeneratorOutput<Output>> as usize,
235 |             self.stack_ptr.get(),
236 |         );
237 | 
238 |         // Set return point. This needs to happen before unwind is triggered.
239 |         self.stack_ptr.set(stack_ptr);
240 | 
241 |         // We use the data pointer to signalize an unwind trigger.
242 |         // It should never be 0 otherwise.
243 |         if data == 0 {
244 |             resume_unwind(Box::new(()));
245 |         }
246 | 
247 |         std::ptr::read(data as *const Input)
248 |     }
249 | }
250 | 


--------------------------------------------------------------------------------
/switcheroo/src/stack/eight_mb.rs:
--------------------------------------------------------------------------------
  1 | use std::io::Error;
  2 | use std::mem::size_of;
  3 | use std::ptr;
  4 | 
  5 | #[cfg(target_family = "unix")]
  6 | use libc::{mmap, MAP_ANON, MAP_FAILED, MAP_NORESERVE, MAP_PRIVATE, PROT_READ, PROT_WRITE};
  7 | 
  8 | #[cfg(target_family = "windows")]
  9 | use winapi::ctypes::c_void;
 10 | #[cfg(target_family = "windows")]
 11 | use winapi::um::memoryapi::{VirtualAlloc, VirtualFree, VirtualProtect};
 12 | #[cfg(target_family = "windows")]
 13 | use winapi::um::winnt::{
 14 |     MEM_COMMIT, MEM_RELEASE, MEM_RESERVE, PAGE_GUARD, PAGE_NOACCESS, PAGE_READWRITE,
 15 | };
 16 | 
 17 | use super::Stack;
 18 | 
 19 | /// A 8 Mb Stack.
 20 | ///
 21 | /// On Unix platforms this will simply reserve 8 Mb of memory to be used as a stack (without a
 22 | /// guard page). Mmap will be called with the MAP_NORESERVE flag to allow us to overcommit on stack
 23 | /// allocations.
 24 | ///
 25 | /// On Windows it will reserve 8 Mb of memory + 4 pages on top for the exception handler. Only the
 26 | /// bottom of the stack will be marked as commited, while the rest will be reserved. This allows us
 27 | /// to overcommit on stack allocations. The memory is specifically set up with guard pages in a way
 28 | /// that Windows expect it to be, so that the OS can automatically grow and commit memory.
 29 | ///
 30 | /// Even 8 Mb may sound like a lot on all modern operating systems only pages that have something
 31 | /// written to consume physical memory, the rest is cheap virtual memory.
 32 | pub struct EightMbStack(*mut usize);
 33 | 
 34 | unsafe impl Send for EightMbStack {}
 35 | 
 36 | const EIGHT_MB: usize = 8 * 1024 * 1024;
 37 | #[cfg(target_family = "windows")]
 38 | const EXCEPTION_ZONE: usize = 4 * 4096;
 39 | 
 40 | impl Stack for EightMbStack {
 41 |     #[cfg(target_family = "unix")]
 42 |     fn new() -> Result<Self, Error> {
 43 |         let ptr = unsafe {
 44 |             mmap(
 45 |                 ptr::null_mut(),
 46 |                 EIGHT_MB,
 47 |                 PROT_READ | PROT_WRITE,
 48 |                 MAP_PRIVATE | MAP_ANON | MAP_NORESERVE,
 49 |                 -1,
 50 |                 0,
 51 |             )
 52 |         };
 53 |         if ptr == MAP_FAILED {
 54 |             Err(Error::last_os_error())
 55 |         } else {
 56 |             Ok(Self(ptr as *mut usize))
 57 |         }
 58 |     }
 59 | 
 60 |     #[cfg(target_family = "unix")]
 61 |     fn bottom(&self) -> *mut usize {
 62 |         unsafe { self.0.add(EIGHT_MB / size_of::<usize>()) }
 63 |     }
 64 |     #[cfg(target_family = "unix")]
 65 |     fn top(&self) -> *mut usize {
 66 |         self.0
 67 |     }
 68 |     #[cfg(target_family = "unix")]
 69 |     fn deallocation(&self) -> *mut usize {
 70 |         panic!("Not used on unix");
 71 |     }
 72 | 
 73 |     // Windows
 74 |     #[cfg(target_family = "windows")]
 75 |     fn new() -> Result<Self, Error> {
 76 |         unsafe {
 77 |             // Add extra 16 Kb on top of the stack to be used by the exception handler in case of a stack overflow.
 78 |             // Cast pointer to `usize`, because calculating offsets with `c_void` is impossible. Sometimes it has a
 79 |             // size of 0, sometimes it decides to be 1 byte.
 80 |             let ptr = VirtualAlloc(
 81 |                 ptr::null_mut(),
 82 |                 EIGHT_MB + EXCEPTION_ZONE,
 83 |                 MEM_RESERVE,
 84 |                 PAGE_NOACCESS,
 85 |             ) as *mut usize;
 86 |             if ptr.is_null() {
 87 |                 return Err(Error::last_os_error());
 88 |             }
 89 |             // Commit 3 bottom pages (1 read/write and 2 guard pages)
 90 |             let bottom_2 = VirtualAlloc(
 91 |                 ptr.add((EIGHT_MB + EXCEPTION_ZONE - 3 * 4096) / size_of::<usize>()) as *mut c_void,
 92 |                 3 * 4096,
 93 |                 MEM_COMMIT,
 94 |                 PAGE_GUARD | PAGE_READWRITE,
 95 |             );
 96 |             if bottom_2.is_null() {
 97 |                 return Err(Error::last_os_error());
 98 |             }
 99 | 
100 |             let old_protect: u32 = 0;
101 |             let bottom_1 = VirtualProtect(
102 |                 ptr.add((EIGHT_MB + EXCEPTION_ZONE - 1 * 4096) / size_of::<usize>()) as *mut c_void,
103 |                 1 * 4096,
104 |                 PAGE_READWRITE,
105 |                 &old_protect as *const u32 as *mut u32,
106 |             );
107 |             if bottom_1 == 0 {
108 |                 return Err(Error::last_os_error());
109 |             }
110 | 
111 |             Ok(Self(ptr as *mut usize))
112 |         }
113 |     }
114 | 
115 |     #[cfg(target_family = "windows")]
116 |     fn bottom(&self) -> *mut usize {
117 |         unsafe { self.0.add((EIGHT_MB + EXCEPTION_ZONE) / size_of::<usize>()) }
118 |     }
119 |     #[cfg(target_family = "windows")]
120 |     fn top(&self) -> *mut usize {
121 |         unsafe { self.0.add(EXCEPTION_ZONE / size_of::<usize>()) }
122 |     }
123 |     #[cfg(target_family = "windows")]
124 |     fn deallocation(&self) -> *mut usize {
125 |         self.0
126 |     }
127 | }
128 | 
129 | #[cfg(target_family = "unix")]
130 | impl Drop for EightMbStack {
131 |     fn drop(&mut self) {
132 |         let result = unsafe { libc::munmap(self.0 as *mut libc::c_void, EIGHT_MB) };
133 |         debug_assert_eq!(result, 0);
134 |     }
135 | }
136 | 
137 | #[cfg(target_family = "windows")]
138 | impl Drop for EightMbStack {
139 |     fn drop(&mut self) {
140 |         let result = unsafe { VirtualFree(self.0 as *mut winapi::ctypes::c_void, 0, MEM_RELEASE) };
141 |         debug_assert_ne!(result, 0);
142 |     }
143 | }
144 | 


--------------------------------------------------------------------------------
/switcheroo/src/stack/mod.rs:
--------------------------------------------------------------------------------
 1 | //! Different stack implementations (currently only contains a 8 Mb stack).
 2 | 
 3 | mod eight_mb;
 4 | mod one_mb;
 5 | pub use eight_mb::EightMbStack;
 6 | pub use one_mb::OneMbStack;
 7 | 
 8 | /// An implementation of this trait will be accepted by a [generator](struct.Generator.html) as a
 9 | /// valid Stack. Most of the functions provided here are straightforward except for
10 | /// [deallocation](trait.Stack.html#tymethod.deallocation), this is a Windows only construct.
11 | ///
12 | /// Windows reserves a few pages above the stack top, so if a stack overflow exception is triggered
13 | /// the handler has still enough of stack to process it. The name comes from the fact that it
14 | /// points to the top most address of the memory area designated to the stack and will be used as a
15 | /// pointer when freeing/deallocating the stack.
16 | pub trait Stack: Sized + Send {
17 |     /// Returns a new stack.
18 |     fn new() -> Result<Self, std::io::Error>;
19 | 
20 |     /// Returns a pointer to the bottom of the stack.
21 |     fn bottom(&self) -> *mut usize;
22 | 
23 |     /// Returns a pointer to the top of the stack.
24 |     fn top(&self) -> *mut usize;
25 | 
26 |     /// Returns a pointer to the deallocation stack (a Windows construct).
27 |     fn deallocation(&self) -> *mut usize;
28 | }
29 | 


--------------------------------------------------------------------------------
/switcheroo/src/stack/one_mb.rs:
--------------------------------------------------------------------------------
  1 | use std::io::Error;
  2 | use std::mem::size_of;
  3 | use std::ptr;
  4 | 
  5 | #[cfg(target_family = "unix")]
  6 | use libc::{mmap, MAP_ANON, MAP_FAILED, MAP_NORESERVE, MAP_PRIVATE, PROT_READ, PROT_WRITE};
  7 | 
  8 | #[cfg(target_family = "windows")]
  9 | use winapi::ctypes::c_void;
 10 | #[cfg(target_family = "windows")]
 11 | use winapi::um::memoryapi::{VirtualAlloc, VirtualFree, VirtualProtect};
 12 | #[cfg(target_family = "windows")]
 13 | use winapi::um::winnt::{
 14 |     MEM_COMMIT, MEM_RELEASE, MEM_RESERVE, PAGE_GUARD, PAGE_NOACCESS, PAGE_READWRITE,
 15 | };
 16 | 
 17 | use super::Stack;
 18 | 
 19 | /// A 1 Mb Stack (1 Mb + 4 Kb).
 20 | ///
 21 | /// On Unix platforms this will simply reserve 1 Mb + 4 Kb of memory to be used as a stack (without
 22 | /// a guard page). Mmap will be called with the MAP_NORESERVE flag to allow us to overcommit on stack
 23 | /// allocations.
 24 | ///
 25 | /// On Windows it will reserve 1 Mb + 4Kb of memory + 4 pages on top for the exception handler. Only the
 26 | /// bottom of the stack will be marked as commited, while the rest will be reserved. This allows us
 27 | /// to overcommit on stack allocations. The memory is specifically set up with guard pages in a way
 28 | /// that Windows expect it to be, so that the OS can automatically grow and commit memory.
 29 | pub struct OneMbStack(*mut usize);
 30 | 
 31 | unsafe impl Send for OneMbStack {}
 32 | 
 33 | const ONE_MB: usize = 1 * 1024 * 1024 + 4096;
 34 | #[cfg(target_family = "windows")]
 35 | const EXCEPTION_ZONE: usize = 4 * 4096;
 36 | 
 37 | impl Stack for OneMbStack {
 38 |     #[cfg(target_family = "unix")]
 39 |     fn new() -> Result<Self, Error> {
 40 |         let ptr = unsafe {
 41 |             mmap(
 42 |                 ptr::null_mut(),
 43 |                 ONE_MB,
 44 |                 PROT_READ | PROT_WRITE,
 45 |                 MAP_PRIVATE | MAP_ANON | MAP_NORESERVE,
 46 |                 -1,
 47 |                 0,
 48 |             )
 49 |         };
 50 |         if ptr == MAP_FAILED {
 51 |             Err(Error::last_os_error())
 52 |         } else {
 53 |             Ok(Self(ptr as *mut usize))
 54 |         }
 55 |     }
 56 | 
 57 |     #[cfg(target_family = "unix")]
 58 |     fn bottom(&self) -> *mut usize {
 59 |         unsafe { self.0.add(ONE_MB / size_of::<usize>()) }
 60 |     }
 61 |     #[cfg(target_family = "unix")]
 62 |     fn top(&self) -> *mut usize {
 63 |         self.0
 64 |     }
 65 |     #[cfg(target_family = "unix")]
 66 |     fn deallocation(&self) -> *mut usize {
 67 |         panic!("Not used on unix");
 68 |     }
 69 | 
 70 |     // Windows
 71 |     #[cfg(target_family = "windows")]
 72 |     fn new() -> Result<Self, Error> {
 73 |         unsafe {
 74 |             // Add extra 16 Kb on top of the stack to be used by the exception handler in case of a stack overflow.
 75 |             // Cast pointer to `usize`, because calculating offsets with `c_void` is impossible. Sometimes it has a
 76 |             // size of 0, sometimes it decides to be 1 byte.
 77 |             let ptr = VirtualAlloc(
 78 |                 ptr::null_mut(),
 79 |                 ONE_MB + EXCEPTION_ZONE,
 80 |                 MEM_RESERVE,
 81 |                 PAGE_NOACCESS,
 82 |             ) as *mut usize;
 83 |             if ptr.is_null() {
 84 |                 return Err(Error::last_os_error());
 85 |             }
 86 |             // Commit 3 bottom pages (1 read/write and 2 guard pages)
 87 |             let bottom_2 = VirtualAlloc(
 88 |                 ptr.add((ONE_MB + EXCEPTION_ZONE - 3 * 4096) / size_of::<usize>()) as *mut c_void,
 89 |                 3 * 4096,
 90 |                 MEM_COMMIT,
 91 |                 PAGE_GUARD | PAGE_READWRITE,
 92 |             );
 93 |             if bottom_2.is_null() {
 94 |                 return Err(Error::last_os_error());
 95 |             }
 96 | 
 97 |             let old_protect: u32 = 0;
 98 |             let bottom_1 = VirtualProtect(
 99 |                 ptr.add((ONE_MB + EXCEPTION_ZONE - 1 * 4096) / size_of::<usize>()) as *mut c_void,
100 |                 1 * 4096,
101 |                 PAGE_READWRITE,
102 |                 &old_protect as *const u32 as *mut u32,
103 |             );
104 |             if bottom_1 == 0 {
105 |                 return Err(Error::last_os_error());
106 |             }
107 | 
108 |             Ok(Self(ptr as *mut usize))
109 |         }
110 |     }
111 | 
112 |     #[cfg(target_family = "windows")]
113 |     fn bottom(&self) -> *mut usize {
114 |         unsafe { self.0.add((ONE_MB + EXCEPTION_ZONE) / size_of::<usize>()) }
115 |     }
116 |     #[cfg(target_family = "windows")]
117 |     fn top(&self) -> *mut usize {
118 |         unsafe { self.0.add(EXCEPTION_ZONE / size_of::<usize>()) }
119 |     }
120 |     #[cfg(target_family = "windows")]
121 |     fn deallocation(&self) -> *mut usize {
122 |         self.0
123 |     }
124 | }
125 | 
126 | #[cfg(target_family = "unix")]
127 | impl Drop for OneMbStack {
128 |     fn drop(&mut self) {
129 |         let result = unsafe { libc::munmap(self.0 as *mut libc::c_void, ONE_MB) };
130 |         debug_assert_eq!(result, 0);
131 |     }
132 | }
133 | 
134 | #[cfg(target_family = "windows")]
135 | impl Drop for OneMbStack {
136 |     fn drop(&mut self) {
137 |         let result = unsafe { VirtualFree(self.0 as *mut winapi::ctypes::c_void, 0, MEM_RELEASE) };
138 |         debug_assert_ne!(result, 0);
139 |     }
140 | }
141 | 


--------------------------------------------------------------------------------
/switcheroo/tests/stack_test.rs:
--------------------------------------------------------------------------------
 1 | use std::io::Error;
 2 | 
 3 | use switcheroo::stack::*;
 4 | 
 5 | #[test]
 6 | fn create_8_mb_stack() -> Result<(), Error> {
 7 |     EightMbStack::new()?;
 8 |     Ok(())
 9 | }
10 | 
11 | #[test]
12 | fn create_300k_8_mb_stacks() {
13 |     // Uses around 4 Gb of commited memory
14 |     let mut stacks = vec![];
15 |     for _i in 0..300_000 {
16 |         let stack = EightMbStack::new();
17 |         assert!(stack.is_ok());
18 |         stacks.push(stack);
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/switcheroo/tests/switch_test.rs:
--------------------------------------------------------------------------------
 1 | use switcheroo::stack::*;
 2 | use switcheroo::Generator;
 3 | 
 4 | #[test]
 5 | fn switch_stack() {
 6 |     let stack = EightMbStack::new().unwrap();
 7 |     let mut add_one = Generator::new(stack, |yielder, mut input| {
 8 |         println!("Sometimes println doesn't touch all pages on windows");
 9 |         loop {
10 |             if input == 0 {
11 |                 break;
12 |             }
13 |             input = yielder.suspend(input + 1);
14 |         }
15 |     });
16 |     assert_eq!(add_one.resume(2), Some(3));
17 |     assert_eq!(add_one.resume(127), Some(128));
18 |     assert_eq!(add_one.resume(-1), Some(0));
19 |     assert_eq!(add_one.resume(0), None);
20 |     assert_eq!(add_one.resume(0), None);
21 |     assert_eq!(add_one.resume(0), None);
22 | }
23 | 
24 | #[test]
25 | fn extend_small_stack() {
26 |     let stack = EightMbStack::new().unwrap();
27 |     let mut blow_stack = Generator::new(stack, |yielder, input| {
28 |         rec(input);
29 |         yielder.suspend(Some(0));
30 |     });
31 |     // This will use 7 Mb of stack, more than the first 4 Kb commited memory on Windows
32 |     blow_stack.resume(7_000);
33 | }
34 | 
35 | // Uses 1 Kb per iteration
36 | fn rec(n: u64) -> u8 {
37 |     let x: [u8; 1024] = [1; 1024];
38 |     if n < 1 {
39 |         x[0]
40 |     } else {
41 |         rec(n - 1)
42 |     }
43 | }
44 | 
45 | #[test]
46 | #[should_panic]
47 | fn panic_on_different_stack() {
48 |     let stack = EightMbStack::new().unwrap();
49 |     let mut add_one = Generator::new(stack, |_yielder, mut _input| {
50 |         panic!("Ups");
51 |     });
52 |     let _: u32 = add_one.resume(0).unwrap();
53 | }
54 | 
55 | #[test]
56 | fn drop_stack_with_unwind() {
57 |     let stack = EightMbStack::new().unwrap();
58 |     let mut add_one = Generator::new(stack, |yielder, mut _input| {
59 |         let _local_variable = Box::new(0);
60 |         yielder.suspend(());
61 |         yielder.suspend(());
62 |         yielder.suspend(());
63 |     });
64 |     let _: () = add_one.resume(()).unwrap();
65 | }
66 | 


--------------------------------------------------------------------------------
/tests/async_test.rs:
--------------------------------------------------------------------------------
 1 | use async_executor::LocalExecutor;
 2 | use async_wormhole::AsyncWormhole;
 3 | use backtrace::Backtrace;
 4 | use switcheroo::stack::*;
 5 | 
 6 | #[test]
 7 | fn async_yield() {
 8 |     let stack = EightMbStack::new().unwrap();
 9 |     let task = AsyncWormhole::<_, _, fn()>::new(stack, |mut yielder| {
10 |         println!("The println function blows up the stack more than 4Kb.");
11 |         let x = yielder.async_suspend(async { 5 });
12 |         assert_eq!(x, 5);
13 |         let y = yielder.async_suspend(async { true });
14 |         assert_eq!(y, true);
15 |         42
16 |     })
17 |     .unwrap();
18 |     let output = futures::executor::block_on(task);
19 |     assert_eq!(output, 42);
20 | }
21 | 
22 | #[test]
23 | #[should_panic]
24 | fn async_yield_panics() {
25 |     let stack = EightMbStack::new().unwrap();
26 |     let task = AsyncWormhole::<_, _, fn()>::new(stack, |mut yielder| {
27 |         let x = yielder.async_suspend(async { 5 });
28 |         assert_eq!(x, 5);
29 |         let y = yielder.async_suspend(async { true });
30 |         assert_eq!(y, true);
31 |         panic!();
32 |     })
33 |     .unwrap();
34 |     futures::executor::block_on(task);
35 | }
36 | 
37 | #[test]
38 | fn async_yield_drop_without_poll() {
39 |     let stack = EightMbStack::new().unwrap();
40 |     AsyncWormhole::<_, _, fn()>::new(stack, |mut yielder| {
41 |         let x = yielder.async_suspend(async { 5 });
42 |         assert_eq!(x, 5);
43 |         let y = yielder.async_suspend(async { true });
44 |         assert_eq!(y, true);
45 |         42
46 |     })
47 |     .unwrap();
48 | }
49 | 
50 | #[test]
51 | fn async_yield_drop_with_one_poll() {
52 |     let stack = EightMbStack::new().unwrap();
53 |     let task = AsyncWormhole::<_, _, fn()>::new(stack, |mut yielder| {
54 |         yielder.async_suspend(async { futures::pending!() });
55 |         println!("Never gets here");
56 |     })
57 |     .unwrap();
58 | 
59 |     let ex = LocalExecutor::new();
60 |     ex.spawn(task).detach();
61 |     ex.try_tick();
62 | }
63 | 
64 | #[test]
65 | fn backtrace_test() {
66 |     let stack = EightMbStack::new().unwrap();
67 |     let task = AsyncWormhole::<_, _, fn()>::new(stack, |_yielder| {
68 |         let _ = Backtrace::new_unresolved();
69 |     })
70 |     .unwrap();
71 | 
72 |     futures::executor::block_on(task);
73 | }
74 | 


--------------------------------------------------------------------------------