├── .github └── workflows │ └── ci.yml ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── ci ├── Cargo.toml └── src │ ├── ci.rs │ ├── commands │ ├── clippy.rs │ ├── compile.rs │ ├── compile_check.rs │ ├── doc.rs │ ├── doc_check.rs │ ├── doc_test.rs │ ├── format.rs │ ├── lints.rs │ ├── loom.rs │ ├── loom_check.rs │ ├── loom_clippy.rs │ ├── loom_test.rs │ └── mod.rs │ ├── main.rs │ └── prepare.rs ├── src ├── job.rs ├── latch.rs ├── lib.rs ├── queue.rs ├── scope.rs ├── thread_pool.rs └── util.rs └── tests ├── general.rs ├── loom.rs └── tests.rs /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Cargo Build & Test 2 | 3 | on: 4 | merge_group: 5 | pull_request: 6 | 7 | env: 8 | CARGO_TERM_COLOR: always 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | timeout-minutes: 30 14 | steps: 15 | - uses: actions/checkout@v4 16 | - uses: actions/cache@v4 17 | with: 18 | path: | 19 | ~/.cargo/bin/ 20 | ~/.cargo/registry/index/ 21 | ~/.cargo/registry/cache/ 22 | ~/.cargo/git/db/ 23 | target/ 24 | key: ${{ runner.os }}-build-${{ hashFiles('**/Cargo.toml') }} 25 | - uses: dtolnay/rust-toolchain@stable 26 | - name: Build & run tests 27 | run: cargo run -p ci -- compile 28 | env: 29 | CARGO_INCREMENTAL: 0 30 | RUSTFLAGS: "-C debuginfo=0 -D warnings" 31 | lints: 32 | runs-on: ubuntu-latest 33 | timeout-minutes: 30 34 | steps: 35 | - uses: actions/checkout@v4 36 | - uses: actions/cache@v4 37 | with: 38 | path: | 39 | ~/.cargo/bin/ 40 | ~/.cargo/registry/index/ 41 | ~/.cargo/registry/cache/ 42 | ~/.cargo/git/db/ 43 | target/ 44 | key: ${{ runner.os }}-lints-${{ hashFiles('**/Cargo.toml') }} 45 | - uses: dtolnay/rust-toolchain@stable 46 | with: 47 | components: rustfmt, clippy 48 | - name: Lints 49 | run: cargo run -p ci -- lints 50 | 51 | docs: 52 | runs-on: ubuntu-latest 53 | timeout-minutes: 30 54 | steps: 55 | - uses: actions/checkout@v4 56 | - uses: actions/cache@v4 57 | with: 58 | path: | 59 | ~/.cargo/bin/ 60 | ~/.cargo/registry/index/ 61 | ~/.cargo/registry/cache/ 62 | ~/.cargo/git/db/ 63 | target/ 64 | key: ${{ runner.os }}-lints-${{ hashFiles('**/Cargo.toml') }} 65 | - uses: dtolnay/rust-toolchain@stable 66 | - name: Build and check doc 67 | run: cargo run -p ci -- doc 68 | env: 69 | CARGO_INCREMENTAL: 0 70 | RUSTFLAGS: "-C debuginfo=0" 71 | 72 | test-loom: 73 | runs-on: ubuntu-latest 74 | timeout-minutes: 30 75 | steps: 76 | - uses: actions/checkout@v4 77 | - uses: actions/cache@v4 78 | with: 79 | path: | 80 | ~/.cargo/bin/ 81 | ~/.cargo/registry/index/ 82 | ~/.cargo/registry/cache/ 83 | ~/.cargo/git/db/ 84 | target/ 85 | key: ${{ runner.os }}-test-loom-${{ hashFiles('**/Cargo.toml') }} 86 | - uses: dtolnay/rust-toolchain@stable 87 | - name: Build and check doc 88 | run: cargo run -p ci -- loom 89 | env: 90 | CARGO_INCREMENTAL: 0 91 | RUSTFLAGS: "-C debuginfo=0" 92 | 93 | typos: 94 | runs-on: ubuntu-latest 95 | timeout-minutes: 30 96 | steps: 97 | - uses: actions/checkout@v4 98 | - name: Check for typos 99 | uses: crate-ci/typos@v1.28.4 100 | - name: Typos info 101 | if: failure() 102 | run: | 103 | echo 'To fix typos, please run `typos -w`' 104 | echo 'To check for a diff, run `typos`' 105 | echo 'You can find typos here: https://crates.io/crates/typos' 106 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "forte" 3 | version = "1.0.0-alpha.1" 4 | edition = "2021" 5 | license = "MIT OR Apache-2.0" 6 | description = "A low-overhead thread-pool with support for non-static async closures" 7 | repository = "https://github.com/NthTensor/Forte" 8 | 9 | [workspace] 10 | resolver = "2" 11 | members = [ 12 | "ci" 13 | ] 14 | 15 | [dependencies] 16 | async-task = "4.7.1" 17 | crossbeam-utils = "0.8.21" 18 | crossbeam-queue = "0.3.12" 19 | tracing = "0.1.41" 20 | tracing-subscriber = "0.3.19" 21 | 22 | [lints.clippy] 23 | doc_markdown = "warn" 24 | manual_let_else = "warn" 25 | match_same_arms = "warn" 26 | redundant_closure_for_method_calls = "warn" 27 | redundant_else = "warn" 28 | semicolon_if_nothing_returned = "warn" 29 | undocumented_unsafe_blocks = "warn" 30 | unwrap_or_default = "warn" 31 | 32 | ptr_as_ptr = "warn" 33 | ptr_cast_constness = "warn" 34 | ref_as_ptr = "warn" 35 | 36 | std_instead_of_core = "warn" 37 | std_instead_of_alloc = "warn" 38 | alloc_instead_of_core = "warn" 39 | 40 | [lints.rust] 41 | missing_docs = "warn" 42 | unsafe_op_in_unsafe_fn = "warn" 43 | unused_qualifications = "warn" 44 | 45 | unexpected_cfgs = { level = "warn", check-cfg = ['cfg(loom)'] } 46 | 47 | # Add loom as a dependency for tests 48 | [target.'cfg(loom)'.dependencies] 49 | loom = "0.7" 50 | 51 | [[test]] 52 | name = "integration" 53 | path = "tests/tests.rs" 54 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010 The Rust Project Developers 2 | 3 | Permission is hereby granted, free of charge, to any 4 | person obtaining a copy of this software and associated 5 | documentation files (the "Software"), to deal in the 6 | Software without restriction, including without 7 | limitation the rights to use, copy, modify, merge, 8 | publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software 10 | is furnished to do so, subject to the following 11 | conditions: 12 | 13 | The above copyright notice and this permission notice 14 | shall be included in all copies or substantial portions 15 | of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Forte 2 | 3 | An async-compatible thread-pool aiming for "speed through simplicity". 4 | 5 | Forte is a parallel & async work scheduler designed to accommodate very large workloads with many short-lived tasks. It replicates the `rayon_core` api but with native support for futures and async tasks. 6 | It's design was prompted by the needs of the bevy game engine, but should be applicable to any problem that involves running both synchronous and asynchronous work concurrently. 7 | 8 | The thread-pool provided by this crate does not employ work-stealing. 9 | Forte instead uses "Heartbeat Scheduling", an alternative load-balancing technique that (theoretically) provides provably small overheads and good utilization. 10 | The end effect is that work is only parallelized every so often, allowing more work to be done sequentially on each thread and amortizing the synchronization overhead. 11 | 12 | # Acknowledgments 13 | 14 | Large portions of the code are direct ports from various versions of `rayon_core`, with minor simplifications and improvements. 15 | We also relied upon `chili` and `spice` for reference while writing the heartbeat scheduling. 16 | Support for futures is based on an approach sketched out by members of the `rayon` community to whom we are deeply indebted. 17 | 18 | # License 19 | 20 | Forte is distributed under the terms of both the MIT license and the Apache License (Version 2.0). 21 | See LICENSE-APACHE and LICENSE-MIT for details. 22 | Opening a pull request is assumed to signal agreement with these licensing terms. 23 | -------------------------------------------------------------------------------- /ci/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "ci" 3 | edition = "2021" 4 | description = "Tool that enables running CI checks locally." 5 | publish = false 6 | license = "MIT OR Apache-2.0" 7 | 8 | [dependencies] 9 | argh = "0.1" 10 | xshell = "0.2" 11 | bitflags = "2.3" 12 | 13 | [lints.clippy] 14 | doc_markdown = "warn" 15 | manual_let_else = "warn" 16 | match_same_arms = "warn" 17 | redundant_closure_for_method_calls = "warn" 18 | redundant_else = "warn" 19 | semicolon_if_nothing_returned = "warn" 20 | undocumented_unsafe_blocks = "warn" 21 | unwrap_or_default = "warn" 22 | 23 | ptr_as_ptr = "warn" 24 | ptr_cast_constness = "warn" 25 | ref_as_ptr = "warn" 26 | 27 | std_instead_of_core = "warn" 28 | std_instead_of_alloc = "warn" 29 | alloc_instead_of_core = "warn" 30 | 31 | [lints.rust] 32 | missing_docs = "warn" 33 | unsafe_code = "deny" 34 | unsafe_op_in_unsafe_fn = "warn" 35 | unused_qualifications = "warn" 36 | -------------------------------------------------------------------------------- /ci/src/ci.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | commands, 3 | prepare::{Flag, Prepare, PreparedCommand}, 4 | }; 5 | use argh::FromArgs; 6 | 7 | /// The CI command line tool for Forte. 8 | #[derive(FromArgs)] 9 | pub struct CI { 10 | #[argh(subcommand)] 11 | command: Option, 12 | 13 | /// continue running commands even if one fails. 14 | #[argh(switch)] 15 | keep_going: bool, 16 | } 17 | 18 | impl CI { 19 | /// Runs the specified commands or all commands if none are specified. 20 | /// 21 | /// When run locally, results may differ from actual CI runs triggered by `.github/workflows/ci.yml`. 22 | /// This is usually related to differing toolchains and configuration. 23 | pub fn run(self) { 24 | let sh = xshell::Shell::new().unwrap(); 25 | 26 | let prepared_commands = self.prepare(&sh); 27 | 28 | let mut failures = vec![]; 29 | 30 | for command in prepared_commands { 31 | // If the CI test is to be executed in a subdirectory, we move there before running the command. 32 | // This will automatically move back to the original directory once dropped. 33 | let _subdir_hook = command.subdir.map(|path| sh.push_dir(path)); 34 | 35 | // Execute each command, checking if it returned an error. 36 | if command.command.envs(command.env_vars).run().is_err() { 37 | let name = command.name; 38 | let message = command.failure_message; 39 | 40 | if self.keep_going { 41 | // We use bullet points here because there can be more than one error. 42 | failures.push(format!("- {name}: {message}")); 43 | } else { 44 | failures.push(format!("{name}: {message}")); 45 | break; 46 | } 47 | } 48 | } 49 | 50 | // Log errors at the very end. 51 | if !failures.is_empty() { 52 | let failures = failures.join("\n"); 53 | 54 | panic!( 55 | "One or more CI commands failed:\n\ 56 | {failures}" 57 | ); 58 | } 59 | } 60 | 61 | fn prepare<'a>(&self, sh: &'a xshell::Shell) -> Vec> { 62 | let mut flags = Flag::empty(); 63 | 64 | if self.keep_going { 65 | flags |= Flag::KEEP_GOING; 66 | } 67 | 68 | match &self.command { 69 | Some(command) => command.prepare(sh, flags), 70 | None => { 71 | // Note that we are running the subcommands directly rather than using any aliases 72 | let mut cmds = vec![]; 73 | cmds.append(&mut commands::FormatCommand::default().prepare(sh, flags)); 74 | cmds.append(&mut commands::ClippyCommand::default().prepare(sh, flags)); 75 | cmds.append(&mut commands::LintsCommand::default().prepare(sh, flags)); 76 | cmds.append(&mut commands::CompileCheckCommand::default().prepare(sh, flags)); 77 | cmds.append(&mut commands::DocCheckCommand::default().prepare(sh, flags)); 78 | cmds.append(&mut commands::DocTestCommand::default().prepare(sh, flags)); 79 | cmds.append(&mut commands::LoomCheckCommand::default().prepare(sh, flags)); 80 | cmds.append(&mut commands::LoomClippyCommand::default().prepare(sh, flags)); 81 | cmds.append(&mut commands::LoomTestCommand::default().prepare(sh, flags)); 82 | cmds 83 | } 84 | } 85 | } 86 | } 87 | 88 | /// The subcommands that can be run by the CI script. 89 | #[derive(FromArgs)] 90 | #[argh(subcommand)] 91 | enum Commands { 92 | // Compile commands 93 | Compile(commands::CompileCommand), 94 | CompileCheck(commands::CompileCheckCommand), 95 | // Documentation commands 96 | Doc(commands::DocCommand), 97 | DocCheck(commands::DocCheckCommand), 98 | DocTest(commands::DocTestCommand), 99 | // Lint commands 100 | Lints(commands::LintsCommand), 101 | Clippy(commands::ClippyCommand), 102 | Format(commands::FormatCommand), 103 | // Loom commands 104 | Loom(commands::LoomCommand), 105 | LoomCheck(commands::LoomCheckCommand), 106 | LoomClippy(commands::LoomClippyCommand), 107 | LoomTest(commands::LoomTestCommand), 108 | } 109 | 110 | impl Prepare for Commands { 111 | fn prepare<'a>(&self, sh: &'a xshell::Shell, flags: Flag) -> Vec> { 112 | match self { 113 | // Compile commands 114 | Commands::Compile(subcommand) => subcommand.prepare(sh, flags), 115 | Commands::CompileCheck(subcommand) => subcommand.prepare(sh, flags), 116 | // Documentation commands 117 | Commands::Doc(subcommand) => subcommand.prepare(sh, flags), 118 | Commands::DocCheck(subcommand) => subcommand.prepare(sh, flags), 119 | Commands::DocTest(subcommand) => subcommand.prepare(sh, flags), 120 | // Lint commands 121 | Commands::Lints(subcommand) => subcommand.prepare(sh, flags), 122 | Commands::Clippy(subcommand) => subcommand.prepare(sh, flags), 123 | Commands::Format(subcommand) => subcommand.prepare(sh, flags), 124 | // Loom commands 125 | Commands::Loom(subcommand) => subcommand.prepare(sh, flags), 126 | Commands::LoomCheck(subcommand) => subcommand.prepare(sh, flags), 127 | Commands::LoomClippy(subcommand) => subcommand.prepare(sh, flags), 128 | Commands::LoomTest(subcommand) => subcommand.prepare(sh, flags), 129 | } 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /ci/src/commands/clippy.rs: -------------------------------------------------------------------------------- 1 | use crate::{Flag, Prepare, PreparedCommand}; 2 | use argh::FromArgs; 3 | use xshell::cmd; 4 | 5 | /// Check for clippy warnings and errors. 6 | #[derive(FromArgs, Default)] 7 | #[argh(subcommand, name = "clippy")] 8 | pub struct ClippyCommand {} 9 | 10 | impl Prepare for ClippyCommand { 11 | fn prepare<'a>(&self, sh: &'a xshell::Shell, _flags: Flag) -> Vec> { 12 | vec![PreparedCommand::new::( 13 | cmd!( 14 | sh, 15 | "cargo clippy --workspace --all-targets --all-features -- -Dwarnings" 16 | ), 17 | "Please fix clippy errors in output above.", 18 | )] 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /ci/src/commands/compile.rs: -------------------------------------------------------------------------------- 1 | use crate::{commands::CompileCheckCommand, Flag, Prepare, PreparedCommand}; 2 | use argh::FromArgs; 3 | 4 | /// Alias for running the `compile-fail`, `bench-check`, `example-check`, `compile-check`, and `test-check` subcommands. 5 | #[derive(FromArgs, Default)] 6 | #[argh(subcommand, name = "compile")] 7 | pub struct CompileCommand {} 8 | 9 | impl Prepare for CompileCommand { 10 | fn prepare<'a>(&self, sh: &'a xshell::Shell, flags: Flag) -> Vec> { 11 | let mut commands = vec![]; 12 | commands.append(&mut CompileCheckCommand::default().prepare(sh, flags)); 13 | commands 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /ci/src/commands/compile_check.rs: -------------------------------------------------------------------------------- 1 | use crate::{Flag, Prepare, PreparedCommand}; 2 | use argh::FromArgs; 3 | use xshell::cmd; 4 | 5 | /// Checks that the project compiles. 6 | #[derive(FromArgs, Default)] 7 | #[argh(subcommand, name = "compile-check")] 8 | pub struct CompileCheckCommand {} 9 | 10 | impl Prepare for CompileCheckCommand { 11 | fn prepare<'a>(&self, sh: &'a xshell::Shell, _flags: Flag) -> Vec> { 12 | vec![PreparedCommand::new::( 13 | cmd!(sh, "cargo check --workspace"), 14 | "Please fix compiler errors in output above.", 15 | )] 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /ci/src/commands/doc.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | commands::{DocCheckCommand, DocTestCommand}, 3 | Flag, Prepare, PreparedCommand, 4 | }; 5 | use argh::FromArgs; 6 | 7 | /// Alias for running the `doc-test` and `doc-check` subcommands. 8 | #[derive(FromArgs, Default)] 9 | #[argh(subcommand, name = "doc")] 10 | pub struct DocCommand {} 11 | 12 | impl Prepare for DocCommand { 13 | fn prepare<'a>(&self, sh: &'a xshell::Shell, flags: Flag) -> Vec> { 14 | let mut commands = vec![]; 15 | commands.append(&mut DocTestCommand::default().prepare(sh, flags)); 16 | commands.append(&mut DocCheckCommand::default().prepare(sh, flags)); 17 | commands 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /ci/src/commands/doc_check.rs: -------------------------------------------------------------------------------- 1 | use crate::{Flag, Prepare, PreparedCommand}; 2 | use argh::FromArgs; 3 | use xshell::cmd; 4 | 5 | /// Checks that all docs compile. 6 | #[derive(FromArgs, Default)] 7 | #[argh(subcommand, name = "doc-check")] 8 | pub struct DocCheckCommand {} 9 | 10 | impl Prepare for DocCheckCommand { 11 | fn prepare<'a>(&self, sh: &'a xshell::Shell, _flags: Flag) -> Vec> { 12 | vec![PreparedCommand::new::( 13 | cmd!( 14 | sh, 15 | "cargo doc --workspace --all-features --no-deps --document-private-items --keep-going" 16 | ), 17 | "Please fix doc warnings in output above.", 18 | ) 19 | .with_env_var("RUSTDOCFLAGS", "-D warnings")] 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /ci/src/commands/doc_test.rs: -------------------------------------------------------------------------------- 1 | use crate::{Flag, Prepare, PreparedCommand}; 2 | use argh::FromArgs; 3 | use xshell::cmd; 4 | 5 | /// Runs all doc tests. 6 | #[derive(FromArgs, Default)] 7 | #[argh(subcommand, name = "doc-test")] 8 | pub struct DocTestCommand {} 9 | 10 | impl Prepare for DocTestCommand { 11 | fn prepare<'a>(&self, sh: &'a xshell::Shell, flags: Flag) -> Vec> { 12 | let no_fail_fast = flags 13 | .contains(Flag::KEEP_GOING) 14 | .then_some("--no-fail-fast") 15 | .unwrap_or_default(); 16 | 17 | vec![PreparedCommand::new::( 18 | cmd!(sh, "cargo test --workspace --doc {no_fail_fast}"), 19 | "Please fix failing doc tests in output above.", 20 | )] 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /ci/src/commands/format.rs: -------------------------------------------------------------------------------- 1 | use crate::{Flag, Prepare, PreparedCommand}; 2 | use argh::FromArgs; 3 | use xshell::cmd; 4 | 5 | /// Check code formatting. 6 | #[derive(FromArgs, Default)] 7 | #[argh(subcommand, name = "format")] 8 | pub struct FormatCommand {} 9 | 10 | impl Prepare for FormatCommand { 11 | fn prepare<'a>(&self, sh: &'a xshell::Shell, _flags: Flag) -> Vec> { 12 | vec![PreparedCommand::new::( 13 | cmd!(sh, "cargo fmt --all -- --check"), 14 | "Please run 'cargo fmt --all' to format your code.", 15 | )] 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /ci/src/commands/lints.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | commands::{ClippyCommand, FormatCommand}, 3 | Flag, Prepare, PreparedCommand, 4 | }; 5 | use argh::FromArgs; 6 | 7 | /// Alias for running the `format` and `clippy` subcommands. 8 | #[derive(FromArgs, Default)] 9 | #[argh(subcommand, name = "lints")] 10 | pub struct LintsCommand {} 11 | 12 | impl Prepare for LintsCommand { 13 | fn prepare<'a>(&self, sh: &'a xshell::Shell, flags: Flag) -> Vec> { 14 | let mut commands = vec![]; 15 | commands.append(&mut FormatCommand::default().prepare(sh, flags)); 16 | commands.append(&mut ClippyCommand::default().prepare(sh, flags)); 17 | commands 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /ci/src/commands/loom.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | commands::{LoomCheckCommand, LoomClippyCommand, LoomTestCommand}, 3 | Flag, Prepare, PreparedCommand, 4 | }; 5 | use argh::FromArgs; 6 | 7 | /// Alias for running the `loom-check`, `loom-clippy` and `loom-test` subcommands. 8 | #[derive(FromArgs, Default)] 9 | #[argh(subcommand, name = "loom")] 10 | pub struct LoomCommand {} 11 | 12 | impl Prepare for LoomCommand { 13 | fn prepare<'a>(&self, sh: &'a xshell::Shell, flags: Flag) -> Vec> { 14 | let mut commands = vec![]; 15 | commands.append(&mut LoomCheckCommand::default().prepare(sh, flags)); 16 | commands.append(&mut LoomClippyCommand::default().prepare(sh, flags)); 17 | commands.append(&mut LoomTestCommand::default().prepare(sh, flags)); 18 | commands 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /ci/src/commands/loom_check.rs: -------------------------------------------------------------------------------- 1 | use crate::{Flag, Prepare, PreparedCommand}; 2 | use argh::FromArgs; 3 | use xshell::cmd; 4 | 5 | /// Checks that the loom test suite compiles. 6 | #[derive(FromArgs, Default)] 7 | #[argh(subcommand, name = "loom-check")] 8 | pub struct LoomCheckCommand {} 9 | 10 | impl Prepare for LoomCheckCommand { 11 | fn prepare<'a>(&self, sh: &'a xshell::Shell, _flags: Flag) -> Vec> { 12 | let command = PreparedCommand::new::( 13 | cmd!(sh, "cargo check --test loom"), 14 | "Please fix compiler errors in output above.", 15 | ) 16 | .with_env_var("RUSTFLAGS", "--cfg loom"); 17 | vec![command] 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /ci/src/commands/loom_clippy.rs: -------------------------------------------------------------------------------- 1 | use crate::{Flag, Prepare, PreparedCommand}; 2 | use argh::FromArgs; 3 | use xshell::cmd; 4 | 5 | /// Checks for clippy warnings and errors in the loom test suite. 6 | #[derive(FromArgs, Default)] 7 | #[argh(subcommand, name = "loom-clippy")] 8 | pub struct LoomClippyCommand {} 9 | 10 | impl Prepare for LoomClippyCommand { 11 | fn prepare<'a>(&self, sh: &'a xshell::Shell, _flags: Flag) -> Vec> { 12 | let command = PreparedCommand::new::( 13 | cmd!(sh, "cargo clippy --test loom -- -Dwarnings"), 14 | "Please fix clippy errors in output above.", 15 | ) 16 | .with_env_var("RUSTFLAGS", "--cfg loom"); 17 | vec![command] 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /ci/src/commands/loom_test.rs: -------------------------------------------------------------------------------- 1 | use crate::{Flag, Prepare, PreparedCommand}; 2 | use argh::FromArgs; 3 | use xshell::cmd; 4 | 5 | /// Runs the loom concurrency test suite. 6 | #[derive(FromArgs, Default)] 7 | #[argh(subcommand, name = "loom-test")] 8 | pub struct LoomTestCommand {} 9 | 10 | impl Prepare for LoomTestCommand { 11 | fn prepare<'a>(&self, sh: &'a xshell::Shell, _flags: Flag) -> Vec> { 12 | let command = PreparedCommand::new::( 13 | cmd!(sh, "cargo test --test loom --release"), 14 | "Please fix compiler errors in output above.", 15 | ) 16 | .with_env_var("RUSTFLAGS", "--cfg loom") 17 | .with_env_var("LOOM_MAX_PREEMPTIONS", "3"); 18 | vec![command] 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /ci/src/commands/mod.rs: -------------------------------------------------------------------------------- 1 | // Compile commands 2 | mod compile; 3 | mod compile_check; 4 | 5 | pub use compile::*; 6 | pub use compile_check::*; 7 | 8 | // Documentation commands 9 | mod doc; 10 | mod doc_check; 11 | mod doc_test; 12 | 13 | pub use doc::*; 14 | pub use doc_check::*; 15 | pub use doc_test::*; 16 | 17 | // Lint commands 18 | mod clippy; 19 | mod format; 20 | mod lints; 21 | 22 | pub use clippy::*; 23 | pub use format::*; 24 | pub use lints::*; 25 | 26 | // Loom test suite commands 27 | mod loom; 28 | mod loom_check; 29 | mod loom_clippy; 30 | mod loom_test; 31 | 32 | pub use loom::*; 33 | pub use loom_check::*; 34 | pub use loom_clippy::*; 35 | pub use loom_test::*; 36 | -------------------------------------------------------------------------------- /ci/src/main.rs: -------------------------------------------------------------------------------- 1 | //! CI script used for Forte. 2 | //! 3 | //! Copied from the bevy CI tool. 4 | 5 | mod ci; 6 | mod commands; 7 | mod prepare; 8 | 9 | pub use self::{ci::*, prepare::*}; 10 | 11 | fn main() { 12 | argh::from_env::().run(); 13 | } 14 | -------------------------------------------------------------------------------- /ci/src/prepare.rs: -------------------------------------------------------------------------------- 1 | use bitflags::bitflags; 2 | 3 | /// Trait for preparing a subcommand to be run. 4 | pub trait Prepare { 5 | /// A method that returns a list of [`PreparedCommand`]s to be run for a given shell and flags. 6 | /// 7 | /// # Example 8 | /// 9 | /// ``` 10 | /// # use crate::{Flag, Prepare, PreparedCommand}; 11 | /// # use argh::FromArgs; 12 | /// # use xshell::Shell; 13 | /// # 14 | /// #[derive(FromArgs)] 15 | /// #[argh(subcommand, name = "check")] 16 | /// struct CheckCommand {} 17 | /// 18 | /// impl Prepare for CheckCommand { 19 | /// fn prepare<'a>(&self, sh: &'a Shell, flags: Flag) -> Vec> { 20 | /// vec![PreparedCommand::new::( 21 | /// cmd!(sh, "cargo check --workspace"), 22 | /// "Please fix linter errors", 23 | /// )] 24 | /// } 25 | /// } 26 | /// ``` 27 | fn prepare<'a>(&self, sh: &'a xshell::Shell, flags: Flag) -> Vec>; 28 | } 29 | 30 | bitflags! { 31 | /// Flags that modify how commands are run. 32 | #[derive(Clone, Copy, Debug, PartialEq, Eq)] 33 | pub struct Flag: u32 { 34 | /// Forces certain checks to continue running even if they hit an error. 35 | const KEEP_GOING = 1 << 0; 36 | } 37 | } 38 | 39 | /// A command with associated metadata, created from a command that implements [`Prepare`]. 40 | #[derive(Debug)] 41 | pub struct PreparedCommand<'a> { 42 | /// The name of the command. 43 | pub name: &'static str, 44 | 45 | /// The command to execute 46 | pub command: xshell::Cmd<'a>, 47 | 48 | /// The message to display if the test command fails 49 | pub failure_message: &'static str, 50 | 51 | /// The subdirectory path to run the test command within 52 | pub subdir: Option<&'static str>, 53 | 54 | /// Environment variables that need to be set before the test runs 55 | pub env_vars: Vec<(&'static str, &'static str)>, 56 | } 57 | 58 | impl<'a> PreparedCommand<'a> { 59 | /// Creates a new [`PreparedCommand`] from a [`Cmd`] and a failure message. 60 | /// 61 | /// The other fields of [`PreparedCommand`] are filled in with their default values. 62 | /// 63 | /// For more information about creating a [`Cmd`], please see the [`cmd!`](xshell::cmd) macro. 64 | /// 65 | /// [`Cmd`]: xshell::Cmd 66 | pub fn new( 67 | command: xshell::Cmd<'a>, 68 | failure_message: &'static str, 69 | ) -> Self { 70 | Self { 71 | command, 72 | name: T::COMMAND.name, 73 | failure_message, 74 | subdir: None, 75 | env_vars: vec![], 76 | } 77 | } 78 | 79 | /// A builder that overwrites the current sub-directory with a new value. 80 | pub fn with_subdir(mut self, subdir: &'static str) -> Self { 81 | self.subdir = Some(subdir); 82 | self 83 | } 84 | 85 | /// A builder that adds a new environmental variable to the list. 86 | pub fn with_env_var(mut self, key: &'static str, value: &'static str) -> Self { 87 | self.env_vars.push((key, value)); 88 | self 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/job.rs: -------------------------------------------------------------------------------- 1 | //! This module defines a executable unit of work called a `Job`. Jobs are what 2 | //! get scheduled on the thread-pool. After the are allocated, a reference 3 | //! (specifically a `JobRef`) is queued, passed to a thread, end executed. 4 | //! 5 | //! This module defines two core job types: `StackJob` and `HeapJob`. The former 6 | //! is more efficient, but can only be used when the work won't outlive the 7 | //! current stack. `HeapJob` requires an allocation, but can outlive the current 8 | //! stack. 9 | //! 10 | //! When using a job, one must be extremely careful to ensure that: 11 | //! (a) The job does not outlive anything it closes over. 12 | //! (b) The job remains valid until it is executed for the last time. 13 | //! (c) Each job reference is executed exactly once. 14 | 15 | use alloc::boxed::Box; 16 | 17 | use crate::primitives::*; 18 | 19 | // ----------------------------------------------------------------------------- 20 | // Job 21 | 22 | /// A job is a unit of work that may be executed by a worker thread. 23 | pub trait Job { 24 | /// Calling this function runs the job. 25 | /// 26 | /// # Safety 27 | /// 28 | /// This may be called from a different thread than the one which scheduled 29 | /// the job, so the implementer must ensure the appropriate traits are met, 30 | /// whether `Send`, `Sync`, or both. 31 | /// 32 | /// The caller must ensure that the pointer is valid and points to an 33 | /// instance of the correct type. They must also ensure this is called 34 | /// exactly once for each job. 35 | unsafe fn execute(this: *const ()); 36 | } 37 | 38 | // ----------------------------------------------------------------------------- 39 | // JobRef 40 | 41 | /// Effectively a Job trait object. It can be treated as such, even though 42 | /// sometimes a `JobRef` will not point to a type that implements `Job`. 43 | pub struct JobRef { 44 | /// A raw pointer to data that can be executed with the `execute_fn`. This 45 | /// will usually point to either a `StackJob` or a `HeapJob`. 46 | pointer: *const (), 47 | /// A function pointer that can execute the job stored at `pointer`. 48 | execute_fn: unsafe fn(*const ()), 49 | } 50 | 51 | impl JobRef { 52 | /// Creates a new `JobRef` from a `Job`. 53 | /// 54 | /// # Safety 55 | /// 56 | /// Caller must ensure `job` will remain valid until the job is executed, 57 | /// and that the job is executed to completion exactly once. 58 | pub unsafe fn new(job: *const J) -> JobRef 59 | where 60 | J: Job, 61 | { 62 | JobRef { 63 | pointer: job.cast(), 64 | execute_fn: ::execute, 65 | } 66 | } 67 | 68 | /// Creates a new `JobRef` from raw pointers. 69 | /// 70 | /// # Safety 71 | /// 72 | /// Caller must ensure the data at the pointer will remain valid until the 73 | /// job is executed, and that the job is executed to completion exactly 74 | /// once. Additionally the caller must ensure that `execute_fn` can be 75 | /// called on `pointer`. 76 | pub unsafe fn new_raw(pointer: *const (), execute_fn: unsafe fn(*const ())) -> JobRef { 77 | JobRef { 78 | pointer, 79 | execute_fn, 80 | } 81 | } 82 | 83 | /// Returns an opaque handle that can be saved and compared, without making 84 | /// `JobRef` itself `Copy + Eq`. 85 | #[inline] 86 | pub fn id(&self) -> impl Eq { 87 | (self.pointer, self.execute_fn) 88 | } 89 | 90 | /// Executes a `JobRef`. 91 | #[inline] 92 | pub fn execute(self) { 93 | // SAFETY: The creator of the `JobRef` is responsible for ensuring 94 | // `self.pointer` is valid up until this call and safe to pass to 95 | // `JobRef::execute_fn`. This consumes the `JobRef`, ensuring that if it 96 | // points to a `Job` then it is executed exactly one (given that the 97 | // only a single `JobRef` is created for each job). 98 | unsafe { (self.execute_fn)(self.pointer) } 99 | } 100 | } 101 | 102 | // SAFETY: !Send for raw pointers is not for safety, just as a lint. 103 | unsafe impl Send for JobRef {} 104 | 105 | // SAFETY: !Sync for raw pointers is not for safety, just as a lint. 106 | unsafe impl Sync for JobRef {} 107 | 108 | // ----------------------------------------------------------------------------- 109 | // Stack allocated job 110 | 111 | /// A job that will be owned by a stack slot. This means that when it executes 112 | /// it need not free any heap data, the cleanup occurs when the stack frame is 113 | /// later popped. 114 | /// 115 | /// This is, from an allocation and freeing point of view, the most efficient 116 | /// type of job. It is used to implement `join` and `on_worker`. 117 | pub struct StackJob 118 | where 119 | F: FnOnce() + Send, 120 | { 121 | job: UnsafeCell>, 122 | } 123 | 124 | impl StackJob 125 | where 126 | F: FnOnce() + Send, 127 | { 128 | /// Creates a new `StackJob` and returns it directly. 129 | pub fn new(job: F) -> StackJob { 130 | StackJob { 131 | job: UnsafeCell::new(Some(job)), 132 | } 133 | } 134 | 135 | /// Executes the job without having to go through the `JobRef`. This has the 136 | /// benefit of saving some dynamic lookup, and allows the compiler to do 137 | /// inline optimization (because the function type is known). 138 | /// 139 | /// This is used in `join` to run the job synchronously after failing to 140 | /// share it. 141 | pub fn run_inline(self) { 142 | let job = self.job.into_inner().unwrap(); 143 | job(); 144 | } 145 | 146 | /// Creates a `JobRef` pointing to this job. 147 | /// 148 | /// # Safety 149 | /// 150 | /// Caller must ensure the `StackJob` remains valid until the `JobRef` is 151 | /// executed. This amounts to ensuring the job is executed before the stack 152 | /// frame is popped. Caller must also ensure that `JobRef::execute` is 153 | /// called exactly once. 154 | pub unsafe fn as_job_ref(&self) -> JobRef { 155 | // SAFETY: The caller ensures this job ref lives for the required 156 | // duration and is executed to completion exactly once. 157 | unsafe { JobRef::new(self) } 158 | } 159 | } 160 | 161 | impl Job for StackJob 162 | where 163 | F: FnOnce() + Send, 164 | { 165 | /// Executes a `StackJob` from a const pointer. 166 | /// 167 | /// # Safety 168 | /// 169 | /// Caller must ensure that the pointer points to a valid `StackJob`; or, 170 | /// equivalently, that this is called before the stack frame in which the 171 | /// job is allocated is popped. Calling this completes the job, so the 172 | /// caller must ensure this is called exactly once. 173 | unsafe fn execute(this: *const ()) { 174 | // SAFETY: The caller ensures this points to a valid `StackJob`. 175 | let this = unsafe { &*(this.cast::()) }; 176 | let job = this.job.get_mut(); 177 | // SAFETY: This is called at most once and the job is otherwise never 178 | // dereferenced so there can be no other mutable references. 179 | let job_func = unsafe { job.deref().take().unwrap() }; 180 | // Run the job. 181 | job_func(); 182 | } 183 | } 184 | 185 | // ----------------------------------------------------------------------------- 186 | // Heap allocated job 187 | 188 | /// Represents a job stored in the heap. Used to implement `scope` and `spawn`. 189 | pub struct HeapJob 190 | where 191 | F: FnOnce() + Send, 192 | { 193 | job: F, 194 | } 195 | 196 | impl HeapJob 197 | where 198 | F: FnOnce() + Send, 199 | { 200 | /// Allocates a new `HeapJob` on the heap. 201 | pub fn new(job: F) -> Box> { 202 | Box::new(HeapJob { job }) 203 | } 204 | 205 | /// A version of `into_job_ref` for functions with a static lifetime. 206 | /// 207 | /// # Safety 208 | /// 209 | /// The caller must still ensure that `JobRef::execute` is called exactly 210 | /// once. 211 | pub unsafe fn into_static_job_ref(self: Box) -> JobRef 212 | where 213 | F: 'static, 214 | { 215 | // SAFETY: The closure this job points to has static lifetime, so it 216 | // will be valid until `JobRef` is executed, and it cannot close over 217 | // any non-static data. The caller ensures it will be called exactly 218 | // once. 219 | unsafe { self.into_job_ref() } 220 | } 221 | 222 | /// Creates a `JobRef` pointing to this job. 223 | /// 224 | /// # Safety 225 | /// 226 | /// Caller must ensure the `Box` remains valid until the `JobRef` 227 | /// is executed. This hides all lifetimes, so the caller must ensure that it 228 | /// doesn't outlive any data it closes over. Additionally, the caller must 229 | /// ensure that `JobRef::execute` is called exactly once. 230 | pub unsafe fn into_job_ref(self: Box) -> JobRef { 231 | // SAFETY: The caller ensures that the `JobRef` will remain valid until 232 | // it is executed, and that it will be executed exactly once. 233 | unsafe { JobRef::new(Box::into_raw(self)) } 234 | } 235 | } 236 | 237 | impl Job for HeapJob 238 | where 239 | F: FnOnce() + Send, 240 | { 241 | /// Executes a `HeapJob` from a const pointer. 242 | /// 243 | /// # Safety 244 | /// 245 | /// Caller must ensure that the pointer points to a valid raw boxed 246 | /// `HeapJob`. Calling this completes the job, so the caller must ensure that 247 | /// this is called exactly once. 248 | unsafe fn execute(this: *const ()) { 249 | // SAFETY: The caller ensures that the pointer is a valid raw boxed heap 250 | // job. The compiler cannot verify that the inner `FnOnce` is actually 251 | // called only once, so we require the caller to verify this as part of 252 | // the safety comment for this function. 253 | unsafe { 254 | let this = Box::from_raw(this as *mut Self); 255 | (this.job)(); 256 | }; 257 | } 258 | } 259 | -------------------------------------------------------------------------------- /src/latch.rs: -------------------------------------------------------------------------------- 1 | //! This module provides a toolbox of scheduling primitives called Latches. 2 | //! 3 | //! In the abstract, a latch is a way to block some logic from progressing on a 4 | //! given thread. All latches begin "closed", blocking some logic. When the 5 | //! latch is "opened", the logic progresses. The exact meaning of "closed" and 6 | //! "open" depend on the latch in question. 7 | //! 8 | //! Most latches implement one or both of the two core traits: [`Latch`] and 9 | //! [`Probe`]. 10 | //! 11 | //! [`Latch`] represents the "write-side" of the api, which allows consumers to 12 | //! open arbitrary latches (and thus unblock whatever logic is blocked using the 13 | //! latch). It defines a single function, [`Latch::set`], which (possibly) 14 | //! opens the latch. 15 | //! 16 | //! [`Probe`] represents the "read-side" of the api, which allows consumers to 17 | //! test if is latch is closed and spin (or do something else) while waiting for 18 | //! it to open. It defines a single method, [`Probe::probe`], which returns 19 | //! a boolean to indicate if the latch is open. 20 | //! 21 | //! # Safety 22 | //! 23 | //! Latches are an inherently somewhat unsafe construct, because once a latch is 24 | //! becomes "open", the logic it unblocks often deallocates the lock. Refer to 25 | //! specific safety comments for more information. 26 | 27 | use alloc::{sync::Arc, task::Wake}; 28 | 29 | use crate::primitives::*; 30 | 31 | use crate::thread_pool::{ThreadPool, WorkerThread}; 32 | 33 | // ----------------------------------------------------------------------------- 34 | // Latches and probes 35 | 36 | /// This trait represents the "write-side" of the latch api. It exists to allow 37 | /// consumers to open arbitrary latches (and thus unblock whatever logic is 38 | /// blocked using the latch). 39 | /// 40 | /// Latches may choose not implement this trait if they cannot be opened without 41 | /// additional context. 42 | pub trait Latch { 43 | /// Possibly opens the latch. Calling this method does not have to open the 44 | /// latch, but there should be some situation in which it does. 45 | /// 46 | /// # Safety 47 | /// 48 | /// Opening a latch triggers other threads to wake up and (in some cases) 49 | /// complete. This may, in turn, cause memory to be deallocated and so 50 | /// forth. 51 | /// 52 | /// This function operates on `*const Self` instead of `&self` to allow it 53 | /// to become dangling during this call. The caller must ensure that the 54 | /// pointer is valid upon entry, and not invalidated during the call by any 55 | /// actions other than `set` itself. 56 | /// 57 | /// The implementer can assume the pointer is valid when passed into the 58 | /// function, but must not assume the pointer is valid after the latch is 59 | /// "opened" (eg. after whatever side-effect can cause logic elsewhere to 60 | /// progress). It's typically better to read all the fields you will need to 61 | /// access *before* a latch is set! 62 | unsafe fn set(this: *const Self); 63 | } 64 | 65 | /// This trait represents the "read-side" of the latch api. It exists to allow 66 | /// consumers to check is a latch is open. 67 | /// 68 | /// Latches may choose not to implement this if they do not support polling (for 69 | /// example if they use a mutex to block a thread). 70 | pub trait Probe { 71 | /// Returns `true` if the latch is open, and `false` if it is closed. 72 | fn probe(&self) -> bool; 73 | } 74 | 75 | // ----------------------------------------------------------------------------- 76 | // Atomic latch 77 | 78 | /// A simple latch implemented using an atomic bool. 79 | pub struct AtomicLatch { 80 | /// The state of the latch, `true` for open and `false` for closed. 81 | state: AtomicBool, 82 | } 83 | 84 | impl AtomicLatch { 85 | /// Creates a new closed latch. 86 | #[inline] 87 | #[cfg(not(loom))] 88 | pub const fn new() -> Self { 89 | Self { 90 | state: AtomicBool::new(false), 91 | } 92 | } 93 | 94 | /// Non-const constructor variant for loom. 95 | #[inline] 96 | #[cfg(loom)] 97 | pub fn new() -> Self { 98 | Self { 99 | state: AtomicBool::new(false), 100 | } 101 | } 102 | 103 | /// Resets the latch back to closed. 104 | #[inline] 105 | pub fn reset(&self) { 106 | self.state.store(false, Ordering::Release); 107 | } 108 | } 109 | 110 | impl Default for AtomicLatch { 111 | fn default() -> Self { 112 | Self::new() 113 | } 114 | } 115 | 116 | impl Latch for AtomicLatch { 117 | #[inline] 118 | unsafe fn set(this: *const Self) { 119 | // SAFETY: We assume the pointer is valid when passed in. We do not use 120 | // it after the side-effects (in this case an atomic store) so it is 121 | // fine if the pointer becomes dangling. 122 | unsafe { 123 | (*this).state.store(true, Ordering::Release); 124 | } 125 | } 126 | } 127 | 128 | impl Probe for AtomicLatch { 129 | #[inline] 130 | fn probe(&self) -> bool { 131 | self.state.load(Ordering::Acquire) 132 | } 133 | } 134 | 135 | // ----------------------------------------------------------------------------- 136 | // Wake latch 137 | 138 | /// A simple wrapper around an `AtomicLatch` that can wake a worker when set. 139 | pub struct WakeLatch { 140 | /// An internal atomic latch. 141 | atomic_latch: AtomicLatch, 142 | /// The thread pool where the thread lives. 143 | thread_pool: &'static ThreadPool, 144 | /// The index of the worker thread to wake when `set()` is called. 145 | thread_index: usize, 146 | } 147 | 148 | impl WakeLatch { 149 | /// Creates a new closed latch. 150 | #[inline] 151 | pub fn new(worker_thread: &WorkerThread) -> WakeLatch { 152 | WakeLatch { 153 | atomic_latch: AtomicLatch::new(), 154 | thread_pool: worker_thread.thread_pool(), 155 | thread_index: worker_thread.index(), 156 | } 157 | } 158 | 159 | /// Creates a new latch from a thread pool and worker index, rather than 160 | /// from a reference to a worker thread. 161 | #[inline] 162 | #[cfg(not(loom))] 163 | pub const fn new_raw(thread_index: usize, thread_pool: &'static ThreadPool) -> WakeLatch { 164 | WakeLatch { 165 | atomic_latch: AtomicLatch::new(), 166 | thread_pool, 167 | thread_index, 168 | } 169 | } 170 | 171 | /// Resets the latch back to closed. 172 | #[inline] 173 | pub fn reset(&self) { 174 | self.atomic_latch.reset(); 175 | } 176 | } 177 | 178 | impl Latch for WakeLatch { 179 | #[inline] 180 | unsafe fn set(this: *const Self) { 181 | // SAFETY: The thread pool itself is static, so we need only be 182 | // concerned with the lifetime of the pointer. Since we assume it is 183 | // valid when passed in and do not use it after the side effects, it is 184 | // fine if it becomes dangling. 185 | unsafe { 186 | let thread_pool = (*this).thread_pool; 187 | let thread_index = (*this).thread_index; 188 | Latch::set(&(*this).atomic_latch); 189 | thread_pool.wake_thread(thread_index); 190 | } 191 | } 192 | } 193 | 194 | impl Probe for WakeLatch { 195 | #[inline] 196 | fn probe(&self) -> bool { 197 | self.atomic_latch.probe() 198 | } 199 | } 200 | 201 | // ----------------------------------------------------------------------------- 202 | // Mutex-lock latch 203 | 204 | /// A latch that can be used to block a thread, implemented using a mutex. 205 | pub struct LockLatch { 206 | mutex: Mutex, 207 | cond: Condvar, 208 | } 209 | 210 | impl LockLatch { 211 | /// Creates a new closed latch. 212 | #[inline] 213 | #[cfg(not(loom))] 214 | pub const fn new() -> LockLatch { 215 | LockLatch { 216 | mutex: Mutex::new(false), 217 | cond: Condvar::new(), 218 | } 219 | } 220 | 221 | /// Non-const constructor variant for loom. 222 | #[inline] 223 | #[cfg(loom)] 224 | pub fn new() -> LockLatch { 225 | LockLatch { 226 | mutex: Mutex::new(false), 227 | cond: Condvar::new(), 228 | } 229 | } 230 | 231 | /// Waits for the latch to open by blocking the thread. 232 | pub fn wait(&self) { 233 | let mut guard = self.mutex.lock().unwrap(); 234 | while !*guard { 235 | guard = self.cond.wait(guard).unwrap(); 236 | } 237 | } 238 | 239 | /// Waits for the latch to open by blocking the thread, then sets it back to closed. 240 | pub fn wait_and_reset(&self) { 241 | let mut guard = self.mutex.lock().unwrap(); 242 | while !*guard { 243 | guard = self.cond.wait(guard).unwrap(); 244 | } 245 | *guard = false; 246 | } 247 | } 248 | 249 | impl Default for LockLatch { 250 | fn default() -> Self { 251 | Self::new() 252 | } 253 | } 254 | 255 | impl Latch for LockLatch { 256 | #[inline] 257 | unsafe fn set(this: *const Self) { 258 | // SAFETY: We assume the pointer is valid when passed in. Side-effects 259 | // are not transmitted until the `notify_all` call at the very end, so 260 | // the pointer remains valid for the entire block. 261 | unsafe { 262 | let mut guard = (*this).mutex.lock().unwrap(); 263 | *guard = true; 264 | (*this).cond.notify_all(); 265 | } 266 | } 267 | } 268 | 269 | // ----------------------------------------------------------------------------- 270 | // Counting latch 271 | 272 | /// A counting latch stores a decrementing counter and only opens when the 273 | /// counter reaches zero. This means that, unlike other latches, multiple calls 274 | /// to `Latch::set` may be required to open the latch. 275 | pub struct CountLatch { 276 | counter: AtomicUsize, 277 | latch: WakeLatch, 278 | } 279 | 280 | impl CountLatch { 281 | /// Creates a new closed latch with the specified count. 282 | #[inline] 283 | pub fn with_count(count: usize, owner: &WorkerThread) -> Self { 284 | Self { 285 | counter: AtomicUsize::new(count), 286 | latch: WakeLatch::new(owner), 287 | } 288 | } 289 | 290 | /// Increments the count. An additional call to `Latch::set()` will be 291 | /// required before the latch opens. 292 | #[inline] 293 | pub fn increment(&self) { 294 | self.counter.fetch_add(1, Ordering::Relaxed); 295 | } 296 | } 297 | 298 | impl Latch for CountLatch { 299 | #[inline] 300 | unsafe fn set(this: *const Self) { 301 | // SAFETY: We assume the pointer is valid when passed in. Side-effects 302 | // are not transmitted until the `Latch::set` call at the very end, so 303 | // it is fine if the pointer becomes dangling. 304 | unsafe { 305 | if (*this).counter.fetch_sub(1, Ordering::SeqCst) == 1 { 306 | Latch::set(&(*this).latch); 307 | } 308 | } 309 | } 310 | } 311 | 312 | impl Probe for CountLatch { 313 | #[inline] 314 | fn probe(&self) -> bool { 315 | self.latch.probe() 316 | } 317 | } 318 | 319 | // ----------------------------------------------------------------------------- 320 | // Async set-on-wake 321 | 322 | /// An async task waker that sets a latch on wake. 323 | pub struct SetOnWake 324 | where 325 | L: Latch, 326 | { 327 | latch: L, 328 | } 329 | 330 | impl SetOnWake 331 | where 332 | L: Latch, 333 | { 334 | /// Creates a waker from a latch. 335 | pub fn new(latch: L) -> Arc { 336 | Arc::new(Self { latch }) 337 | } 338 | 339 | /// Returns a reference to the inner latch. 340 | pub fn latch(&self) -> &L { 341 | &self.latch 342 | } 343 | } 344 | 345 | impl Wake for SetOnWake 346 | where 347 | L: Latch, 348 | { 349 | fn wake(self: Arc) { 350 | self.wake_by_ref(); 351 | } 352 | 353 | fn wake_by_ref(self: &Arc) { 354 | // SAFETY: The pointer passed to `Latch::set()` is valid and cannot be 355 | // invalidated while the arc is held. 356 | unsafe { Latch::set(&self.latch) }; 357 | } 358 | } 359 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! An async-compatible thread-pool aiming for "speed through simplicity". 2 | //! 3 | //! Forte is a parallel & async work scheduler designed to accommodate very large 4 | //! workloads with many short-lived tasks. It replicates the `rayon_core` api 5 | //! but with native support for futures and async tasks. It's design was 6 | //! prompted by the needs of the bevy game engine, but should be applicable to 7 | //! any problem that involves running both synchronous and asynchronous work 8 | //! concurrently. 9 | //! 10 | //! The thread-pool provided by this crate does not employ work-stealing. Forte 11 | //! instead uses "Heartbeat Scheduling", an alternative load-balancing technique 12 | //! that (theoretically) provides provably small overheads and good utilization. 13 | //! The end effect is that work is only parallelized every so often, allowing 14 | //! more work to be done sequentially on each thread and amortizing the 15 | //! synchronization overhead. 16 | //! 17 | //! # Acknowledgments 18 | //! 19 | //! Large portions of the code are direct ports from various versions of 20 | //! `rayon_core`, with minor simplifications and improvements. We also relied 21 | //! upon `chili` and `spice` for reference while writing the heartbeat 22 | //! scheduling. Support for futures is based on an approach sketched out by 23 | //! members of the `rayon` community to whom we are deeply indebted. 24 | 25 | // ----------------------------------------------------------------------------- 26 | // Boilerplate for building without the standard library 27 | 28 | #![no_std] 29 | 30 | extern crate alloc; 31 | extern crate std; 32 | 33 | // ----------------------------------------------------------------------------- 34 | // Modules 35 | 36 | pub mod job; 37 | pub mod latch; 38 | pub mod scope; 39 | pub mod thread_pool; 40 | 41 | mod util; 42 | 43 | // ----------------------------------------------------------------------------- 44 | // Prelude 45 | 46 | pub mod prelude { 47 | //! Reexports some types commonly needed for using Forte. 48 | 49 | pub use crate::{ 50 | scope::Scope, 51 | thread_pool::{ThreadPool, WorkerThread}, 52 | }; 53 | } 54 | 55 | // ----------------------------------------------------------------------------- 56 | // Mocked APIs 57 | 58 | // This crate uses `loom` for testing, which requires mocking all of the core 59 | // threading primitives (`Mutex` and the like). Unfortunately there are some 60 | // minor differences between the `loom` and `std`. 61 | // 62 | // To make things a bit simpler, we re-export all the important types in the 63 | // `primitives` module. Where necessary we wrap the `std` implementation to make 64 | // it match up with `loom`. 65 | 66 | #[cfg(not(loom))] 67 | mod primitives { 68 | pub use core::cell::Cell; 69 | pub use core::sync::atomic::AtomicBool; 70 | pub use core::sync::atomic::AtomicUsize; 71 | pub use core::sync::atomic::Ordering; 72 | 73 | pub use std::sync::Condvar; 74 | pub use std::sync::Mutex; 75 | pub use std::thread::available_parallelism; 76 | pub use std::thread::spawn as spawn_thread; 77 | 78 | pub use crossbeam_queue::SegQueue as Queue; 79 | 80 | pub struct UnsafeCell { 81 | data: core::cell::UnsafeCell, 82 | } 83 | 84 | impl UnsafeCell { 85 | #[inline(always)] 86 | pub const fn new(data: T) -> Self { 87 | UnsafeCell { 88 | data: core::cell::UnsafeCell::new(data), 89 | } 90 | } 91 | 92 | #[inline(always)] 93 | pub fn into_inner(self) -> T { 94 | self.data.into_inner() 95 | } 96 | 97 | #[inline(always)] 98 | pub fn get_mut(&self) -> MutPtr { 99 | MutPtr { 100 | ptr: self.data.get(), 101 | } 102 | } 103 | } 104 | 105 | pub struct MutPtr { 106 | ptr: *mut T, 107 | } 108 | 109 | #[allow(clippy::mut_from_ref)] 110 | impl MutPtr { 111 | /// Dereferences the pointer. 112 | /// 113 | /// # Safety 114 | /// 115 | /// This is equivalent to dereferencing a *mut T pointer, so all the 116 | /// same safety considerations apply here. 117 | /// 118 | /// Because the `MutPtr` type can only be created by calling 119 | /// `UnsafeCell::get_mut` on a valid `UnsafeCell`, we know the pointer 120 | /// will never be null. 121 | #[inline(always)] 122 | pub unsafe fn deref(&self) -> &mut T { 123 | // SAFETY: The safety requirements of this pointer dereference are 124 | // identical to those of the function. 125 | unsafe { &mut *self.ptr } 126 | } 127 | } 128 | 129 | pub trait WithMut { 130 | fn with_mut(&mut self, f: impl FnOnce(&mut usize) -> R) -> R; 131 | } 132 | 133 | impl WithMut for AtomicUsize { 134 | #[inline(always)] 135 | fn with_mut(&mut self, f: impl FnOnce(&mut usize) -> R) -> R { 136 | f(self.get_mut()) 137 | } 138 | } 139 | } 140 | 141 | #[cfg(loom)] 142 | mod primitives { 143 | pub use loom::cell::Cell; 144 | pub use loom::cell::UnsafeCell; 145 | pub use loom::sync::atomic::AtomicBool; 146 | pub use loom::sync::atomic::AtomicUsize; 147 | pub use loom::sync::atomic::Ordering; 148 | pub use loom::sync::Condvar; 149 | pub use loom::sync::Mutex; 150 | pub use loom::thread::spawn as spawn_thread; 151 | 152 | pub use std::thread::available_parallelism; 153 | 154 | use alloc::vec::Vec; 155 | 156 | pub struct Queue { 157 | inner: Mutex>, 158 | } 159 | 160 | impl Queue { 161 | pub fn new() -> Queue { 162 | Queue { 163 | inner: Mutex::new(Vec::new()), 164 | } 165 | } 166 | 167 | pub fn push(&self, val: T) { 168 | let mut vec = self.inner.lock().unwrap(); 169 | vec.push(val); 170 | } 171 | 172 | pub fn pop(&self) -> Option { 173 | let mut vec = self.inner.lock().unwrap(); 174 | vec.pop() 175 | } 176 | } 177 | } 178 | -------------------------------------------------------------------------------- /src/queue.rs: -------------------------------------------------------------------------------- 1 | //! A fast concurrent queue 2 | 3 | // This uses the same alignment as `CachePadded 4 | #[cfg(target_arch = "s390x", repr(align(256)))] 5 | const CACHE_LINE_SIZE: usize = 256; 6 | #[cfg(any( 7 | target_arch = "x86_64", 8 | target_arch = "aarch64", 9 | target_arch = "powerpc64", 10 | ))] 11 | const CACHE_LINE_SIZE: usize = 128; 12 | #[cfg(not(any( 13 | target_arch = "x86_64", 14 | target_arch = "aarch64", 15 | target_arch = "powerpc64", 16 | target_arch = "arm", 17 | target_arch = "mips", 18 | target_arch = "mips32r6", 19 | target_arch = "mips64", 20 | target_arch = "mips64r6", 21 | target_arch = "sparc", 22 | target_arch = "hexagon", 23 | target_arch = "m68k", 24 | target_arch = "s390x", 25 | )))] 26 | const CACHE_LINE_SIZE: usize = 64; 27 | #[cfg(any( 28 | target_arch = "arm", 29 | target_arch = "mips", 30 | target_arch = "mips32r6", 31 | target_arch = "mips64", 32 | target_arch = "mips64r6", 33 | target_arch = "sparc", 34 | target_arch = "hexagon", 35 | ))] 36 | const CACHE_LINE_SIZE: usize = 32; 37 | #[cfg(target_arch = "m68k")] 38 | const CACHE_LINE_SIZE: usize = 16; 39 | 40 | pub struct Queue { 41 | // The head of the queue 42 | head: CachePadded, 43 | // The tail of the queue 44 | tail: CachePadded, 45 | // The buffer of pointers to T 46 | buffer: Box>]>>, 47 | // States for the buffer items, same length as buffer 48 | states: Box>, 49 | } 50 | 51 | unsafe impl Send for Queue {} 52 | unsafe impl Sync for Queue {} 53 | 54 | impl Queue {} 55 | -------------------------------------------------------------------------------- /src/scope.rs: -------------------------------------------------------------------------------- 1 | //! This module defines an utility for spawning lifetime-scoped jobs. 2 | 3 | use alloc::boxed::Box; 4 | use core::{future::Future, marker::PhantomData, ptr::NonNull}; 5 | 6 | use async_task::{Runnable, Task}; 7 | 8 | use crate::{ 9 | job::{HeapJob, JobRef}, 10 | latch::{CountLatch, Latch}, 11 | thread_pool::{ThreadPool, WorkerThread}, 12 | util::CallOnDrop, 13 | }; 14 | 15 | // ----------------------------------------------------------------------------- 16 | // Scope 17 | 18 | /// A scope which can spawn a number of non-static jobs and async tasks. See 19 | /// [`ThreadPool::scope`] for more information. 20 | pub struct Scope<'scope> { 21 | /// The thread pool the scope is for. 22 | thread_pool: &'static ThreadPool, 23 | /// A counting latch that opens when all jobs spawned in this scope are complete. 24 | job_completed_latch: CountLatch, 25 | /// A marker that makes the scope behave as if it contained a vector of 26 | /// closures to execute, all of which outlive `'scope`. We pretend they are 27 | /// `Send + Sync` even though they're not actually required to be `Sync`. 28 | /// It's still safe to let the `Scope` implement `Sync` because the closures 29 | /// are only *moved* across threads to be executed. 30 | #[allow(clippy::type_complexity)] 31 | marker: PhantomData) + Send + Sync + 'scope>>, 32 | } 33 | 34 | impl<'scope> Scope<'scope> { 35 | /// Creates a new scope owned by the given worker thread. For a safe 36 | /// equivalent, use [`ThreadPool::scope`]. 37 | /// 38 | /// Two important lifetimes effect scope: the external lifetime of the scope 39 | /// object itself (which we will call `'ext`) and the internal lifetime 40 | /// `'scope`. 41 | /// 42 | /// # Safety 43 | /// 44 | /// The caller must ensure that the scope is completed with a call to 45 | /// [`Scope::complete`], passing in a reference the same owning worker 46 | /// thread both times. 47 | /// 48 | /// If the scope is not completed, jobs spawned onto this scope may outlive 49 | /// the data they close over. 50 | pub unsafe fn new(owner: &WorkerThread) -> Scope<'scope> { 51 | Scope { 52 | thread_pool: owner.thread_pool(), 53 | job_completed_latch: CountLatch::with_count(1, owner), 54 | marker: PhantomData, 55 | } 56 | } 57 | 58 | /// Spawns a job into the scope. This job will execute sometime before the 59 | /// scope completes. The job is specified as a closure, and this closure 60 | /// receives its own reference to the scope `self` as argument. This can be 61 | /// used to inject new jobs into `self`. 62 | /// 63 | /// # Returns 64 | /// 65 | /// Nothing. The spawned closures cannot pass back values to the caller 66 | /// directly, though they can write to local variables on the stack (if 67 | /// those variables outlive the scope) or communicate through shared 68 | /// channels. 69 | /// 70 | /// If you need to return a value, spawn a `Future` instead with 71 | /// [`Scope::spawn_future`]. 72 | /// 73 | /// # See also 74 | /// 75 | /// The [`ThreadPool::scope`] function has more extensive documentation about 76 | /// task spawning. 77 | pub fn spawn(&self, f: F) 78 | where 79 | F: FnOnce(&Scope<'scope>) + Send + 'scope, 80 | { 81 | // We increment the scope counter; this will prevent the scope from 82 | // ending until after a corresponding `Latch::set` call. 83 | self.job_completed_latch.increment(); 84 | 85 | // Create a job to execute the spawned function in the scope. 86 | let scope_ptr = ScopePtr(self); 87 | let job = HeapJob::new(move || { 88 | // SAFETY: Because we called `increment` and the owner is required 89 | // to call `complete`, this scope will remain valid at-least until 90 | // `Latch::set` is called. 91 | unsafe { 92 | let scope = scope_ptr.as_ref(); 93 | f(scope); 94 | Latch::set(&self.job_completed_latch); 95 | } 96 | }); 97 | 98 | // SAFETY: The heap job does not outlive `'scope`. This is ensured 99 | // because the owner of this scope is required to call 100 | // `Scope::complete`, and that function keeps the scope alive until the 101 | // latch is opened. The latch will not open until after this job is 102 | // executed, because the call to `increment` above is matched by the 103 | // call to `Latch::set` after execution. 104 | let job_ref = unsafe { job.into_job_ref() }; 105 | 106 | // Send the job to a queue to be executed. 107 | self.thread_pool.inject_or_push(job_ref); 108 | } 109 | 110 | /// Spawns a future onto the scope. This future will be asynchronously 111 | /// polled to completion some time before the scope completes. 112 | /// 113 | /// # Returns 114 | /// 115 | /// This returns a task, which represents a handle to the async computation 116 | /// and is itself a future that can be awaited to receive the output of the 117 | /// future. There's four ways to interact with a task: 118 | /// 119 | /// 1. Await the task. This will eventually produce the output of the 120 | /// provided future. The scope will not complete until the output is 121 | /// returned to the awaiting logic. 122 | /// 123 | /// 2. Drop the task. This will stop execution of the future and potentially 124 | /// allow the scope to complete immediately. 125 | /// 126 | /// 3. Cancel the task. This has the same effect as dropping the task, but 127 | /// waits until the futures stops running (which in the worst-case means 128 | /// waiting for the scope to complete). 129 | /// 130 | /// 4. Detach the task. This will allow the future to continue executing 131 | /// even after the task itself is dropped. The scope will only complete 132 | /// after the future polls to completion. Detaching a task with an 133 | /// infinite loop will prevent the scope from completing, and is not 134 | /// recommended. 135 | /// 136 | pub fn spawn_future(&self, future: F) -> Task 137 | where 138 | F: Future + Send + 'scope, 139 | T: Send + 'scope, 140 | { 141 | // We increment the scope counter; this will prevent the scope from 142 | // ending until after a corresponding `Latch::set` call. 143 | self.job_completed_latch.increment(); 144 | 145 | // The future is dropped when the task is completed or canceled. In 146 | // either case we have to decrement the scope job count. We inject this 147 | // logic into the future itself at the onset. 148 | // 149 | // A useful consequence of this approach is that the scope (and 150 | // therefore the latch) will remain valid at least until the last future 151 | // is dropped. 152 | let scope_ptr = ScopePtr(self); 153 | let future = async move { 154 | let _guard = CallOnDrop(move || { 155 | // SAFETY: Because we called `increment` and the owner is required 156 | // to call `complete`, this scope will remain valid at-least until 157 | // `Latch::set` is called. 158 | unsafe { 159 | let scope = scope_ptr.as_ref(); 160 | Latch::set(&scope.job_completed_latch); 161 | } 162 | }); 163 | future.await 164 | }; 165 | 166 | // The schedule function will turn the future into a job when woken. 167 | let scope_ptr = ScopePtr(self); 168 | let schedule = move |runnable: Runnable| { 169 | // SAFETY: Because we called `increment` and the owner is required 170 | // to call `complete`, this scope will remain valid at-least until 171 | // `Latch::set` is called when the future is dropped. 172 | // 173 | // The future will not be dropped until after the runnable is 174 | // dropped, so the scope pointer must still be valid. 175 | let scope = unsafe { scope_ptr.as_ref() }; 176 | 177 | // Now we turn the runnable into a job-ref that we can send to a 178 | // worker. 179 | 180 | // SAFETY: We provide a pointer to a non-null runnable, and we turn 181 | // it back into a non-null runnable. The runnable will remain valid 182 | // until the task is run. 183 | let job_ref = unsafe { 184 | JobRef::new_raw(runnable.into_raw().as_ptr(), |this| { 185 | let this = NonNull::new_unchecked(this.cast_mut()); 186 | let runnable = Runnable::<()>::from_raw(this); 187 | // Poll the task. 188 | runnable.run(); 189 | }) 190 | }; 191 | 192 | // Send this job off to be executed. When this schedule function is 193 | // called on a worker thread this re-schedules it onto the worker's 194 | // local queue, which will generally cause tasks to stick to the 195 | // same thread instead of jumping around randomly. This is also 196 | // faster than injecting into the global queue. 197 | scope.thread_pool.inject_or_push(job_ref); 198 | }; 199 | 200 | // SAFETY: We must ensure that the runnable and the waker do not outlive 201 | // `'scope`. This is ensured because the owner of this scope is 202 | // required to call `Scope::complete`. That function keeps the scope 203 | // alive until the latch is opened, and the latch will not open until 204 | // after the future is dropped, which can happen only after the runnable 205 | // and waker are dropped. 206 | // 207 | // We have to use `spawn_unchecked` here instead of `spawn` because the 208 | // future is non-static. 209 | let (runnable, task) = unsafe { async_task::spawn_unchecked(future, schedule) }; 210 | // Call the schedule function once to create the initial job. 211 | runnable.schedule(); 212 | task 213 | } 214 | 215 | /// Spawns an async closure onto the scope. This future will be 216 | /// asynchronously polled to completion some time before the scope 217 | /// completes. 218 | /// 219 | /// Internally the closure is wrapped into a future and passed along to 220 | /// [`Scope::spawn_future`]. See the docs on that function for more 221 | /// information. 222 | pub fn spawn_async(&self, f: Fn) -> Task 223 | where 224 | Fn: FnOnce(&Scope<'scope>) -> Fut + Send + 'static, 225 | Fut: Future + Send + 'static, 226 | T: Send + 'static, 227 | { 228 | // Wrap the function into a future using an async block. 229 | let scope_ptr = ScopePtr(self); 230 | let future = async move { 231 | // SAFETY: The scope will be valid at least until this future is 232 | // dropped because of the drop guard in `spawn_future`. 233 | let scope = unsafe { scope_ptr.as_ref() }; 234 | f(scope).await 235 | }; 236 | // We just pass this future to `spawn_future`. 237 | self.spawn_future(future) 238 | } 239 | 240 | /// Consumes the scope and blocks until all jobs spawned on it are complete. 241 | pub fn complete(self, owner: &WorkerThread) { 242 | // SAFETY: The latch is valid until the scope is dropped at the end of 243 | // this function. 244 | unsafe { Latch::set(&self.job_completed_latch) }; 245 | // Run the thread until the jobs are complete, then return. 246 | owner.run_until(&self.job_completed_latch); 247 | } 248 | } 249 | 250 | // ----------------------------------------------------------------------------- 251 | // Scope pointer 252 | 253 | /// Used to capture a scope `&Self` pointer in jobs, without faking a lifetime. 254 | /// 255 | /// Unsafe code is still required to dereference the pointer, but that's fine in 256 | /// scope jobs that are guaranteed to execute before the scope ends. 257 | struct ScopePtr(*const T); 258 | 259 | // SAFETY: !Send for raw pointers is not for safety, just as a lint 260 | unsafe impl Send for ScopePtr {} 261 | 262 | // SAFETY: !Sync for raw pointers is not for safety, just as a lint 263 | unsafe impl Sync for ScopePtr {} 264 | 265 | impl ScopePtr { 266 | // Helper to avoid disjoint captures of `scope_ptr.0` 267 | // 268 | // # Safety 269 | // 270 | // Callers must ensure the scope pointer is still valid. 271 | unsafe fn as_ref(&self) -> &T { 272 | // SAFETY: The caller is required to ensure that the scope pointer is 273 | // still valid. 274 | unsafe { &*self.0 } 275 | } 276 | } 277 | -------------------------------------------------------------------------------- /src/thread_pool.rs: -------------------------------------------------------------------------------- 1 | //! This module contains the api and worker logic for the Forte thread pool. 2 | 3 | use alloc::{collections::VecDeque, sync::Arc}; 4 | use core::{ 5 | cell::OnceCell, 6 | cmp, 7 | future::Future, 8 | num::NonZero, 9 | pin::Pin, 10 | ptr::{self, NonNull}, 11 | task::{Context, Poll}, 12 | time::Duration, 13 | }; 14 | 15 | use async_task::{Runnable, Task}; 16 | use crossbeam_utils::CachePadded; 17 | 18 | use tracing::{debug, info}; 19 | 20 | use crate::primitives::*; 21 | 22 | use crate::{ 23 | job::{HeapJob, JobRef, StackJob}, 24 | latch::{AtomicLatch, Latch, LockLatch, Probe, SetOnWake, WakeLatch}, 25 | scope::*, 26 | util::{Slot, XorShift64Star}, 27 | }; 28 | 29 | // ----------------------------------------------------------------------------- 30 | // Thread pool types 31 | 32 | /// This crate puts a hard upper-bound on the maximum size of a threadpool and 33 | /// pre-allocates space for that number of threads. 34 | /// 35 | /// I've chosen 32 as a reasonable default for development, as that's generally 36 | /// the maximum number of threads available on flagship consumer hardware. 37 | /// 38 | /// Note that this is only the hard upper bound on thread pool size. Thread 39 | /// pools can be dynamically resized at runtime. 40 | pub const MAX_THREADS: usize = 32; 41 | 42 | /// The `ThreadPool` object is used to orchestrate and distribute work to a pool 43 | /// of threads, and is generally the main entry point to using `Forte`. 44 | /// 45 | /// # Creating Thread Pools 46 | /// 47 | /// Thread pools should be static and const constructed. You don't have to worry 48 | /// about `LazyStatic` or anything else; to create a new thread pool, just call 49 | /// [`ThreadPool::new`]. 50 | /// 51 | /// ``` 52 | /// # #![cfg(not(loom))] 53 | /// # use forte::prelude::*; 54 | /// // Allocate a new thread pool. 55 | /// static COMPUTE: ThreadPool = ThreadPool::new(); 56 | /// 57 | /// fn main() { 58 | /// // Spawn a task onto the pool. 59 | /// COMPUTE.spawn(|| { 60 | /// println!("Do your work here"); 61 | /// }); 62 | /// } 63 | /// ``` 64 | /// 65 | /// This attaches a new thread pool to your program named `COMPUTE`, which you 66 | /// can begin to schedule work on immediately. The thread pool will exist for 67 | /// the entire duration of your program, and will shut down when your program 68 | /// completes. 69 | /// 70 | /// # Resizing Thread Pools 71 | /// 72 | /// Thread pools are dynamically sized; When your program starts they have size 73 | /// zero (meaning no threads are running), and you will have to add threads by 74 | /// resizing it. The simplest way to resize a pool is via 75 | /// [`ThreadPool::resize_to_available`] which will simply fill all the available 76 | /// space. More granular control is possible through other methods such as 77 | /// [`ThreadPool::grow`], [`ThreadPool::shrink`], or [`ThreadPool::resize_to`]. 78 | /// 79 | pub struct ThreadPool { 80 | /// This contains the shared state for each worker thread. 81 | threads: [CachePadded; MAX_THREADS], 82 | /// A queue of pending jobs that can be taken by any thread. It uses the 83 | /// lock-free queue from crossbeam. 84 | queue: Queue, 85 | /// The thread pool state is a collection of infrequently modified shared 86 | /// data. It's bundled together into a cache line so that atomic writes 87 | /// don't cause unrelated cache-misses. 88 | state: CachePadded, 89 | } 90 | 91 | /// Core information about the thread pool. This data may be read from 92 | /// frequently and should only be written to infrequently. 93 | struct ThreadPoolState { 94 | /// Tracks the number of currently ruining threads, included currently 95 | /// sleeping threads. This should only be written to when the `is_resizing` 96 | /// mutex is held. It is not placed within the mutex because it can be 97 | /// safely read at any time. 98 | running_threads: AtomicUsize, 99 | /// A mutex used to guard the resizing critical section. 100 | is_resizing: Mutex<()>, 101 | /// Controls for the thread that sends out heartbeat notifications. 102 | heartbeat_control: ThreadControl, 103 | } 104 | 105 | /// Information for a specific worker thread. 106 | struct ThreadInfo { 107 | /// This is the thread's "heartbeat": an atomic bool which is set 108 | /// periodically by a coordination thread. The heartbeat is used to 109 | /// "promote" local jobs to shared jobs. 110 | heartbeat: AtomicBool, 111 | /// Each worker may "share" one job, allowing other workers to claim it if 112 | /// they are busy. This is typically the last (oldest) job on their queue. 113 | shared_job: Slot, 114 | /// Information used to control the thread's life cycle. 115 | control: ThreadControl, 116 | } 117 | 118 | const FREE: usize = 0; 119 | const RUNNING: usize = 1 << 1; 120 | const SLEEPING: usize = 1 << 2; 121 | const AWOKEN: usize = 1 << 3; 122 | 123 | /// This is a generalized control mechanism for a thread, implementing sleeping, 124 | /// makeups and a termination procedure. This is used by all the worker threads 125 | /// and also the heartbeat-sender thread. 126 | struct ThreadControl { 127 | status: Mutex, 128 | status_changed: Condvar, 129 | /// A latch that terminates the thread when set. 130 | should_terminate: AtomicLatch, 131 | } 132 | 133 | // ----------------------------------------------------------------------------- 134 | // Thread pool creation and maintenance 135 | 136 | #[allow(clippy::new_without_default)] 137 | impl ThreadPool { 138 | /// Creates a new thread pool. This function should be used to define a 139 | /// `static` variable rather than to allocate something on the stack during 140 | /// runtime. 141 | #[cfg(not(loom))] 142 | pub const fn new() -> ThreadPool { 143 | // We use these constructs to construct new thread pools. Clippy will 144 | // try to tell us this is bad because the const items will be copied 145 | // each time they are used. But that's exactly the behavior we want, 146 | // since these are used to populate an array. 147 | 148 | #[allow(clippy::declare_interior_mutable_const)] 149 | const THREAD_CONTROL: ThreadControl = ThreadControl { 150 | status: Mutex::new(FREE), 151 | status_changed: Condvar::new(), 152 | should_terminate: AtomicLatch::new(), 153 | }; 154 | 155 | #[allow(clippy::declare_interior_mutable_const)] 156 | const THREAD_INFO: CachePadded = CachePadded::new(ThreadInfo { 157 | heartbeat: AtomicBool::new(false), 158 | shared_job: Slot::empty(), 159 | control: THREAD_CONTROL, 160 | }); 161 | 162 | ThreadPool { 163 | threads: [THREAD_INFO; MAX_THREADS], 164 | queue: Queue::new(), 165 | state: CachePadded::new(ThreadPoolState { 166 | running_threads: AtomicUsize::new(0), 167 | heartbeat_control: THREAD_CONTROL, 168 | is_resizing: Mutex::new(()), 169 | }), 170 | } 171 | } 172 | 173 | /// Non-const constructor variant for loom. 174 | #[cfg(loom)] 175 | pub fn new() -> ThreadPool { 176 | let threads = [(); MAX_THREADS].map(|_| { 177 | CachePadded::new(ThreadInfo { 178 | heartbeat: AtomicBool::new(false), 179 | shared_job: Slot::empty(), 180 | control: ThreadControl { 181 | status: Mutex::new(FREE), 182 | status_changed: Condvar::new(), 183 | should_terminate: AtomicLatch::new(), 184 | }, 185 | }) 186 | }); 187 | 188 | ThreadPool { 189 | threads, 190 | queue: Queue::new(), 191 | state: CachePadded::new(ThreadPoolState { 192 | running_threads: AtomicUsize::new(0), 193 | heartbeat_control: ThreadControl { 194 | status: Mutex::new(FREE), 195 | status_changed: Condvar::new(), 196 | should_terminate: AtomicLatch::new(), 197 | }, 198 | is_resizing: Mutex::new(()), 199 | }), 200 | } 201 | } 202 | 203 | /// Resizes the thread pool to fill all available space. After this returns, 204 | /// the pool will have at least one worker thread and at most `MAX_THREADS`. 205 | /// Returns the new size of the pool. 206 | /// 207 | /// See [`ThreadPool::resize`] for more information about resizing. 208 | pub fn resize_to_available(&'static self) -> usize { 209 | let available = available_parallelism().map(NonZero::get).unwrap_or(1); 210 | self.resize_to(available) 211 | } 212 | 213 | /// Resizes the pool to the specified number of threads. Returns the new 214 | /// size of the thread pool, which may be smaller than requested. 215 | /// 216 | /// See [`ThreadPool::resize`] for more information about resizing. 217 | pub fn resize_to(&'static self, new_size: usize) -> usize { 218 | self.resize(|_| new_size) 219 | } 220 | 221 | /// Adds the given number of threads to the thread pool. Returns the new 222 | /// size of the pool, which may be smaller than requested. 223 | /// 224 | /// See [`ThreadPool::resize_to`] for more information about resizing. 225 | pub fn grow(&'static self, added_threads: usize) -> usize { 226 | self.resize(|current_size| current_size + added_threads) 227 | } 228 | 229 | /// Removes the given number of thread from the thread pool. Returns the new 230 | /// size of the pool. 231 | /// 232 | /// See [`ThreadPool::resize_to`] for more information about resizing. 233 | pub fn shrink(&'static self, terminated_threads: usize) -> usize { 234 | self.resize(|current_size| current_size - terminated_threads) 235 | } 236 | 237 | /// Ensures that there is at least one worker thread attached to the thread 238 | /// pool. This is mostly used to avoid deadlocks. This should be called 239 | /// before blocking on a thread pool to ensure the block will eventually be 240 | /// released. Returns the new size of the pool, which will be either the old 241 | /// size or one. 242 | /// 243 | /// See [`ThreadPool::resize_to`] for more information about resizing. 244 | pub fn populate(&'static self) -> usize { 245 | self.resize( 246 | |current_size| { 247 | if current_size == 0 { 248 | 1 249 | } else { 250 | current_size 251 | } 252 | }, 253 | ) 254 | } 255 | 256 | /// Removes all worker threads from the thread pool. This should only be 257 | /// done carefully, as blocking on an empty pool can cause a deadlock. 258 | /// 259 | /// See [`ThreadPool::resize_to`] for more information about resizing. 260 | pub fn depopulate(&'static self) -> usize { 261 | self.resize_to(0) 262 | } 263 | 264 | /// Resizes the pool. 265 | /// 266 | /// When called from within the pool, this does nothing. Pools can only be 267 | /// resized by other threads. 268 | pub fn resize(&'static self, get_size: F) -> usize 269 | where 270 | F: Fn(usize) -> usize, 271 | { 272 | // We cannot shrink the pool from within the pool, so we simply refuse 273 | // the request and return the same size. 274 | if WorkerThread::with(|worker_thread| worker_thread.is_some()) { 275 | return self.state.running_threads.load(Ordering::Acquire); 276 | } 277 | 278 | debug!("waiting to start thread pool resize"); 279 | 280 | // Resizing is a critical section; only one thread is allowed to resize 281 | // the thread pool at a time. To ensure this exclusivity, we lock a 282 | // boolean mutex. 283 | let _resizing_guard = self.state.is_resizing.lock().unwrap(); 284 | 285 | debug!("starting thread pool resize"); 286 | 287 | // Use the provided callback to determine the new size of the pool, 288 | // clamping it to the max size. We don't have to worry about race 289 | // conditions here because it's a critical section. The entire section 290 | // is effectively atomic. 291 | let current_size = self.state.running_threads.load(Ordering::Acquire); 292 | let new_size = usize::min(get_size(current_size), MAX_THREADS); 293 | 294 | // If the size is unchanged we can return early. 295 | if new_size == current_size { 296 | info!("keeping current size {}", current_size); 297 | return current_size; 298 | } 299 | 300 | info!( 301 | "resizing thread pool from {} to {} thread(s)", 302 | current_size, new_size 303 | ); 304 | 305 | // Otherwise we can store the new size. We still don't have to worry 306 | // about data races between this and the atomic load above because is 307 | // this the only place it's set, and we are in a critical section 308 | // guarded by the `is_resizing` mutex. 309 | // 310 | // At this point, other threads will begin to thread the thread pool as 311 | // having been resized, even though we have not actually created new or 312 | // cleaned up old threads yet. 313 | self.state 314 | .running_threads 315 | .store(new_size, Ordering::Release); 316 | 317 | match new_size.cmp(¤t_size) { 318 | cmp::Ordering::Equal => {} 319 | cmp::Ordering::Greater => { 320 | // Spawn each new thread. 321 | for i in current_size..new_size { 322 | debug!("spawning thread {}", i); 323 | self.threads[i].control.run(move || { 324 | // SAFETY: The main loop is the first thing called 325 | // on the new thread. 326 | unsafe { main_loop(self, i) } 327 | }); 328 | } 329 | 330 | // Wait for each thread to become ready. 331 | for i in current_size..new_size { 332 | debug!("waiting for thread {} to become ready", i); 333 | self.threads[i].control.await_ready(); 334 | } 335 | 336 | // Spawn the heartbeat thread if it's not running. 337 | if current_size == 0 { 338 | debug!("spawning heartbeat thread"); 339 | self.state 340 | .heartbeat_control 341 | .run(move || heartbeat_loop(self)); 342 | 343 | debug!("waiting for heartbeat thread to become ready"); 344 | self.state.heartbeat_control.await_ready(); 345 | } 346 | } 347 | cmp::Ordering::Less => { 348 | // Ask each thread to terminate. 349 | for i in new_size..current_size { 350 | debug!("halting thread {}", i); 351 | self.threads[i].control.halt(); 352 | } 353 | 354 | // Wait for each thread to terminate. 355 | for i in new_size..current_size { 356 | debug!("waiting for thread {} to terminate", i); 357 | self.threads[i].control.await_termination(); 358 | } 359 | 360 | // Terminate the heartbeat thread if the pool is empty. 361 | if new_size == 0 { 362 | debug!("halting heartbeat thread"); 363 | self.state.heartbeat_control.halt(); 364 | 365 | debug!("waiting for heartbeat thread to terminate"); 366 | self.state.heartbeat_control.await_termination(); 367 | } 368 | } 369 | } 370 | 371 | debug!("completed thread pool resize"); 372 | 373 | // Release the lock and return the new size. 374 | new_size 375 | } 376 | 377 | /// Returns an opaque identifier for this thread pool. 378 | pub fn id(&'static self) -> usize { 379 | // We can rely on `self` not to change since it's a static ref. 380 | ptr::from_ref(self) as usize 381 | } 382 | 383 | /// When called on a worker thread, this pushes the job directly into the 384 | /// local queue. Otherwise it injects it into the thread pool queue. 385 | pub fn inject_or_push(&'static self, job_ref: JobRef) { 386 | WorkerThread::with(|worker_thread| match worker_thread { 387 | Some(worker_thread) if worker_thread.thread_pool().id() == self.id() => { 388 | debug!("pushing job to local queue"); 389 | worker_thread.push(job_ref); 390 | } 391 | _ => self.inject(job_ref), 392 | }); 393 | } 394 | 395 | /// Injects a job into the thread pool. 396 | pub fn inject(&'static self, job_ref: JobRef) { 397 | debug!("injecting job into thread pool"); 398 | self.queue.push(job_ref); 399 | 400 | // Wake a thread to work on the task. 401 | self.wake_any(1); 402 | } 403 | 404 | /// Pops a job from the thread pool's injector queue. 405 | pub fn pop(&'static self) -> Option { 406 | debug!("popping job from thread pool"); 407 | self.queue.pop() 408 | } 409 | 410 | /// Runs the provided function in one of this thread pool's workers. If 411 | /// called by a worker, the function is immediately run on that worker. 412 | /// Otherwise (if called from a thread belonging to a different thread pool 413 | /// or not belonging to a thread pool) the function is queued on the pool and 414 | /// executed as a job. 415 | /// 416 | /// This function blocks until the function is complete, possibly putting 417 | /// the current thread to sleep. 418 | pub fn in_worker(&'static self, f: F) -> T 419 | where 420 | F: FnOnce(&WorkerThread, bool) -> T + Send, 421 | T: Send, 422 | { 423 | WorkerThread::with(|worker_thread| match worker_thread { 424 | // If we are not in a worker, pack the function into a job and send it 425 | // to the global injector queue. This will block until the job completes. 426 | None => self.in_worker_cold(f), 427 | Some(worker_thread) => { 428 | if worker_thread.thread_pool.id() != self.id() { 429 | // We are in a worker thread, but not in the same thread pool. 430 | // Package the job into a thread but then do idle work until it 431 | // completes. 432 | self.in_worker_cross(worker_thread, f) 433 | } else { 434 | // We are in a worker thread for the correct thread pool, so we can 435 | // just execute the function directly. 436 | f(worker_thread, false) 437 | } 438 | } 439 | }) 440 | } 441 | 442 | /// Queues the provided closure for execution on a worker and then blocks 443 | /// the thread (with a mutex) until it completes. 444 | /// 445 | /// This is intended for situations where you want to run something in a 446 | /// worker from a non-worker thread. It's used to implement the public 447 | /// `in_worker` function just above. 448 | #[cold] 449 | fn in_worker_cold(&'static self, f: F) -> T 450 | where 451 | F: FnOnce(&WorkerThread, bool) -> T + Send, 452 | T: Send, 453 | { 454 | // Rust's thread locals can actually be fairly costly unless the 455 | // special `const` variant is used. Loom dosn't support this, so we 456 | // have to do this annoying conditional here. 457 | #[cfg(not(loom))] 458 | std::thread_local!(static LOCK_LATCH: LockLatch = const { LockLatch::new() }); 459 | #[cfg(loom)] 460 | loom::thread_local!(static LOCK_LATCH: LockLatch = LockLatch::new() ); 461 | 462 | LOCK_LATCH.with(|latch| { 463 | let mut result = None; 464 | let job = StackJob::new(|| { 465 | WorkerThread::with(|worker_thread| { 466 | // Since this is within a job, and jobs only execute on 467 | // worker threads, this must be non-null. 468 | let worker_thread = worker_thread.unwrap(); 469 | 470 | // Run the user-provided function and write the output directly 471 | // to the result. 472 | result = Some(f(worker_thread, true)); 473 | 474 | // SAFETY: This latch is static, so the pointer is always valid. 475 | unsafe { Latch::set(latch) }; 476 | }); 477 | }); 478 | 479 | // Inject the job into the thread pool for execution. 480 | 481 | // SAFETY: The job will remain valid until the end of this scope. 482 | // This scope will only end when the latch is set, and the latch 483 | // will only be set when the job executes, so this scope is valid 484 | // until the job executes. 485 | let job_ref = unsafe { job.as_job_ref() }; 486 | self.inject(job_ref); 487 | 488 | // Block the thread until the job completes, then reset the latch. 489 | latch.wait_and_reset(); 490 | 491 | // Return the result 492 | result.unwrap() 493 | }) 494 | } 495 | 496 | /// Queues the provided closure for execution on a different worker, but 497 | /// keeps running tasks for the current worker. 498 | /// 499 | /// The `current_thread` is a worker from a different pool, which is queuing 500 | /// work into this pool, across thread pool boundaries. 501 | fn in_worker_cross(&'static self, current_thread: &WorkerThread, f: F) -> T 502 | where 503 | F: FnOnce(&WorkerThread, bool) -> T + Send, 504 | T: Send, 505 | { 506 | // Create a latch with a reference to the current thread. 507 | let latch = WakeLatch::new(current_thread); 508 | let mut result = None; 509 | let job = StackJob::new(|| { 510 | WorkerThread::with(|worker_thread| { 511 | // Jobs are only executed on worker threads, so this must be 512 | // non-null. 513 | let worker_thread = worker_thread.unwrap(); 514 | 515 | result = Some(f(worker_thread, true)); 516 | 517 | // SAFETY: This latch is valid until this function returns, and it 518 | // does not return until the latch is set. 519 | unsafe { Latch::set(&latch) }; 520 | }); 521 | }); 522 | 523 | // SAFETY: This job is valid for this entire scope. The scope does not 524 | // exit until the function returns, the job does not return until the 525 | // latch is set, and the latch cannot be set until the job runs. 526 | let job_ref = unsafe { job.as_job_ref() }; 527 | self.inject(job_ref); 528 | 529 | // Run tasks on the current thread until the job completes, possibly 530 | // putting the thread to sleep. 531 | current_thread.run_until(&latch); 532 | 533 | // Return the result. 534 | result.unwrap() 535 | } 536 | 537 | /// Tries to wake a number of threads. Returns the number of threads 538 | /// actually woken. 539 | pub fn wake_any(&'static self, num_to_wake: usize) -> usize { 540 | if num_to_wake > 0 { 541 | // Iterate through the threads, trying to wake each one until we run 542 | // out or have reached our target number. 543 | let mut num_woken = 0; 544 | let num_threads = self.state.running_threads.load(Ordering::Relaxed); 545 | for index in 0..num_threads { 546 | if self.wake_thread(index) { 547 | num_woken += 1; 548 | if num_to_wake == num_woken { 549 | return num_woken; 550 | } 551 | } 552 | } 553 | num_woken 554 | } else { 555 | 0 556 | } 557 | } 558 | 559 | /// Wakes a worker that has gone to sleep. Returns true if the worker was 560 | /// woken up, false if it was already awake. 561 | pub fn wake_thread(&'static self, index: usize) -> bool { 562 | self.threads[index].control.wake() 563 | } 564 | } 565 | 566 | // ----------------------------------------------------------------------------- 567 | // Thread control 568 | 569 | impl ThreadControl { 570 | /// Spawns a thread with the provided closure. It's expected that this 571 | /// thread is given a reference to this `ThreadControl`. 572 | /// 573 | /// The `ThreadControl` api is split into two halves: one half is tended to 574 | /// be called by the "controller" (the thread that calls this) and the other 575 | /// half is intended to be called by the "worker" (the thread this spawns). 576 | fn run(&'static self, f: F) 577 | where 578 | F: FnOnce() + Send + 'static, 579 | { 580 | spawn_thread(f); 581 | } 582 | 583 | /// The controller may call this to wait until the client calls 584 | /// `post_ready_status`. 585 | fn await_ready(&'static self) { 586 | let mut status = self.status.lock().unwrap(); 587 | while *status & RUNNING == 0 { 588 | status = self.status_changed.wait(status).unwrap(); 589 | } 590 | } 591 | 592 | /// The worker should call this to indicate that it is now entering it's 593 | /// main loop. 594 | fn post_ready_status(&'static self) { 595 | let mut status = self.status.lock().unwrap(); 596 | *status |= RUNNING; 597 | self.status_changed.notify_all(); 598 | } 599 | 600 | /// The controller should call this whenever it wishes to wake the worker. 601 | /// 602 | /// This assumes the worker has set `is_sleeping` to true and is waiting for 603 | /// `awakened`. There is no `sleep` function because sleeping behavior tends 604 | /// to be implementation specific. 605 | fn wake(&'static self) -> bool { 606 | let mut status = self.status.lock().unwrap(); 607 | *status |= AWOKEN; 608 | self.status_changed.notify_all(); 609 | *status & SLEEPING != 0 610 | } 611 | 612 | /// The controller should call this to tell the worker to exit it's main 613 | /// thread. 614 | /// 615 | /// This assumes the worker is looping waiting for `should_terminate` to be 616 | /// set. 617 | fn halt(&'static self) { 618 | // SAFETY: This latch has a static lifetime so is always valid. 619 | unsafe { Latch::set(&self.should_terminate) } 620 | self.wake(); 621 | } 622 | 623 | /// The controller may call this to wait until the client calls 624 | /// `post_termination_status`. 625 | fn await_termination(&'static self) { 626 | let mut status = self.status.lock().unwrap(); 627 | while *status & RUNNING != 0 { 628 | status = self.status_changed.wait(status).unwrap(); 629 | } 630 | } 631 | 632 | /// The worker should call this right before it terminates. 633 | fn post_termination_status(&'static self) { 634 | let mut status = self.status.lock().unwrap(); 635 | *status = FREE; 636 | self.status_changed.notify_all(); 637 | } 638 | } 639 | 640 | // ----------------------------------------------------------------------------- 641 | // Core API 642 | 643 | impl ThreadPool { 644 | /// Spawns a new closure onto the thread pool. Just like a standard thread, 645 | /// this task is not tied to the current stack frame, and hence it cannot 646 | /// hold any references other than those with 'static lifetime. If you want 647 | /// to spawn a task that references stack data, use the 648 | /// [`ThreadPool::scope()`] function to create a scope. 649 | /// 650 | /// Since tasks spawned with this function cannot hold references into the 651 | /// enclosing stack frame, you almost certainly want to use a move closure 652 | /// as their argument (otherwise, the closure will typically hold references 653 | /// to any variables from the enclosing function that you happen to use). 654 | /// 655 | /// To spawn an async closure or future, use [`ThreadPool::spawn_async`] or 656 | /// [`ThreadPool::spawn_future`]. To spawn a non-static closure, use 657 | /// [`ThreadPool::scope`]. 658 | pub fn spawn(&'static self, f: F) 659 | where 660 | F: FnOnce() + Send + 'static, 661 | { 662 | let job = HeapJob::new(f); 663 | // SAFETY: The thread pool executes each `JobRef` exactly once each time 664 | // it is queued. We queue this exactly once, so it is only executed 665 | // exactly once. 666 | let job_ref = unsafe { job.into_static_job_ref() }; 667 | self.inject_or_push(job_ref); 668 | } 669 | 670 | /// Spawns a future onto the scope. See [`ThreadPool::spawn`] for more 671 | /// information about spawning jobs. Only static futures are supported 672 | /// through this function, but you can use `ThreadPool::scope` to get a scope 673 | /// on which non-static futures and async tasks can be spawned. 674 | /// 675 | /// # Returns 676 | /// 677 | /// This returns a task, which represents a handle to the async computation 678 | /// and is itself a future that can be awaited to receive the output of the 679 | /// future. There's four ways to interact with a task: 680 | /// 681 | /// 1. Await the task. This will eventually produce the output of the 682 | /// provided future. 683 | /// 684 | /// 2. Drop the task. This will stop execution of the future. 685 | /// 686 | /// 3. Cancel the task. This has the same effect as dropping the task, but 687 | /// waits until the future stops running (which can take a while). 688 | /// 689 | /// 4. Detach the task. This will allow the future to continue executing 690 | /// even after the task itself is dropped. 691 | /// 692 | pub fn spawn_future(&'static self, future: F) -> Task 693 | where 694 | F: Future + Send + 'static, 695 | T: Send + 'static, 696 | { 697 | // The schedule function will turn the future into a job when woken. 698 | let schedule = move |runnable: Runnable| { 699 | // Now we turn the runnable into a job-ref that we can send to a 700 | // worker. 701 | 702 | // SAFETY: We provide a pointer to a non-null runnable, and we turn 703 | // it back into a non-null runnable. The runnable will remain valid 704 | // until the task is run. 705 | let job_ref = unsafe { 706 | JobRef::new_raw(runnable.into_raw().as_ptr(), |this| { 707 | let this = NonNull::new_unchecked(this.cast_mut()); 708 | let runnable = Runnable::<()>::from_raw(this); 709 | // Poll the task. This will drop the future if the task is 710 | // canceled or the future completes. 711 | 712 | runnable.run(); 713 | }) 714 | }; 715 | 716 | // Send this job off to be executed. When this schedule function is 717 | // called on a worker thread this re-schedules it onto the worker's 718 | // local queue, which will generally cause tasks to stick to the 719 | // same thread instead of jumping around randomly. This is also 720 | // faster than injecting into the global queue. 721 | self.inject_or_push(job_ref); 722 | }; 723 | 724 | // Creates a task from the future and schedule. 725 | let (runnable, task) = async_task::spawn(future, schedule); 726 | // Call the schedule function once to create the initial job. 727 | runnable.schedule(); 728 | task 729 | } 730 | 731 | /// Like [`ThreadPool::spawn_future`] but accepts an async closure instead of 732 | /// a future. Here again everything must be static (but there is a 733 | /// non-static equivalent on [`Scope`]). 734 | /// 735 | /// Internally this wraps the closure into a new future and passes it along 736 | /// over to `spawn_future`. 737 | pub fn spawn_async(&'static self, f: Fn) -> Task 738 | where 739 | Fn: FnOnce() -> Fut + Send + 'static, 740 | Fut: Future + Send + 'static, 741 | T: Send + 'static, 742 | { 743 | // Wrap the function into a future using an async block. 744 | let future = async move { f().await }; 745 | // We just pass this future to `spawn_future`. 746 | self.spawn_future(future) 747 | } 748 | 749 | /// Polls a future to completion, then returns the outcome. This function 750 | /// will prioritize polling the future as soon as it becomes available, and 751 | /// while the future is not available it will try to do other meaningfully 752 | /// work (if executed on a thread pool) or block (if not executed on a thread 753 | /// pool). 754 | pub fn block_on(&'static self, mut future: F) -> T 755 | where 756 | F: Future + Send, 757 | T: Send, 758 | { 759 | // We pin the future so that we can poll it. 760 | // SAFETY: This future is dropped at the end of this scope and is not 761 | // moved before then, so it is effectively pinned. 762 | let mut future = unsafe { Pin::new_unchecked(&mut future) }; 763 | self.in_worker(|worker_thread, _| { 764 | debug!("running job created by block_on"); 765 | // Create a callback that will wake this thread when the future is 766 | // ready to be polled again. 767 | let wake = SetOnWake::new(WakeLatch::new(worker_thread)); 768 | let ctx_waker = Arc::clone(&wake).into(); 769 | let mut ctx = Context::from_waker(&ctx_waker); 770 | // Keep polling the future, running work until it is woken up again. 771 | loop { 772 | match future.as_mut().poll(&mut ctx) { 773 | Poll::Ready(res) => return res, 774 | Poll::Pending => { 775 | worker_thread.run_until(wake.latch()); 776 | wake.latch().reset(); 777 | } 778 | } 779 | } 780 | }) 781 | } 782 | 783 | /// Takes two closures and *potentially* runs them in parallel. It returns a 784 | /// pair of the results from those closures. It is conceptually similar to 785 | /// spawning to threads, but it can be significantly faster due to 786 | /// optimizations in the thread pool. 787 | /// 788 | /// When called from outside the thread pool this will block until both 789 | /// closures are executed. When called within the thread pool, the worker 790 | /// thread will attempt to do other work while it's waiting. 791 | pub fn join(&'static self, a: A, b: B) -> (RA, RB) 792 | where 793 | A: FnOnce() -> RA + Send, 794 | B: FnOnce() -> RB + Send, 795 | RA: Send, 796 | RB: Send, 797 | { 798 | self.in_worker(|worker_thread, _| { 799 | // We will execute `a` and create a job to run `b` in parallel. 800 | let mut status_b = None; 801 | // Create a new latch that can wake this thread when it completes. 802 | let latch_b = WakeLatch::new(worker_thread); 803 | // Create a job which runs b, returns the outcome back to the stack, 804 | // and trips the latch. 805 | let job_b = StackJob::new(|| { 806 | status_b = Some(b()); 807 | // SAFETY: This job is valid until the end of the scope and is 808 | // not dropped until this function returns. The function does 809 | // not return until after the latch is set. 810 | unsafe { Latch::set(&latch_b) }; 811 | }); 812 | // SAFETY: This job is valid until the end of this scope, and is not 813 | // dropped until this function returns. The function does not return 814 | // until the latch is set, which can only happen when this job is 815 | // executed. 816 | let job_b_ref = unsafe { job_b.as_job_ref() }; 817 | let job_b_ref_id = job_b_ref.id(); 818 | worker_thread.push(job_b_ref); 819 | 820 | // Execute task A. 821 | let status_a = a(); 822 | 823 | // We wait for `job_b` to complete. At this point we don't know if 824 | // `job_b` has been shared or is still somewhere on the local stack, 825 | // so we go hunting through the stack for it. 826 | while !latch_b.probe() { 827 | if let Some(job) = worker_thread.pop() { 828 | if job.id() == job_b_ref_id { 829 | // We found `job_b`, now we have to execute it. First we 830 | // will try to share a job by calling `tick`. Normally 831 | // this is done by `execute` but we have to call it 832 | // manually here. 833 | worker_thread.tick(); 834 | // Since we still are holding the original `job_b` we 835 | // can run it without the indirection from the job-ref, 836 | // allowing the compiler to optimize to closure. 837 | job_b.run_inline(); 838 | // Having run the job we can break, since we know 839 | // `latch_b` should now be set. 840 | break; 841 | } 842 | 843 | // If it wasn't `job_b` we execute the job-ref normally. 844 | worker_thread.execute(job); 845 | } else { 846 | // We executed all our local jobs, so `job_b` must have been 847 | // shared. We wait until it completes. This will put the 848 | // thread to sleep at first, but it may wake up and do more 849 | // work before this returns. 850 | worker_thread.run_until(&latch_b); 851 | } 852 | } 853 | 854 | // Return the outcome of the two closures. 855 | (status_a, status_b.unwrap()) 856 | }) 857 | } 858 | 859 | /// Creates a scope on which new work can be spawned. Spawned jobs may run 860 | /// asynchronously with respect to the closure; they may themselves spawn 861 | /// additional tasks into the scope. When the closure returns, it will block 862 | /// until all tasks that have been spawned into `s` complete. 863 | /// 864 | /// This function allows spawning closures, futures and async closures with 865 | /// non-static lifetimes. 866 | pub fn scope<'scope, F, T>(&'static self, f: F) -> T 867 | where 868 | F: FnOnce(&Scope<'scope>) -> T + Send, 869 | T: Send, 870 | { 871 | self.in_worker(|owner_thread, _| { 872 | // SAFETY: The scope is completed before it is dropped. 873 | unsafe { 874 | let scope = Scope::<'scope>::new(owner_thread); 875 | let outcome = f(&scope); 876 | scope.complete(owner_thread); 877 | outcome 878 | } 879 | }) 880 | } 881 | } 882 | 883 | // ----------------------------------------------------------------------------- 884 | // Worker threads 885 | 886 | /// Data for a local worker thread, typically stored in a thread-local static. 887 | pub struct WorkerThread { 888 | queue: UnsafeCell>, 889 | thread_pool: &'static ThreadPool, 890 | index: usize, 891 | rng: XorShift64Star, 892 | } 893 | 894 | // Rust's thread locals can actually be fairly costly unless the special 895 | // `const` variant is used. Loom dosn't support this, so we have to do this 896 | // annoying conditional here. 897 | #[cfg(not(loom))] 898 | std::thread_local! { 899 | static WORKER_THREAD_STATE: CachePadded> = const { CachePadded::new(OnceCell::new()) }; 900 | } 901 | #[cfg(loom)] 902 | loom::thread_local! { 903 | static WORKER_THREAD_STATE: CachePadded> = CachePadded::new(OnceCell::new()); 904 | } 905 | 906 | impl WorkerThread { 907 | /// Returns access to the shared portion of this thread's data. 908 | #[inline] 909 | fn thread_info(&self) -> &ThreadInfo { 910 | &self.thread_pool.threads[self.index] 911 | } 912 | 913 | /// Acquires a reference to the `WorkerThread` for the current thread. 914 | /// This will be `None` if the current thread is not a worker thread. 915 | pub fn with(f: impl FnOnce(Option<&Self>) -> R) -> R { 916 | WORKER_THREAD_STATE.with(|worker_thread| f(worker_thread.get())) 917 | } 918 | 919 | /// Returns the thread pool to which the worker belongs. 920 | #[inline] 921 | pub fn thread_pool(&self) -> &'static ThreadPool { 922 | self.thread_pool 923 | } 924 | 925 | /// Returns the unique index of the thread within the thread pool. 926 | #[inline] 927 | pub fn index(&self) -> usize { 928 | self.index 929 | } 930 | 931 | /// Pushes a job onto the local queue. This operation is cheap and local, 932 | /// with no atomics or locks. 933 | #[inline] 934 | pub fn push(&self, job: JobRef) { 935 | let queue = self.queue.get_mut(); 936 | // SAFETY: The queue is thread local. Only methods of `WorkerThread` 937 | // dereference the queue and none of them can call eachother. 938 | unsafe { queue.deref().push_front(job) }; 939 | } 940 | 941 | /// Pops a job from the local queue. This operation is cheap and local, with 942 | /// no atomics or locks. 943 | #[inline] 944 | pub fn pop(&self) -> Option { 945 | let queue = self.queue.get_mut(); 946 | // SAFETY: The queue is thread local. Only methods of `WorkerThread` 947 | // dereference the queue and none of them can call eachother. 948 | unsafe { queue.deref().pop_front() } 949 | } 950 | 951 | /// Removes all jobs from the local queue. 952 | #[inline] 953 | pub fn drain(&self) -> impl Iterator { 954 | let queue = self.queue.get_mut(); 955 | // SAFETY: The queue is thread local. Only methods of `WorkerThread` 956 | // dereference the queue and none of them can call eachother. 957 | let queue = unsafe { core::mem::take(queue.deref()) }; 958 | queue.into_iter() 959 | } 960 | 961 | /// Claims a shared job. Claiming jobs is lock free. This will do at most 962 | /// `MAX_THREADS` atomic read-modify-write operations and at at most one 963 | /// actual write. The worker will try to claim it's own shared job first. 964 | /// Otherwise it will try to claim shared jobs in sequence starting from a 965 | /// random other node. 966 | #[inline] 967 | pub fn claim_shared(&self) -> Option { 968 | // Try to reclaim this worker's shared job first. 969 | if let Some(job) = self.thread_info().shared_job.take() { 970 | return Some(job); 971 | } 972 | 973 | // Otherwise try to claim shared jobs from random other workers. 974 | let threads = self.thread_pool.threads.as_slice(); 975 | let num_threads = self 976 | .thread_pool 977 | .state 978 | .running_threads 979 | .load(Ordering::Relaxed); 980 | 981 | // It's possible this thread is being resized to zero. When this happens 982 | // it's fine to return early. 983 | if num_threads == 0 { 984 | return None; 985 | } 986 | 987 | // Otherwise pick a random point and start looking for work. 988 | let start = self.rng.next_usize(num_threads); 989 | (start..num_threads) 990 | .chain(0..start) 991 | .filter(move |&i| i != self.index()) 992 | .find_map(|i| threads[i].shared_job.take()) 993 | } 994 | 995 | /// Tries to promote the oldest job in the local stack to a shared job. If 996 | /// the local job queue is empty, this does nothing. If the worker thread 997 | /// already has a shared job, this will instead try to wake one of the other 998 | /// thread to claim it. 999 | #[cold] 1000 | fn promote(&self) { 1001 | debug!("attempting promotion"); 1002 | let queue = self.queue.get_mut(); 1003 | // SAFETY: The queue is thread local. Only methods of `WorkerThread` 1004 | // dereference the queue and none of them can call eachother. 1005 | if let Some(job) = unsafe { queue.deref().pop_back() } { 1006 | // If there's work in the queue, pop it and try to share it 1007 | if let Some(job) = self.thread_info().shared_job.put(job) { 1008 | // SAFETY: Again, the queue is thread local, only methods of 1009 | // `WorkerThread` dereference the queue, and none of them can 1010 | // call eachother. 1011 | unsafe { queue.deref().push_back(job) }; 1012 | } else { 1013 | // Attempt to wake one other thread to claim this shared job. 1014 | self.thread_pool.wake_any(1); 1015 | } 1016 | } 1017 | } 1018 | 1019 | /// Promotes the oldest local job into a shared job which can be claimed and 1020 | /// executed by other workers in the thread pool. 1021 | /// 1022 | /// This function is amortized. Promotion is somewhat expensive, so this 1023 | /// function will only perform a promotion once in a fixed interval of time 1024 | /// (the heartbeat interval). 1025 | /// 1026 | /// Many parts of the core thread pool api call this function automatically, 1027 | /// but it can also be called manually by users. 1028 | #[inline] 1029 | pub fn tick(&self) { 1030 | // Only runs the promotion if we have received the heartbeat signal. This 1031 | // will happen infrequently so the promotion itself is marked cold. 1032 | if self.thread_info().heartbeat.load(Ordering::Acquire) { 1033 | self.thread_info().heartbeat.store(false, Ordering::Release); 1034 | self.promote(); 1035 | } 1036 | } 1037 | 1038 | /// Executes a job in the main loop. 1039 | /// 1040 | /// This call calls `tick`. Every so often, when the heartbeat signal is received, it will 1041 | /// try to promote a local job to a shared job. 1042 | #[inline] 1043 | pub fn execute(&self, job: JobRef) { 1044 | // Possibly promote a local job. 1045 | self.tick(); 1046 | // Run the job. 1047 | debug!("executing work"); 1048 | job.execute(); 1049 | debug!("work completed"); 1050 | } 1051 | 1052 | /// Runs until the provided latch is set. This will put the thread to sleep 1053 | /// if no work can be found and the latch is still unset. 1054 | #[inline] 1055 | pub fn run_until(&self, latch: &L) { 1056 | if !latch.probe() { 1057 | self.run_until_cold(latch); 1058 | } 1059 | } 1060 | 1061 | /// Runs until the provided latch is set. This will put the thread to sleep 1062 | /// if no work can be found and the latch is still unset. Setting the latch 1063 | /// will wake the thread. 1064 | #[cold] 1065 | fn run_until_cold(&self, latch: &L) { 1066 | while !latch.probe() { 1067 | debug!("looking for work"); 1068 | // Try to find work, either on the local queue, the shared jobs 1069 | // vector, or the injector queue. 1070 | if let Some(job) = self.find_work() { 1071 | // SAFETY: No reference is held to the thread's job queue within 1072 | // the main loop, and since it is thread-local there can be no 1073 | // references anywhere. 1074 | self.execute(job); 1075 | continue; 1076 | } 1077 | 1078 | debug!("no work found, going to sleep"); 1079 | 1080 | let control = &self.thread_info().control; 1081 | let mut status = control.status.lock().unwrap(); 1082 | 1083 | while *status & AWOKEN == 0 { 1084 | debug!("sleeping"); 1085 | *status |= SLEEPING; 1086 | status = control.status_changed.wait(status).unwrap(); 1087 | debug!("awoken"); 1088 | } 1089 | 1090 | debug!("woke up"); 1091 | *status &= !(AWOKEN | SLEEPING); 1092 | } 1093 | } 1094 | 1095 | /// Looks for jobs for this worker to work on. It first pulls from the local 1096 | /// queue, then the shared jobs, then the global injector queue. 1097 | /// 1098 | /// It can be as fast as a local queue pop, or as slow as a contested lock. 1099 | #[inline] 1100 | pub fn find_work(&self) -> Option { 1101 | // First we try to pop a job off the local stack. This is an entirely 1102 | // synchronous and local operation, with no atomics or locks. 1103 | // 1104 | // When there are no local jobs, we will try to claim one of the shared 1105 | // jobs. This is more expensive and can result in contested locking. 1106 | // 1107 | // If there are no local jobs and no shared jobs, we will try to pop 1108 | // work off the thread pool's injector queue. This is atomic but may 1109 | // cause us to spin very briefly. 1110 | self.pop() 1111 | .or_else(|| self.claim_shared()) 1112 | .or_else(|| self.thread_pool().pop()) 1113 | } 1114 | } 1115 | 1116 | // ----------------------------------------------------------------------------- 1117 | // Main worker loop 1118 | 1119 | /// This is the main loop for a worker thread. It's in charge of executing jobs. 1120 | /// Operating on the principle that you should finish what you start before 1121 | /// starting something new, workers will first execute their queue, then execute 1122 | /// shared jobs, then pull new jobs from the injector. 1123 | /// 1124 | /// # Safety 1125 | /// 1126 | /// This must not be called after `set_current` has been called. As a 1127 | /// consequence, this function cannot be called twice on the same thread. 1128 | unsafe fn main_loop(thread_pool: &'static ThreadPool, index: usize) { 1129 | debug!("worker thread started"); 1130 | 1131 | // Store a reference to this thread's control data. 1132 | let control = &thread_pool.threads[index].control; 1133 | 1134 | WORKER_THREAD_STATE.with(|worker_thread| { 1135 | // Register the worker on the thread. 1136 | let worker_thread = worker_thread.get_or_init(|| WorkerThread { 1137 | index, 1138 | thread_pool, 1139 | queue: UnsafeCell::new(VecDeque::with_capacity(32)), 1140 | rng: XorShift64Star::new(index as u64 + 1), 1141 | }); 1142 | 1143 | // Inform other threads that we are starting the main worker loop. 1144 | control.post_ready_status(); 1145 | 1146 | debug!("worker thread ready"); 1147 | 1148 | // Inform other threads that we are starting the main worker loop. 1149 | control.post_ready_status(); 1150 | 1151 | debug!("worker thread running"); 1152 | 1153 | // Run the worker thread until the thread is asked to terminate. 1154 | worker_thread.run_until(&control.should_terminate); 1155 | 1156 | debug!("worker thread halting"); 1157 | 1158 | // Offload any remaining local work into the global queue. 1159 | for job in worker_thread.drain() { 1160 | thread_pool.inject(job); 1161 | } 1162 | 1163 | // If we had a shared job, push that to the global queue after all the local queue is pushed. 1164 | if let Some(job) = worker_thread.thread_info().shared_job.take() { 1165 | thread_pool.inject(job); 1166 | } 1167 | }); 1168 | 1169 | debug!("worker thread terminating"); 1170 | 1171 | // Inform other threads that we are terminating. 1172 | control.post_termination_status(); 1173 | 1174 | debug!("worker thread terminated"); 1175 | } 1176 | 1177 | // ----------------------------------------------------------------------------- 1178 | // Heartbeat sender loop 1179 | 1180 | /// This is the main loop for the heartbeat thread. It's in charge of 1181 | /// periodically sending a "heartbeat" signal to each worker. By default, each 1182 | /// worker receives a heartbeat about once every 100 μs. 1183 | /// 1184 | /// Workers use the heartbeat signal to amortize the cost of promoting local 1185 | /// jobs to shared jobs (which allows other works to claim them) and to reduce 1186 | /// lock contention. 1187 | fn heartbeat_loop(thread_pool: &'static ThreadPool) { 1188 | let control = &thread_pool.state.heartbeat_control; 1189 | 1190 | control.post_ready_status(); 1191 | 1192 | // Loop as long as the thread pool is running. 1193 | let mut i = 0; 1194 | while !control.should_terminate.probe() { 1195 | // Load the current number of running threads. 1196 | let num_threads = thread_pool.state.running_threads.load(Ordering::Relaxed); 1197 | 1198 | // If there are no threads, automatically shut down. 1199 | if num_threads == 0 { 1200 | break; 1201 | } 1202 | 1203 | // It's possible for the pool to be resized out from under us and end up 1204 | // already above the number of threads. When that happens, we jump back 1205 | // to zero. 1206 | if i >= num_threads { 1207 | i = 0; 1208 | continue; 1209 | } 1210 | 1211 | // Otherwise we will emit a heartbeat for the selected thread. 1212 | thread_pool.threads[i] 1213 | .heartbeat 1214 | .store(true, Ordering::Relaxed); 1215 | 1216 | // Increment the thread index for the next iteration. 1217 | i += 1; 1218 | 1219 | // Use a 100 microsecond heartbeat interval. Eventually this will be 1220 | // configurable. 1221 | let interval = Duration::from_micros(100); 1222 | 1223 | // We want to space out the heartbeat to each thread, so we divide the 1224 | // interval by the current number of threads. When the thread pool is 1225 | // not resized, this will mean we will stagger the heartbeats out evenly 1226 | // and each thread will get a heartbeat on the given frequency. 1227 | let _interval = interval / num_threads as u32; 1228 | 1229 | // When not running on loom, we put the thread to sleep until we are 1230 | // woken or need to send another heartbeat signal. 1231 | let mut status = control.status.lock().unwrap(); 1232 | while *status & AWOKEN == 0 { 1233 | let timeout; 1234 | (status, timeout) = control 1235 | .status_changed 1236 | .wait_timeout(status, _interval) 1237 | .unwrap(); 1238 | if timeout.timed_out() { 1239 | break; 1240 | } 1241 | } 1242 | *status &= !(AWOKEN | SLEEPING); 1243 | } 1244 | 1245 | control.post_termination_status(); 1246 | } 1247 | -------------------------------------------------------------------------------- /src/util.rs: -------------------------------------------------------------------------------- 1 | use core::mem::{needs_drop, replace, MaybeUninit}; 2 | 3 | use crate::primitives::*; 4 | 5 | // ----------------------------------------------------------------------------- 6 | // Call on drop guard 7 | 8 | // A guard that calls the specified closure when it is dropped. This is used 9 | // internally to run logic when a `Future` is canceled or completed. 10 | pub struct CallOnDrop(pub F); 11 | 12 | impl Drop for CallOnDrop { 13 | fn drop(&mut self) { 14 | (self.0)(); 15 | } 16 | } 17 | 18 | // ----------------------------------------------------------------------------- 19 | // Slot 20 | 21 | /// A slot is a simple atomic store. Like `Option`, slots are either empty or 22 | /// contain a single value. But unlike `Option`, slots are opaque: the only way 23 | /// to tell if a slot contains a value is to remove it. 24 | /// 25 | /// Slot supports only two operations: 26 | /// + `put` inserts a value into an empty slot (and fails when occupied). 27 | /// + `take` removes a value from an occupied slot (and files when empty). 28 | /// 29 | /// Both these operations are lock-free. The failing path costs only an atomic 30 | /// read. The success path costs an atomic read and three quick writes (which 31 | /// should only ever cause one cache miss on other threads). 32 | /// 33 | /// Neither `put` nor `take` will spin. 34 | pub struct Slot { 35 | slot: UnsafeCell>, 36 | flag: AtomicUsize, 37 | } 38 | 39 | // A flag state indicating the slot is empty. This allows `put` but not `take`. 40 | const NONE: usize = 0; 41 | 42 | // A flag state indicating either a `put` or a `take` is in progress. 43 | const LOCK: usize = 1; 44 | 45 | // A flag state indicating the slot is occupied. This allows `take` but not `put`. 46 | const SOME: usize = 2; 47 | 48 | impl Slot { 49 | /// Creates an empty slot. 50 | #[cfg(not(loom))] 51 | pub const fn empty() -> Slot { 52 | Slot { 53 | slot: UnsafeCell::new(MaybeUninit::uninit()), 54 | flag: AtomicUsize::new(NONE), 55 | } 56 | } 57 | 58 | /// Non-const constructor variant for loom. 59 | #[cfg(loom)] 60 | pub fn empty() -> Slot { 61 | Slot { 62 | slot: UnsafeCell::new(MaybeUninit::uninit()), 63 | flag: AtomicUsize::new(NONE), 64 | } 65 | } 66 | 67 | /// Tries to put a new value in the slot. If the slot is already occupied 68 | /// the new value is returned. Returning `None` indicates a successful insertion. 69 | pub fn put(&self, value: T) -> Option { 70 | match self 71 | .flag 72 | .compare_exchange(NONE, LOCK, Ordering::Acquire, Ordering::Relaxed) 73 | { 74 | Err(_) => Some(value), 75 | Ok(_) => { 76 | let slot = self.slot.get_mut(); 77 | // SAFETY: When the flag was `NONE` the value must be 78 | // uninitialized. Since the slot is locked for the duration we 79 | // know no other threads can access the cell. 80 | unsafe { slot.deref().write(value) }; 81 | self.flag.store(SOME, Ordering::Release); 82 | None 83 | } 84 | } 85 | } 86 | 87 | /// Takes the value from the slot. Returns none if the slot is empty. 88 | pub fn take(&self) -> Option { 89 | match self 90 | .flag 91 | .compare_exchange(SOME, LOCK, Ordering::Acquire, Ordering::Relaxed) 92 | { 93 | Err(_) => None, 94 | Ok(_) => { 95 | let slot = self.slot.get_mut(); 96 | // SAFETY: When the flag was `SOME` the value must be 97 | // initialized. Since the slot is locked the duration, we know no 98 | // other threads can access the cell. 99 | let value = unsafe { replace(slot.deref(), MaybeUninit::uninit()).assume_init() }; 100 | self.flag.store(NONE, Ordering::Release); 101 | Some(value) 102 | } 103 | } 104 | } 105 | } 106 | 107 | impl Drop for Slot { 108 | fn drop(&mut self) { 109 | // If `T` doesn't need to be dropped then neither does `Slot`. 110 | if needs_drop::() { 111 | let Slot { flag, slot } = self; 112 | flag.with_mut(|flag| { 113 | if *flag == SOME { 114 | let slot = slot.get_mut(); 115 | // SAFETY: Since the flag was `SOME` we know the slot is 116 | // occupied and should be dropped. 117 | unsafe { slot.deref().as_mut_ptr().drop_in_place() }; 118 | } 119 | }); 120 | } 121 | } 122 | } 123 | 124 | /// SAFETY: A `Slot` contains `T` so is `Send` iff `T` is send. 125 | unsafe impl Send for Slot where T: Send {} 126 | 127 | /// SAFETY: A `&Slot` lets you get a `T` via `Slot::take()`. If `Slot` is 128 | /// `Sync` this could cause `T` to be sent to another thread. So `Slot` is 129 | /// `Sync` iff `T` is `Send`. 130 | unsafe impl Sync for Slot where T: Send {} 131 | 132 | // ----------------------------------------------------------------------------- 133 | // Xorshift fast prng (taken from rayon) 134 | 135 | /// [xorshift*] is a fast pseudo-random number generator which will 136 | /// even tolerate weak seeding, as long as it's not zero. 137 | /// 138 | /// [xorshift*]: https://en.wikipedia.org/wiki/Xorshift#xorshift* 139 | pub struct XorShift64Star { 140 | state: Cell, 141 | } 142 | 143 | impl XorShift64Star { 144 | /// Initializes the prng with a seed. Provided seed must be nonzero. 145 | pub fn new(seed: u64) -> Self { 146 | XorShift64Star { 147 | state: Cell::new(seed), 148 | } 149 | } 150 | 151 | /// Returns a pseudorandom number. 152 | pub fn next(&self) -> u64 { 153 | let mut x = self.state.get(); 154 | debug_assert_ne!(x, 0); 155 | x ^= x >> 12; 156 | x ^= x << 25; 157 | x ^= x >> 27; 158 | self.state.set(x); 159 | x.wrapping_mul(0x2545_f491_4f6c_dd1d) 160 | } 161 | 162 | /// Return a pseudorandom number from `0..n`. 163 | pub fn next_usize(&self, n: usize) -> usize { 164 | (self.next() % n as u64) as usize 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /tests/general.rs: -------------------------------------------------------------------------------- 1 | //! General integration tests 2 | 3 | #![cfg(not(loom))] 4 | -------------------------------------------------------------------------------- /tests/loom.rs: -------------------------------------------------------------------------------- 1 | //! Tests using the `loom` testing framework. 2 | 3 | #![cfg(loom)] 4 | //#![allow(unused_must_use)] 5 | #![allow(clippy::useless_vec)] 6 | 7 | use core::hint::black_box; 8 | 9 | use async_task::Task; 10 | use loom::model::Builder; 11 | use loom::sync::atomic::{AtomicUsize, Ordering}; 12 | use loom::sync::{Condvar, Mutex}; 13 | 14 | use tracing::{info, Level}; 15 | use tracing_subscriber::fmt::Subscriber; 16 | 17 | use forte::prelude::*; 18 | 19 | // ----------------------------------------------------------------------------- 20 | // Infrastructure 21 | 22 | fn model(f: F) 23 | where 24 | F: Fn() + Send + Sync + 'static, 25 | { 26 | let subscriber = Subscriber::builder() 27 | .with_max_level(Level::ERROR) 28 | .with_test_writer() 29 | .without_time() 30 | .finish(); 31 | 32 | tracing::subscriber::with_default(subscriber, || { 33 | let mut model = Builder::new(); 34 | model.log = true; 35 | model.check(f); 36 | }); 37 | } 38 | 39 | /// Provides access to a thread pool which can be treated as static for the 40 | /// purposes of testing. 41 | fn with_thread_pool(f: F) 42 | where 43 | F: Fn(&'static ThreadPool) + 'static, 44 | { 45 | info!("### SETTING UP TEST"); 46 | 47 | // Create a new thread pool. 48 | let thread_pool = Box::new(ThreadPool::new()); 49 | let ptr = Box::into_raw(thread_pool); 50 | 51 | // SAFETY: We want to create have a reference to the thread pool which can 52 | // be treated as `'static` by the callback `f`. We will assume that `f` has 53 | // no side-effects except for those created by calls to the thread pool. 54 | // This problem comes down to ensuring that `thread_pool` lives for the 55 | // duration of `f` and also outlives anything spawned onto the pool by `f`. 56 | // 57 | // The first condition is easily satisfied: the thread pool is not dropped 58 | // until the end of the scope. For the latter condition, the call to 59 | // `wait_until_inactive()` blocks the thread until all work spawned onto the 60 | // pool completes, and `resize_to(0)` blocks until all of the pool's threads 61 | // terminate. 62 | // 63 | // For all intents and purposes, so long as `f` has no other side-effects, 64 | // `thread_pool` can be treated as if it has a `'static` lifetime within 65 | // `f`. 66 | unsafe { 67 | let thread_pool = &*ptr; 68 | info!("### POPULATING POOL"); 69 | thread_pool.populate(); 70 | info!("### STARTING TEST"); 71 | f(thread_pool); 72 | info!("### SHUTTING DOWN POOL"); 73 | thread_pool.resize_to(0); 74 | // This assert ensures that all spawned jobs are run. 75 | assert!(thread_pool.pop().is_none()); 76 | }; 77 | 78 | // SAFETY: This was created by `Box::into_raw`. 79 | let thread_pool = unsafe { Box::from_raw(&mut *ptr) }; 80 | drop(thread_pool); 81 | 82 | info!("### TEST COMPLETE"); 83 | } 84 | 85 | // ----------------------------------------------------------------------------- 86 | // Workload tracking 87 | 88 | struct Workload { 89 | counter: AtomicUsize, 90 | is_done: Mutex, 91 | completed: Condvar, 92 | } 93 | 94 | impl Workload { 95 | fn new(count: usize) -> Workload { 96 | Workload { 97 | counter: AtomicUsize::new(count), 98 | is_done: Mutex::new(false), 99 | completed: Condvar::new(), 100 | } 101 | } 102 | 103 | fn execute(&self) { 104 | if 1 == self.counter.fetch_sub(1, Ordering::Relaxed) { 105 | let mut is_done = self 106 | .is_done 107 | .lock() 108 | .expect("failed to acquire workload lock"); 109 | *is_done = true; 110 | self.completed.notify_all(); 111 | } 112 | } 113 | 114 | fn wait_until_complete(&self) { 115 | let mut is_done = self 116 | .is_done 117 | .lock() 118 | .expect("failed to acquire workload lock"); 119 | while !*is_done { 120 | is_done = self 121 | .completed 122 | .wait(is_done) 123 | .expect("failed to reacquire workload lock"); 124 | } 125 | } 126 | } 127 | 128 | // ----------------------------------------------------------------------------- 129 | // Pool resizing 130 | 131 | /// Tests for concurrency issues within the `with_thread_pool` helper function. 132 | /// This spins up a thread pool with a single thread, then spins it back down. 133 | #[test] 134 | pub fn empty() { 135 | model(|| { 136 | with_thread_pool(|_threads| {}); 137 | }); 138 | } 139 | 140 | /// Tests for concurrency issues when increasing the size of the pool. 141 | #[test] 142 | pub fn resize_grow() { 143 | model(|| { 144 | with_thread_pool(|threads| { 145 | threads.grow(1); 146 | }); 147 | }); 148 | } 149 | 150 | /// Tests for concurrency issues when shrinking the size of the pool. 151 | #[test] 152 | pub fn resize_shrink() { 153 | model(|| { 154 | with_thread_pool(|threads| { 155 | threads.shrink(1); 156 | }); 157 | }); 158 | } 159 | 160 | // ----------------------------------------------------------------------------- 161 | // Core API 162 | 163 | /// Tests for concurrency issues when spawning a static closure. 164 | #[test] 165 | pub fn spawn_closure() { 166 | model(|| { 167 | with_thread_pool(|threads| { 168 | let workload: &Workload = Box::leak(Box::new(Workload::new(1))); 169 | threads.spawn(|| { 170 | workload.execute(); 171 | }); 172 | workload.wait_until_complete(); 173 | }); 174 | }); 175 | } 176 | 177 | /// Tests for concurrency issues when spawning a static future. 178 | #[test] 179 | pub fn spawn_future() { 180 | model(|| { 181 | with_thread_pool(|threads| { 182 | let workload: &Workload = Box::leak(Box::new(Workload::new(1))); 183 | let task = threads.spawn_future(async { 184 | workload.execute(); 185 | }); 186 | task.detach(); 187 | workload.wait_until_complete(); 188 | }); 189 | }); 190 | } 191 | 192 | /// Tests for concurrency issues in join operations. 193 | #[test] 194 | pub fn join() { 195 | model(|| { 196 | with_thread_pool(|threads| { 197 | threads.join(|| black_box(()), || black_box(())); 198 | }); 199 | }); 200 | } 201 | 202 | /// Tests for concurrency issues when blocking on a future. 203 | #[test] 204 | pub fn block_on() { 205 | model(|| { 206 | with_thread_pool(|threads| { 207 | threads.block_on(async { 208 | black_box(()); 209 | }); 210 | }); 211 | }); 212 | } 213 | 214 | /// Tests for concurrency issues when spawning a future and then blocking on the 215 | /// resulting task. 216 | #[test] 217 | pub fn spawn_and_block() { 218 | model(|| { 219 | with_thread_pool(|threads| { 220 | let task = threads.spawn_future(async { 221 | black_box(()); 222 | }); 223 | threads.block_on(task); 224 | }); 225 | }); 226 | } 227 | 228 | // ----------------------------------------------------------------------------- 229 | // Scoped API 230 | 231 | /// Test for concurrency issues when creating a scope. 232 | #[test] 233 | pub fn scope_empty() { 234 | model(|| { 235 | with_thread_pool(|threads| { 236 | threads.scope(|_| {}); 237 | }); 238 | }); 239 | } 240 | 241 | /// Tests for concurrency issues when returning a value from a scope. 242 | #[test] 243 | fn scope_result() { 244 | model(|| { 245 | with_thread_pool(|threads| { 246 | let x = threads.scope(|_| 22); 247 | assert_eq!(x, 22); 248 | }); 249 | }); 250 | } 251 | 252 | /// Tests for concurrency issues when spawning a scoped closure. 253 | #[test] 254 | pub fn scope_spawn() { 255 | model(|| { 256 | with_thread_pool(|threads| { 257 | let vec = vec![1, 2, 3]; 258 | threads.scope(|scope| { 259 | scope.spawn(|_| { 260 | black_box(vec.len()); 261 | }); 262 | }); 263 | }); 264 | }); 265 | } 266 | 267 | /// Tests for concurrency issues when spawning multiple scoped closures. 268 | #[test] 269 | pub fn scope_two() { 270 | model(|| { 271 | with_thread_pool(|threads| { 272 | let counter = &AtomicUsize::new(0); 273 | threads.scope(|scope| { 274 | scope.spawn(|_| { 275 | counter.fetch_add(1, Ordering::SeqCst); 276 | }); 277 | scope.spawn(|_| { 278 | counter.fetch_add(10, Ordering::SeqCst); 279 | }); 280 | }); 281 | let v = counter.load(Ordering::SeqCst); 282 | assert_eq!(v, 11); 283 | }); 284 | }); 285 | } 286 | 287 | /// Tests for concurrency issues when spawning a scoped future, and blocking on 288 | /// it. 289 | #[test] 290 | pub fn scope_future() { 291 | model(|| { 292 | with_thread_pool(|threads| { 293 | let vec = vec![1, 2, 3]; 294 | let mut task: Option> = None; 295 | threads.scope(|scope| { 296 | let scoped_task = scope.spawn_future(async { black_box(vec.len()) }); 297 | task = Some(scoped_task); 298 | }); 299 | let task = task.expect("task should be initialized after scoped spawn"); 300 | let len = threads.block_on(task); 301 | assert_eq!(len, vec.len()); 302 | }); 303 | }); 304 | } 305 | -------------------------------------------------------------------------------- /tests/tests.rs: -------------------------------------------------------------------------------- 1 | //! Entrypoint for all of Forte's tests 2 | 3 | mod general; 4 | mod loom; 5 | --------------------------------------------------------------------------------