├── .github └── workflows │ └── ci.yml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── benches └── overhead.rs └── src ├── job.rs └── lib.rs /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Continuous integration 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | 9 | jobs: 10 | check: 11 | name: Check 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v4 15 | - uses: dtolnay/rust-toolchain@1.81.0 16 | - uses: Swatinem/rust-cache@v2 17 | - run: cargo check --all 18 | 19 | test: 20 | name: Test 21 | runs-on: ubuntu-latest 22 | steps: 23 | - uses: actions/checkout@v4 24 | - uses: dtolnay/rust-toolchain@1.81.0 25 | - uses: Swatinem/rust-cache@v2 26 | - run: cargo test --all 27 | 28 | fmt: 29 | name: Rustfmt 30 | runs-on: ubuntu-latest 31 | steps: 32 | - uses: actions/checkout@v4 33 | - uses: dtolnay/rust-toolchain@1.81.0 34 | with: 35 | components: rustfmt 36 | - run: cargo fmt --all -- --check 37 | 38 | clippy: 39 | name: Clippy 40 | runs-on: ubuntu-latest 41 | steps: 42 | - uses: actions/checkout@v4 43 | - uses: dtolnay/rust-toolchain@1.81.0 44 | with: 45 | components: clippy 46 | - uses: Swatinem/rust-cache@v2 47 | - run: cargo clippy --all 48 | 49 | miri: 50 | name: Miri 51 | runs-on: ubuntu-latest 52 | steps: 53 | - uses: actions/checkout@v4 54 | - uses: dtolnay/rust-toolchain@nightly 55 | with: 56 | components: miri 57 | - uses: Swatinem/rust-cache@v2 58 | - uses: taiki-e/install-action@nextest 59 | - run: cargo +nightly miri setup 60 | - run: cargo +nightly miri nextest run -j8 -E 'not (test(join_very_long))' 61 | - run: MIRIFLAGS=-Zmiri-many-seeds cargo +nightly miri test --lib -- join_wait 62 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 4 4 | 5 | [[package]] 6 | name = "anstyle" 7 | version = "1.0.8" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" 10 | 11 | [[package]] 12 | name = "bitflags" 13 | version = "2.6.0" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" 16 | 17 | [[package]] 18 | name = "cfg-if" 19 | version = "1.0.0" 20 | source = "registry+https://github.com/rust-lang/crates.io-index" 21 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 22 | 23 | [[package]] 24 | name = "chili" 25 | version = "0.2.1" 26 | dependencies = [ 27 | "divan", 28 | "rayon", 29 | ] 30 | 31 | [[package]] 32 | name = "clap" 33 | version = "4.5.16" 34 | source = "registry+https://github.com/rust-lang/crates.io-index" 35 | checksum = "ed6719fffa43d0d87e5fd8caeab59be1554fb028cd30edc88fc4369b17971019" 36 | dependencies = [ 37 | "clap_builder", 38 | ] 39 | 40 | [[package]] 41 | name = "clap_builder" 42 | version = "4.5.15" 43 | source = "registry+https://github.com/rust-lang/crates.io-index" 44 | checksum = "216aec2b177652e3846684cbfe25c9964d18ec45234f0f5da5157b207ed1aab6" 45 | dependencies = [ 46 | "anstyle", 47 | "clap_lex", 48 | "terminal_size", 49 | ] 50 | 51 | [[package]] 52 | name = "clap_lex" 53 | version = "0.7.2" 54 | source = "registry+https://github.com/rust-lang/crates.io-index" 55 | checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" 56 | 57 | [[package]] 58 | name = "condtype" 59 | version = "1.3.0" 60 | source = "registry+https://github.com/rust-lang/crates.io-index" 61 | checksum = "baf0a07a401f374238ab8e2f11a104d2851bf9ce711ec69804834de8af45c7af" 62 | 63 | [[package]] 64 | name = "crossbeam-deque" 65 | version = "0.8.5" 66 | source = "registry+https://github.com/rust-lang/crates.io-index" 67 | checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" 68 | dependencies = [ 69 | "crossbeam-epoch", 70 | "crossbeam-utils", 71 | ] 72 | 73 | [[package]] 74 | name = "crossbeam-epoch" 75 | version = "0.9.18" 76 | source = "registry+https://github.com/rust-lang/crates.io-index" 77 | checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" 78 | dependencies = [ 79 | "crossbeam-utils", 80 | ] 81 | 82 | [[package]] 83 | name = "crossbeam-utils" 84 | version = "0.8.20" 85 | source = "registry+https://github.com/rust-lang/crates.io-index" 86 | checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" 87 | 88 | [[package]] 89 | name = "divan" 90 | version = "0.1.14" 91 | source = "registry+https://github.com/rust-lang/crates.io-index" 92 | checksum = "a0d567df2c9c2870a43f3f2bd65aaeb18dbce1c18f217c3e564b4fbaeb3ee56c" 93 | dependencies = [ 94 | "cfg-if", 95 | "clap", 96 | "condtype", 97 | "divan-macros", 98 | "libc", 99 | "regex-lite", 100 | ] 101 | 102 | [[package]] 103 | name = "divan-macros" 104 | version = "0.1.14" 105 | source = "registry+https://github.com/rust-lang/crates.io-index" 106 | checksum = "27540baf49be0d484d8f0130d7d8da3011c32a44d4fc873368154f1510e574a2" 107 | dependencies = [ 108 | "proc-macro2", 109 | "quote", 110 | "syn", 111 | ] 112 | 113 | [[package]] 114 | name = "either" 115 | version = "1.13.0" 116 | source = "registry+https://github.com/rust-lang/crates.io-index" 117 | checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" 118 | 119 | [[package]] 120 | name = "errno" 121 | version = "0.3.9" 122 | source = "registry+https://github.com/rust-lang/crates.io-index" 123 | checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" 124 | dependencies = [ 125 | "libc", 126 | "windows-sys 0.52.0", 127 | ] 128 | 129 | [[package]] 130 | name = "libc" 131 | version = "0.2.158" 132 | source = "registry+https://github.com/rust-lang/crates.io-index" 133 | checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439" 134 | 135 | [[package]] 136 | name = "linux-raw-sys" 137 | version = "0.4.14" 138 | source = "registry+https://github.com/rust-lang/crates.io-index" 139 | checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" 140 | 141 | [[package]] 142 | name = "proc-macro2" 143 | version = "1.0.86" 144 | source = "registry+https://github.com/rust-lang/crates.io-index" 145 | checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" 146 | dependencies = [ 147 | "unicode-ident", 148 | ] 149 | 150 | [[package]] 151 | name = "quote" 152 | version = "1.0.36" 153 | source = "registry+https://github.com/rust-lang/crates.io-index" 154 | checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" 155 | dependencies = [ 156 | "proc-macro2", 157 | ] 158 | 159 | [[package]] 160 | name = "rayon" 161 | version = "1.10.0" 162 | source = "registry+https://github.com/rust-lang/crates.io-index" 163 | checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" 164 | dependencies = [ 165 | "either", 166 | "rayon-core", 167 | ] 168 | 169 | [[package]] 170 | name = "rayon-core" 171 | version = "1.12.1" 172 | source = "registry+https://github.com/rust-lang/crates.io-index" 173 | checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" 174 | dependencies = [ 175 | "crossbeam-deque", 176 | "crossbeam-utils", 177 | ] 178 | 179 | [[package]] 180 | name = "regex-lite" 181 | version = "0.1.6" 182 | source = "registry+https://github.com/rust-lang/crates.io-index" 183 | checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" 184 | 185 | [[package]] 186 | name = "rustix" 187 | version = "0.38.34" 188 | source = "registry+https://github.com/rust-lang/crates.io-index" 189 | checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" 190 | dependencies = [ 191 | "bitflags", 192 | "errno", 193 | "libc", 194 | "linux-raw-sys", 195 | "windows-sys 0.52.0", 196 | ] 197 | 198 | [[package]] 199 | name = "syn" 200 | version = "2.0.75" 201 | source = "registry+https://github.com/rust-lang/crates.io-index" 202 | checksum = "f6af063034fc1935ede7be0122941bafa9bacb949334d090b77ca98b5817c7d9" 203 | dependencies = [ 204 | "proc-macro2", 205 | "quote", 206 | "unicode-ident", 207 | ] 208 | 209 | [[package]] 210 | name = "terminal_size" 211 | version = "0.3.0" 212 | source = "registry+https://github.com/rust-lang/crates.io-index" 213 | checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7" 214 | dependencies = [ 215 | "rustix", 216 | "windows-sys 0.48.0", 217 | ] 218 | 219 | [[package]] 220 | name = "unicode-ident" 221 | version = "1.0.12" 222 | source = "registry+https://github.com/rust-lang/crates.io-index" 223 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" 224 | 225 | [[package]] 226 | name = "windows-sys" 227 | version = "0.48.0" 228 | source = "registry+https://github.com/rust-lang/crates.io-index" 229 | checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" 230 | dependencies = [ 231 | "windows-targets 0.48.5", 232 | ] 233 | 234 | [[package]] 235 | name = "windows-sys" 236 | version = "0.52.0" 237 | source = "registry+https://github.com/rust-lang/crates.io-index" 238 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" 239 | dependencies = [ 240 | "windows-targets 0.52.6", 241 | ] 242 | 243 | [[package]] 244 | name = "windows-targets" 245 | version = "0.48.5" 246 | source = "registry+https://github.com/rust-lang/crates.io-index" 247 | checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" 248 | dependencies = [ 249 | "windows_aarch64_gnullvm 0.48.5", 250 | "windows_aarch64_msvc 0.48.5", 251 | "windows_i686_gnu 0.48.5", 252 | "windows_i686_msvc 0.48.5", 253 | "windows_x86_64_gnu 0.48.5", 254 | "windows_x86_64_gnullvm 0.48.5", 255 | "windows_x86_64_msvc 0.48.5", 256 | ] 257 | 258 | [[package]] 259 | name = "windows-targets" 260 | version = "0.52.6" 261 | source = "registry+https://github.com/rust-lang/crates.io-index" 262 | checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" 263 | dependencies = [ 264 | "windows_aarch64_gnullvm 0.52.6", 265 | "windows_aarch64_msvc 0.52.6", 266 | "windows_i686_gnu 0.52.6", 267 | "windows_i686_gnullvm", 268 | "windows_i686_msvc 0.52.6", 269 | "windows_x86_64_gnu 0.52.6", 270 | "windows_x86_64_gnullvm 0.52.6", 271 | "windows_x86_64_msvc 0.52.6", 272 | ] 273 | 274 | [[package]] 275 | name = "windows_aarch64_gnullvm" 276 | version = "0.48.5" 277 | source = "registry+https://github.com/rust-lang/crates.io-index" 278 | checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" 279 | 280 | [[package]] 281 | name = "windows_aarch64_gnullvm" 282 | version = "0.52.6" 283 | source = "registry+https://github.com/rust-lang/crates.io-index" 284 | checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" 285 | 286 | [[package]] 287 | name = "windows_aarch64_msvc" 288 | version = "0.48.5" 289 | source = "registry+https://github.com/rust-lang/crates.io-index" 290 | checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" 291 | 292 | [[package]] 293 | name = "windows_aarch64_msvc" 294 | version = "0.52.6" 295 | source = "registry+https://github.com/rust-lang/crates.io-index" 296 | checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" 297 | 298 | [[package]] 299 | name = "windows_i686_gnu" 300 | version = "0.48.5" 301 | source = "registry+https://github.com/rust-lang/crates.io-index" 302 | checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" 303 | 304 | [[package]] 305 | name = "windows_i686_gnu" 306 | version = "0.52.6" 307 | source = "registry+https://github.com/rust-lang/crates.io-index" 308 | checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" 309 | 310 | [[package]] 311 | name = "windows_i686_gnullvm" 312 | version = "0.52.6" 313 | source = "registry+https://github.com/rust-lang/crates.io-index" 314 | checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" 315 | 316 | [[package]] 317 | name = "windows_i686_msvc" 318 | version = "0.48.5" 319 | source = "registry+https://github.com/rust-lang/crates.io-index" 320 | checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" 321 | 322 | [[package]] 323 | name = "windows_i686_msvc" 324 | version = "0.52.6" 325 | source = "registry+https://github.com/rust-lang/crates.io-index" 326 | checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" 327 | 328 | [[package]] 329 | name = "windows_x86_64_gnu" 330 | version = "0.48.5" 331 | source = "registry+https://github.com/rust-lang/crates.io-index" 332 | checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" 333 | 334 | [[package]] 335 | name = "windows_x86_64_gnu" 336 | version = "0.52.6" 337 | source = "registry+https://github.com/rust-lang/crates.io-index" 338 | checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" 339 | 340 | [[package]] 341 | name = "windows_x86_64_gnullvm" 342 | version = "0.48.5" 343 | source = "registry+https://github.com/rust-lang/crates.io-index" 344 | checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" 345 | 346 | [[package]] 347 | name = "windows_x86_64_gnullvm" 348 | version = "0.52.6" 349 | source = "registry+https://github.com/rust-lang/crates.io-index" 350 | checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" 351 | 352 | [[package]] 353 | name = "windows_x86_64_msvc" 354 | version = "0.48.5" 355 | source = "registry+https://github.com/rust-lang/crates.io-index" 356 | checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" 357 | 358 | [[package]] 359 | name = "windows_x86_64_msvc" 360 | version = "0.52.6" 361 | source = "registry+https://github.com/rust-lang/crates.io-index" 362 | checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" 363 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "chili" 3 | description = "low-overhead parallelization library" 4 | version = "0.2.1" 5 | edition = "2021" 6 | authors = ["Dragoș Tiselice "] 7 | repository = "https://github.com/dragostis/chili" 8 | documentation = "https://docs.rs/chili" 9 | keywords = ["join", "concurrency", "parallel", "spice"] 10 | categories = ["concurrency"] 11 | license = "MIT OR Apache-2.0" 12 | readme = "README.md" 13 | 14 | [dev-dependencies] 15 | divan = "0.1.14" 16 | rayon = "1.10.0" 17 | 18 | [[bench]] 19 | name = "overhead" 20 | harness = false 21 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # chili 2 | 3 | [![Crates.io](https://img.shields.io/crates/v/chili.svg)](https://crates.io/crates/chili) 4 | [![Docs](https://docs.rs/chili/badge.svg)](https://docs.rs/chili) 5 | 6 | ## Rust port of [Spice], a low-overhead parallelization library 7 | 8 | Very low-overhead parallelization primitive, almost identical to 9 | [`rayon::join`]. At any fork point during computation, it *may* run the two 10 | passed closures in parallel. 11 | 12 | It works best in cases where there are many small computations and where it is 13 | expensive to estimate how many are left on the current branch in order to stop trying to share work across threads. 14 | 15 | ## Example 16 | 17 | The following example sums up all nodes in a binary tree in parallel. 18 | 19 | ```rust 20 | fn sum(node: &Node, scope: &mut Scope<'_>) -> u64 { 21 | let (left, right) = scope.join( 22 | |s| node.left.as_deref().map(|n| sum(n, s)).unwrap_or_default(), 23 | |s| node.right.as_deref().map(|n| sum(n, s)).unwrap_or_default(), 24 | ); 25 | 26 | node.val + left + right 27 | } 28 | ``` 29 | 30 | This is the ideal example since per-node computation is very cheap and the 31 | nodes don't keep track of how many descendants are left. 32 | 33 | ## Benchmarks 34 | 35 | The following benchmarks measure the time it takes to sum up all the values in 36 | a balanced binary tree with varying number of nodes. 37 | 38 | ### AMD Ryzen 7 4800HS (8 cores) 39 | 40 | While the improvement over the baseline in the 134M nodes case is close to the 41 | theoretical maximum, it's worth noting that the actual time per node is 0.8ns 42 | vs. a theoretical 1.8 / 8 = 0.2ns, if we're to compare against the 1K nodes 43 | case. 44 | 45 | | Number of nodes | Baseline | Rayon | chili | Baseline / chili | 46 | |----------------:|---------:|---------:|---------:|:----------------:| 47 | | 1023 | 1.8 µs | 51.1 µs | 3.4 µs | **x0.53** | 48 | | 16777215 | 94.4 ms | 58.1 ms | 13.6 ms | **x6.94** | 49 | | 134217727 | 797.5 ms | 497.2 ms | 101.8 ms | **x7.83** | 50 | 51 | ### Apple M1 (8 cores) 52 | 53 | | Number of nodes | Baseline | Rayon | chili | Baseline / chili | 54 | |----------------:|---------:|---------:|---------:|:----------------:| 55 | | 1023 | 1.6 µs | 29.2 µs | 3.5 µs | **x0.46** | 56 | | 16777215 | 39.4 ms | 40.5 ms | 11.2 ms | **x3.51** | 57 | | 67108863 | 156.5 ms | 167.1 ms | 44.3 ms | **x3.53** | 58 | 59 | ### chili overhead on AMD Ryzen 7 4800HS (8 cores) 60 | 61 | The overhead in the 1K nodes case remains approximately constant with respect 62 | to the number of threads. 63 | 64 | | Number of nodes | Baseline | 1 thread | 2 threads | 4 threads | 8 threads | 65 | |----------------:|---------:|---------:|----------:|----------:|----------:| 66 | | 1023 | 1.8 ns | 3.5 ns | 3.5 ns | 3.5 ns | 3.5 ns | 67 | 68 | [Spice]: https://github.com/judofyr/spice 69 | [`rayon::join`]: https://docs.rs/rayon/latest/rayon/fn.join.html -------------------------------------------------------------------------------- /benches/overhead.rs: -------------------------------------------------------------------------------- 1 | use chili::Scope; 2 | use divan::Bencher; 3 | 4 | struct Node { 5 | val: u64, 6 | left: Option>, 7 | right: Option>, 8 | } 9 | 10 | impl Node { 11 | pub fn tree(layers: usize) -> Self { 12 | Self { 13 | val: 1, 14 | left: (layers != 1).then(|| Box::new(Self::tree(layers - 1))), 15 | right: (layers != 1).then(|| Box::new(Self::tree(layers - 1))), 16 | } 17 | } 18 | } 19 | 20 | const LAYERS: &[usize] = &[10, 24]; 21 | fn nodes() -> impl Iterator { 22 | LAYERS.iter().map(|&l| (l, (1 << l) - 1)) 23 | } 24 | 25 | #[divan::bench(args = nodes())] 26 | fn no_overhead(bencher: Bencher, nodes: (usize, usize)) { 27 | fn join_no_overhead(scope: &mut Scope<'_>, a: A, b: B) -> (RA, RB) 28 | where 29 | A: FnOnce(&mut Scope<'_>) -> RA + Send, 30 | B: FnOnce(&mut Scope<'_>) -> RB + Send, 31 | RA: Send, 32 | RB: Send, 33 | { 34 | (a(scope), b(scope)) 35 | } 36 | 37 | #[inline] 38 | fn sum(node: &Node, scope: &mut Scope<'_>) -> u64 { 39 | let (left, right) = join_no_overhead( 40 | scope, 41 | |s| node.left.as_deref().map(|n| sum(n, s)).unwrap_or_default(), 42 | |s| node.right.as_deref().map(|n| sum(n, s)).unwrap_or_default(), 43 | ); 44 | 45 | node.val + left + right 46 | } 47 | 48 | let tree = Node::tree(nodes.0); 49 | let mut scope = Scope::global(); 50 | 51 | bencher.bench_local(move || { 52 | assert_eq!(sum(&tree, &mut scope), nodes.1 as u64); 53 | }); 54 | } 55 | 56 | #[divan::bench(args = nodes())] 57 | fn chili_overhead(bencher: Bencher, nodes: (usize, usize)) { 58 | fn sum(node: &Node, scope: &mut Scope<'_>) -> u64 { 59 | let (left, right) = scope.join( 60 | |s| node.left.as_deref().map(|n| sum(n, s)).unwrap_or_default(), 61 | |s| node.right.as_deref().map(|n| sum(n, s)).unwrap_or_default(), 62 | ); 63 | 64 | node.val + left + right 65 | } 66 | 67 | let tree = Node::tree(nodes.0); 68 | let mut scope = Scope::global(); 69 | 70 | bencher.bench_local(move || { 71 | assert_eq!(sum(&tree, &mut scope), nodes.1 as u64); 72 | }); 73 | } 74 | 75 | #[divan::bench(args = nodes())] 76 | fn rayon_overhead(bencher: Bencher, nodes: (usize, usize)) { 77 | fn sum(node: &Node) -> u64 { 78 | let (left, right) = rayon::join( 79 | || node.left.as_deref().map(sum).unwrap_or_default(), 80 | || node.right.as_deref().map(sum).unwrap_or_default(), 81 | ); 82 | 83 | node.val + left + right 84 | } 85 | 86 | let tree = Node::tree(nodes.0); 87 | 88 | bencher.bench_local(move || { 89 | assert_eq!(sum(&tree), nodes.1 as u64); 90 | }); 91 | } 92 | 93 | fn main() { 94 | divan::main(); 95 | } 96 | -------------------------------------------------------------------------------- /src/job.rs: -------------------------------------------------------------------------------- 1 | use core::fmt; 2 | use std::{ 3 | cell::{Cell, UnsafeCell}, 4 | collections::VecDeque, 5 | mem::{self, ManuallyDrop}, 6 | panic::{self, AssertUnwindSafe}, 7 | ptr::NonNull, 8 | sync::{ 9 | atomic::{AtomicU8, Ordering}, 10 | Arc, 11 | }, 12 | thread::{self, Thread}, 13 | }; 14 | 15 | use crate::Scope; 16 | 17 | enum State { 18 | Pending, 19 | Waiting, 20 | Ready, 21 | } 22 | 23 | #[derive(Debug)] 24 | #[repr(C)] 25 | struct Channel { 26 | state: AtomicU8, 27 | /// Can only be written only by the `Receiver` and read by the `Sender` if 28 | /// `state` is `State::Waiting`. 29 | waiting_thread: UnsafeCell>, 30 | /// Can only be written only by the `Sender` and read by the `Receiver` if 31 | /// `state` is `State::Ready`. 32 | val: UnsafeCell>>>, 33 | } 34 | 35 | impl Default for Channel { 36 | fn default() -> Self { 37 | Self { 38 | state: AtomicU8::new(State::Pending as u8), 39 | waiting_thread: UnsafeCell::new(None), 40 | val: UnsafeCell::new(None), 41 | } 42 | } 43 | } 44 | 45 | #[derive(Debug)] 46 | pub struct Receiver(Arc>); 47 | 48 | impl Receiver { 49 | pub fn is_empty(&self) -> bool { 50 | self.0.state.load(Ordering::Acquire) != State::Ready as u8 51 | } 52 | 53 | pub fn recv(self) -> thread::Result { 54 | // SAFETY: 55 | // Only this thread can write to `waiting_thread` and none can read it 56 | // yet. 57 | unsafe { *self.0.waiting_thread.get() = Some(thread::current()) }; 58 | 59 | if self 60 | .0 61 | .state 62 | .compare_exchange( 63 | State::Pending as u8, 64 | State::Waiting as u8, 65 | Ordering::AcqRel, 66 | Ordering::Acquire, 67 | ) 68 | .is_ok() 69 | { 70 | thread::park(); 71 | } 72 | 73 | // SAFETY: 74 | // To arrive here, either `state` is `State::Ready` or the above 75 | // `compare_exchange` succeeded, the thread was parked and then 76 | // unparked by the `Sender` *after* the `state` was set to 77 | // `State::Ready`. 78 | // 79 | // In either case, this thread now has unique access to `val`. 80 | unsafe { (*self.0.val.get()).take().map(|b| *b).unwrap() } 81 | } 82 | } 83 | 84 | #[derive(Debug)] 85 | struct Sender(Arc>); 86 | 87 | impl Sender { 88 | pub fn send(self, val: thread::Result) { 89 | // SAFETY: 90 | // Only this thread can write to `val` and none can read it 91 | // yet. 92 | unsafe { 93 | *self.0.val.get() = Some(Box::new(val)); 94 | } 95 | 96 | if self.0.state.swap(State::Ready as u8, Ordering::AcqRel) == State::Waiting as u8 { 97 | // SAFETY: 98 | // A `Receiver` already wrote its thread to `waiting_thread` 99 | // *before* setting the `state` to `State::Waiting`. 100 | if let Some(thread) = unsafe { (*self.0.waiting_thread.get()).take() } { 101 | thread.unpark(); 102 | } 103 | } 104 | } 105 | } 106 | 107 | fn channel() -> (Sender, Receiver) { 108 | let channel = Arc::new(Channel::default()); 109 | 110 | (Sender(channel.clone()), Receiver(channel)) 111 | } 112 | 113 | pub struct JobStack { 114 | /// All code paths should call either `Job::execute` or `Self::unwrap` to 115 | /// avoid a potential memory leak. 116 | f: UnsafeCell>, 117 | } 118 | 119 | impl JobStack { 120 | pub fn new(f: F) -> Self { 121 | Self { 122 | f: UnsafeCell::new(ManuallyDrop::new(f)), 123 | } 124 | } 125 | 126 | /// SAFETY: 127 | /// It should only be called once. 128 | pub unsafe fn take_once(&self) -> F { 129 | // SAFETY: 130 | // No `Job` has has been executed, therefore `self.f` has not yet been 131 | // `take`n. 132 | unsafe { ManuallyDrop::take(&mut *self.f.get()) } 133 | } 134 | } 135 | 136 | /// `Job` is only sent, not shared between threads. 137 | /// 138 | /// When popped from the `JobQueue`, it gets copied before sending across 139 | /// thread boundaries. 140 | #[repr(C)] 141 | pub struct Job { 142 | stack: NonNull, 143 | harness: unsafe fn(&mut Scope<'_>, NonNull, Sender), 144 | receiver: Cell>>, 145 | } 146 | 147 | impl Job { 148 | pub fn new(stack: &JobStack) -> Self 149 | where 150 | F: FnOnce(&mut Scope<'_>) -> T + Send, 151 | T: Send, 152 | { 153 | /// SAFETY: 154 | /// It should only be called while the `stack` is still alive. 155 | unsafe fn harness(scope: &mut Scope<'_>, stack: NonNull, sender: Sender) 156 | where 157 | F: FnOnce(&mut Scope<'_>) -> T + Send, 158 | T: Send, 159 | { 160 | // SAFETY: 161 | // The `stack` is still alive as per `JobShared::execute`'s contract. 162 | let stack: &JobStack = unsafe { stack.cast().as_ref() }; 163 | // SAFETY: 164 | // This is the first call to `take_once` since `Job::execute` 165 | // (the only place where this harness is called) is called only 166 | // after the job has been popped. 167 | let f = unsafe { stack.take_once() }; 168 | // SAFETY: 169 | // `Sender` can be safely transmuted to `Sender` since the 170 | // `Channel`'s size is the same as `Channel` because the only 171 | // field referencing `T` has constant size (`Box`), and the order 172 | // of its fields is preserved given that it is `repr(C)`. 173 | let sender: Sender = unsafe { mem::transmute(sender) }; 174 | 175 | sender.send(panic::catch_unwind(AssertUnwindSafe(|| f(scope)))); 176 | } 177 | 178 | Self { 179 | stack: NonNull::from(stack).cast(), 180 | harness: harness::, 181 | receiver: Cell::new(None), 182 | } 183 | } 184 | 185 | pub fn take_receiver(&self) -> Option> { 186 | self.receiver.take() 187 | } 188 | } 189 | 190 | impl fmt::Debug for Job { 191 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 192 | let receiver = self.receiver.take(); 193 | 194 | let result = f 195 | .debug_struct("Job") 196 | .field("stack", &self.stack) 197 | .field("harness", &self.harness) 198 | .field("sender", &receiver) 199 | .finish(); 200 | 201 | self.receiver.set(receiver); 202 | 203 | result 204 | } 205 | } 206 | 207 | #[derive(Debug)] 208 | pub struct JobShared { 209 | stack: NonNull, 210 | harness: unsafe fn(&mut Scope<'_>, NonNull, Sender), 211 | sender: Sender, 212 | } 213 | 214 | impl JobShared { 215 | /// SAFETY: 216 | /// It should only be called while the `JobStack` it was created with is 217 | /// still alive and after being popped from a `JobQueue`. 218 | pub unsafe fn execute(self, scope: &mut Scope<'_>) { 219 | // SAFETY: 220 | // The `stack` is still alive as per `JobShared::execute`'s contract. 221 | unsafe { 222 | (self.harness)(scope, self.stack, self.sender); 223 | } 224 | } 225 | } 226 | 227 | // SAFETY: 228 | // The job's `stack` will only be accessed exclusively from the thread 229 | // `JobShared::execute`ing the job which also consumes it. 230 | // 231 | // The job's `sender` will be accessed either from one thread to check if 232 | // `Receiver::is_empty` or from the executing thread to `JobShared::execute` 233 | // which calls `Sender::send` which can be only called once. 234 | unsafe impl Send for JobShared {} 235 | 236 | #[derive(Debug, Default)] 237 | pub struct JobQueue(VecDeque>); 238 | 239 | impl JobQueue { 240 | pub fn len(&self) -> usize { 241 | self.0.len() 242 | } 243 | 244 | /// SAFETY: 245 | /// Any `Job` pushed onto the queue should alive at least until it gets 246 | /// popped. 247 | pub unsafe fn push_back(&mut self, job: &Job) { 248 | self.0.push_back(NonNull::from(&*job).cast()); 249 | } 250 | 251 | pub fn pop_back(&mut self) { 252 | self.0.pop_back(); 253 | } 254 | 255 | pub fn pop_front(&mut self) -> Option { 256 | // SAFETY: 257 | // `Job` is still alive as per contract in `push_back`. 258 | // 259 | // The previously pushed `Job` is safe to cast to `Job` since the 260 | // only field that depends on `T` is of type 261 | // `Cell>>>` which has constant size, while 262 | // being `repr(C)` guarantees identical field order. 263 | let job = unsafe { self.0.pop_front()?.as_ref() }; 264 | 265 | let (sender, receiver) = channel(); 266 | 267 | job.receiver.set(Some(receiver)); 268 | 269 | Some(JobShared { 270 | stack: job.stack, 271 | harness: job.harness, 272 | sender, 273 | }) 274 | } 275 | } 276 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![deny(missing_docs)] 2 | #![deny(unsafe_op_in_unsafe_fn)] 3 | #![deny(clippy::undocumented_unsafe_blocks)] 4 | 5 | //! # chili. Rust port of [Spice], a low-overhead parallelization library 6 | //! 7 | //! A crate for very low-overhead fork-join workloads that can potentially be 8 | //! run in parallel. 9 | //! 10 | //! It works best in cases where there are many small computations and where it 11 | //! is expensive to estimate how many are left on the current branch in order 12 | //! to stop trying to share work across threads. 13 | //! 14 | //! [Spice]: https://github.com/judofyr/spice 15 | //! 16 | //! # Examples 17 | //! 18 | //! ``` 19 | //! # use chili::Scope; 20 | //! struct Node { 21 | //! val: u64, 22 | //! left: Option>, 23 | //! right: Option>, 24 | //! } 25 | //! 26 | //! impl Node { 27 | //! pub fn tree(layers: usize) -> Self { 28 | //! Self { 29 | //! val: 1, 30 | //! left: (layers != 1).then(|| Box::new(Self::tree(layers - 1))), 31 | //! right: (layers != 1).then(|| Box::new(Self::tree(layers - 1))), 32 | //! } 33 | //! } 34 | //! } 35 | //! 36 | //! fn sum(node: &Node, scope: &mut Scope<'_>) -> u64 { 37 | //! let (left, right) = scope.join( 38 | //! |s| node.left.as_deref().map(|n| sum(n, s)).unwrap_or_default(), 39 | //! |s| node.right.as_deref().map(|n| sum(n, s)).unwrap_or_default(), 40 | //! ); 41 | //! 42 | //! node.val + left + right 43 | //! } 44 | //! 45 | //! let tree = Node::tree(10); 46 | //! 47 | //! assert_eq!(sum(&tree, &mut Scope::global()), 1023); 48 | //! ``` 49 | 50 | use std::{ 51 | collections::{btree_map::Entry, BTreeMap, HashMap}, 52 | num::NonZero, 53 | ops::{Deref, DerefMut}, 54 | panic, 55 | sync::{ 56 | atomic::{AtomicBool, Ordering}, 57 | Arc, Barrier, Condvar, Mutex, OnceLock, Weak, 58 | }, 59 | thread::{self, JoinHandle}, 60 | time::{Duration, Instant}, 61 | }; 62 | 63 | mod job; 64 | 65 | use job::{Job, JobQueue, JobShared, JobStack, Receiver}; 66 | 67 | #[derive(Debug)] 68 | struct Heartbeat { 69 | is_set: Weak, 70 | last_heartbeat: Instant, 71 | } 72 | 73 | #[derive(Debug, Default)] 74 | struct LockContext { 75 | time: u64, 76 | is_stopping: bool, 77 | shared_jobs: BTreeMap, 78 | heartbeats: HashMap, 79 | heartbeat_index: u64, 80 | } 81 | 82 | impl LockContext { 83 | pub fn new_heartbeat(&mut self) -> Arc { 84 | let is_set = Arc::new(AtomicBool::new(true)); 85 | let heartbeat = Heartbeat { 86 | is_set: Arc::downgrade(&is_set), 87 | last_heartbeat: Instant::now(), 88 | }; 89 | 90 | let i = self.heartbeat_index; 91 | self.heartbeats.insert(i, heartbeat); 92 | 93 | self.heartbeat_index = i.checked_add(1).unwrap(); 94 | 95 | is_set 96 | } 97 | 98 | pub fn pop_earliest_shared_job(&mut self) -> Option { 99 | self.shared_jobs 100 | .pop_first() 101 | .map(|(_, (_, shared_job))| shared_job) 102 | } 103 | } 104 | 105 | #[derive(Debug)] 106 | struct Context { 107 | lock: Mutex, 108 | job_is_ready: Condvar, 109 | scope_created_from_thread_pool: Condvar, 110 | } 111 | 112 | fn execute_worker(context: Arc, barrier: Arc) -> Option<()> { 113 | let mut first_run = true; 114 | 115 | let mut job_queue = JobQueue::default(); 116 | let mut scope = Scope::new_from_worker(context.clone(), &mut job_queue); 117 | 118 | loop { 119 | let job = { 120 | let mut lock = context.lock.lock().unwrap(); 121 | lock.pop_earliest_shared_job() 122 | }; 123 | 124 | if let Some(job) = job { 125 | // SAFETY: 126 | // Any `Job` that was shared between threads is waited upon before 127 | // the `JobStack` exits scope. 128 | unsafe { 129 | job.execute(&mut scope); 130 | } 131 | } 132 | 133 | if first_run { 134 | first_run = false; 135 | barrier.wait(); 136 | }; 137 | 138 | let lock = context.lock.lock().ok()?; 139 | if lock.is_stopping || context.job_is_ready.wait(lock).is_err() { 140 | break; 141 | } 142 | } 143 | 144 | Some(()) 145 | } 146 | 147 | fn execute_heartbeat( 148 | context: Arc, 149 | heartbeat_interval: Duration, 150 | num_workers: usize, 151 | ) -> Option<()> { 152 | loop { 153 | let interval_between_workers = { 154 | let mut lock = context 155 | .scope_created_from_thread_pool 156 | .wait_while(context.lock.lock().ok()?, |l| { 157 | l.heartbeats.len() == num_workers && !l.is_stopping 158 | }) 159 | .ok()?; 160 | 161 | if lock.is_stopping { 162 | break; 163 | } 164 | 165 | let now = Instant::now(); 166 | lock.heartbeats.retain(|_, h| { 167 | h.is_set 168 | .upgrade() 169 | .inspect(|is_set| { 170 | if now.duration_since(h.last_heartbeat) >= heartbeat_interval { 171 | is_set.store(true, Ordering::Relaxed); 172 | h.last_heartbeat = now; 173 | } 174 | }) 175 | .is_some() 176 | }); 177 | 178 | heartbeat_interval.checked_div(lock.heartbeats.len() as u32) 179 | }; 180 | 181 | // If there are no heartbeats (`lock.heartbeats.len()` is 0), skip 182 | // immediately to the next iteration of the loop to trigger the wait. 183 | if let Some(interval_between_workers) = interval_between_workers { 184 | thread::sleep(interval_between_workers); 185 | } 186 | } 187 | 188 | Some(()) 189 | } 190 | 191 | #[derive(Debug)] 192 | enum ThreadJobQueue<'s> { 193 | Worker(&'s mut JobQueue), 194 | Current(JobQueue), 195 | } 196 | 197 | impl Deref for ThreadJobQueue<'_> { 198 | type Target = JobQueue; 199 | 200 | fn deref(&self) -> &Self::Target { 201 | match self { 202 | Self::Worker(queue) => queue, 203 | Self::Current(queue) => queue, 204 | } 205 | } 206 | } 207 | 208 | impl DerefMut for ThreadJobQueue<'_> { 209 | fn deref_mut(&mut self) -> &mut Self::Target { 210 | match self { 211 | Self::Worker(queue) => queue, 212 | Self::Current(queue) => queue, 213 | } 214 | } 215 | } 216 | 217 | /// A `Scope`d object that you can run fork-join workloads on. 218 | /// 219 | /// # Examples 220 | /// 221 | /// ``` 222 | /// # use chili::ThreadPool; 223 | /// let mut tp = ThreadPool::new(); 224 | /// let mut s = tp.scope(); 225 | /// 226 | /// let mut vals = [0; 2]; 227 | /// let (left, right) = vals.split_at_mut(1); 228 | /// 229 | /// s.join(|_| left[0] = 1, |_| right[0] = 1); 230 | /// 231 | /// assert_eq!(vals, [1; 2]); 232 | /// ``` 233 | #[derive(Debug)] 234 | pub struct Scope<'s> { 235 | context: Arc, 236 | job_queue: ThreadJobQueue<'s>, 237 | heartbeat: Arc, 238 | join_count: u8, 239 | } 240 | 241 | impl<'s> Scope<'s> { 242 | /// Returns the global scope. 243 | /// 244 | /// # Examples 245 | /// 246 | /// ``` 247 | /// # use chili::Scope; 248 | /// let _s = Scope::global(); 249 | /// ``` 250 | pub fn global() -> Scope<'static> { 251 | ThreadPool::global().scope() 252 | } 253 | 254 | fn new_from_thread_pool(thread_pool: &'s ThreadPool) -> Self { 255 | let heartbeat = thread_pool.context.lock.lock().unwrap().new_heartbeat(); 256 | thread_pool 257 | .context 258 | .scope_created_from_thread_pool 259 | .notify_one(); 260 | 261 | Self { 262 | context: thread_pool.context.clone(), 263 | job_queue: ThreadJobQueue::Current(JobQueue::default()), 264 | heartbeat, 265 | join_count: 0, 266 | } 267 | } 268 | 269 | fn new_from_worker(context: Arc, job_queue: &'s mut JobQueue) -> Self { 270 | let heartbeat = context.lock.lock().unwrap().new_heartbeat(); 271 | 272 | Self { 273 | context, 274 | job_queue: ThreadJobQueue::Worker(job_queue), 275 | heartbeat, 276 | join_count: 0, 277 | } 278 | } 279 | 280 | fn heartbeat_id(&self) -> usize { 281 | Arc::as_ptr(&self.heartbeat) as usize 282 | } 283 | 284 | fn wait_for_sent_job(&mut self, receiver: Receiver) -> Option> { 285 | if self 286 | .context 287 | .lock 288 | .lock() 289 | .unwrap() 290 | .shared_jobs 291 | .remove(&self.heartbeat_id()) 292 | .is_some() 293 | { 294 | return None; 295 | } 296 | 297 | while receiver.is_empty() { 298 | let job = { 299 | let mut lock = self.context.lock.lock().unwrap(); 300 | lock.pop_earliest_shared_job() 301 | }; 302 | 303 | if let Some(job) = job { 304 | // SAFETY: 305 | // Any `Job` that was shared between threads is waited upon 306 | // before the `JobStack` exits scope. 307 | unsafe { 308 | job.execute(self); 309 | } 310 | } else { 311 | break; 312 | } 313 | } 314 | 315 | Some(receiver.recv()) 316 | } 317 | 318 | #[cold] 319 | fn heartbeat(&mut self) { 320 | let mut lock = self.context.lock.lock().unwrap(); 321 | 322 | let time = lock.time; 323 | if let Entry::Vacant(e) = lock.shared_jobs.entry(self.heartbeat_id()) { 324 | if let Some(job) = self.job_queue.pop_front() { 325 | e.insert((time, job)); 326 | 327 | lock.time += 1; 328 | self.context.job_is_ready.notify_one(); 329 | } 330 | } 331 | 332 | self.heartbeat.store(false, Ordering::Relaxed); 333 | } 334 | 335 | fn join_seq(&mut self, a: A, b: B) -> (RA, RB) 336 | where 337 | A: FnOnce(&mut Scope<'_>) -> RA + Send, 338 | B: FnOnce(&mut Scope<'_>) -> RB + Send, 339 | RA: Send, 340 | RB: Send, 341 | { 342 | let rb = b(self); 343 | let ra = a(self); 344 | 345 | (ra, rb) 346 | } 347 | 348 | fn join_heartbeat(&mut self, a: A, b: B) -> (RA, RB) 349 | where 350 | A: FnOnce(&mut Scope<'_>) -> RA + Send, 351 | B: FnOnce(&mut Scope<'_>) -> RB + Send, 352 | RA: Send, 353 | RB: Send, 354 | { 355 | let stack = JobStack::new(a); 356 | let job = Job::new(&stack); 357 | 358 | // SAFETY: 359 | // `job` is alive until the end of this scope. 360 | unsafe { self.job_queue.push_back(&job) }; 361 | 362 | if self.heartbeat.load(Ordering::Relaxed) { 363 | self.heartbeat(); 364 | } 365 | 366 | let rb = b(self); 367 | 368 | if let Some(receiver) = job.take_receiver() { 369 | let ra = match self.wait_for_sent_job(receiver) { 370 | Some(Ok(val)) => val, 371 | Some(Err(e)) => panic::resume_unwind(e), 372 | // SAFETY: 373 | // Since the `job` didn't have the chance to be actually 374 | // sent across threads, it cannot take the closure out of the 375 | // `JobStack` anymore. `JobStack::take_once` is thus called 376 | // only once. 377 | None => unsafe { (stack.take_once())(self) }, 378 | }; 379 | 380 | (ra, rb) 381 | } else { 382 | self.job_queue.pop_back(); 383 | 384 | // SAFETY: 385 | // Since the `job` was popped from the back of the queue, it cannot 386 | // take the closure out of the `JobStack` anymore. 387 | // `JobStack::take_once` is thus called only once. 388 | (unsafe { (stack.take_once())(self) }, rb) 389 | } 390 | } 391 | 392 | /// Runs `a` and `b` potentially in parallel on separate threads and 393 | /// returns the results. 394 | /// 395 | /// This variant skips checking for a heartbeat every 16 calls for improved 396 | /// performance. 397 | /// 398 | /// # Examples 399 | /// 400 | /// ``` 401 | /// # use chili::Scope; 402 | /// let mut vals = [0; 2]; 403 | /// let (left, right) = vals.split_at_mut(1); 404 | /// 405 | /// Scope::global().join(|_| left[0] = 1, |_| right[0] = 1); 406 | /// 407 | /// assert_eq!(vals, [1; 2]); 408 | /// ``` 409 | pub fn join(&mut self, a: A, b: B) -> (RA, RB) 410 | where 411 | A: FnOnce(&mut Scope<'_>) -> RA + Send, 412 | B: FnOnce(&mut Scope<'_>) -> RB + Send, 413 | RA: Send, 414 | RB: Send, 415 | { 416 | self.join_with_heartbeat_every::<64, _, _, _, _>(a, b) 417 | } 418 | 419 | /// Runs `a` and `b` potentially in parallel on separate threads and 420 | /// returns the results. 421 | /// 422 | /// This variant skips checking for a heartbeat every `TIMES - 1` calls for 423 | /// improved performance. 424 | /// 425 | /// # Examples 426 | /// 427 | /// ``` 428 | /// # use chili::Scope; 429 | /// 430 | /// let mut vals = [0; 2]; 431 | /// let (left, right) = vals.split_at_mut(1); 432 | /// 433 | /// // Skip checking 7/8 calls to join_with_heartbeat_every. 434 | /// Scope::global().join_with_heartbeat_every::<8, _, _, _, _>(|_| left[0] = 1, |_| right[0] = 1); 435 | /// 436 | /// assert_eq!(vals, [1; 2]); 437 | /// ``` 438 | pub fn join_with_heartbeat_every( 439 | &mut self, 440 | a: A, 441 | b: B, 442 | ) -> (RA, RB) 443 | where 444 | A: FnOnce(&mut Scope<'_>) -> RA + Send, 445 | B: FnOnce(&mut Scope<'_>) -> RB + Send, 446 | RA: Send, 447 | RB: Send, 448 | { 449 | self.join_count = self.join_count.wrapping_add(1) % TIMES; 450 | 451 | if self.join_count == 0 || self.job_queue.len() < 3 { 452 | self.join_heartbeat(a, b) 453 | } else { 454 | self.join_seq(a, b) 455 | } 456 | } 457 | } 458 | 459 | /// `ThreadPool` configuration. 460 | #[derive(Debug)] 461 | pub struct Config { 462 | /// The number of threads or `None` to use 463 | /// `std::thread::available_parallelism`. 464 | pub thread_count: Option>, 465 | /// The interval between heartbeats on any particular thread. 466 | pub heartbeat_interval: Duration, 467 | } 468 | 469 | impl Default for Config { 470 | fn default() -> Self { 471 | Self { 472 | thread_count: None, 473 | heartbeat_interval: Duration::from_micros(100), 474 | } 475 | } 476 | } 477 | 478 | static GLOBAL_THREAD_POOL: OnceLock = OnceLock::new(); 479 | 480 | /// A thread pool for running fork-join workloads. 481 | #[derive(Debug)] 482 | pub struct ThreadPool { 483 | context: Arc, 484 | worker_handles: Vec>, 485 | heartbeat_handle: Option>, 486 | } 487 | 488 | impl ThreadPool { 489 | /// Crates a new thread pool with default `Config`. 490 | /// 491 | /// # Examples 492 | /// 493 | /// ``` 494 | /// # use chili::ThreadPool; 495 | /// let _tp = ThreadPool::new(); 496 | /// ``` 497 | pub fn new() -> Self { 498 | Self::with_config(Config::default()) 499 | } 500 | 501 | /// Creates a new thread pool with `config`. 502 | /// 503 | /// # Examples 504 | /// 505 | /// ``` 506 | /// # use std::{num::NonZero, time::Duration}; 507 | /// # use chili::{Config, ThreadPool}; 508 | /// let _tp = ThreadPool::with_config(Config { 509 | /// thread_count: Some(NonZero::new(1).unwrap()), 510 | /// heartbeat_interval: Duration::from_micros(50), 511 | /// }); 512 | /// ``` 513 | pub fn with_config(config: Config) -> Self { 514 | let thread_count = config 515 | .thread_count 516 | .or_else(|| thread::available_parallelism().ok()) 517 | .map(|thread_count| thread_count.get() - 1) 518 | .unwrap_or_default(); 519 | let worker_barrier = Arc::new(Barrier::new(thread_count + 1)); 520 | 521 | let context = Arc::new(Context { 522 | lock: Mutex::new(LockContext::default()), 523 | job_is_ready: Condvar::new(), 524 | scope_created_from_thread_pool: Condvar::new(), 525 | }); 526 | 527 | let worker_handles = (0..thread_count) 528 | .map(|_| { 529 | let context = context.clone(); 530 | let barrier = worker_barrier.clone(); 531 | thread::spawn(move || { 532 | execute_worker(context, barrier); 533 | }) 534 | }) 535 | .collect(); 536 | 537 | worker_barrier.wait(); 538 | 539 | Self { 540 | context: context.clone(), 541 | worker_handles, 542 | heartbeat_handle: Some(thread::spawn(move || { 543 | execute_heartbeat(context, config.heartbeat_interval, thread_count); 544 | })), 545 | } 546 | } 547 | 548 | /// Sets the global thread pool to this one. 549 | /// 550 | /// The global thread pool can only be set once. Any subsequent call will 551 | /// return the thread pool back. 552 | /// 553 | /// # Examples 554 | /// 555 | /// ``` 556 | /// # use std::{num::NonZero, time::Duration}; 557 | /// # use chili::{Config, ThreadPool}; 558 | /// ThreadPool::with_config(Config { 559 | /// thread_count: Some(NonZero::new(1).unwrap()), 560 | /// heartbeat_interval: Duration::from_micros(50), 561 | /// }) 562 | /// .set_global() 563 | /// .unwrap(); 564 | /// ``` 565 | pub fn set_global(self) -> Result<(), Self> { 566 | GLOBAL_THREAD_POOL.set(self) 567 | } 568 | 569 | /// Returns the global thread pool. 570 | /// 571 | /// # Examples 572 | /// 573 | /// ``` 574 | /// # use chili::ThreadPool; 575 | /// let mut s = ThreadPool::global().scope(); 576 | /// 577 | /// let mut vals = [0; 2]; 578 | /// let (left, right) = vals.split_at_mut(1); 579 | /// 580 | /// s.join(|_| left[0] = 1, |_| right[0] = 1); 581 | /// 582 | /// assert_eq!(vals, [1; 2]); 583 | /// ``` 584 | pub fn global() -> &'static ThreadPool { 585 | GLOBAL_THREAD_POOL.get_or_init(ThreadPool::new) 586 | } 587 | 588 | /// Returns a `Scope`d object that you can run fork-join workloads on. 589 | /// 590 | /// # Examples 591 | /// 592 | /// ``` 593 | /// # use chili::ThreadPool; 594 | /// let mut tp = ThreadPool::new(); 595 | /// let mut s = tp.scope(); 596 | /// 597 | /// let mut vals = [0; 2]; 598 | /// let (left, right) = vals.split_at_mut(1); 599 | /// 600 | /// s.join(|_| left[0] = 1, |_| right[0] = 1); 601 | /// 602 | /// assert_eq!(vals, [1; 2]); 603 | /// ``` 604 | pub fn scope(&self) -> Scope<'_> { 605 | Scope::new_from_thread_pool(self) 606 | } 607 | } 608 | 609 | impl Default for ThreadPool { 610 | fn default() -> Self { 611 | Self::new() 612 | } 613 | } 614 | 615 | impl Drop for ThreadPool { 616 | fn drop(&mut self) { 617 | self.context 618 | .lock 619 | .lock() 620 | .expect("locking failed") 621 | .is_stopping = true; 622 | self.context.job_is_ready.notify_all(); 623 | self.context.scope_created_from_thread_pool.notify_one(); 624 | 625 | for handle in self.worker_handles.drain(..) { 626 | handle.join().unwrap(); 627 | } 628 | 629 | if let Some(handle) = self.heartbeat_handle.take() { 630 | handle.join().unwrap(); 631 | } 632 | } 633 | } 634 | 635 | #[cfg(test)] 636 | mod tests { 637 | use std::sync::atomic::AtomicU8; 638 | 639 | use super::*; 640 | 641 | use thread::ThreadId; 642 | 643 | #[test] 644 | fn thread_pool_stops() { 645 | let _tp = ThreadPool::new(); 646 | } 647 | 648 | #[test] 649 | fn thread_pool_with_one_thread() { 650 | let _tp = ThreadPool::with_config(Config { 651 | thread_count: Some(NonZero::new(1).unwrap()), 652 | ..Default::default() 653 | }); 654 | } 655 | 656 | #[test] 657 | fn join_basic() { 658 | let threat_pool = ThreadPool::new(); 659 | let mut scope = threat_pool.scope(); 660 | 661 | let mut a = 0; 662 | let mut b = 0; 663 | scope.join(|_| a += 1, |_| b += 1); 664 | 665 | assert_eq!(a, 1); 666 | assert_eq!(b, 1); 667 | } 668 | 669 | #[test] 670 | fn join_long() { 671 | let threat_pool = ThreadPool::new(); 672 | 673 | fn increment(s: &mut Scope, slice: &mut [u32]) { 674 | match slice.len() { 675 | 0 => (), 676 | 1 => slice[0] += 1, 677 | _ => { 678 | let (head, tail) = slice.split_at_mut(1); 679 | 680 | s.join(|_| head[0] += 1, |s| increment(s, tail)); 681 | } 682 | } 683 | } 684 | 685 | let mut vals = [0; 1_024]; 686 | 687 | increment(&mut threat_pool.scope(), &mut vals); 688 | 689 | assert_eq!(vals, [1; 1_024]); 690 | } 691 | 692 | #[test] 693 | fn join_very_long() { 694 | let threat_pool = ThreadPool::new(); 695 | 696 | fn increment(s: &mut Scope, slice: &mut [u32]) { 697 | match slice.len() { 698 | 0 => (), 699 | 1 => slice[0] += 1, 700 | _ => { 701 | let mid = slice.len() / 2; 702 | let (left, right) = slice.split_at_mut(mid); 703 | 704 | s.join(|s| increment(s, left), |s| increment(s, right)); 705 | } 706 | } 707 | } 708 | 709 | let mut vals = vec![0; 1_024 * 1_024]; 710 | 711 | increment(&mut threat_pool.scope(), &mut vals); 712 | 713 | assert_eq!(vals, vec![1; 1_024 * 1_024]); 714 | } 715 | 716 | #[test] 717 | fn join_wait() { 718 | let threat_pool = ThreadPool::with_config(Config { 719 | thread_count: Some(NonZero::new(2).unwrap()), 720 | heartbeat_interval: Duration::from_micros(1), 721 | ..Default::default() 722 | }); 723 | 724 | fn increment(s: &mut Scope, slice: &mut [u32]) { 725 | match slice.len() { 726 | 0 => (), 727 | 1 => slice[0] += 1, 728 | _ => { 729 | let (head, tail) = slice.split_at_mut(1); 730 | 731 | s.join_with_heartbeat_every::<1, _, _, _, _>( 732 | |_| { 733 | thread::sleep(Duration::from_micros(10)); 734 | head[0] += 1; 735 | }, 736 | |s| increment(s, tail), 737 | ); 738 | } 739 | } 740 | } 741 | 742 | let mut vals = [0; 10]; 743 | 744 | increment(&mut threat_pool.scope(), &mut vals); 745 | 746 | assert_eq!(vals, [1; 10]); 747 | } 748 | 749 | #[test] 750 | #[should_panic(expected = "panicked across threads")] 751 | fn join_panic() { 752 | let threat_pool = ThreadPool::with_config(Config { 753 | thread_count: Some(NonZero::new(2).unwrap()), 754 | heartbeat_interval: Duration::from_micros(1), 755 | }); 756 | 757 | if let Some(thread_count) = thread::available_parallelism().ok().map(NonZero::get) { 758 | if thread_count == 1 { 759 | // Pass test artificially when only one thread is available. 760 | panic!("panicked across threads"); 761 | } 762 | } 763 | 764 | fn increment(s: &mut Scope, slice: &mut [u32], id: ThreadId) -> bool { 765 | let mut threads_crossed = AtomicBool::new(false); 766 | 767 | match slice.len() { 768 | 0 => (), 769 | 1 => slice[0] += 1, 770 | _ => { 771 | let (head, tail) = slice.split_at_mut(1); 772 | 773 | s.join_with_heartbeat_every::<1, _, _, _, _>( 774 | |_| { 775 | thread::sleep(Duration::from_micros(100)); 776 | 777 | if thread::current().id() != id { 778 | threads_crossed.store(true, Ordering::Relaxed); 779 | panic!("panicked across threads"); 780 | } 781 | 782 | head[0] += 1; 783 | }, 784 | |s| increment(s, tail, id), 785 | ); 786 | } 787 | } 788 | 789 | *threads_crossed.get_mut() 790 | } 791 | 792 | let mut vals = [0; 10]; 793 | 794 | let threads_crossed = 795 | increment(&mut threat_pool.scope(), &mut vals, thread::current().id()); 796 | 797 | // Since there was no panic up to this point, this means that the 798 | // thread boundary has not been crossed. 799 | // 800 | // Check that the work was done and pass the test artificially. 801 | if !threads_crossed { 802 | assert_eq!(vals, [1; 10]); 803 | panic!("panicked across threads"); 804 | } 805 | } 806 | 807 | #[test] 808 | fn concurrent_scopes() { 809 | const NUM_THREADS: u8 = 128; 810 | let threat_pool = ThreadPool::with_config(Config { 811 | thread_count: Some(NonZero::new(4).unwrap()), 812 | ..Default::default() 813 | }); 814 | 815 | let a = AtomicU8::new(0); 816 | let b = AtomicU8::new(0); 817 | 818 | thread::scope(|s| { 819 | for _ in 0..NUM_THREADS { 820 | s.spawn(|| { 821 | let mut scope = threat_pool.scope(); 822 | scope.join( 823 | |_| a.fetch_add(1, Ordering::Relaxed), 824 | |_| b.fetch_add(1, Ordering::Relaxed), 825 | ); 826 | }); 827 | } 828 | }); 829 | 830 | assert_eq!(a.load(Ordering::Relaxed), NUM_THREADS); 831 | assert_eq!(b.load(Ordering::Relaxed), NUM_THREADS); 832 | } 833 | } 834 | --------------------------------------------------------------------------------