├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── README.md ├── async-brigade ├── Cargo.toml ├── rss-per-task.sh └── src │ └── main.rs ├── async-creation ├── Cargo.toml ├── README.md └── src │ └── main.rs ├── async-mem-brigade ├── Cargo.toml └── src │ └── main.rs ├── one-thread-brigade ├── Cargo.toml └── src │ └── main.rs ├── thread-brigade ├── Cargo.toml ├── rss-per-thread.sh └── src │ └── main.rs ├── thread-creation ├── Cargo.toml └── src │ └── main.rs └── utils ├── Cargo.toml └── src ├── lib.rs ├── stats.rs └── useful_duration.rs /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | **/*.rs.bk 3 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "async-brigade" 7 | version = "0.1.0" 8 | dependencies = [ 9 | "docopt", 10 | "libc", 11 | "serde", 12 | "tokio", 13 | "utils", 14 | ] 15 | 16 | [[package]] 17 | name = "async-channel" 18 | version = "1.6.1" 19 | source = "registry+https://github.com/rust-lang/crates.io-index" 20 | checksum = "2114d64672151c0c5eaa5e131ec84a74f06e1e559830dabba01ca30605d66319" 21 | dependencies = [ 22 | "concurrent-queue", 23 | "event-listener", 24 | "futures-core", 25 | ] 26 | 27 | [[package]] 28 | name = "async-creation" 29 | version = "0.1.0" 30 | dependencies = [ 31 | "async-std", 32 | "docopt", 33 | "serde", 34 | "utils", 35 | ] 36 | 37 | [[package]] 38 | name = "async-executor" 39 | version = "1.4.1" 40 | source = "registry+https://github.com/rust-lang/crates.io-index" 41 | checksum = "871f9bb5e0a22eeb7e8cf16641feb87c9dc67032ccf8ff49e772eb9941d3a965" 42 | dependencies = [ 43 | "async-task", 44 | "concurrent-queue", 45 | "fastrand", 46 | "futures-lite", 47 | "once_cell", 48 | "slab", 49 | ] 50 | 51 | [[package]] 52 | name = "async-global-executor" 53 | version = "2.1.0" 54 | source = "registry+https://github.com/rust-lang/crates.io-index" 55 | checksum = "fd8b508d585e01084059b60f06ade4cb7415cd2e4084b71dd1cb44e7d3fb9880" 56 | dependencies = [ 57 | "async-channel", 58 | "async-executor", 59 | "async-io", 60 | "async-lock", 61 | "blocking", 62 | "futures-lite", 63 | "once_cell", 64 | ] 65 | 66 | [[package]] 67 | name = "async-io" 68 | version = "1.7.0" 69 | source = "registry+https://github.com/rust-lang/crates.io-index" 70 | checksum = "e5e18f61464ae81cde0a23e713ae8fd299580c54d697a35820cfd0625b8b0e07" 71 | dependencies = [ 72 | "concurrent-queue", 73 | "futures-lite", 74 | "libc", 75 | "log", 76 | "once_cell", 77 | "parking", 78 | "polling", 79 | "slab", 80 | "socket2", 81 | "waker-fn", 82 | "winapi", 83 | ] 84 | 85 | [[package]] 86 | name = "async-lock" 87 | version = "2.5.0" 88 | source = "registry+https://github.com/rust-lang/crates.io-index" 89 | checksum = "e97a171d191782fba31bb902b14ad94e24a68145032b7eedf871ab0bc0d077b6" 90 | dependencies = [ 91 | "event-listener", 92 | ] 93 | 94 | [[package]] 95 | name = "async-mem-brigade" 96 | version = "0.1.0" 97 | dependencies = [ 98 | "libc", 99 | "tokio", 100 | "utils", 101 | ] 102 | 103 | [[package]] 104 | name = "async-std" 105 | version = "1.12.0" 106 | source = "registry+https://github.com/rust-lang/crates.io-index" 107 | checksum = "62565bb4402e926b29953c785397c6dc0391b7b446e45008b0049eb43cec6f5d" 108 | dependencies = [ 109 | "async-channel", 110 | "async-global-executor", 111 | "async-io", 112 | "async-lock", 113 | "crossbeam-utils", 114 | "futures-channel", 115 | "futures-core", 116 | "futures-io", 117 | "futures-lite", 118 | "gloo-timers", 119 | "kv-log-macro", 120 | "log", 121 | "memchr", 122 | "once_cell", 123 | "pin-project-lite", 124 | "pin-utils", 125 | "slab", 126 | "wasm-bindgen-futures", 127 | ] 128 | 129 | [[package]] 130 | name = "async-task" 131 | version = "4.2.0" 132 | source = "registry+https://github.com/rust-lang/crates.io-index" 133 | checksum = "30696a84d817107fc028e049980e09d5e140e8da8f1caeb17e8e950658a3cea9" 134 | 135 | [[package]] 136 | name = "atomic-waker" 137 | version = "1.0.0" 138 | source = "registry+https://github.com/rust-lang/crates.io-index" 139 | checksum = "065374052e7df7ee4047b1160cca5e1467a12351a40b3da123c870ba0b8eda2a" 140 | 141 | [[package]] 142 | name = "autocfg" 143 | version = "1.1.0" 144 | source = "registry+https://github.com/rust-lang/crates.io-index" 145 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 146 | 147 | [[package]] 148 | name = "bitflags" 149 | version = "1.3.2" 150 | source = "registry+https://github.com/rust-lang/crates.io-index" 151 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 152 | 153 | [[package]] 154 | name = "blocking" 155 | version = "1.2.0" 156 | source = "registry+https://github.com/rust-lang/crates.io-index" 157 | checksum = "c6ccb65d468978a086b69884437ded69a90faab3bbe6e67f242173ea728acccc" 158 | dependencies = [ 159 | "async-channel", 160 | "async-task", 161 | "atomic-waker", 162 | "fastrand", 163 | "futures-lite", 164 | "once_cell", 165 | ] 166 | 167 | [[package]] 168 | name = "bumpalo" 169 | version = "3.10.0" 170 | source = "registry+https://github.com/rust-lang/crates.io-index" 171 | checksum = "37ccbd214614c6783386c1af30caf03192f17891059cecc394b4fb119e363de3" 172 | 173 | [[package]] 174 | name = "bytes" 175 | version = "1.1.0" 176 | source = "registry+https://github.com/rust-lang/crates.io-index" 177 | checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8" 178 | 179 | [[package]] 180 | name = "cache-padded" 181 | version = "1.2.0" 182 | source = "registry+https://github.com/rust-lang/crates.io-index" 183 | checksum = "c1db59621ec70f09c5e9b597b220c7a2b43611f4710dc03ceb8748637775692c" 184 | 185 | [[package]] 186 | name = "cc" 187 | version = "1.0.73" 188 | source = "registry+https://github.com/rust-lang/crates.io-index" 189 | checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" 190 | 191 | [[package]] 192 | name = "cfg-if" 193 | version = "1.0.0" 194 | source = "registry+https://github.com/rust-lang/crates.io-index" 195 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 196 | 197 | [[package]] 198 | name = "concurrent-queue" 199 | version = "1.2.2" 200 | source = "registry+https://github.com/rust-lang/crates.io-index" 201 | checksum = "30ed07550be01594c6026cff2a1d7fe9c8f683caa798e12b68694ac9e88286a3" 202 | dependencies = [ 203 | "cache-padded", 204 | ] 205 | 206 | [[package]] 207 | name = "crossbeam-utils" 208 | version = "0.8.9" 209 | source = "registry+https://github.com/rust-lang/crates.io-index" 210 | checksum = "8ff1f980957787286a554052d03c7aee98d99cc32e09f6d45f0a814133c87978" 211 | dependencies = [ 212 | "cfg-if", 213 | "once_cell", 214 | ] 215 | 216 | [[package]] 217 | name = "ctor" 218 | version = "0.1.22" 219 | source = "registry+https://github.com/rust-lang/crates.io-index" 220 | checksum = "f877be4f7c9f246b183111634f75baa039715e3f46ce860677d3b19a69fb229c" 221 | dependencies = [ 222 | "quote", 223 | "syn", 224 | ] 225 | 226 | [[package]] 227 | name = "docopt" 228 | version = "1.1.1" 229 | source = "registry+https://github.com/rust-lang/crates.io-index" 230 | checksum = "7f3f119846c823f9eafcf953a8f6ffb6ed69bf6240883261a7f13b634579a51f" 231 | dependencies = [ 232 | "lazy_static", 233 | "regex", 234 | "serde", 235 | "strsim", 236 | ] 237 | 238 | [[package]] 239 | name = "event-listener" 240 | version = "2.5.2" 241 | source = "registry+https://github.com/rust-lang/crates.io-index" 242 | checksum = "77f3309417938f28bf8228fcff79a4a37103981e3e186d2ccd19c74b38f4eb71" 243 | 244 | [[package]] 245 | name = "fastrand" 246 | version = "1.7.0" 247 | source = "registry+https://github.com/rust-lang/crates.io-index" 248 | checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf" 249 | dependencies = [ 250 | "instant", 251 | ] 252 | 253 | [[package]] 254 | name = "futures-channel" 255 | version = "0.3.21" 256 | source = "registry+https://github.com/rust-lang/crates.io-index" 257 | checksum = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010" 258 | dependencies = [ 259 | "futures-core", 260 | ] 261 | 262 | [[package]] 263 | name = "futures-core" 264 | version = "0.3.21" 265 | source = "registry+https://github.com/rust-lang/crates.io-index" 266 | checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3" 267 | 268 | [[package]] 269 | name = "futures-io" 270 | version = "0.3.21" 271 | source = "registry+https://github.com/rust-lang/crates.io-index" 272 | checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b" 273 | 274 | [[package]] 275 | name = "futures-lite" 276 | version = "1.12.0" 277 | source = "registry+https://github.com/rust-lang/crates.io-index" 278 | checksum = "7694489acd39452c77daa48516b894c153f192c3578d5a839b62c58099fcbf48" 279 | dependencies = [ 280 | "fastrand", 281 | "futures-core", 282 | "futures-io", 283 | "memchr", 284 | "parking", 285 | "pin-project-lite", 286 | "waker-fn", 287 | ] 288 | 289 | [[package]] 290 | name = "gloo-timers" 291 | version = "0.2.4" 292 | source = "registry+https://github.com/rust-lang/crates.io-index" 293 | checksum = "5fb7d06c1c8cc2a29bee7ec961009a0b2caa0793ee4900c2ffb348734ba1c8f9" 294 | dependencies = [ 295 | "futures-channel", 296 | "futures-core", 297 | "js-sys", 298 | "wasm-bindgen", 299 | ] 300 | 301 | [[package]] 302 | name = "hermit-abi" 303 | version = "0.1.19" 304 | source = "registry+https://github.com/rust-lang/crates.io-index" 305 | checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" 306 | dependencies = [ 307 | "libc", 308 | ] 309 | 310 | [[package]] 311 | name = "instant" 312 | version = "0.1.12" 313 | source = "registry+https://github.com/rust-lang/crates.io-index" 314 | checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" 315 | dependencies = [ 316 | "cfg-if", 317 | ] 318 | 319 | [[package]] 320 | name = "js-sys" 321 | version = "0.3.58" 322 | source = "registry+https://github.com/rust-lang/crates.io-index" 323 | checksum = "c3fac17f7123a73ca62df411b1bf727ccc805daa070338fda671c86dac1bdc27" 324 | dependencies = [ 325 | "wasm-bindgen", 326 | ] 327 | 328 | [[package]] 329 | name = "kv-log-macro" 330 | version = "1.0.7" 331 | source = "registry+https://github.com/rust-lang/crates.io-index" 332 | checksum = "0de8b303297635ad57c9f5059fd9cee7a47f8e8daa09df0fcd07dd39fb22977f" 333 | dependencies = [ 334 | "log", 335 | ] 336 | 337 | [[package]] 338 | name = "lazy_static" 339 | version = "1.4.0" 340 | source = "registry+https://github.com/rust-lang/crates.io-index" 341 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 342 | 343 | [[package]] 344 | name = "libc" 345 | version = "0.2.126" 346 | source = "registry+https://github.com/rust-lang/crates.io-index" 347 | checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" 348 | 349 | [[package]] 350 | name = "lock_api" 351 | version = "0.4.7" 352 | source = "registry+https://github.com/rust-lang/crates.io-index" 353 | checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53" 354 | dependencies = [ 355 | "autocfg", 356 | "scopeguard", 357 | ] 358 | 359 | [[package]] 360 | name = "log" 361 | version = "0.4.17" 362 | source = "registry+https://github.com/rust-lang/crates.io-index" 363 | checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" 364 | dependencies = [ 365 | "cfg-if", 366 | "value-bag", 367 | ] 368 | 369 | [[package]] 370 | name = "memchr" 371 | version = "2.5.0" 372 | source = "registry+https://github.com/rust-lang/crates.io-index" 373 | checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" 374 | 375 | [[package]] 376 | name = "mio" 377 | version = "0.8.4" 378 | source = "registry+https://github.com/rust-lang/crates.io-index" 379 | checksum = "57ee1c23c7c63b0c9250c339ffdc69255f110b298b901b9f6c82547b7b87caaf" 380 | dependencies = [ 381 | "libc", 382 | "log", 383 | "wasi", 384 | "windows-sys", 385 | ] 386 | 387 | [[package]] 388 | name = "num_cpus" 389 | version = "1.13.1" 390 | source = "registry+https://github.com/rust-lang/crates.io-index" 391 | checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" 392 | dependencies = [ 393 | "hermit-abi", 394 | "libc", 395 | ] 396 | 397 | [[package]] 398 | name = "once_cell" 399 | version = "1.12.0" 400 | source = "registry+https://github.com/rust-lang/crates.io-index" 401 | checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225" 402 | 403 | [[package]] 404 | name = "one-thread-brigade" 405 | version = "0.1.0" 406 | dependencies = [ 407 | "libc", 408 | "utils", 409 | ] 410 | 411 | [[package]] 412 | name = "parking" 413 | version = "2.0.0" 414 | source = "registry+https://github.com/rust-lang/crates.io-index" 415 | checksum = "427c3892f9e783d91cc128285287e70a59e206ca452770ece88a76f7a3eddd72" 416 | 417 | [[package]] 418 | name = "parking_lot" 419 | version = "0.12.1" 420 | source = "registry+https://github.com/rust-lang/crates.io-index" 421 | checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" 422 | dependencies = [ 423 | "lock_api", 424 | "parking_lot_core", 425 | ] 426 | 427 | [[package]] 428 | name = "parking_lot_core" 429 | version = "0.9.3" 430 | source = "registry+https://github.com/rust-lang/crates.io-index" 431 | checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929" 432 | dependencies = [ 433 | "cfg-if", 434 | "libc", 435 | "redox_syscall", 436 | "smallvec", 437 | "windows-sys", 438 | ] 439 | 440 | [[package]] 441 | name = "pin-project-lite" 442 | version = "0.2.9" 443 | source = "registry+https://github.com/rust-lang/crates.io-index" 444 | checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" 445 | 446 | [[package]] 447 | name = "pin-utils" 448 | version = "0.1.0" 449 | source = "registry+https://github.com/rust-lang/crates.io-index" 450 | checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" 451 | 452 | [[package]] 453 | name = "polling" 454 | version = "2.2.0" 455 | source = "registry+https://github.com/rust-lang/crates.io-index" 456 | checksum = "685404d509889fade3e86fe3a5803bca2ec09b0c0778d5ada6ec8bf7a8de5259" 457 | dependencies = [ 458 | "cfg-if", 459 | "libc", 460 | "log", 461 | "wepoll-ffi", 462 | "winapi", 463 | ] 464 | 465 | [[package]] 466 | name = "proc-macro2" 467 | version = "1.0.40" 468 | source = "registry+https://github.com/rust-lang/crates.io-index" 469 | checksum = "dd96a1e8ed2596c337f8eae5f24924ec83f5ad5ab21ea8e455d3566c69fbcaf7" 470 | dependencies = [ 471 | "unicode-ident", 472 | ] 473 | 474 | [[package]] 475 | name = "quote" 476 | version = "1.0.20" 477 | source = "registry+https://github.com/rust-lang/crates.io-index" 478 | checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804" 479 | dependencies = [ 480 | "proc-macro2", 481 | ] 482 | 483 | [[package]] 484 | name = "redox_syscall" 485 | version = "0.2.13" 486 | source = "registry+https://github.com/rust-lang/crates.io-index" 487 | checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" 488 | dependencies = [ 489 | "bitflags", 490 | ] 491 | 492 | [[package]] 493 | name = "regex" 494 | version = "1.5.6" 495 | source = "registry+https://github.com/rust-lang/crates.io-index" 496 | checksum = "d83f127d94bdbcda4c8cc2e50f6f84f4b611f69c902699ca385a39c3a75f9ff1" 497 | dependencies = [ 498 | "regex-syntax", 499 | ] 500 | 501 | [[package]] 502 | name = "regex-syntax" 503 | version = "0.6.26" 504 | source = "registry+https://github.com/rust-lang/crates.io-index" 505 | checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64" 506 | 507 | [[package]] 508 | name = "scopeguard" 509 | version = "1.1.0" 510 | source = "registry+https://github.com/rust-lang/crates.io-index" 511 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" 512 | 513 | [[package]] 514 | name = "serde" 515 | version = "1.0.137" 516 | source = "registry+https://github.com/rust-lang/crates.io-index" 517 | checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1" 518 | dependencies = [ 519 | "serde_derive", 520 | ] 521 | 522 | [[package]] 523 | name = "serde_derive" 524 | version = "1.0.137" 525 | source = "registry+https://github.com/rust-lang/crates.io-index" 526 | checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be" 527 | dependencies = [ 528 | "proc-macro2", 529 | "quote", 530 | "syn", 531 | ] 532 | 533 | [[package]] 534 | name = "signal-hook-registry" 535 | version = "1.4.0" 536 | source = "registry+https://github.com/rust-lang/crates.io-index" 537 | checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" 538 | dependencies = [ 539 | "libc", 540 | ] 541 | 542 | [[package]] 543 | name = "slab" 544 | version = "0.4.6" 545 | source = "registry+https://github.com/rust-lang/crates.io-index" 546 | checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32" 547 | 548 | [[package]] 549 | name = "smallvec" 550 | version = "1.8.0" 551 | source = "registry+https://github.com/rust-lang/crates.io-index" 552 | checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" 553 | 554 | [[package]] 555 | name = "socket2" 556 | version = "0.4.4" 557 | source = "registry+https://github.com/rust-lang/crates.io-index" 558 | checksum = "66d72b759436ae32898a2af0a14218dbf55efde3feeb170eb623637db85ee1e0" 559 | dependencies = [ 560 | "libc", 561 | "winapi", 562 | ] 563 | 564 | [[package]] 565 | name = "strsim" 566 | version = "0.10.0" 567 | source = "registry+https://github.com/rust-lang/crates.io-index" 568 | checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" 569 | 570 | [[package]] 571 | name = "syn" 572 | version = "1.0.98" 573 | source = "registry+https://github.com/rust-lang/crates.io-index" 574 | checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd" 575 | dependencies = [ 576 | "proc-macro2", 577 | "quote", 578 | "unicode-ident", 579 | ] 580 | 581 | [[package]] 582 | name = "thread-brigade" 583 | version = "0.1.0" 584 | dependencies = [ 585 | "docopt", 586 | "libc", 587 | "serde", 588 | "utils", 589 | ] 590 | 591 | [[package]] 592 | name = "thread-creation" 593 | version = "0.1.0" 594 | dependencies = [ 595 | "docopt", 596 | "serde", 597 | "utils", 598 | ] 599 | 600 | [[package]] 601 | name = "tokio" 602 | version = "1.19.2" 603 | source = "registry+https://github.com/rust-lang/crates.io-index" 604 | checksum = "c51a52ed6686dd62c320f9b89299e9dfb46f730c7a48e635c19f21d116cb1439" 605 | dependencies = [ 606 | "bytes", 607 | "libc", 608 | "memchr", 609 | "mio", 610 | "num_cpus", 611 | "once_cell", 612 | "parking_lot", 613 | "pin-project-lite", 614 | "signal-hook-registry", 615 | "socket2", 616 | "tokio-macros", 617 | "winapi", 618 | ] 619 | 620 | [[package]] 621 | name = "tokio-macros" 622 | version = "1.8.0" 623 | source = "registry+https://github.com/rust-lang/crates.io-index" 624 | checksum = "9724f9a975fb987ef7a3cd9be0350edcbe130698af5b8f7a631e23d42d052484" 625 | dependencies = [ 626 | "proc-macro2", 627 | "quote", 628 | "syn", 629 | ] 630 | 631 | [[package]] 632 | name = "unicode-ident" 633 | version = "1.0.1" 634 | source = "registry+https://github.com/rust-lang/crates.io-index" 635 | checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c" 636 | 637 | [[package]] 638 | name = "utils" 639 | version = "0.1.0" 640 | 641 | [[package]] 642 | name = "value-bag" 643 | version = "1.0.0-alpha.9" 644 | source = "registry+https://github.com/rust-lang/crates.io-index" 645 | checksum = "2209b78d1249f7e6f3293657c9779fe31ced465df091bbd433a1cf88e916ec55" 646 | dependencies = [ 647 | "ctor", 648 | "version_check", 649 | ] 650 | 651 | [[package]] 652 | name = "version_check" 653 | version = "0.9.4" 654 | source = "registry+https://github.com/rust-lang/crates.io-index" 655 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" 656 | 657 | [[package]] 658 | name = "waker-fn" 659 | version = "1.1.0" 660 | source = "registry+https://github.com/rust-lang/crates.io-index" 661 | checksum = "9d5b2c62b4012a3e1eca5a7e077d13b3bf498c4073e33ccd58626607748ceeca" 662 | 663 | [[package]] 664 | name = "wasi" 665 | version = "0.11.0+wasi-snapshot-preview1" 666 | source = "registry+https://github.com/rust-lang/crates.io-index" 667 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 668 | 669 | [[package]] 670 | name = "wasm-bindgen" 671 | version = "0.2.81" 672 | source = "registry+https://github.com/rust-lang/crates.io-index" 673 | checksum = "7c53b543413a17a202f4be280a7e5c62a1c69345f5de525ee64f8cfdbc954994" 674 | dependencies = [ 675 | "cfg-if", 676 | "wasm-bindgen-macro", 677 | ] 678 | 679 | [[package]] 680 | name = "wasm-bindgen-backend" 681 | version = "0.2.81" 682 | source = "registry+https://github.com/rust-lang/crates.io-index" 683 | checksum = "5491a68ab4500fa6b4d726bd67408630c3dbe9c4fe7bda16d5c82a1fd8c7340a" 684 | dependencies = [ 685 | "bumpalo", 686 | "lazy_static", 687 | "log", 688 | "proc-macro2", 689 | "quote", 690 | "syn", 691 | "wasm-bindgen-shared", 692 | ] 693 | 694 | [[package]] 695 | name = "wasm-bindgen-futures" 696 | version = "0.4.31" 697 | source = "registry+https://github.com/rust-lang/crates.io-index" 698 | checksum = "de9a9cec1733468a8c657e57fa2413d2ae2c0129b95e87c5b72b8ace4d13f31f" 699 | dependencies = [ 700 | "cfg-if", 701 | "js-sys", 702 | "wasm-bindgen", 703 | "web-sys", 704 | ] 705 | 706 | [[package]] 707 | name = "wasm-bindgen-macro" 708 | version = "0.2.81" 709 | source = "registry+https://github.com/rust-lang/crates.io-index" 710 | checksum = "c441e177922bc58f1e12c022624b6216378e5febc2f0533e41ba443d505b80aa" 711 | dependencies = [ 712 | "quote", 713 | "wasm-bindgen-macro-support", 714 | ] 715 | 716 | [[package]] 717 | name = "wasm-bindgen-macro-support" 718 | version = "0.2.81" 719 | source = "registry+https://github.com/rust-lang/crates.io-index" 720 | checksum = "7d94ac45fcf608c1f45ef53e748d35660f168490c10b23704c7779ab8f5c3048" 721 | dependencies = [ 722 | "proc-macro2", 723 | "quote", 724 | "syn", 725 | "wasm-bindgen-backend", 726 | "wasm-bindgen-shared", 727 | ] 728 | 729 | [[package]] 730 | name = "wasm-bindgen-shared" 731 | version = "0.2.81" 732 | source = "registry+https://github.com/rust-lang/crates.io-index" 733 | checksum = "6a89911bd99e5f3659ec4acf9c4d93b0a90fe4a2a11f15328472058edc5261be" 734 | 735 | [[package]] 736 | name = "web-sys" 737 | version = "0.3.58" 738 | source = "registry+https://github.com/rust-lang/crates.io-index" 739 | checksum = "2fed94beee57daf8dd7d51f2b15dc2bcde92d7a72304cdf662a4371008b71b90" 740 | dependencies = [ 741 | "js-sys", 742 | "wasm-bindgen", 743 | ] 744 | 745 | [[package]] 746 | name = "wepoll-ffi" 747 | version = "0.1.2" 748 | source = "registry+https://github.com/rust-lang/crates.io-index" 749 | checksum = "d743fdedc5c64377b5fc2bc036b01c7fd642205a0d96356034ae3404d49eb7fb" 750 | dependencies = [ 751 | "cc", 752 | ] 753 | 754 | [[package]] 755 | name = "winapi" 756 | version = "0.3.9" 757 | source = "registry+https://github.com/rust-lang/crates.io-index" 758 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 759 | dependencies = [ 760 | "winapi-i686-pc-windows-gnu", 761 | "winapi-x86_64-pc-windows-gnu", 762 | ] 763 | 764 | [[package]] 765 | name = "winapi-i686-pc-windows-gnu" 766 | version = "0.4.0" 767 | source = "registry+https://github.com/rust-lang/crates.io-index" 768 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 769 | 770 | [[package]] 771 | name = "winapi-x86_64-pc-windows-gnu" 772 | version = "0.4.0" 773 | source = "registry+https://github.com/rust-lang/crates.io-index" 774 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 775 | 776 | [[package]] 777 | name = "windows-sys" 778 | version = "0.36.1" 779 | source = "registry+https://github.com/rust-lang/crates.io-index" 780 | checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" 781 | dependencies = [ 782 | "windows_aarch64_msvc", 783 | "windows_i686_gnu", 784 | "windows_i686_msvc", 785 | "windows_x86_64_gnu", 786 | "windows_x86_64_msvc", 787 | ] 788 | 789 | [[package]] 790 | name = "windows_aarch64_msvc" 791 | version = "0.36.1" 792 | source = "registry+https://github.com/rust-lang/crates.io-index" 793 | checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" 794 | 795 | [[package]] 796 | name = "windows_i686_gnu" 797 | version = "0.36.1" 798 | source = "registry+https://github.com/rust-lang/crates.io-index" 799 | checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" 800 | 801 | [[package]] 802 | name = "windows_i686_msvc" 803 | version = "0.36.1" 804 | source = "registry+https://github.com/rust-lang/crates.io-index" 805 | checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" 806 | 807 | [[package]] 808 | name = "windows_x86_64_gnu" 809 | version = "0.36.1" 810 | source = "registry+https://github.com/rust-lang/crates.io-index" 811 | checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" 812 | 813 | [[package]] 814 | name = "windows_x86_64_msvc" 815 | version = "0.36.1" 816 | source = "registry+https://github.com/rust-lang/crates.io-index" 817 | checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" 818 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = [ 3 | "async-brigade", 4 | "async-mem-brigade", 5 | "one-thread-brigade", 6 | "async-creation", 7 | "thread-brigade", 8 | "thread-creation", 9 | "utils", 10 | ] 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Comparison of Rust async and Linux thread context switch time and memory use 2 | 3 | These are a few programs that try to measure context switch time and task memory 4 | use in various ways. In summary: 5 | 6 | - A context switch takes around 0.2µs between async tasks, versus 1.7µs 7 | between kernel threads. But this advantage goes away if the context switch 8 | is due to I/O readiness: both converge to 1.7µs. The async advantage also 9 | goes away in our microbenchmark if the program is pinned to a single core. 10 | So inter-core communication is something to watch out for. 11 | 12 | - Creating a new task takes ~0.3µs for an async task, versus ~17µs for a new 13 | kernel thread. 14 | 15 | - Memory consumption per task (i.e. for a task that doesn't do much) starts at 16 | around a few hundred bytes for an async task, versus around 20KiB (9.5KiB 17 | user, 10KiB kernel) for a kernel thread. This is a minimum: more demanding 18 | tasks will naturally use more. 19 | 20 | - It's no problem to create 250,000 async tasks, but I was only able to get my 21 | laptop to run 80,000 threads (4 core, two way HT, 32GiB), even after raising 22 | every limit I could find. So I don't know what's imposing this limit. See 23 | "Running tests with large numbers of threads", below. 24 | 25 | These are probably not the limiting factors in your application, but it's nice 26 | to know that the headroom is there. 27 | 28 | ## Measuring thread context switch time 29 | 30 | The programs `thread-brigade` and `async-brigade` each create 500 tasks 31 | connected by pipes (like a “bucket brigade”) and measure how long it takes to 32 | propagate a single byte from the first to the last. One is implemented with 33 | threads, and the other is implemented with the Tokio crate's async I/O. 34 | 35 | $ cd async-brigade/ 36 | $ /bin/time cargo run --release 37 | Finished release [optimized] target(s) in 0.02s 38 | Running `/home/jimb/rust/context-switch/target/release/async-brigade` 39 | 500 tasks, 10000 iterations: 40 | mean 1.795ms per iteration, stddev 82.016µs (3.589µs per task per iter) 41 | 9.83user 8.33system 0:18.19elapsed 99%CPU (0avgtext+0avgdata 17144maxresident)k 42 | 0inputs+0outputs (0major+2283minor)pagefaults 0swaps 43 | $ 44 | 45 | $ cd ../thread-brigade 46 | $ /bin/time cargo run --release 47 | Finished release [optimized] target(s) in 0.02s 48 | Running `/home/jimb/rust/context-switch/target/release/thread-brigade` 49 | 500 tasks, 10000 iterations: 50 | mean 2.657ms per iteration, stddev 231.822µs (5.313µs per task per iter) 51 | 9.14user 27.88system 0:26.91elapsed 137%CPU (0avgtext+0avgdata 16784maxresident)k 52 | 0inputs+0outputs (0major+3381minor)pagefaults 0swaps 53 | $ 54 | 55 | In these runs, I'm seeing 18.19s / 26.91s ≅ 0.68 or a 30% speedup from going 56 | async. However, if I pin the threaded version to a single core, the speed 57 | advantage of async disappears: 58 | 59 | $ taskset --cpu-list 1 /bin/time cargo run --release 60 | Finished release [optimized] target(s) in 0.02s 61 | Running `/home/jimb/rust/context-switch/target/release/thread-brigade` 62 | 500 tasks, 10000 iterations: 63 | mean 1.709ms per iteration, stddev 102.926µs (3.417µs per task per iter) 64 | 4.81user 12.50system 0:17.37elapsed 99%CPU (0avgtext+0avgdata 16744maxresident)k 65 | 0inputs+0outputs (0major+3610minor)pagefaults 0swaps 66 | $ 67 | 68 | I don't know why. 69 | 70 | It would be interesting to see whether/how the number of tasks in the brigade 71 | affects these numbers. 72 | 73 | Per-thread resident memory use in `thread-brigade` is about 9.5KiB, whereas 74 | per-async-task memory use in `async-brigade` is around 0.4KiB, a factor of ~20. 75 | See 'Measuring memory use', below. 76 | 77 | There are differences in the system calls performed by the two versions: 78 | 79 | - In `thread-brigade`, each task does a single `recvfrom` and a `write` per 80 | iteration, taking 5.5µs. 81 | 82 | - In `async-brigade`, each task does one `recvfrom` and one `write`, neither of 83 | which block, and then one more `recvfrom`, which returns `EAGAIN` and suspends 84 | the task. Then control returns to the executor. The reactor thread calls 85 | `epoll` to see which pipes are readable, and tells the executor which task to 86 | run next. All this takes 3.6µs. 87 | 88 | - In `one-thread-brigade`, we build the pipes but just have a single thread loop 89 | through them all and do the reads and writes. This gives us a baseline cost 90 | for the I/O operations themselves, which we can subtract off from the times in 91 | the other two programs, in hopes that the remainder reflects the cost of the 92 | context switches alone. 93 | 94 | The `async-brigade` performance isn't affected much if we switch from Tokio's 95 | default multi-thread executor to a single-threaded executor, so it's not 96 | spending much time in kernel context switches. `thread-brigade` does a kernel 97 | context switch from each task to the next. I think this means that context 98 | switches are more expensive than a `recvfrom` and `epoll` system call. 99 | 100 | If we run the test with 50000 tasks (and reduce the number of iterations to 101 | 100), the speedup doesn't change much, but `thread-brigade` requires a 466MiB 102 | resident set, whereas `async-brigade` runs in around 21MiB. That's 10kiB of 103 | memory being actively touched by each task, versus 0.4kiB, about a twentieth. 104 | This isn't just the effect of pessimistically-sized thread stacks: we're looking 105 | at the resident set size, which shouldn't include pages allocated to the stack 106 | that the thread never actually touches. So the way Rust right-sizes futures 107 | seems really effective. 108 | 109 | This microbenchmark doesn't do much, but a real application would add to each 110 | task's working set, and that difference might become less significant. But I was 111 | able to run async-brigade with 250,000 tasks; I wasn't able to get my laptop 112 | to run 250,000 threads at all. 113 | 114 | The other programs are minor variations, or make other measurements: 115 | 116 | - `async-mem-brigade` uses `tokio:sync::mpsc` channels to send `usize` values 117 | from one async channel to another. This performs the same number of 118 | task-to-task switches, but avoids the overhead of the pipe I/O. It seems 119 | that Tokio's channels do use futexes on Linux to signal readiness. 120 | 121 | - `one-thread-brigade` attempts to measure the cost of the pipe I/O alone, by 122 | creating all the pipes but having a single thread do all the reading and 123 | writing to propagate the byte from the first to the last. 124 | 125 | - `thread-creation` and `async-creation` attempt to measure the time 126 | required to create a thread / async task. 127 | 128 | ## Measuring memory use 129 | 130 | The scripts `thread-brigade/rss-per-thread.sh` and 131 | `async-brigade/rss-per-task.sh` run their respective brigade microbenchmarks 132 | with varying numbers of tasks, and measure the virtual and resident memory 133 | consumption at each count. You can then do a linear regression to see the memory 134 | use of a single task. Note that `async-brigade/rss-per-task.sh` runs 10x as many 135 | tasks, to keep the noise down. 136 | 137 | As mentioned above, in my measurements, each thread costs around 9.5KiB, and 138 | each async task costs around 0.4KiB, so the async version uses about 1/20th as 139 | much memory as the threaded version. 140 | 141 | To run this script, you'll need to have the Linux `pmap` utility installed; this 142 | gives an accurate measurement of resident set size. On Fedora, this is included 143 | in the `procps-ng` package. (Pull requests for info about other major 144 | distributions welcome.) 145 | 146 | ## Running tests with large numbers of threads 147 | 148 | It's interesting to play with the number of tasks to see how that affects the 149 | relative speed of the async and threaded bucket brigades. But in order to test 150 | large numbers of threads, you may need to remove some of your system's 151 | guardrails. 152 | 153 | On Linux: 154 | 155 | - You will run out of file descriptors. Each task needs two file descriptors, 156 | one for the reading end of the upstream pipe, and one for the writing end of 157 | the downstream pipe. The process also needs a few file descriptors for 158 | miscellaneous purposes. For 50000 tasks, say: 159 | 160 | $ ulimit -n 100010 161 | 162 | - You may run out of process id numbers. Each thread needs its own pid. So, 163 | perhaps something like: 164 | 165 | $ sudo sysctl kernel.pid_max=4194304 166 | 167 | This is overkill, but why worry about this? (The number above is the default 168 | in Fedora 33, 4 × 1024 × 1024; apparently systemd was worried about pid 169 | rollover.) 170 | 171 | - You will run out of memory map areas. Each thread has its own stack, with an 172 | unmapped guard page at the low end to catch stack overflows. There seem to 173 | be other constraints as well. In practice, this seems to work for 50000 174 | tasks: 175 | 176 | $ sudo sysctl vm.max_map_count=200000 177 | 178 | - Process ID numbers can also be limited by the `pids` cgroup controller. 179 | 180 | A cgroup is a collection of processes on which you can impose system 181 | resource limits as a group. Every process belongs to exactly one cgroup. 182 | When one process creates another, the new process is placed in the same 183 | cgroup as its parent. 184 | 185 | Cgroups are arranged in a tree, where limits set on a cgroup apply to that 186 | group and all its descendants. Only leaf cgroups actually contain 187 | processes/threads. The cgroups in the hierarchy have names that look like 188 | filesystem paths; the root cgroup is named `/`. 189 | 190 | You can see which cgroup your shell belongs to like this: 191 | 192 | $ cat /proc/$$/cgroup 193 | 0::/user.slice/user-1000.slice/gargle/howl.scope 194 | 195 | This indicates that my shell is in a cgroup named 196 | `/user.slice/user-1000.slice/gargle/howl.scope`. The names can get quite 197 | long, so this example is simplified. 198 | 199 | On Fedora, at least, the cgroup hierarchy is reflected in the ordinary 200 | filesystem as a directory tree under `/sys/fs/cgroup`, so my shell's 201 | cgroup appears as a directory here: 202 | 203 | $ ls /sys/fs/cgroup/user.slice/user-1000.slice/gargle/howl.scope 204 | cgroup.controllers cpu.stat memory.pressure 205 | cgroup.events io.pressure memory.stat 206 | cgroup.freeze memory.current memory.swap.current 207 | cgroup.max.depth memory.events memory.swap.events 208 | cgroup.max.descendants memory.events.local memory.swap.high 209 | cgroup.procs memory.high memory.swap.max 210 | cgroup.stat memory.low pids.current 211 | cgroup.subtree_control memory.max pids.events 212 | cgroup.threads memory.min pids.max 213 | cgroup.type memory.numa_stat 214 | cpu.pressure memory.oom.group 215 | $ 216 | 217 | You can inspect and manipulate cgroups by looking at these files. Some 218 | represent different resources that can be limited, while others relate to 219 | the cgroup hierarchy itself. 220 | 221 | In particular, the file `pids.max` shows the limit this cgroup imposes on my 222 | shell: 223 | 224 | $ cat /sys/fs/cgroup/user.slice/user-1000.slice/gargle/howl.scope/pids.max 225 | max 226 | $ 227 | 228 | A limit of `max` means that there's no limit. But limits set on parent 229 | cgroups also apply to their descendants, so we need to check our ancestor 230 | groups: 231 | 232 | $ cat /sys/fs/cgroup/user.slice/user-1000.slice/gargle/pids.max 233 | 10813 234 | $ cat /sys/fs/cgroup/user.slice/user-1000.slice/pids.max 235 | 84184 236 | $ cat /sys/fs/cgroup/user.slice/pids.max 237 | max 238 | $ cat /sys/fs/cgroup/pids.max 239 | cat: /sys/fs/cgroup/pids.max: No such file or directory 240 | $ 241 | 242 | Apparently there's a limit of 10813 pids imposed by my shell's cgroup's 243 | parent, and a higher limit of 84184 pids set for me as a user. (On Fedora, 244 | these limits are established by systemd configuration files.) To raise that 245 | limit, we can simply write another value to these files, as root: 246 | 247 | $ sudo sh -c 'echo 100000 > /sys/fs/cgroup/user.slice/user-1000.slice/pids.max' 248 | $ sudo sh -c 'echo max > /sys/fs/cgroup/user.slice/user-1000.slice/gargle/pids.max' 249 | 250 | The cgroup machinery seems to vary not only from one Linux distribution to 251 | the next, but even from one version to another. So while I hope this is 252 | helpful, you may need to consult other documentation. `man cgroups(7)` is a 253 | good place to start, but beware, it makes my explanation here look short. 254 | 255 | - The kernel parameter `kernel.threads-max` is a system-wide limit on the 256 | number of threads. You probably won't run into this. 257 | 258 | $ sysctl kernel.threads-max 259 | kernel.threads-max = 255208 260 | $ 261 | 262 | - There is a limit on the number of processes that can run under a given real 263 | user ID: 264 | 265 | $ ulimit -u 266 | 127604 267 | $ 268 | 269 | At the system call level, this is the `getrlimit(2)` system call's 270 | `RLIMIT_NPROC` resource. This, too, you're unlikely to run into. 271 | 272 | - The default thread stack size is 8MiB: 273 | 274 | $ ulimit -s 275 | 8192 276 | $ 277 | 278 | You might expect this to limit a 32GiB (x86_64) machine to 4096 threads, but 279 | the kernel only allocates physical memory to a stack as the thread touches 280 | its pages, so the initial memory consumption of a thread in user space is 281 | actually only around 8kiB. At this size, 32GiB could accommodate 4Mi 282 | threads. Again, this is unlikely to be the limiting factor. 283 | 284 | Although it doesn't matter, `thread-brigade` program in this repository 285 | requests a 1MiB stack for each thread, which is plenty for our purposes. 286 | 287 | With these changes made, I was able to run `thread-brigade` with 80000 tasks. I 288 | tried to run more, but even after raising every limit I could identify, I still 289 | got errors. So I don't know what imposes this limit. 290 | 291 | ## Does any of this matter? 292 | 293 | In GitHub issue #1, @spacejam raised a good point: 294 | 295 | > overall, there are a lot of things here that really fade into insignificance 296 | > when you consider the simple effort required to deserialize JSON or handle 297 | > TLS. People often see that there's some theoretical benefit of async and then 298 | > they accept far less ergonomic coding styles and the additional bug classes 299 | > that only happen on async due to accidental blocking etc... despite the fact 300 | > that when you consider a real-world deployed application, those "benefits" 301 | > become indistinguishable from noise. However, due to the additional bug 302 | > classes and worse ergonomics, there is now less energy for actually optimizing 303 | > the business logic, which is where all of the cycles and resource use are 304 | > anyway, so in-practice async implementations tend to be buggier and slower. 305 | 306 | Below is my reply to them, lightly edited: 307 | 308 | > I have a few responses to this. 309 | > 310 | > First of all, the reason I carried out the experiments in this repo in the 311 | > first place was that I basically agreed with all of your points here. I think 312 | > async is wildly oversold as "faster" without any real investigation into why 313 | > that would be. It is hard to pin down exactly how the alleged advantages would 314 | > arise. The same I/O operations have to be carried out either way (or worse); 315 | > kernel context switches have been heavily optimized over the years (although 316 | > the Spectre mitigations made them worse); and the whole story of the creation 317 | > of NPTL was about it beating IBM's competing M-on-N thread implementation 318 | > (which I see as analogous to async task systems) in the very microbenchmarks 319 | > in which the M-on-N thread library was expected to have an advantage. 320 | > 321 | > However, in conversations that I sought out with people with experience 322 | > implementing high-volume servers, both with threads and with async designs, my 323 | > async skepticism met a lot of pushback. They consistently reported struggling 324 | > with threaded designs and not being able to get performance under control until 325 | > they went async. Big caveat: they were not using Rust - these were older designs 326 | > in C++ and even C. But it jibes well with the other successful designs you see 327 | > out there, like nginx and Elixir (which is used by WhatsApp, among others), 328 | > which are all essentially async. 329 | > 330 | > So the purpose of these experiments was to see if I could isolate some of the 331 | > sources of async's apparent advantages. It came down to memory consumption, 332 | > creation time, and context switch time each having best-case 333 | > order-of-magnitude advantages. Taken together, those advantages are beyond the 334 | > point that I'm willing to call negligible. How often the best case actually 335 | > arises is unclear, but one can argue that that, at least, is under the 336 | > programmer's control, so the ceiling on how far implementation effort can get 337 | > you is higher, in an async design. 338 | > 339 | > Ultimately, as far as this repo is concerned, you need to decide whether you 340 | > trust your readers to understand both the value and the limitations of 341 | > microbenchmarks. If you assume your readers are in Twitter mode---they're just 342 | > going to glance at the headlines and come away with a binary, "async good, two 343 | > legs bad" kind of conclusion---then maybe it's better not to publish 344 | > microbenchmarks at all, because they're misleading. Reality is more sensitive to 345 | > details. But I think the benefit of offering these microbenchmarks and the 346 | > README's analysis to careful readers might(?) outweigh the harm done by the 347 | > noise from careless readers, because I think the careful readers are more likely 348 | > to use the material in a way that has lasting impact. The wind changes; the 349 | > forest does not. 350 | > 351 | > The 2nd edition of Programming Rust (due out in June 2021) has a chapter on 352 | > async that ends with a discussion of the rationale for async programming. It 353 | > tries to dismiss some of the commonly heard bogus arguments, and present the 354 | > advantages that async does have with the appropriate qualifications. It 355 | > mentions tooling disadvantages. Generally, the chapter describes Rust's async 356 | > implementation in a decent amount of detail, because we want our readers to be 357 | > able to anticipate how it will perform and where it might help; the summary 358 | > attempts to make clear what all that machinery can and cannot accomplish. 359 | 360 | The only thing I'd add is that the measurements reported here for asynchronous 361 | performance were taken of an implementation that uses `epoll`-style system 362 | calls. The newer `io_uring`-style APIs seem radically different, and I'm curious 363 | to see whether these might change the story here. 364 | -------------------------------------------------------------------------------- /async-brigade/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "async-brigade" 3 | version = "0.1.0" 4 | authors = ["Jim Blandy "] 5 | edition = "2021" 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | docopt = "1" 11 | libc = "0.2" 12 | serde = { version = "1", features = ["derive"] } 13 | tokio = { version = "1.19", features = [ "full" ] } 14 | utils = { path = "../utils" } 15 | -------------------------------------------------------------------------------- /async-brigade/rss-per-task.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -eu 4 | 5 | if ! [ -f Cargo.toml ]; then 6 | echo "Run in top-level directory of async-brigade package." >&2 7 | exit 1 8 | fi 9 | 10 | cargo build --release 11 | 12 | echo -e "num threads\tvirtual KiB\tresident KiB" 13 | for ((n=1000; n <= 10000; n += 500)); do 14 | ../target/release/async-brigade --quiet --iters 10 --threads $n --command 'pmap -x {pid}' \ 15 | | awk -v num_threads=$n '/^total/ { print num_threads "\t" $3 "\t" $4 }' 16 | # | awk -v num_threads=$n ' 17 | # /Active . Total Size/ { print num_threads "\t" $8 } 18 | # ' 19 | done 20 | -------------------------------------------------------------------------------- /async-brigade/src/main.rs: -------------------------------------------------------------------------------- 1 | use docopt::Docopt; 2 | use serde::Deserialize; 3 | use std::process::Command; 4 | use std::time::Instant; 5 | use tokio::net::UnixStream; 6 | use tokio::io::{AsyncReadExt, AsyncWriteExt}; 7 | use utils::{Stats, UsefulDuration}; 8 | 9 | const USAGE: &str = " 10 | Microbenchmark of context switch overhead. 11 | 12 | Create a chain of Rust asynchronous tasks connected together by pipes, each one 13 | repeatedly reading a single byte from its upstream pipe and writing it to its 14 | downstream pipe. One 'iteration' of the benchmark drops a byte in one end, and 15 | measures the time required for it to come out the other end. 16 | 17 | If `--measure COMMAND` is given, then the program runs `COMMAND` before exiting. 18 | This gives an opportunity to measure the program's memory use. If `COMMAND` 19 | contains the string `{pid}`, each occurrence is replaced with this program's 20 | process ID. 21 | 22 | Usage: 23 | thread-brigade [--threads N] [--iters N] [--warmups N] [--command COMMAND] [--quiet] 24 | 25 | Options: 26 | --threads Number of async tasks (note: not OS threads). [default: 500] 27 | --iters Number of iterations to perform. [default: 10000] 28 | --warmups Number of warmup iterations to perform before benchmarking. 29 | [default: 100] 30 | --command Command to run before exiting. 31 | --quiet Don't print time measurements. 32 | "; 33 | 34 | #[derive(Debug, Deserialize)] 35 | struct Args { 36 | flag_threads: usize, 37 | flag_iters: usize, 38 | flag_warmups: usize, 39 | flag_command: Option, 40 | flag_quiet: bool, 41 | } 42 | 43 | struct Pipe { 44 | read: UnixStream, 45 | write: UnixStream, 46 | } 47 | 48 | fn pipe() -> Result { 49 | let (read, write) = UnixStream::pair()?; 50 | Ok(Pipe { read, write }) 51 | } 52 | 53 | #[tokio::main] 54 | async fn main() -> Result<(), Box> { 55 | let args: Args = Docopt::new(USAGE) 56 | .and_then(|d| d.deserialize()) 57 | .unwrap_or_else(|e| e.exit()); 58 | 59 | if !args.flag_quiet { 60 | eprintln!("{} tasks, {} iterations:", args.flag_threads, args.flag_iters); 61 | } 62 | 63 | let Pipe { read: mut upstream_read, write: mut first_write} = pipe()?; 64 | for _i in 0..args.flag_threads { 65 | let next_pipe = pipe()?; 66 | let mut downstream_write = next_pipe.write; 67 | tokio::spawn(async move { 68 | let mut buf = [0_u8; 1]; 69 | 70 | // Establish 'async' block's return type. Yeah. 71 | if false { 72 | return Ok::<(), std::io::Error>(()); 73 | } 74 | 75 | loop { 76 | assert_eq!(upstream_read.read_exact(&mut buf).await?, 1); 77 | downstream_write.write_all(&buf).await?; 78 | } 79 | }); 80 | upstream_read = next_pipe.read; 81 | } 82 | 83 | let mut buf = [0_u8; 1]; 84 | 85 | // Warm up. 86 | for _i in 0..args.flag_warmups { 87 | first_write.write_all(b"*").await?; 88 | upstream_read.read_exact(&mut buf).await?; 89 | } 90 | 91 | let mut stats = Stats::new(); 92 | for _i in 0..args.flag_iters { 93 | let start = Instant::now(); 94 | first_write.write_all(b"*").await?; 95 | upstream_read.read_exact(&mut buf).await?; 96 | let end = Instant::now(); 97 | 98 | stats.push(UsefulDuration::from(end - start).into()); 99 | } 100 | 101 | if !args.flag_quiet { 102 | eprintln!("mean {} per iteration, stddev {} ({} per task per iter)", 103 | UsefulDuration::from(stats.mean()), 104 | UsefulDuration::from(stats.population_stddev()), 105 | UsefulDuration::from(stats.mean() / args.flag_threads as f64)); 106 | } 107 | 108 | if let Some(command) = args.flag_command { 109 | let command = command.replace("{pid}", &std::process::id().to_string()); 110 | let status = Command::new("sh") 111 | .arg("-c") 112 | .arg(command) 113 | .status()?; 114 | if !status.success() { 115 | Err(format!("child exited with status: {}", status))?; 116 | } 117 | } 118 | 119 | Ok(()) 120 | } 121 | -------------------------------------------------------------------------------- /async-creation/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "async-creation" 3 | version = "0.1.0" 4 | authors = ["Jim Blandy "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | async-std = "1.6.0-beta.2" 9 | docopt = "1" 10 | serde = { version = "1", features = ["derive"] } 11 | utils = { path = "../utils" } 12 | -------------------------------------------------------------------------------- /async-creation/README.md: -------------------------------------------------------------------------------- 1 | # async-creation: Measure cost of async task creation 2 | 3 | This microbenchmark tries to measure how long it takes to spawn an asynchronous 4 | task. It spawns a given number of asynchronous tasks. Measure how long it takes 5 | for the spawning process to spawn all the tasks, and how long it takes a spawned 6 | task to begin execution. 7 | -------------------------------------------------------------------------------- /async-creation/src/main.rs: -------------------------------------------------------------------------------- 1 | use async_std::task; 2 | use docopt::Docopt; 3 | use serde::Deserialize; 4 | use std::time::Instant; 5 | use utils::{Stats, UsefulDuration}; 6 | 7 | const USAGE: &'static str = " 8 | Microbenchmark of task creation overhead. 9 | 10 | Spawn a given number of asynchronous tasks. Measure how long it takes for the 11 | spawning process to spawn all the tasks, and how long it takes a spawned task to 12 | begin execution. 13 | 14 | Usage: 15 | task-creation [--tasks N] [--iters N] [--warmups N] 16 | 17 | Options: 18 | --tasks Number of tasks. [default: 10000] 19 | --iters Number of iterations to perform. [default: 100] 20 | --warmups Number of warmup iterations to perform before benchmarking. 21 | [default: 10] 22 | "; 23 | 24 | #[derive(Debug, Deserialize)] 25 | struct Args { 26 | flag_tasks: usize, 27 | flag_iters: usize, 28 | flag_warmups: usize, 29 | } 30 | 31 | fn main() { 32 | let args: Args = Docopt::new(USAGE) 33 | .and_then(|d| d.deserialize()) 34 | .unwrap_or_else(|e| e.exit()); 35 | 36 | struct StartedTask { 37 | start_time: Instant, 38 | handle: task::JoinHandle, 39 | } 40 | 41 | struct FinishedTask { 42 | start_time: Instant, 43 | end_time: Instant, 44 | } 45 | 46 | let mut started = Vec::with_capacity(args.flag_tasks); 47 | let mut finished = Vec::with_capacity(args.flag_tasks); 48 | 49 | eprintln!("{} tasks, {} warmups, {} iterations:", args.flag_tasks, args.flag_warmups, args.flag_iters); 50 | 51 | // Do a few warmup passes. 52 | for _warmup in 0..args.flag_warmups { 53 | started.clear(); 54 | finished.clear(); 55 | 56 | for _ in 0..args.flag_tasks { 57 | let start_time = Instant::now(); 58 | let handle = task::spawn(async move { Instant::now() }); 59 | started.push(StartedTask { start_time, handle }); 60 | } 61 | 62 | finished.extend(started.drain(..) 63 | .map(|StartedTask { start_time, handle }| { 64 | let end_time = task::block_on(handle); 65 | FinishedTask { start_time, end_time } 66 | })); 67 | } 68 | 69 | // Do the real passes. 70 | let mut creation_times = Stats::new(); 71 | let mut started_times = Stats::new(); 72 | for _rep in 0..args.flag_iters { 73 | started.clear(); 74 | finished.clear(); 75 | 76 | let start_creation = Instant::now(); 77 | for _ in 0..args.flag_tasks { 78 | let start_time = Instant::now(); 79 | let handle = task::spawn(async move { Instant::now() }); 80 | started.push(StartedTask { start_time, handle }); 81 | } 82 | let end_creation = Instant::now(); 83 | creation_times.push(UsefulDuration::from(end_creation - start_creation).into()); 84 | 85 | finished.extend(started.drain(..) 86 | .map(|StartedTask { start_time, handle }| { 87 | let end_time = task::block_on(handle); 88 | FinishedTask { start_time, end_time } 89 | })); 90 | 91 | started_times.extend(finished.iter() 92 | .map(|FinishedTask { start_time, end_time }| { 93 | UsefulDuration::from(*end_time - *start_time).into() 94 | })); 95 | } 96 | 97 | eprintln!("create a task: mean {} per iter, stddev {} ({} per task)", 98 | UsefulDuration::from(creation_times.mean()), 99 | UsefulDuration::from(creation_times.population_stddev()), 100 | UsefulDuration::from(creation_times.mean() / args.flag_tasks as f64)); 101 | eprintln!("creation to body: mean {}, stddev {}", 102 | UsefulDuration::from(started_times.mean()), 103 | UsefulDuration::from(started_times.population_stddev())); 104 | } 105 | -------------------------------------------------------------------------------- /async-mem-brigade/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "async-mem-brigade" 3 | version = "0.1.0" 4 | authors = ["Jim Blandy "] 5 | edition = "2021" 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | tokio = { version = "1.19", features = [ "full" ] } 11 | libc = "0.2" 12 | utils = { path = "../utils" } 13 | -------------------------------------------------------------------------------- /async-mem-brigade/src/main.rs: -------------------------------------------------------------------------------- 1 | use std::time::Instant; 2 | use tokio::sync::mpsc; 3 | use utils::{Stats, UsefulDuration}; 4 | 5 | struct Pipe { 6 | read: mpsc::Receiver, 7 | write: mpsc::Sender, 8 | } 9 | 10 | fn pipe() -> Result { 11 | let (write, read) = mpsc::channel(1); 12 | Ok(Pipe { read, write }) 13 | } 14 | 15 | #[tokio::main(flavor = "current_thread")] 16 | async fn main() -> Result<(), Box> { 17 | const NUM_TASKS: usize = 500; 18 | const NUM_WARMUP_REPS: usize = 5; 19 | const NUM_REPS: usize = 10000; 20 | 21 | let Pipe { read: mut upstream_read, write: first_write} = pipe()?; 22 | for _i in 0..NUM_TASKS { 23 | let next_pipe = pipe()?; 24 | let downstream_write = next_pipe.write; 25 | tokio::spawn(async move { 26 | // Establish 'async' block's return type. Yeah. 27 | if false { 28 | return Ok::<(), mpsc::error::SendError>(()); 29 | } 30 | 31 | loop { 32 | let n = upstream_read.recv().await.unwrap(); 33 | downstream_write.send(n + 1).await?; 34 | } 35 | }); 36 | upstream_read = next_pipe.read; 37 | } 38 | 39 | // Warm up. 40 | for _i in 0..NUM_WARMUP_REPS { 41 | first_write.send(0).await?; 42 | assert_eq!(upstream_read.recv().await, Some(NUM_TASKS)); 43 | } 44 | 45 | let mut stats = Stats::new(); 46 | for _i in 0..NUM_REPS { 47 | let start = Instant::now(); 48 | first_write.send(0).await?; 49 | assert_eq!(upstream_read.recv().await, Some(NUM_TASKS)); 50 | let end = Instant::now(); 51 | 52 | stats.push(UsefulDuration::from(end - start).into()); 53 | } 54 | 55 | println!("{} iterations, {} tasks, mean {} per iteration, stddev {} ({} per task per iter)", 56 | NUM_REPS, NUM_TASKS, 57 | UsefulDuration::from(stats.mean()), 58 | UsefulDuration::from(stats.population_stddev()), 59 | UsefulDuration::from(stats.mean() / NUM_TASKS as f64)); 60 | 61 | // Otherwise, Tokio blocks waiting for other tasks to finish. I don't want 62 | // to risk introducing noise by adding shutdown logic to them, so just exit 63 | // the entire process. 64 | std::process::exit(0); 65 | } 66 | -------------------------------------------------------------------------------- /one-thread-brigade/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "one-thread-brigade" 3 | version = "0.1.0" 4 | authors = ["Jim Blandy "] 5 | edition = "2018" 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | libc = "0.2" 11 | utils = { path = "../utils" } 12 | -------------------------------------------------------------------------------- /one-thread-brigade/src/main.rs: -------------------------------------------------------------------------------- 1 | use std::time::Instant; 2 | use std::os::unix::net::UnixStream; 3 | use std::io::prelude::*; 4 | use utils::{Stats, UsefulDuration}; 5 | 6 | struct Pipe { 7 | read: UnixStream, 8 | write: UnixStream, 9 | } 10 | 11 | fn pipe() -> Result { 12 | let (read, write) = UnixStream::pair()?; 13 | Ok(Pipe { read, write }) 14 | } 15 | 16 | fn main() -> Result<(), Box> { 17 | const NUM_TASKS: usize = 500; 18 | const NUM_WARMUP_REPS: usize = 5; 19 | const NUM_REPS: usize = 10000; 20 | 21 | let Pipe { read: mut upstream_read, write: mut first_write} = pipe()?; 22 | let mut pipes = Vec::new(); 23 | for _i in 0..NUM_TASKS { 24 | let next_pipe = pipe()?; 25 | pipes.push(Pipe { 26 | read: upstream_read, 27 | write: next_pipe.write, 28 | }); 29 | 30 | upstream_read = next_pipe.read; 31 | } 32 | 33 | fn brigade(pipes: &mut Vec) -> Result<(), std::io::Error> { 34 | let mut buf = [0_u8; 1]; 35 | for pipe in pipes { 36 | pipe.read.read_exact(&mut buf)?; 37 | pipe.write.write_all(&buf)?; 38 | } 39 | Ok(()) 40 | } 41 | 42 | let mut buf = [0_u8; 1]; 43 | 44 | // Warm up. 45 | for _i in 0..NUM_WARMUP_REPS { 46 | first_write.write_all(b"*")?; 47 | brigade(&mut pipes)?; 48 | upstream_read.read(&mut buf)?; 49 | } 50 | 51 | let mut stats = Stats::new(); 52 | for _i in 0..NUM_REPS { 53 | let start = Instant::now(); 54 | first_write.write_all(b"*")?; 55 | brigade(&mut pipes)?; 56 | upstream_read.read(&mut buf)?; 57 | let end = Instant::now(); 58 | 59 | stats.push(UsefulDuration::from(end - start).into()); 60 | } 61 | 62 | println!("{} iterations, {} tasks, mean {} per iteration, stddev {} ({} per task per iter)", 63 | NUM_REPS, NUM_TASKS, 64 | UsefulDuration::from(stats.mean()), 65 | UsefulDuration::from(stats.population_stddev()), 66 | UsefulDuration::from(stats.mean() / NUM_TASKS as f64)); 67 | 68 | Ok(()) 69 | } 70 | -------------------------------------------------------------------------------- /thread-brigade/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "thread-brigade" 3 | version = "0.1.0" 4 | authors = ["Jim Blandy "] 5 | edition = "2018" 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | docopt = "1" 11 | libc = "0.2" 12 | serde = { version = "1", features = ["derive"] } 13 | utils = { path = "../utils" } 14 | -------------------------------------------------------------------------------- /thread-brigade/rss-per-thread.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -eu 4 | 5 | if ! [ -f Cargo.toml ]; then 6 | echo "Run in top-level directory of thread-brigade package." >&2 7 | exit 1 8 | fi 9 | 10 | cargo build --release 11 | 12 | echo -e "num threads\tvirtual KiB\tresident KiB" 13 | for ((n=100; n <= 1000; n += 50)); do 14 | ../target/release/thread-brigade --quiet --iters 1000 --threads $n --command 'pmap -x {pid}' \ 15 | | awk -v num_threads=$n '/^total/ { print num_threads "\t" $3 "\t" $4 }' 16 | # | awk -v num_threads=$n ' 17 | # /Active . Total Size/ { print num_threads "\t" $8 } 18 | # ' 19 | done 20 | -------------------------------------------------------------------------------- /thread-brigade/src/main.rs: -------------------------------------------------------------------------------- 1 | use docopt::Docopt; 2 | use serde::Deserialize; 3 | use std::io::prelude::*; 4 | use std::os::unix::net::UnixStream; 5 | use std::process::Command; 6 | use std::time::Instant; 7 | use utils::{Stats, UsefulDuration}; 8 | 9 | const USAGE: &'static str = " 10 | Microbenchmark of context switch overhead. 11 | 12 | Create a chain of threads connected together by pipes, each one repeatedly 13 | reading a single byte from its upstream pipe and writing it to its downstream 14 | pipe. One 'iteration' of the benchmark drops a byte in one end, and measures the 15 | time required for it to come out the other end. 16 | 17 | If `--measure COMMAND` is given, then the program runs `COMMAND` before exiting. 18 | This gives an opportunity to measure the program's memory use. If `COMMAND` 19 | contains the string `{pid}`, each occurrence is replaced with this program's 20 | process ID. 21 | 22 | Usage: 23 | thread-brigade [--threads N] [--iters N] [--warmups N] [--command COMMAND] [--quiet] 24 | 25 | Options: 26 | --threads Number of threads. [default: 500] 27 | --iters Number of iterations to perform. [default: 10000] 28 | --warmups Number of warmup iterations to perform before benchmarking. 29 | [default: 100] 30 | --command Command to run before exiting. 31 | --quiet Don't print time measurements. 32 | "; 33 | 34 | #[derive(Debug, Deserialize)] 35 | struct Args { 36 | flag_threads: usize, 37 | flag_iters: usize, 38 | flag_warmups: usize, 39 | flag_command: Option, 40 | flag_quiet: bool, 41 | } 42 | 43 | struct Pipe { 44 | read: UnixStream, 45 | write: UnixStream, 46 | } 47 | 48 | fn pipe() -> Result { 49 | let (read, write) = UnixStream::pair()?; 50 | Ok(Pipe { read, write }) 51 | } 52 | 53 | fn main() -> Result<(), Box> { 54 | let args: Args = Docopt::new(USAGE) 55 | .and_then(|d| d.deserialize()) 56 | .unwrap_or_else(|e| e.exit()); 57 | 58 | if !args.flag_quiet { 59 | eprintln!("{} tasks, {} iterations:", args.flag_threads, args.flag_iters); 60 | } 61 | 62 | let Pipe { read: mut upstream_read, write: mut first_write} = pipe()?; 63 | for _i in 0..args.flag_threads { 64 | let next_pipe = pipe()?; 65 | let mut downstream_write = next_pipe.write; 66 | std::thread::Builder::new() 67 | .stack_size(1024 * 1024) 68 | .spawn(move || -> Result<(), std::io::Error> { 69 | let mut buf = [0_u8; 1]; 70 | 71 | loop { 72 | upstream_read.read_exact(&mut buf)?; 73 | downstream_write.write_all(&buf)?; 74 | } 75 | })?; 76 | upstream_read = next_pipe.read; 77 | } 78 | 79 | let mut buf = [0_u8; 1]; 80 | 81 | // Warm up. 82 | for _i in 0..args.flag_warmups { 83 | first_write.write_all(b"*")?; 84 | upstream_read.read(&mut buf)?; 85 | } 86 | 87 | let mut stats = Stats::new(); 88 | for _i in 0..args.flag_iters { 89 | let start = Instant::now(); 90 | first_write.write_all(b"*")?; 91 | upstream_read.read(&mut buf)?; 92 | let end = Instant::now(); 93 | 94 | stats.push(UsefulDuration::from(end - start).into()); 95 | } 96 | 97 | if !args.flag_quiet { 98 | eprintln!("mean {} per iteration, stddev {} ({} per task per iter)", 99 | UsefulDuration::from(stats.mean()), 100 | UsefulDuration::from(stats.population_stddev()), 101 | UsefulDuration::from(stats.mean() / args.flag_threads as f64)); 102 | } 103 | 104 | if let Some(command) = args.flag_command { 105 | let command = command.replace("{pid}", &std::process::id().to_string()); 106 | let status = Command::new("sh") 107 | .arg("-c") 108 | .arg(command) 109 | .status()?; 110 | if !status.success() { 111 | Err(format!("child exited with status: {}", status))?; 112 | } 113 | } 114 | 115 | Ok(()) 116 | } 117 | -------------------------------------------------------------------------------- /thread-creation/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "thread-creation" 3 | version = "0.1.0" 4 | authors = ["Jim Blandy "] 5 | edition = "2018" 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | docopt = "1" 11 | serde = { version = "1", features = ["derive"] } 12 | utils = { path = "../utils" } 13 | -------------------------------------------------------------------------------- /thread-creation/src/main.rs: -------------------------------------------------------------------------------- 1 | use docopt::Docopt; 2 | use serde::Deserialize; 3 | use std::thread; 4 | use std::time::Instant; 5 | use utils::{Stats, UsefulDuration}; 6 | 7 | const USAGE: &'static str = " 8 | Microbenchmark of task creation overhead. 9 | 10 | Spawn a given number of asynchronous tasks. Measure how long it takes for the 11 | spawning process to spawn all the tasks, and how long it takes a spawned task to 12 | begin execution. 13 | 14 | Usage: 15 | task-creation [--tasks N] [--iters N] [--warmups N] 16 | 17 | Options: 18 | --tasks Number of tasks. [default: 1000] 19 | --iters Number of iterations to perform. [default: 100] 20 | --warmups Number of warmup iterations to perform before benchmarking. 21 | [default: 10] 22 | "; 23 | 24 | #[derive(Debug, Deserialize)] 25 | struct Args { 26 | flag_tasks: usize, 27 | flag_iters: usize, 28 | flag_warmups: usize, 29 | } 30 | 31 | fn main() { 32 | let args: Args = Docopt::new(USAGE) 33 | .and_then(|d| d.deserialize()) 34 | .unwrap_or_else(|e| e.exit()); 35 | 36 | struct StartedTask { 37 | start_time: Instant, 38 | handle: thread::JoinHandle, 39 | } 40 | 41 | struct FinishedTask { 42 | start_time: Instant, 43 | end_time: Instant, 44 | } 45 | 46 | let mut started = Vec::with_capacity(args.flag_tasks); 47 | let mut finished = Vec::with_capacity(args.flag_tasks); 48 | 49 | eprintln!("{} tasks, {} warmups, {} iterations:", args.flag_tasks, args.flag_warmups, args.flag_iters); 50 | 51 | // Do a few warmup passes. 52 | for _warmup in 0..args.flag_warmups { 53 | started.clear(); 54 | finished.clear(); 55 | 56 | for _ in 0..args.flag_tasks { 57 | let start_time = Instant::now(); 58 | let handle = thread::spawn(move || { Instant::now() }); 59 | started.push(StartedTask { start_time, handle }); 60 | } 61 | 62 | finished.extend(started.drain(..) 63 | .map(|StartedTask { start_time, handle }| { 64 | let end_time = handle.join().unwrap(); 65 | FinishedTask { start_time, end_time } 66 | })); 67 | } 68 | 69 | // Do the real passes. 70 | let mut creation_times = Stats::new(); 71 | let mut started_times = Stats::new(); 72 | for _rep in 0..args.flag_iters { 73 | started.clear(); 74 | finished.clear(); 75 | 76 | let start_creation = Instant::now(); 77 | for _ in 0..args.flag_tasks { 78 | let start_time = Instant::now(); 79 | let handle = thread::spawn(move || { Instant::now() }); 80 | started.push(StartedTask { start_time, handle }); 81 | } 82 | let end_creation = Instant::now(); 83 | creation_times.push(UsefulDuration::from(end_creation - start_creation).into()); 84 | 85 | finished.extend(started.drain(..) 86 | .map(|StartedTask { start_time, handle }| { 87 | let end_time = handle.join().unwrap(); 88 | FinishedTask { start_time, end_time } 89 | })); 90 | 91 | started_times.extend(finished.iter() 92 | .map(|FinishedTask { start_time, end_time }| { 93 | UsefulDuration::from(*end_time - *start_time).into() 94 | })); 95 | } 96 | 97 | eprintln!("create a task: mean {} per iter, stddev {} ({} per task)", 98 | UsefulDuration::from(creation_times.mean()), 99 | UsefulDuration::from(creation_times.population_stddev()), 100 | UsefulDuration::from(creation_times.mean() / args.flag_tasks as f64)); 101 | eprintln!("creation to body: mean {}, stddev {}", 102 | UsefulDuration::from(started_times.mean()), 103 | UsefulDuration::from(started_times.population_stddev())); 104 | } 105 | -------------------------------------------------------------------------------- /utils/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "utils" 3 | version = "0.1.0" 4 | authors = ["Jim Blandy "] 5 | edition = "2018" 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | -------------------------------------------------------------------------------- /utils/src/lib.rs: -------------------------------------------------------------------------------- 1 | mod stats; 2 | mod useful_duration; 3 | 4 | pub use stats::*; 5 | pub use useful_duration::*; 6 | -------------------------------------------------------------------------------- /utils/src/stats.rs: -------------------------------------------------------------------------------- 1 | #[derive(Debug, Default)] 2 | pub struct Stats { 3 | x0s: usize, 4 | x1s: f64, 5 | x2s: f64, 6 | } 7 | 8 | impl Stats { 9 | pub fn new() -> Stats { 10 | Default::default() 11 | } 12 | 13 | pub fn push(&mut self, x: f64) { 14 | self.x0s += 1; 15 | self.x1s += x; 16 | self.x2s += x * x; 17 | } 18 | 19 | pub fn mean(&self) -> f64 { 20 | self.x1s / self.x0s as f64 21 | } 22 | 23 | pub fn population_stddev(&self) -> f64 { 24 | (self.x0s as f64 * self.x2s - self.x1s * self.x1s).sqrt() / self.x0s as f64 25 | } 26 | } 27 | 28 | impl Extend for Stats { 29 | fn extend>(&mut self, iter: T) { 30 | iter.into_iter().for_each(|x| self.push(x)); 31 | } 32 | } 33 | 34 | impl std::iter::FromIterator for Stats { 35 | fn from_iter(iter: T) -> Stats 36 | where T: IntoIterator 37 | { 38 | let mut s = Stats::new(); 39 | s.extend(iter); 40 | s 41 | } 42 | } 43 | 44 | #[test] 45 | fn pop_stddev() { 46 | let mut stats = Stats::new(); 47 | 48 | stats.extend([2.0_f64, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0].iter().copied()); 49 | assert_eq!(stats.mean(), 5.0); 50 | assert_eq!(stats.population_stddev(), 2.0); 51 | } 52 | -------------------------------------------------------------------------------- /utils/src/useful_duration.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | use std::time::Duration; 3 | 4 | #[derive(Copy, Clone, Debug)] 5 | pub struct UsefulDuration(Duration); 6 | 7 | impl From for UsefulDuration { 8 | fn from(d: Duration) -> Self { 9 | UsefulDuration(d) 10 | } 11 | } 12 | 13 | impl From for UsefulDuration { 14 | fn from(secs: f64) -> Self { 15 | UsefulDuration(Duration::from_secs_f64(secs)) 16 | } 17 | } 18 | 19 | impl From for f64 { 20 | fn from(d: UsefulDuration) -> Self { 21 | d.0.as_secs_f64() 22 | } 23 | } 24 | 25 | impl fmt::Display for UsefulDuration { 26 | fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { 27 | let d = f64::from(*self); 28 | if d == 0.0 { 29 | write!(fmt, "0s") 30 | } else if d < 1.5e-6 { 31 | write!(fmt, "{:.3}ns", d * 1e9) 32 | } else if d < 1.5e-3 { 33 | write!(fmt, "{:.3}µs", d * 1e6) 34 | } else if d < 1.5 { 35 | write!(fmt, "{:.3}ms", d * 1e3) 36 | } else { 37 | write!(fmt, "{:.3}s", d) 38 | } 39 | } 40 | } 41 | --------------------------------------------------------------------------------