├── .config └── nextest.toml ├── .gitattributes ├── .gitconfig ├── .github └── copilot-instructions.md ├── .gitignore ├── .vscode ├── settings.json └── tasks.json ├── Cargo.toml ├── DEVELOPMENT.md ├── LICENSE ├── README.md ├── RELEASING.md ├── clippy.toml ├── crates ├── benchmark_utils │ ├── Cargo.toml │ └── src │ │ ├── lib.rs │ │ └── threadpool.rs ├── benchmarks │ ├── Cargo.toml │ ├── benches │ │ ├── effects_of_memory.rs │ │ ├── effects_of_memory_windows.rs │ │ └── variable_access.rs │ └── src │ │ └── lib.rs ├── cpulist │ ├── Cargo.toml │ ├── README.md │ ├── examples │ │ ├── cpulist_basic.rs │ │ └── cpulist_stride.rs │ └── src │ │ ├── emit.rs │ │ ├── error.rs │ │ ├── lib.rs │ │ └── parse.rs ├── folo_ffi │ ├── Cargo.toml │ ├── README.md │ └── src │ │ ├── lib.rs │ │ └── native_buffer.rs ├── folo_utils │ ├── Cargo.toml │ ├── README.md │ └── src │ │ └── lib.rs ├── linked │ ├── Cargo.toml │ ├── README.md │ ├── benches │ │ ├── instance_per_thread.rs │ │ ├── instance_per_thread_sync.rs │ │ ├── instances.rs │ │ ├── static_thread_local_arc.rs │ │ └── static_thread_local_rc.rs │ ├── doc │ │ ├── instance_per_thread.mermaid │ │ ├── instance_per_thread_sync.mermaid │ │ └── linked.mermaid │ ├── examples │ │ ├── linked_basic.rs │ │ ├── linked_box.rs │ │ ├── linked_family.rs │ │ ├── linked_std_box.rs │ │ ├── linked_thread_local_arc.rs │ │ └── linked_thread_local_rc.rs │ ├── src │ │ ├── __private.rs │ │ ├── box.rs │ │ ├── constants.rs │ │ ├── family.rs │ │ ├── instance_per_thread.rs │ │ ├── instance_per_thread_sync.rs │ │ ├── lib.rs │ │ ├── macros.rs │ │ ├── object.rs │ │ ├── static_instance_per_thread.rs │ │ ├── static_instance_per_thread_sync.rs │ │ ├── static_instances.rs │ │ └── thread_id_hash.rs │ └── tests │ │ ├── linked_object.rs │ │ └── smoke.rs ├── linked_macros │ ├── Cargo.toml │ └── src │ │ └── lib.rs ├── linked_macros_impl │ ├── Cargo.toml │ └── src │ │ ├── lib.rs │ │ ├── linked_object.rs │ │ └── syn_helpers.rs ├── many_cpus │ ├── Cargo.toml │ ├── README.md │ ├── benches │ │ ├── hardware_info.rs │ │ ├── hardware_tracker.rs │ │ ├── pal_windows.rs │ │ └── processor_set_builder.rs │ ├── docs │ │ └── snippets │ │ │ ├── changes_at_runtime.md │ │ │ └── external_constraints.md │ ├── examples │ │ ├── get_all_processors.rs │ │ ├── get_default_processors.rs │ │ ├── obey_job_affinity_windows.rs │ │ ├── obey_job_resource_quota_windows.rs │ │ ├── observe_processor.rs │ │ ├── spawn_on_all_processors.rs │ │ ├── spawn_on_any_processors.rs │ │ ├── spawn_on_inherited_processors.rs │ │ └── spawn_on_selected_processors.rs │ ├── src │ │ ├── clients.rs │ │ ├── clients │ │ │ ├── hw_tracker_client.rs │ │ │ └── hw_tracker_facade.rs │ │ ├── hardware_info.rs │ │ ├── hardware_tracker.rs │ │ ├── lib.rs │ │ ├── pal.rs │ │ ├── pal │ │ │ ├── abstractions.rs │ │ │ ├── abstractions │ │ │ │ ├── platform.rs │ │ │ │ └── processor.rs │ │ │ ├── facade.rs │ │ │ ├── facade │ │ │ │ ├── platform.rs │ │ │ │ └── processor.rs │ │ │ ├── linux.rs │ │ │ ├── linux │ │ │ │ ├── bindings.rs │ │ │ │ ├── bindings │ │ │ │ │ ├── abstractions.rs │ │ │ │ │ ├── facade.rs │ │ │ │ │ └── real.rs │ │ │ │ ├── filesystem.rs │ │ │ │ ├── filesystem │ │ │ │ │ ├── abstractions.rs │ │ │ │ │ ├── facade.rs │ │ │ │ │ └── real.rs │ │ │ │ ├── platform.rs │ │ │ │ └── processor.rs │ │ │ ├── mocks.rs │ │ │ ├── windows.rs │ │ │ └── windows │ │ │ │ ├── bindings.rs │ │ │ │ ├── bindings │ │ │ │ ├── abstractions.rs │ │ │ │ ├── facade.rs │ │ │ │ └── real.rs │ │ │ │ ├── group_mask.rs │ │ │ │ ├── platform.rs │ │ │ │ └── processor.rs │ │ ├── primitive_types.rs │ │ ├── processor.rs │ │ ├── processor_set.rs │ │ ├── processor_set_builder.rs │ │ └── resource_quota.rs │ └── tests │ │ └── job_limits_windows.rs ├── many_cpus_benchmarking │ ├── Cargo.toml │ ├── README.md │ ├── benches │ │ └── many_cpus_harness_demo.rs │ ├── images │ │ └── work_distribution_comparison.png │ └── src │ │ ├── cache.rs │ │ ├── lib.rs │ │ ├── payload.rs │ │ ├── run.rs │ │ └── work_distribution.rs ├── region_cached │ ├── Cargo.toml │ ├── README.md │ ├── benches │ │ └── region_cached.rs │ ├── doc │ │ └── region_cached.mermaid │ ├── examples │ │ ├── region_cached_1gb.rs │ │ ├── region_cached_log_filtering.rs │ │ ├── region_cached_log_filtering_no_statics.rs │ │ └── region_cached_web.rs │ └── src │ │ ├── __private.rs │ │ ├── clients.rs │ │ ├── clients │ │ ├── hw_info_client.rs │ │ ├── hw_info_facade.rs │ │ ├── hw_tracker_client.rs │ │ └── hw_tracker_facade.rs │ │ ├── lib.rs │ │ ├── macros.rs │ │ ├── region_cached.rs │ │ └── region_cached_ext.rs ├── region_local │ ├── Cargo.toml │ ├── README.md │ ├── benches │ │ └── region_local.rs │ ├── doc │ │ └── region_local.mermaid │ ├── examples │ │ ├── region_local_1gb.rs │ │ └── region_local_web.rs │ └── src │ │ ├── __private.rs │ │ ├── clients.rs │ │ ├── clients │ │ ├── hw_info_client.rs │ │ ├── hw_info_facade.rs │ │ ├── hw_tracker_client.rs │ │ └── hw_tracker_facade.rs │ │ ├── lib.rs │ │ ├── macros.rs │ │ ├── region_local.rs │ │ └── region_local_ext.rs └── testing │ ├── Cargo.toml │ ├── examples │ └── spin_cpu_windows.rs │ └── src │ ├── lib.rs │ ├── windows.rs │ └── windows │ └── job.rs ├── just_basics.just ├── just_quality.just ├── just_quality_mutants.just ├── just_release.just ├── just_setup.just ├── just_testing.just ├── justfile ├── release-plz.toml └── rust-toolchain.toml /.config/nextest.toml: -------------------------------------------------------------------------------- 1 | [profile.default] 2 | # The leak detector is just bad and calls everything a leaky test because it relies on timing 3 | # coincidences to do its job. It is not an accurate nor valuable feature and we would rather 4 | # disable it but there is no off button so let's just set a high timeout that avoids it complaining. 5 | leak-timeout = "10s" 6 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Our regular development environment is dual-OS - the same working directory is accessed 2 | # from both Windows and Linux. This means they need to agree on the line endings! All text 3 | # is therefore using Linux line endings (LF). 4 | * text eol=lf 5 | 6 | # Binary files often seen in our documentation. 7 | *.png filter=lfs diff=lfs merge=lfs -text 8 | *.jpg filter=lfs diff=lfs merge=lfs -text 9 | -------------------------------------------------------------------------------- /.gitconfig: -------------------------------------------------------------------------------- 1 | [core] 2 | # We do not use CRLF but if someone (or some automation) happens 3 | # to try commit a CRLF file, we convert it to LF for storage in the Git index. 4 | autocrlf = input 5 | 6 | # Scream if files do not match what .gitattributes requires. 7 | safecrlf = true 8 | -------------------------------------------------------------------------------- /.github/copilot-instructions.md: -------------------------------------------------------------------------------- 1 | # Standard commands 2 | 3 | We use the Just command runner for many common commands - look inside *.just files to see the 4 | list of available commands. Some relevant ones are: 5 | 6 | * `just build` - build the entire workspace 7 | * `just package=many_cpus build` - build a single workspace (most commands accept a `package` parameter) 8 | * `just test` - test the entire workspace 9 | 10 | Avoid running `just bench`, as the benchmarks take a lot of time and `just test` will anyway run 11 | a single benchmark iteration to validate they are still working. 12 | 13 | We generally prefer using Just commands over raw Cargo commands if there is a suitable Just command 14 | defined in one of the *.just files. 15 | 16 | Do not execute `just release` - this is a critical tool reserved for human use. 17 | 18 | # Validating changes 19 | 20 | Use `just test` to verify that the code compiles and tests pass. 21 | 22 | Use `just clippy` to verify that all linter rules pass. We operate under a "zero warnings allowed" 23 | requirement - fix all warnings that Clippy generates. 24 | 25 | Use `just format` to apply auto-formatting to code files, ensuring consistent code style. 26 | 27 | # Multiplatform codebase 28 | 29 | This is a multiplatform codebase. In some crates you will find folders named `linux` and `windows`, 30 | which contain platform-specific code. When modifying files of one platform, you are also expected 31 | to make the equivalent modifications in the other. 32 | 33 | By default, we are operating on Windows. However, you can also invoke commands on Linux using the 34 | syntax `wsl -e bash -l -c "command"`. For example, to test on both Windows and Linux, execute: 35 | 36 | 1. `just test` 37 | 2. `wsl -e bash -l -c "just test"` 38 | 39 | You are expected to validate all changes on both operating systems. 40 | 41 | # Facades and abstractions 42 | 43 | Some crates like `many_cpus` use a platform abstraction layer (PAL), where an abstraction like 44 | `trait Platform` defined in `crates/many_cpus/src/pal/abstractions/**` has multiple different 45 | implementations: 46 | 47 | 1. A Windows implementation (`crates/many_cpus/src/pal/windows/**`) 48 | 2. A Linux implementation (`crates/many_cpus/src/pal/linux/**`) 49 | 3. A mock implementation (`crates/many_cpus/src/pal/mocks.rs`) 50 | 51 | Logic code will consume this abstraction via facade types, which can either call into the real 52 | implementation of the build target platform (Windows or Linux) or the mock implementation (only 53 | when building in test mode). The facades are defined in `crates/many_cpus/src/pal/facade/**` and 54 | only exist to be minimal pass-through layers to allow swapping in the mock implementation in tests. 55 | 56 | When modifying the API of the PAL, you are expected to make the API changes in the 57 | abstraction, facade and implementation types at the same time, as the API surface must match. 58 | 59 | The same pattern may also be used elsewhere (e.g. inside the PAL implementations as a second layer 60 | of abstraction, or in other crates). 61 | 62 | # Filesystem structure 63 | 64 | We prefer many smaller files over few large files, typically only packing implementation details 65 | and unit tests into the same file but keeping separate API-visible types in separate files (even 66 | if only API-visible inside the same crate). 67 | 68 | We prefer to keep the public API relatively flat - even if we create separate Rust modules for 69 | types, we re-export them all at the parent, so while we have modules like 70 | `crates/many_cpus/src/hardware_tracker.rs` the type itself is exported at the crate root as 71 | `many_cpus::HardwareTracker` instead of at the module as `many_cpus::hardware_tracker::HardwareTracker`. 72 | 73 | # Scripting 74 | 75 | You can assume PowerShell is available. Prefer PowerShell over Bash. -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | mutants.out 2 | mutants.out.old 3 | 4 | # Generated by Cargo 5 | # will have compiled files and executables 6 | debug/ 7 | target/ 8 | 9 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 10 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 11 | Cargo.lock 12 | 13 | # These are backup files generated by rustfmt 14 | **/*.rs.bk 15 | 16 | # MSVC Windows builds of rustc generate these, which store debugging information 17 | *.pdb 18 | 19 | # RustRover 20 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 21 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 22 | # and can be added to the global gitignore or merged into this file. For a more nuclear 23 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 24 | #.idea/ -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "cSpell.words": [ 3 | "affinitization", 4 | "affinitized", 5 | "affinitizing", 6 | "cpulist", 7 | "cpus", 8 | "foldhash", 9 | "Folo", 10 | "heapless", 11 | "JOBOBJECT", 12 | "metas", 13 | "miri", 14 | "nanos", 15 | "nextest", 16 | "nonoverlapping", 17 | "pointee", 18 | "taskset", 19 | "withf" 20 | ], 21 | "rust-analyzer.cargo.cfgs": [ 22 | "debug_assertions" 23 | ] 24 | } -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0.0", 3 | "tasks": [ 4 | { 5 | "type": "process", 6 | "command": "just", 7 | "args": [ 8 | "build", 9 | ], 10 | "problemMatcher": [ 11 | "$rustc" 12 | ], 13 | "group": { 14 | "kind": "build", 15 | "isDefault": true 16 | }, 17 | "label": "just: build" 18 | }, 19 | { 20 | "type": "process", 21 | "command": "just", 22 | "args": [ 23 | "test", 24 | ], 25 | "problemMatcher": [ 26 | "$rustc" 27 | ], 28 | "group": { 29 | "kind": "test", 30 | "isDefault": true 31 | }, 32 | "label": "just: test" 33 | }, 34 | { 35 | "type": "process", 36 | "command": "just", 37 | "args": [ 38 | "test-docs", 39 | ], 40 | "problemMatcher": [ 41 | "$rustc" 42 | ], 43 | "group": { 44 | "kind": "test" 45 | }, 46 | "label": "just: test-docs" 47 | }, 48 | { 49 | "type": "process", 50 | "command": "just", 51 | "args": [ 52 | "docs" 53 | ], 54 | "problemMatcher": [ 55 | "$rustc" 56 | ], 57 | "group": "none", 58 | "label": "just: docs" 59 | }, 60 | { 61 | "type": "process", 62 | "command": "just", 63 | "args": [ 64 | "bench" 65 | ], 66 | "problemMatcher": [ 67 | "$rustc" 68 | ], 69 | "group": "none", 70 | "label": "just: bench" 71 | }, 72 | ] 73 | } -------------------------------------------------------------------------------- /DEVELOPMENT.md: -------------------------------------------------------------------------------- 1 | # The basics 2 | 3 | This is a multiplatform project supporting both Windows and Linux. Development of the Linux 4 | functionality takes place in a Windows Subsystem for Linux (WSL) virtual machine. 5 | 6 | See `rust-toolchain.toml` for the required stable Rust toolchain version. The `nightly` toolchain 7 | is also required for some development tooling. 8 | 9 | # Development environment setup (Windows) 10 | 11 | Prerequisites: 12 | 13 | * Windows 11 14 | * Visual Studio 2022 with workload "Desktop development with C++" 15 | * Visual Studio Code with extensions: 16 | * C/C++ 17 | * rust-analyzer 18 | * vscode-just 19 | * WSL 20 | * PowerShell 7 21 | * Rust development tools for version listed in `rust-toolchain.toml` 22 | * `rustup toolchain install nightly` 23 | * `cargo install just` 24 | * (Only if publishing releases) GitHub CLI + `gh auth login` 25 | 26 | Setup: 27 | 28 | 1. Clone the repo to a directory of your choosing. 29 | 1. Open a terminal in the repo root. 30 | 1. Execute `git config --local include.path ./.gitconfig` to attach the repo-specific Git configuration. 31 | 1. Execute `just install-tools` to install development tools. 32 | 33 | Validation: 34 | 35 | 1. Open repo directory in Visual Studio code. 36 | 1. Execute from task palette (F1): 37 | * `Tasks: Run Build Task` 38 | * `Tasks: Run Test Task` 39 | 1. Execute `just validate-local` in terminal. 40 | 41 | # Development environment setup (Linux) 42 | 43 | Prerequisites: 44 | 45 | * Ubuntu 24 installed in WSL 46 | * `sudo apt install -y git git-lfs build-essential cmake gcc make curl` 47 | * [PowerShell 7](https://learn.microsoft.com/en-us/powershell/scripting/install/install-ubuntu?view=powershell-7.5) 48 | * Rust development tools for version listed in `rust-toolchain.toml` 49 | * `rustup toolchain install nightly` 50 | * `cargo install just` 51 | * If first time Git setup, execute `git config --global credential.helper "/mnt/c/Program\ Files/Git/mingw64/bin/git-credential-manager.exe"` to setup authentication flow 52 | 53 | Setup: 54 | 55 | 1. Navigate to repo shared with Windows host (under `/mnt/c/`). Do not create a separate clone of the repo for Linux. 56 | 1. Execute `just install-tools` to install development tools. 57 | 1. Open Visual Studio code via `code .` 58 | 1. If first time setup, install required Visual Studio Code extensions: 59 | * C/C++ 60 | * rust-analyzer 61 | 62 | Validation: 63 | 64 | 1. Execute from task palette (F1): 65 | * `Tasks: Run Build Task` 66 | * `Tasks: Run Test Task` 67 | 1. Execute `just validate-local` in terminal. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024+ Folo authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Folo 2 | 3 | Mechanisms for high-performance hardware-aware programming in Rust. 4 | 5 | The design tenets this project aims to satisfy are the following: 6 | 7 | * In services, keep the processing of each request on a single processor to ensure both that data 8 | is locally cached for fast access and to avoid polluting caches of many processors with data of 9 | a single request. 10 | * Be aware of [memory region boundaries](https://www.kernel.org/doc/html/v4.18/vm/numa.html) 11 | when scheduling work and placing data. Avoid moving data across these boundaries because it can 12 | be very slow. 13 | * Use single-threaded logic without synchronization - even atomics and "lock-free" synchronization 14 | primitives are expensive compared to single-threaded logic. Whenever feasible, use `!Send` types 15 | to avoid accidental multithreading. Maintain separate mutable data sets per thread or memory 16 | region instead of maintaining global data sets. 17 | * Use asynchronous logic in the app, in library code and when communicating with the operating 18 | system, ensuring that a thread is never blocked from doing useful work. 19 | 20 | # Contents 21 | 22 | This is an umbrella project that covers multiple largely independent crates: 23 | 24 | | Crate | Description | 25 | |---------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------| 26 | | [`linked`](crates/linked/README.md) + siblings | Create families of linked objects that can collaborate across threads while being internally single-threaded | 27 | | [`many_cpus`](crates/many_cpus/README.md) | Efficiently schedule work and inspect the hardware environment on many-processor systems | 28 | | [`many_cpus_benchmarking`](crates/many_cpus_benchmarking/README.md) | Criterion benchmark harness to easily compare different processor configurations | 29 | | [`region_cached`](crates/region_cached/README.md) | Add a layer of cache between L3 and main memory | 30 | | [`region_local`](crates/region_local/README.md) | Isolate variable storage per memory region, similar to `thread_local_rc!` | 31 | 32 | Some auxiliary crates are also published because the primary crates above require their 33 | functionality. They only indirectly contribute to the Folo mission, so are listed separately: 34 | 35 | | Crate | Description | 36 | |---------------------------------------------|----------------------------------------------------------------------------------------------------------| 37 | | [`cpulist`](crates/cpulist/README.md) | Utilities for parsing and emitting Linux cpulist strings | 38 | | [`folo_ffi`](crates/folo_ffi/README.md) | Utilities for working with FFI logic; exists for internal use in Folo crates; no stable API surface | 39 | | [`folo_utils`](crates/folo_utils/README.md) | Utilities for internal use in Folo crates; exists for internal use in Folo crates; no stable API surface | 40 | 41 | There are also some development-only crates in this repo, which are not published: 42 | 43 | | Crate | Description | 44 | |---------------------------------------------|------------------------------------------------------------------------------------| 45 | | [`benchmark_utils`](crates/benchmark_utils) | Common benchmarking logic used across the crates in this project | 46 | | [`benchmarks`](crates/benchmarks) | Random pile of benchmarks to explore relevant scenarios and guide Folo development | 47 | | [`testing`](crates/testing) | Private helpers for testing and examples in Folo crates | 48 | 49 | # Development environment setup 50 | 51 | See [DEVELOPMENT.md](DEVELOPMENT.md). -------------------------------------------------------------------------------- /RELEASING.md: -------------------------------------------------------------------------------- 1 | # Guide to releasing a new version 2 | 3 | 1. Validate everything via `just validate` on Windows (will automatically invoke Linux validation). 4 | 1. Execute `just prepare-release` on `main` branch to increment version numbers and update changelogs. 5 | * Verify pending changes manually and adjust as necessary. 6 | * Commit as "chore: prepare for release" when satisfied with the changes. 7 | * `git push` 8 | 1. Execute `just release` to upload new packages to `crates.io`. 9 | -------------------------------------------------------------------------------- /clippy.toml: -------------------------------------------------------------------------------- 1 | # This file contains fine-tuning settings that cannot be specified in cargo.toml (which only supports "error level"). 2 | 3 | # Absolute paths of length 3 can be useful to emphasize where a particular symbol is coming from, 4 | # e.g. by using "std::sync::mutex" versus "tokio::sync::mutex". Anything beyond 3 segments seems 5 | # excessively verbose, so we limit it to 3 - import or alias to shorten longer symbol paths. 6 | absolute-paths-max-segments = 3 -------------------------------------------------------------------------------- /crates/benchmark_utils/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "benchmark_utils" 3 | description = "Common benchmarking logic used across the crates in this project" 4 | publish = false 5 | version = "0.0.1-never" 6 | 7 | authors.workspace = true 8 | edition.workspace = true 9 | license.workspace = true 10 | repository.workspace = true 11 | rust-version.workspace = true 12 | 13 | [lib] 14 | doc = false 15 | 16 | [dependencies] 17 | folo_utils = { workspace = true } 18 | many_cpus = { workspace = true } 19 | 20 | oneshot = { workspace = true } 21 | 22 | [dev-dependencies] 23 | mutants = { workspace = true } 24 | 25 | [lints] 26 | workspace = true 27 | -------------------------------------------------------------------------------- /crates/benchmarks/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "benchmarks" 3 | description = "Random pile of benchmarks to explore relevant scenarios and guide Folo development" 4 | publish = false 5 | version = "0.0.1-never" 6 | 7 | authors.workspace = true 8 | edition.workspace = true 9 | license.workspace = true 10 | repository.workspace = true 11 | rust-version.workspace = true 12 | 13 | [lib] 14 | doc = false 15 | 16 | [dev-dependencies] 17 | linked = { workspace = true } 18 | many_cpus = { workspace = true } 19 | many_cpus_benchmarking = { workspace = true } 20 | 21 | criterion = { workspace = true } 22 | fake_headers = { workspace = true } 23 | frozen-collections = { workspace = true } 24 | http = { workspace = true } 25 | scc = { workspace = true } 26 | 27 | [target.'cfg(windows)'.dev-dependencies] 28 | windows = { workspace = true, features = ["Win32_System_Memory"] } 29 | 30 | [[bench]] 31 | name = "variable_access" 32 | harness = false 33 | 34 | [[bench]] 35 | name = "effects_of_memory" 36 | harness = false 37 | 38 | [[bench]] 39 | name = "effects_of_memory_windows" 40 | harness = false 41 | 42 | [lints] 43 | workspace = true 44 | -------------------------------------------------------------------------------- /crates/benchmarks/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! This crate contains nothing, it exists just as a container for benchmarks. 2 | -------------------------------------------------------------------------------- /crates/cpulist/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cpulist" 3 | description = "Parse and emit the Linux 'cpulist' data format used to list processors, memory regions and similar entities" 4 | publish = true 5 | version = "0.2.0" 6 | 7 | authors.workspace = true 8 | edition.workspace = true 9 | license.workspace = true 10 | repository.workspace = true 11 | rust-version.workspace = true 12 | 13 | [features] 14 | default = [] 15 | 16 | [dependencies] 17 | folo_utils = { workspace = true } 18 | include-doc = { workspace = true } 19 | itertools = { workspace = true } 20 | thiserror = { workspace = true } 21 | 22 | [dev-dependencies] 23 | 24 | [lints] 25 | workspace = true 26 | -------------------------------------------------------------------------------- /crates/cpulist/README.md: -------------------------------------------------------------------------------- 1 | Utilities for parsing and emitting strings in the the `cpulist` format often used by Linux 2 | utilities that work with processor IDs, memory region IDs and similar numeric hardware 3 | identifiers. 4 | 5 | Example cpulist string: `0,1,2-4,5-9:2,6-10:2` 6 | 7 | More details in the [crate documentation](https://docs.rs/cpulist/). 8 | 9 | This is part of the [Folo project](https://github.com/folo-rs/folo) that provides mechanisms for 10 | high-performance hardware-aware programming in Rust. -------------------------------------------------------------------------------- /crates/cpulist/examples/cpulist_basic.rs: -------------------------------------------------------------------------------- 1 | //! Parsing a cpulist string and emitting it back to the terminal. 2 | 3 | fn main() { 4 | let selected_processors = cpulist::parse("0-9,32-35,40").unwrap(); 5 | assert_eq!( 6 | selected_processors, 7 | vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 32, 33, 34, 35, 40] 8 | ); 9 | 10 | println!("Selected processors: {selected_processors:?}"); 11 | println!("As cpulist: {}", cpulist::emit(selected_processors)); 12 | } 13 | -------------------------------------------------------------------------------- /crates/cpulist/examples/cpulist_stride.rs: -------------------------------------------------------------------------------- 1 | //! The stride operator can be used to divide ranges into any number of individual series. 2 | 3 | fn main() { 4 | let evens = cpulist::parse("0-16:2").unwrap(); 5 | let odds = cpulist::parse("1-16:2").unwrap(); 6 | 7 | let all = cpulist::emit(odds.iter().chain(evens.iter()).copied()); 8 | 9 | println!("Evens: {evens:?}"); 10 | println!("Odds: {odds:?}"); 11 | 12 | println!("All as cpulist: {all}"); 13 | } 14 | -------------------------------------------------------------------------------- /crates/cpulist/src/error.rs: -------------------------------------------------------------------------------- 1 | use thiserror::Error; 2 | 3 | /// Errors that can occur when processing cpulist strings. 4 | #[derive(Debug, Error)] 5 | #[non_exhaustive] 6 | pub enum Error { 7 | /// The caller provided a supposed cpulist string but it did not match the expected format. 8 | #[error("invalid cpulist syntax: '{invalid_value}' is invalid: {problem}")] 9 | InvalidSyntax { 10 | /// The specific value that was invalid. This may either be the entire cpulist string 11 | /// or a specific part of it, depending on the problem. 12 | invalid_value: String, 13 | 14 | /// A human-readable description of the problem. 15 | problem: String, 16 | }, 17 | } 18 | 19 | /// A specialized `Result` type for cpulist operations, returning the crate's 20 | /// [`Error`] type as the error value. 21 | pub(crate) type Result = std::result::Result; 22 | -------------------------------------------------------------------------------- /crates/cpulist/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Utilities for parsing and emitting strings in the the `cpulist` format often used by Linux 2 | //! utilities that work with processor IDs, memory region IDs and similar numeric hardware 3 | //! identifiers. 4 | //! 5 | //! Example cpulist string: `0-9,32-35,40` 6 | //! 7 | //! This is part of the [Folo project](https://github.com/folo-rs/folo) that provides mechanisms for 8 | //! high-performance hardware-aware programming in Rust. 9 | //! 10 | //! # Format 11 | //! 12 | //! The value is a comma-separated list of zero or more integers or integer ranges, where each item 13 | //! is either: 14 | //! 15 | //! * a single integer (e.g. `1`) 16 | //! * a range of integers (e.g. `2-4`) 17 | //! * a range of integers with a stride (step size) operator (e.g. `5-9:2` which is equivalent to `5,7,9`) 18 | //! 19 | //! Whitespace or extra characters are not allowed anywhere in the string. 20 | //! 21 | //! The identifiers in the list are of size `u32`. 22 | //! 23 | //! # Example 24 | //! 25 | //! Basic conversion from/to strings: 26 | //! 27 | //! ``` 28 | #![doc = source_file!("examples/cpulist_basic.rs")] 29 | //! ``` 30 | //! 31 | //! The stride operator is also supported for parsing: 32 | //! 33 | //! ``` 34 | #![doc = source_file!("examples/cpulist_stride.rs")] 35 | //! ``` 36 | 37 | use include_doc::source_file; 38 | 39 | mod emit; 40 | mod error; 41 | mod parse; 42 | 43 | pub use emit::*; 44 | pub use error::*; 45 | pub use parse::*; 46 | 47 | pub(crate) type Item = u32; 48 | -------------------------------------------------------------------------------- /crates/folo_ffi/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "folo_ffi" 3 | description = "Utilities for working with FFI logic; exists for internal use in Folo crates; no stable API surface" 4 | publish = true 5 | version = "0.1.2" 6 | 7 | authors.workspace = true 8 | edition.workspace = true 9 | license.workspace = true 10 | repository.workspace = true 11 | rust-version.workspace = true 12 | 13 | [lib] 14 | doc = false 15 | 16 | [features] 17 | default = [] 18 | 19 | [dependencies] 20 | 21 | [dev-dependencies] 22 | mutants = { workspace = true } 23 | 24 | [lints] 25 | workspace = true 26 | -------------------------------------------------------------------------------- /crates/folo_ffi/README.md: -------------------------------------------------------------------------------- 1 | Utilities for working with FFI calls. This exists to serve the internal FFI needs of Folo crates. 2 | Accordingly, the crate has no stable API surface. 3 | 4 | This is part of the [Folo project](https://github.com/folo-rs/folo) that provides mechanisms for 5 | high-performance hardware-aware programming in Rust. -------------------------------------------------------------------------------- /crates/folo_ffi/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Utilities for working with FFI logic; exists for internal use in Folo crates; no stable API surface. 2 | 3 | mod native_buffer; 4 | 5 | pub use native_buffer::*; 6 | -------------------------------------------------------------------------------- /crates/folo_utils/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "folo_utils" 3 | description = "Utilities for internal use in Folo crates; no stable API surface" 4 | publish = true 5 | version = "0.1.0" 6 | 7 | authors.workspace = true 8 | edition.workspace = true 9 | license.workspace = true 10 | repository.workspace = true 11 | rust-version.workspace = true 12 | 13 | [lib] 14 | doc = false 15 | 16 | [features] 17 | default = [] 18 | 19 | [dependencies] 20 | 21 | [dev-dependencies] 22 | mutants = { workspace = true } 23 | 24 | [lints] 25 | workspace = true 26 | -------------------------------------------------------------------------------- /crates/folo_utils/README.md: -------------------------------------------------------------------------------- 1 | Utilities for internal use in Folo crates; no stable API surface. 2 | This exists to serve the internal FFI needs of Folo crates. 3 | Accordingly, the crate has no stable API surface. 4 | 5 | This is part of the [Folo project](https://github.com/folo-rs/folo) that provides mechanisms for 6 | high-performance hardware-aware programming in Rust. -------------------------------------------------------------------------------- /crates/folo_utils/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Utilities for internal use in Folo crates; no stable API surface 2 | 3 | /// A macro to create a `NonZero` constant from a literal value. 4 | #[macro_export] 5 | macro_rules! nz { 6 | ($x:literal) => { 7 | const { ::std::num::NonZero::new($x).expect("literal must have non-zero value") } 8 | }; 9 | } 10 | -------------------------------------------------------------------------------- /crates/linked/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "linked" 3 | description = "Create families of linked objects that can collaborate across threads while being internally single-threaded" 4 | publish = true 5 | version = "0.2.0" 6 | 7 | authors.workspace = true 8 | edition.workspace = true 9 | license.workspace = true 10 | repository.workspace = true 11 | rust-version.workspace = true 12 | 13 | [features] 14 | default = [] 15 | 16 | [dependencies] 17 | hash_hasher = { workspace = true } 18 | linked_macros = { workspace = true } 19 | paste = { workspace = true } 20 | simple-mermaid = { workspace = true } 21 | 22 | [dev-dependencies] 23 | benchmark_utils = { workspace = true } 24 | criterion = { workspace = true } 25 | many_cpus = { workspace = true } 26 | mutants = { workspace = true } 27 | seq-macro = { workspace = true } 28 | 29 | [[bench]] 30 | name = "instances" 31 | harness = false 32 | 33 | [[bench]] 34 | name = "instance_per_thread" 35 | harness = false 36 | 37 | [[bench]] 38 | name = "instance_per_thread_sync" 39 | harness = false 40 | 41 | [[bench]] 42 | name = "static_thread_local_arc" 43 | harness = false 44 | 45 | [[bench]] 46 | name = "static_thread_local_rc" 47 | harness = false 48 | 49 | [lints] 50 | workspace = true 51 | -------------------------------------------------------------------------------- /crates/linked/README.md: -------------------------------------------------------------------------------- 1 | Mechanisms for creating families of linked objects that can collaborate across threads, 2 | with each instance only used from a single thread. 3 | 4 | The problem this crate solves is that while writing highly efficient lock-free thread-local 5 | code can yield great performance, it comes with serious drawbacks in terms of usability and 6 | developer experience. 7 | 8 | This crate bridges the gap by providing patterns and mechanisms that facilitate thread-local 9 | behavior while presenting a simple and reasonably ergonomic API to user code: 10 | 11 | * Internally, a linked object can take advantage of lock-free thread-isolated logic for **high 12 | performance and efficiency** because it operates as a multithreaded family of thread-isolated 13 | objects, each of which implements local behavior on a single thread. 14 | * Externally, the linked object family can look and act very much like a single Rust object and 15 | can hide the fact that there is collaboration happening on multiple threads, 16 | providing **a reasonably simple API with minimal extra complexity** for both the author 17 | and the user of a type. 18 | 19 | The patterns and mechanisms provided by this crate are designed to make it easy to create linked 20 | object families and to provide primitives that allow these object families to be used without 21 | the user code having to understand how the objects are wired up inside or keeping track of which 22 | instance is meant to be used on which thread. 23 | 24 | More details in the [crate documentation](https://docs.rs/linked/). 25 | 26 | This is part of the [Folo project](https://github.com/folo-rs/folo) that provides mechanisms for 27 | high-performance hardware-aware programming in Rust. -------------------------------------------------------------------------------- /crates/linked/benches/instances.rs: -------------------------------------------------------------------------------- 1 | //! Basic operations on the `instances!` macro and underlying type. 2 | 3 | #![allow( 4 | missing_docs, 5 | reason = "No need for API documentation in benchmark code" 6 | )] 7 | 8 | use std::{ 9 | hint::black_box, 10 | sync::{Arc, atomic::AtomicUsize}, 11 | }; 12 | 13 | use benchmark_utils::{ThreadPool, bench_on_threadpool}; 14 | use criterion::{BatchSize, Criterion, criterion_group, criterion_main}; 15 | use seq_macro::seq; 16 | 17 | criterion_group!(benches, entrypoint); 18 | criterion_main!(benches); 19 | 20 | #[expect( 21 | dead_code, 22 | reason = "We do not care about using all the fields but we want to pay the price of initializing them" 23 | )] 24 | #[linked::object] 25 | struct TestSubject { 26 | local_state: AtomicUsize, 27 | shared_state: Arc, 28 | } 29 | 30 | impl TestSubject { 31 | fn new() -> Self { 32 | let shared_state = Arc::new(AtomicUsize::new(0)); 33 | 34 | linked::new!(Self { 35 | local_state: AtomicUsize::new(0), 36 | shared_state: Arc::clone(&shared_state), 37 | }) 38 | } 39 | } 40 | 41 | linked::instances!(static TARGET: TestSubject = TestSubject::new()); 42 | 43 | fn entrypoint(c: &mut Criterion) { 44 | let thread_pool = ThreadPool::default(); 45 | 46 | let mut g = c.benchmark_group("instances::get"); 47 | 48 | g.bench_function("single-threaded", |b| { 49 | b.iter(|| black_box(Arc::weak_count(&TARGET.get().shared_state))); 50 | }); 51 | 52 | g.bench_function("multi-threaded", |b| { 53 | b.iter_custom(|iters| { 54 | bench_on_threadpool( 55 | &thread_pool, 56 | iters, 57 | || (), 58 | |()| { 59 | black_box(Arc::weak_count(&TARGET.get().shared_state)); 60 | }, 61 | ) 62 | }); 63 | }); 64 | 65 | g.finish(); 66 | 67 | let mut g = c.benchmark_group("instances::get_1000"); 68 | 69 | g.bench_function("single-threaded", |b| { 70 | b.iter_batched_ref( 71 | LinkedVariableClearGuard::default, 72 | |_| { 73 | seq!(N in 0..1000 { 74 | black_box(Arc::weak_count(&TARGET_MANY_~N.get().shared_state)); 75 | }); 76 | }, 77 | BatchSize::SmallInput, 78 | ); 79 | }); 80 | 81 | g.bench_function("multi-threaded", |b| { 82 | b.iter_custom(|iters| { 83 | let duration = bench_on_threadpool( 84 | &thread_pool, 85 | iters, 86 | || (), 87 | |()| { 88 | seq!(N in 0..1000 { 89 | black_box(Arc::weak_count(&TARGET_MANY_~N.get().shared_state)); 90 | }); 91 | }, 92 | ); 93 | 94 | // The other threads were all temporary and have already gone away, so all we care about 95 | // is destroying the remains in the global registry, which is fine from this thread. 96 | linked::__private_clear_linked_variables(); 97 | 98 | duration 99 | }); 100 | }); 101 | 102 | g.finish(); 103 | } 104 | 105 | // We manually expand the macro here just because macro-in-macro goes crazy and fails to operate. 106 | seq!(N in 0..1000 { 107 | #[expect(non_camel_case_types, reason = "manually replicating uglified macro internals for benchmark")] 108 | struct __lookup_key_~N; 109 | 110 | const TARGET_MANY_~N : ::linked::StaticInstances = 111 | ::linked::StaticInstances::new( 112 | ::std::any::TypeId::of::<__lookup_key_~N>, 113 | TestSubject::new 114 | ); 115 | }); 116 | 117 | /// Clears all data stored in the shared variable system when created and dropped. Just for testing. 118 | #[derive(Debug)] 119 | struct LinkedVariableClearGuard {} 120 | 121 | impl Default for LinkedVariableClearGuard { 122 | fn default() -> Self { 123 | ::linked::__private_clear_linked_variables(); 124 | Self {} 125 | } 126 | } 127 | 128 | impl Drop for LinkedVariableClearGuard { 129 | fn drop(&mut self) { 130 | ::linked::__private_clear_linked_variables(); 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /crates/linked/doc/instance_per_thread.mermaid: -------------------------------------------------------------------------------- 1 | graph TD 2 | subgraph Thread1[Thread 1] 3 | Task1a[Local task] -->|"::acquire()"| Local1a[Ref] 4 | Task1b[Local task] -->|"::acquire()"| Local1b[Ref] 5 | 6 | Local1a --> SharedOwnership((Shared
ownership)) 7 | Local1b --> SharedOwnership 8 | 9 | SharedOwnership --> Instance1[Linked object instance] 10 | end 11 | 12 | subgraph Thread2[Thread 2] 13 | Task2a[Local task] -->|"::acquire()"| Local2a[Ref] 14 | Task2b[Local task] -->|"::acquire()"| Local2b[Ref] 15 | 16 | Local2a --> SharedOwnership2((Shared
ownership)) 17 | Local2b --> SharedOwnership2 18 | 19 | SharedOwnership2 --> Instance2[Linked object instance] 20 | end 21 | 22 | Instance1 --> SharedState[Family state] 23 | Instance2 --> SharedState -------------------------------------------------------------------------------- /crates/linked/doc/instance_per_thread_sync.mermaid: -------------------------------------------------------------------------------- 1 | graph TD 2 | subgraph Thread1[Thread 1] 3 | Task1a[Local task] -->|"::acquire()"| Local1a[RefSync] 4 | Task1b[Local task] -->|"::acquire()"| Local1b[RefSync] 5 | 6 | Local1a --> SharedOwnership((Shared
ownership)) 7 | Local1b --> SharedOwnership 8 | 9 | SharedOwnership --> Instance1[Linked object instance] 10 | end 11 | 12 | subgraph Thread2[Thread 2] 13 | Task2a[Local task] -->|"::acquire()"| Local2a[RefSync] 14 | Task2b[Local task] -->|"::acquire()"| Local2b[RefSync] 15 | 16 | Local2a --> SharedOwnership2((Shared
ownership)) 17 | Local2b --> SharedOwnership2 18 | 19 | SharedOwnership2 --> Instance2[Linked object instance] 20 | end 21 | 22 | Instance1 --> SharedState[Family state] 23 | Instance2 --> SharedState -------------------------------------------------------------------------------- /crates/linked/doc/linked.mermaid: -------------------------------------------------------------------------------- 1 | graph TD 2 | subgraph Thread1[Thread 1] 3 | Task1[Local task] -->|thread-agnostic API surface| Instance1[Linked object instance] 4 | Instance1 -->|lock-free| Local1[Local state] 5 | end 6 | 7 | subgraph Thread2[Thread 2] 8 | Task2[Local task] -->|thread-agnostic API surface| Instance2[Linked object instance] 9 | Instance2 -->|lock-free| Local2[Local state] 10 | end 11 | 12 | SS[Family state] 13 | 14 | Instance1 ---> SS 15 | Instance2 ---> SS 16 | -------------------------------------------------------------------------------- /crates/linked/examples/linked_basic.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Copyright (c) Folo authors. 3 | 4 | //! Demonstrates basic usage of the linked object pattern. 5 | 6 | #![allow(clippy::new_without_default, reason = "not relevant for example")] 7 | 8 | use std::thread; 9 | 10 | mod counters { 11 | use std::sync::Arc; 12 | use std::sync::atomic::{AtomicUsize, Ordering}; 13 | 14 | /// An event counter that keeps both a local count and a global count across all linked instances. 15 | #[linked::object] // Activates the linked object pattern on this type. 16 | pub(crate) struct EventCounter { 17 | // Each instance has its own local count. 18 | local_count: usize, 19 | 20 | // Each instance also increments a global count shared between all instances. 21 | global_count: Arc, 22 | } 23 | 24 | impl EventCounter { 25 | pub(crate) fn new() -> Self { 26 | // The global count is shared between all instances by cloning this Arc into each one. 27 | let global_count = Arc::new(AtomicUsize::new(0)); 28 | 29 | // Instead of just creating a new instance, we must use the `linked::new!` macro. 30 | // The body of the macro must be a `Self` struct-expression. This 31 | // struct-expression will be reused to create each linked instance. It may capture any 32 | // necessary variables as long as they are thread-safe (`Send` + `Sync` + `'static`). 33 | linked::new!(Self { 34 | local_count: 0, 35 | global_count: Arc::clone(&global_count), 36 | }) 37 | } 38 | 39 | pub(crate) fn increment(&mut self) { 40 | self.local_count = self.local_count.saturating_add(1); 41 | self.global_count.fetch_add(1, Ordering::Relaxed); 42 | } 43 | 44 | pub(crate) fn local_count(&self) -> usize { 45 | self.local_count 46 | } 47 | 48 | pub(crate) fn global_count(&self) -> usize { 49 | self.global_count.load(Ordering::Relaxed) 50 | } 51 | } 52 | } 53 | 54 | use counters::EventCounter; 55 | 56 | // A static variable provides linked instances of the event counter on any thread. 57 | // The `linked::instances!` macro gives all necessary superpowers to this static variable. 58 | // This is the simplest way to create instances that are linked across threads. 59 | linked::instances!(static RECORDS_PROCESSED: EventCounter = EventCounter::new()); 60 | 61 | fn main() { 62 | const THREAD_COUNT: usize = 4; 63 | const RECORDS_PER_THREAD: usize = 1_000; 64 | 65 | let mut threads = Vec::with_capacity(THREAD_COUNT); 66 | 67 | for _ in 0..THREAD_COUNT { 68 | threads.push(thread::spawn(move || { 69 | let mut counter = RECORDS_PROCESSED.get(); 70 | 71 | for _ in 0..RECORDS_PER_THREAD { 72 | counter.increment(); 73 | } 74 | 75 | println!( 76 | "Thread completed work; local count: {}, global count: {}", 77 | counter.local_count(), 78 | counter.global_count() 79 | ); 80 | })); 81 | } 82 | 83 | for thread in threads { 84 | thread.join().unwrap(); 85 | } 86 | 87 | let final_count = RECORDS_PROCESSED.get().global_count(); 88 | 89 | println!("All threads completed work; final global count: {final_count}"); 90 | } 91 | -------------------------------------------------------------------------------- /crates/linked/examples/linked_box.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Copyright (c) Folo authors. 3 | 4 | //! This is a variation of `linked_basic.rs` - familiarize yourself with that example first. 5 | //! 6 | //! Demonstrates how to apply the linked object pattern to types exposed via abstractions (traits). 7 | //! This aims to preserve all the functionality of the linked objects pattern while allowing you 8 | //! to expose the instances themselves as `dyn SomeTrait` instead of the concrete type. 9 | //! 10 | //! This is enabled by `linked::Box` which works like `std::boxed::Box` but with the 11 | //! necessary extra machinery for linked objects. 12 | //! 13 | //! Under this model, **all** instances of a type T must be created as `linked::Box`, 14 | //! starting right from the constructor. If you want to have some instances exist as `T` and only 15 | //! some as `dyn SomeTrait`, refer to the example `linked_std_box.rs`. 16 | 17 | use std::thread; 18 | 19 | mod counters { 20 | use std::sync::Arc; 21 | use std::sync::atomic::{AtomicUsize, Ordering}; 22 | 23 | pub(crate) trait Counter { 24 | fn increment(&mut self); 25 | fn local_count(&self) -> usize; 26 | fn global_count(&self) -> usize; 27 | } 28 | 29 | // Note the difference from `linked_basic.rs`: there is no `#[linked::object]` attribute 30 | // This is because the `linked::Box` wrapper we use provides the necessary machinery. 31 | pub(crate) struct EventCounter { 32 | local_count: usize, 33 | global_count: Arc, 34 | } 35 | 36 | impl EventCounter { 37 | // The desired pattern is to suffix the constructor with "as_" to indicate that 38 | // it returns the result as a trait object instead of the concrete type. 39 | pub(crate) fn new_as_counter() -> linked::Box { 40 | let global_count = Arc::new(AtomicUsize::new(0)); 41 | 42 | // Instead of `linked::new!` as we did in `linked_basic.rs`, we use `linked::new_box!`. 43 | // The first argument is the trait object that our linked object will be used through. 44 | // The second argument is the instance template as a `Self` struct-expression. This 45 | // struct-expression will be reused to create each linked instance. It may capture any 46 | // necessary variables as long as they are thread-safe (`Send`+`Sync`+`'static`). 47 | linked::new_box!( 48 | dyn Counter, 49 | Self { 50 | local_count: 0, 51 | global_count: Arc::clone(&global_count), 52 | } 53 | ) 54 | } 55 | } 56 | 57 | impl Counter for EventCounter { 58 | fn increment(&mut self) { 59 | self.local_count = self.local_count.saturating_add(1); 60 | self.global_count.fetch_add(1, Ordering::Relaxed); 61 | } 62 | 63 | fn local_count(&self) -> usize { 64 | self.local_count 65 | } 66 | 67 | fn global_count(&self) -> usize { 68 | self.global_count.load(Ordering::Relaxed) 69 | } 70 | } 71 | } 72 | 73 | use counters::{Counter, EventCounter}; 74 | 75 | linked::instances!(static RECORDS_PROCESSED: linked::Box = EventCounter::new_as_counter()); 76 | 77 | fn main() { 78 | const THREAD_COUNT: usize = 4; 79 | const RECORDS_PER_THREAD: usize = 1_000; 80 | 81 | let mut threads = Vec::with_capacity(THREAD_COUNT); 82 | 83 | for _ in 0..THREAD_COUNT { 84 | threads.push(thread::spawn(move || { 85 | let mut counter = RECORDS_PROCESSED.get(); 86 | 87 | for _ in 0..RECORDS_PER_THREAD { 88 | counter.increment(); 89 | } 90 | 91 | println!( 92 | "Thread completed work; local count: {}, global count: {}", 93 | counter.local_count(), 94 | counter.global_count() 95 | ); 96 | })); 97 | } 98 | 99 | for thread in threads { 100 | thread.join().unwrap(); 101 | } 102 | 103 | let final_count = RECORDS_PROCESSED.get().global_count(); 104 | 105 | println!("All threads completed work; final global count: {final_count}"); 106 | } 107 | -------------------------------------------------------------------------------- /crates/linked/examples/linked_family.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Copyright (c) Folo authors. 3 | 4 | //! This is a variation of `linked_basic.rs` - familiarize yourself with that example first. 5 | //! 6 | //! Demonstrates how to use linked objects across threads by manually establishing the linked 7 | //! object family relationships via passing a reference to the family across threads and manually 8 | //! creating instances from the family. This is useful because sometimes it might not be convenient 9 | //! for you to define a static variable or use one of the standard instance-per-thread mechanisms. 10 | //! 11 | //! This example creates linked instances directly from the linked object family. This is 12 | //! the most flexible approach but also requires the most code from you. 13 | 14 | #![allow(clippy::new_without_default, reason = "Not relevant for example")] 15 | 16 | use std::thread; 17 | 18 | // This trait allows you to access the family of a linked object. 19 | use linked::Object; 20 | 21 | // Everything in the "counters" module is the same as in `linked_basic.rs`. 22 | // The difference is all in main() below. 23 | mod counters { 24 | use std::sync::Arc; 25 | use std::sync::atomic::{AtomicUsize, Ordering}; 26 | 27 | #[linked::object] 28 | pub(crate) struct EventCounter { 29 | local_count: usize, 30 | global_count: Arc, 31 | } 32 | 33 | impl EventCounter { 34 | pub(crate) fn new() -> Self { 35 | let global_count = Arc::new(AtomicUsize::new(0)); 36 | 37 | linked::new!(Self { 38 | local_count: 0, 39 | global_count: Arc::clone(&global_count), 40 | }) 41 | } 42 | 43 | pub(crate) fn increment(&mut self) { 44 | self.local_count = self.local_count.saturating_add(1); 45 | self.global_count.fetch_add(1, Ordering::Relaxed); 46 | } 47 | 48 | pub(crate) fn local_count(&self) -> usize { 49 | self.local_count 50 | } 51 | 52 | pub(crate) fn global_count(&self) -> usize { 53 | self.global_count.load(Ordering::Relaxed) 54 | } 55 | } 56 | } 57 | 58 | use counters::EventCounter; 59 | 60 | fn main() { 61 | const THREAD_COUNT: usize = 4; 62 | const RECORDS_PER_THREAD: usize = 1_000; 63 | 64 | let mut threads = Vec::with_capacity(THREAD_COUNT); 65 | 66 | // We create the counter as a local variable here. Linked objects are 67 | // regular structs and are not limited to static variables in any way. 68 | let counter = EventCounter::new(); 69 | 70 | // Every linked object belongs to a family, which you can access like this. 71 | // The family reference this returns is always thread-safe, even if the linked 72 | // object instances themselves are not. This allows you to pass it between threads. 73 | let counter_family = counter.family(); 74 | 75 | for _ in 0..THREAD_COUNT { 76 | threads.push(thread::spawn({ 77 | // We create a new clone of the family reference for each thread we spawn. 78 | let counter_family = counter_family.clone(); 79 | 80 | move || { 81 | // The family reference can be converted to a new instance on demand. 82 | let mut counter: EventCounter = counter_family.into(); 83 | 84 | for _ in 0..RECORDS_PER_THREAD { 85 | counter.increment(); 86 | } 87 | 88 | println!( 89 | "Thread completed work; local count: {}, global count: {}", 90 | counter.local_count(), 91 | counter.global_count() 92 | ); 93 | } 94 | })); 95 | } 96 | 97 | for thread in threads { 98 | thread.join().unwrap(); 99 | } 100 | 101 | let final_count = counter.global_count(); 102 | 103 | println!("All threads completed work; final global count: {final_count}"); 104 | } 105 | -------------------------------------------------------------------------------- /crates/linked/examples/linked_std_box.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Copyright (c) Folo authors. 3 | 4 | //! This is a variation of `linked_basic.rs` - familiarize yourself with that example first. 5 | //! 6 | //! Demonstrates how to expose linked objects via abstractions (traits) on demand while 7 | //! still using the linked objects via the concrete type itself. 8 | //! 9 | //! In this form, there exist two categories of instances for a type T: 10 | //! 11 | //! 1. The regular instances of type T, which are ordinary linked objects. 12 | //! 2. Instances of `std::boxed::Box` where `T: Xyz`. These remain linked internally but 13 | //! cannot be used to create additional linked instances (there is no `.clone()` and 14 | //! no `.family()` on these objects). 15 | //! 16 | //! If you want to be able to create additional linked instances of `dyn Xyz` from an existing 17 | //! instance of `dyn Xyz`, you must create **all** instances (starting from the constructor) as 18 | //! `linked::Box` instead of `std::boxed::Box`. See `linked_box.rs` for an example. 19 | 20 | #![allow(clippy::new_without_default, reason = "Not relevant for example")] 21 | 22 | use std::thread; 23 | 24 | mod counters { 25 | use std::sync::Arc; 26 | use std::sync::atomic::{AtomicUsize, Ordering}; 27 | 28 | /// A trait that defines functions for reporting the results of some counting that happened. 29 | pub(crate) trait CountResult { 30 | fn local_count(&self) -> usize; 31 | fn global_count(&self) -> usize; 32 | } 33 | 34 | // Note how this is a regular linked object type, just like in `linked_basic.rs`. 35 | #[linked::object] 36 | pub(crate) struct EventCounter { 37 | local_count: usize, 38 | global_count: Arc, 39 | } 40 | 41 | impl EventCounter { 42 | pub(crate) fn new() -> Self { 43 | let global_count = Arc::new(AtomicUsize::new(0)); 44 | 45 | linked::new!(Self { 46 | local_count: 0, 47 | global_count: Arc::clone(&global_count), 48 | }) 49 | } 50 | 51 | pub(crate) fn increment(&mut self) { 52 | self.local_count = self.local_count.saturating_add(1); 53 | self.global_count.fetch_add(1, Ordering::Relaxed); 54 | } 55 | } 56 | 57 | impl CountResult for EventCounter { 58 | fn local_count(&self) -> usize { 59 | self.local_count 60 | } 61 | 62 | fn global_count(&self) -> usize { 63 | self.global_count.load(Ordering::Relaxed) 64 | } 65 | } 66 | } 67 | 68 | use counters::{CountResult, EventCounter}; 69 | 70 | linked::instances!(static RECORDS_PROCESSED: EventCounter = EventCounter::new()); 71 | 72 | // Here we have some code that takes ownership of abstract count results. In this simple example 73 | // there is of course no real "need" for us to use an abstraction but let's pretend we have a 74 | // reason to do so. 75 | #[expect(clippy::needless_pass_by_value, reason = "adding realism to example")] 76 | fn finalize_counter_processing(result: Box) { 77 | println!( 78 | "Counter finished counting: local count: {}, global count: {}", 79 | result.local_count(), 80 | result.global_count() 81 | ); 82 | } 83 | 84 | fn main() { 85 | const THREAD_COUNT: usize = 4; 86 | const RECORDS_PER_THREAD: usize = 1_000; 87 | 88 | let mut threads = Vec::with_capacity(THREAD_COUNT); 89 | 90 | for _ in 0..THREAD_COUNT { 91 | threads.push(thread::spawn(move || { 92 | let mut counter = RECORDS_PROCESSED.get(); 93 | 94 | for _ in 0..RECORDS_PER_THREAD { 95 | counter.increment(); 96 | } 97 | 98 | // You can take a regular instance of a linked object and stuff it into a Box any time. 99 | // Note, however, that you cannot use this instance anymore to create additional linked 100 | // instances because now it lacks the `.clone()` and `.family()` required for that. 101 | let boxed_count_result = Box::new(counter); 102 | finalize_counter_processing(boxed_count_result); 103 | })); 104 | } 105 | 106 | for thread in threads { 107 | thread.join().unwrap(); 108 | } 109 | 110 | let final_count = RECORDS_PROCESSED.get().global_count(); 111 | 112 | println!("All threads completed work; final global count: {final_count}"); 113 | } 114 | -------------------------------------------------------------------------------- /crates/linked/examples/linked_thread_local_rc.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Copyright (c) Folo authors. 3 | 4 | //! This is a variation of `linked_basic.rs` - familiarize yourself with that example first. 5 | //! 6 | //! Demonstrates how to share thread-local instances of linked objects, so all callers on a single 7 | //! thread access the same instance of the linked object. 8 | 9 | #![allow(clippy::new_without_default, reason = "Not relevant for example")] 10 | 11 | use std::thread; 12 | 13 | mod counters { 14 | use std::cell::Cell; 15 | use std::sync::Arc; 16 | use std::sync::atomic::{AtomicUsize, Ordering}; 17 | 18 | #[linked::object] 19 | pub(crate) struct EventCounter { 20 | // This now acts as a thread-local count because we only access a single instance of the 21 | // linked object on every thread. 22 | // 23 | // Multiple callers on a single thread using the same instance means they cannot use `&mut` 24 | // references, so we cannot have any function in our `impl` block that takes `&mut self`! 25 | // That requires interior mutability to be used for any local state changes, which is why 26 | // we use a Cell here to facilitate incrementing the local count. 27 | local_count: Cell, 28 | 29 | global_count: Arc, 30 | } 31 | 32 | impl EventCounter { 33 | pub(crate) fn new() -> Self { 34 | let global_count = Arc::new(AtomicUsize::new(0)); 35 | 36 | linked::new!(Self { 37 | local_count: Cell::new(0), 38 | global_count: Arc::clone(&global_count), 39 | }) 40 | } 41 | 42 | // Note how this is `&self` instead of `&mut self` - we cannot use `&mut self` or have any 43 | // variables typed `mut EventCounter` or `&mut EventCounter` if we are reusing the same 44 | // instance for all operations aligned to a single thread. 45 | pub(crate) fn increment(&self) { 46 | self.local_count 47 | .set(self.local_count.get().saturating_add(1)); 48 | self.global_count.fetch_add(1, Ordering::Relaxed); 49 | } 50 | 51 | pub(crate) fn local_count(&self) -> usize { 52 | self.local_count.get() 53 | } 54 | 55 | pub(crate) fn global_count(&self) -> usize { 56 | self.global_count.load(Ordering::Relaxed) 57 | } 58 | } 59 | } 60 | 61 | use counters::EventCounter; 62 | 63 | linked::thread_local_rc!(static RECORDS_PROCESSED: EventCounter = EventCounter::new()); 64 | 65 | fn main() { 66 | const THREAD_COUNT: usize = 4; 67 | const INCREMENT_ITERATIONS: usize = 1_000; 68 | 69 | let mut threads = Vec::with_capacity(THREAD_COUNT); 70 | 71 | for _ in 0..THREAD_COUNT { 72 | threads.push(thread::spawn(move || { 73 | // This is the simplest approach, directly referencing the current thread's instance. 74 | RECORDS_PROCESSED.with(|x| x.increment()); 75 | 76 | // If needed, you can also obtain a long-lived reference to the current thread's 77 | // instance. Obtaining a long-lived reference is more efficient when accessing the 78 | // thread-specific instance, as long as you actually reuse the reference. 79 | // 80 | // These two are the exact same instance, just accessed via different references. 81 | let counter1 = RECORDS_PROCESSED.to_rc(); 82 | let counter2 = RECORDS_PROCESSED.to_rc(); 83 | 84 | for _ in 0..INCREMENT_ITERATIONS { 85 | counter1.increment(); 86 | counter2.increment(); 87 | } 88 | 89 | // Again, the exact same instance as above! 90 | let counter3 = RECORDS_PROCESSED.to_rc(); 91 | 92 | println!( 93 | "Thread completed work; thread local count: {}, global count: {}", 94 | counter3.local_count(), 95 | counter3.global_count() 96 | ); 97 | })); 98 | } 99 | 100 | for thread in threads { 101 | thread.join().unwrap(); 102 | } 103 | 104 | let final_count = RECORDS_PROCESSED.to_rc().global_count(); 105 | 106 | println!("All threads completed work; final global count: {final_count}"); 107 | } 108 | -------------------------------------------------------------------------------- /crates/linked/src/__private.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Copyright (c) Folo authors. 3 | 4 | //! This module contains logically private things that must be technically public 5 | //! because they are accessed from macro-generated code. 6 | 7 | use std::fmt::{self, Debug, Formatter}; 8 | use std::sync::Arc; 9 | 10 | use crate::{Family, Object}; 11 | 12 | /// Re-export so we can use it via macros in projects that do not have a reference to `paste`. 13 | pub use ::paste::paste; 14 | 15 | /// This is meant to be used via the [`linked::new!`][crate::new] macro, never directly called. 16 | /// 17 | /// Creates a family of linked objects, the instances of which are created using a callback whose 18 | /// captured state connects all members of the linked object family. 19 | /// 20 | /// The instance factory must be thread-safe, which implies that all captured state in this factory 21 | /// function must be `Send` + `Sync` + `'static`. The instances it returns do not need to be thread- 22 | /// safe, however. 23 | #[inline] 24 | pub fn new(instance_factory: impl Fn(Link) -> T + Send + Sync + 'static) -> T { 25 | Link::new(Arc::new(instance_factory)).into_instance() 26 | } 27 | 28 | /// This is meant to be used via the `#[linked::object]` macro, never directly called. 29 | /// 30 | /// Clones a linked object. They require a specific pattern to clone, so the `#[linked::object]` 31 | /// macro wires up a suitable `Clone` implementation for all such types to avoid mistakes. 32 | #[inline] 33 | pub fn clone(value: &T) -> T 34 | where 35 | T: Object + From>, 36 | { 37 | value.family().into() 38 | } 39 | 40 | pub(crate) type InstanceFactory = Arc) -> T + Send + Sync + 'static>; 41 | 42 | /// An object that connects an instance to other instances in the same linked object family. 43 | /// 44 | /// This type serves the linked object infrastructure and is not meant to be used by user code. 45 | /// It is a private public type because it is used in macro-generated code. 46 | pub struct Link { 47 | pub(super) instance_factory: InstanceFactory, 48 | } 49 | 50 | impl Debug for Link { 51 | #[cfg_attr(test, mutants::skip)] // We have no API contract for this. 52 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 53 | f.debug_struct("Link") 54 | .field( 55 | "instance_factory", 56 | &format_args!( 57 | "Arc) -> {t}>", 58 | t = std::any::type_name::() 59 | ), 60 | ) 61 | .finish() 62 | } 63 | } 64 | 65 | impl Link { 66 | #[must_use] 67 | pub(super) fn new(instance_factory: InstanceFactory) -> Self { 68 | Self { instance_factory } 69 | } 70 | 71 | #[must_use] 72 | pub(super) fn into_instance(self) -> T { 73 | let instance_factory = Arc::clone(&self.instance_factory); 74 | (instance_factory)(self) 75 | } 76 | 77 | // This type deliberately does not implement `Clone` to discourage accidental implementation of 78 | // cloning of type `T` via `#[derive(Clone)]`. The expected pattern is to use `#[linked::object]` 79 | // which generates both a `Linked` implementation and a specialized `Clone` implementation. 80 | #[must_use] 81 | fn clone(&self) -> Self { 82 | Self { 83 | instance_factory: Arc::clone(&self.instance_factory), 84 | } 85 | } 86 | 87 | #[inline] 88 | #[must_use] 89 | pub fn family(&self) -> Family { 90 | Family::new(self.clone()) 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /crates/linked/src/constants.rs: -------------------------------------------------------------------------------- 1 | // A poisoned lock means the process is in an unrecoverable/unsafe state and must exit (we panic). 2 | pub(crate) const ERR_POISONED_LOCK: &str = "encountered poisoned lock - continued execution is not safe because we can no longer ensure that we uphold security and privacy guarantees"; 3 | -------------------------------------------------------------------------------- /crates/linked/src/family.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Copyright (c) Folo authors. 3 | 4 | use std::any::type_name; 5 | use std::fmt::{self, Debug, Formatter}; 6 | 7 | use crate::__private::{InstanceFactory, Link}; 8 | 9 | /// Represents a family of [linked objects][crate] and allows you to create additional instances 10 | /// in the same family. 11 | /// 12 | /// Clones represent the same family and are functionally equivalent. 13 | /// 14 | /// # When to use this type 15 | /// 16 | /// The family is a low-level primitive for creating instances of linked objects. You will need to 17 | /// use it directly if you are implementing custom instance management patterns. Typical usage of 18 | /// linked objects occurs via standard macros/wrappers provided by the crate: 19 | /// 20 | /// * [`linked::instances!`][1] 21 | /// * [`linked::thread_local_rc!`][2] 22 | /// * [`linked::thread_local_arc!`][4] (if `T: Sync`) 23 | /// * [`linked::InstancePerThread`][5] 24 | /// * [`linked::InstancePerThreadSync`][6] (if `T: Sync`) 25 | /// 26 | /// # Example 27 | /// 28 | /// ```rust 29 | /// # use std::sync::{Arc, Mutex}; 30 | /// # #[linked::object] 31 | /// # struct Thing { 32 | /// # value: Arc>, 33 | /// # } 34 | /// # impl Thing { 35 | /// # pub fn new(initial_value: String) -> Self { 36 | /// # let shared_value = Arc::new(Mutex::new(initial_value)); 37 | /// # linked::new!(Self { 38 | /// # value: shared_value.clone(), 39 | /// # }) 40 | /// # } 41 | /// # pub fn value(&self) -> String { 42 | /// # self.value.lock().unwrap().clone() 43 | /// # } 44 | /// # pub fn set_value(&self, value: String) { 45 | /// # *self.value.lock().unwrap() = value; 46 | /// # } 47 | /// # } 48 | /// use linked::Object; // This brings .family() into scope. 49 | /// use std::thread; 50 | /// 51 | /// let thing = Thing::new("hello".to_string()); 52 | /// assert_eq!(thing.value(), "hello"); 53 | /// 54 | /// thing.set_value("world".to_string()); 55 | /// 56 | /// thread::spawn({ 57 | /// let thing_family = thing.family(); 58 | /// 59 | /// move || { 60 | /// let thing: Thing = thing_family.into(); 61 | /// assert_eq!(thing.value(), "world"); 62 | /// } 63 | /// }).join().unwrap(); 64 | /// ``` 65 | /// 66 | /// [1]: crate::instances 67 | /// [2]: crate::thread_local_rc 68 | /// [4]: crate::thread_local_arc 69 | /// [5]: crate::InstancePerThread 70 | /// [6]: crate::InstancePerThreadSync 71 | #[derive(Clone)] 72 | pub struct Family { 73 | // For the family, we extract the factory from the `Link` because the `Link` is not thread-safe. 74 | // In other words, a `Link` exists only in interactions with a specific instance of `T`. 75 | instance_factory: InstanceFactory, 76 | } 77 | 78 | impl Debug for Family { 79 | #[cfg_attr(test, mutants::skip)] // We have no API contract for this. 80 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 81 | f.debug_struct(type_name::()) 82 | .field( 83 | "instance_factory", 84 | &format_args!("Arc) -> {t}>", t = type_name::()), 85 | ) 86 | .finish() 87 | } 88 | } 89 | 90 | impl Family { 91 | #[must_use] 92 | pub(super) fn new(link: Link) -> Self { 93 | Self { 94 | instance_factory: link.instance_factory, 95 | } 96 | } 97 | 98 | // Implementation of `From> for T`, called from macro-generated code for a specific T. 99 | #[doc(hidden)] 100 | #[inline] 101 | #[must_use] 102 | pub fn __private_into(self) -> T { 103 | Link::new(self.instance_factory).into_instance() 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /crates/linked/src/macros.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Copyright (c) Folo authors. 3 | 4 | /// Defines the template used to create every instance in a linked object family. 5 | /// 6 | /// You are expected to use this in the constructor of a [linked object][crate], 7 | /// except when you want to always express the linked object via trait objects (`dyn Xyz`), 8 | /// in which case you should use [`linked::new_box`][crate::new_box]. 9 | /// 10 | /// The macro body must be a struct-expression of the `Self` type. Any variables the macro body 11 | /// captures must be thread-safe (`Send` + `Sync` + `'static`). The returned object itself does 12 | /// not need to be thread-safe. 13 | /// 14 | /// # Example 15 | /// 16 | /// ``` 17 | /// use std::sync::{Arc, Mutex}; 18 | /// 19 | /// #[linked::object] 20 | /// struct TokenCache { 21 | /// tokens_created: usize, 22 | /// name: String, 23 | /// master_key: Arc>, 24 | /// is_multidimensional: bool, 25 | /// } 26 | /// 27 | /// impl TokenCache { 28 | /// fn new(name: String, is_multidimensional: bool) -> Self { 29 | /// // Any shared data referenced by the macro body must be thread-safe. 30 | /// let master_key = Arc::new(Mutex::new(String::new())); 31 | /// 32 | /// linked::new!(Self { 33 | /// tokens_created: 0, 34 | /// name: name.clone(), 35 | /// master_key: Arc::clone(&master_key), 36 | /// is_multidimensional, 37 | /// }) 38 | /// } 39 | /// } 40 | /// ``` 41 | /// 42 | /// Complex expressions are supported within the `Self` struct-expression: 43 | /// 44 | /// ``` 45 | /// #[linked::object] 46 | /// struct TokenCache { 47 | /// token_sources: Vec>, 48 | /// } 49 | /// # trait TokenSource {} 50 | /// 51 | /// impl TokenCache { 52 | /// fn new(source_families: Vec>>) -> Self { 53 | /// linked::new!(Self { 54 | /// token_sources: source_families 55 | /// .iter() 56 | /// .cloned() 57 | /// .map(linked::Family::into) 58 | /// .collect() 59 | /// }) 60 | /// } 61 | /// } 62 | /// ``` 63 | /// 64 | /// For a complete example, see `examples/linked_basic.rs`. 65 | #[macro_export] 66 | macro_rules! new { 67 | // `new!()` is forwarded to `new!(Self {})` 68 | (Self) => { 69 | $crate::new!(Self {}) 70 | }; 71 | // Special case if there are no field initializers (for proper comma handling). 72 | (Self {}) => { 73 | $crate::__private::new(move |__private_linked_link| Self { 74 | __private_linked_link, 75 | }) 76 | }; 77 | // Typical case - struct expression with zero or more field initializers. 78 | // Each field initializer is processed as per the `@expand` rules below, 79 | // which essentially does not touch/change them. 80 | (Self { $($field:ident $( : $value:expr )?),* $(,)? }) => { 81 | $crate::__private::new(move |__private_linked_link| Self { 82 | $($field: $crate::new!(@expand $field $( : $value )?)),*, 83 | __private_linked_link, 84 | }) 85 | }; 86 | (@expand $field:ident : $value:expr) => { 87 | $value 88 | }; 89 | (@expand $field:ident) => { 90 | $field 91 | }; 92 | } 93 | -------------------------------------------------------------------------------- /crates/linked/src/object.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Copyright (c) Folo authors. 3 | 4 | use crate::Family; 5 | 6 | /// Operations available on every instance of a [linked object][crate]. 7 | /// 8 | /// The only supported way to implement this is via [`#[linked::object]`][crate::object]. 9 | pub trait Object: From> + Sized + Clone + 'static { 10 | /// The object family that the current instance is linked to. 11 | /// 12 | /// The returned object can be used to create additional instances linked to the same family. 13 | fn family(&self) -> Family; 14 | } 15 | -------------------------------------------------------------------------------- /crates/linked/src/thread_id_hash.rs: -------------------------------------------------------------------------------- 1 | use std::hash::{BuildHasher, Hasher}; 2 | 3 | /// A hasher implementation specialized for thread IDs. 4 | pub(crate) struct ThreadIdHasher { 5 | state: u64, 6 | } 7 | 8 | impl ThreadIdHasher { 9 | pub(crate) fn new() -> Self { 10 | Self { state: 0 } 11 | } 12 | } 13 | 14 | impl Hasher for ThreadIdHasher { 15 | fn finish(&self) -> u64 { 16 | self.state 17 | } 18 | 19 | // No mutation - we avoid hardcoding hash logic into tests, so expectations are minimal. 20 | #[cfg_attr(test, mutants::skip)] 21 | fn write(&mut self, bytes: &[u8]) { 22 | assert_eq!( 23 | bytes.len(), 24 | 8, 25 | "ThreadIdHasher expects exactly 8 bytes (u64) as input" 26 | ); 27 | 28 | // We expect this to only be called once per hash operation. 29 | // We expect the contents to be a u64 that typically has only 30 | // the low bits set (rare to see more than 16 bits of data, often even 8 bits). 31 | self.state = u64::from_le_bytes(bytes.try_into().expect("expecting ThreadId to be u64")); 32 | 33 | // We copy the low byte into the high byte because HashMap seems to care a lot about 34 | // the high bits (this is used as the control byte for fast comparisons). 35 | self.state ^= u64::from( 36 | *bytes 37 | .first() 38 | .expect("already asserted that we have enough bytes"), 39 | ) << 56; 40 | } 41 | } 42 | 43 | /// A `BuildHasher` that creates `ThreadIdHasher` instances. 44 | pub(crate) struct BuildThreadIdHasher; 45 | 46 | impl BuildHasher for BuildThreadIdHasher { 47 | type Hasher = ThreadIdHasher; 48 | 49 | fn build_hasher(&self) -> Self::Hasher { 50 | ThreadIdHasher::new() 51 | } 52 | } 53 | 54 | #[cfg(test)] 55 | mod tests { 56 | use super::*; 57 | 58 | #[test] 59 | fn control_byte_is_different() { 60 | // Even for tiny changes in the ID value, we expect the control byte (high byte) to be 61 | // different because the control byte comparison is performance-critical. 62 | let mut hasher = ThreadIdHasher::new(); 63 | hasher.write(&0_u64.to_le_bytes()); 64 | let hash1 = hasher.finish(); 65 | 66 | let mut hasher = ThreadIdHasher::new(); 67 | hasher.write(&1_u64.to_le_bytes()); 68 | let hash2 = hasher.finish(); 69 | 70 | // There has to be at least some difference. 71 | assert_ne!(hash1, hash2); 72 | 73 | // This is the control byte (high byte). 74 | assert_ne!(hash1 & 0xFF00_0000_0000_0000, hash2 & 0xFF00_0000_0000_0000); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /crates/linked/tests/linked_object.rs: -------------------------------------------------------------------------------- 1 | //! Linked object definition under various edge cases. 2 | 3 | #[test] 4 | fn empty_struct() { 5 | #[linked::object] 6 | struct Empty {} 7 | 8 | impl Empty { 9 | fn new() -> Self { 10 | linked::new!(Self {}) 11 | } 12 | } 13 | 14 | drop(Empty::new()); 15 | } 16 | 17 | #[test] 18 | fn very_empty_struct() { 19 | #[linked::object] 20 | struct Empty {} 21 | 22 | impl Empty { 23 | fn new() -> Self { 24 | linked::new!(Self) 25 | } 26 | } 27 | 28 | drop(Empty::new()); 29 | } 30 | -------------------------------------------------------------------------------- /crates/linked/tests/smoke.rs: -------------------------------------------------------------------------------- 1 | //! Basic operations on linked objects. 2 | 3 | use std::{ 4 | sync::{Arc, Mutex}, 5 | thread, 6 | }; 7 | 8 | use linked::Object; 9 | 10 | #[test] 11 | fn linked_objects_smoke_test() { 12 | #[linked::object] 13 | struct Thing { 14 | local_value: usize, 15 | global_value: Arc>, 16 | } 17 | 18 | impl Thing { 19 | fn new(local_value: usize, global_value: String) -> Self { 20 | let global_value = Arc::new(Mutex::new(global_value)); 21 | 22 | linked::new!(Self { 23 | local_value, 24 | global_value: Arc::clone(&global_value), 25 | }) 26 | } 27 | 28 | fn set_global_value(&self, value: &str) { 29 | let mut global_value = self.global_value.lock().unwrap(); 30 | *global_value = value.to_string(); 31 | } 32 | 33 | fn get_global_value(&self) -> String { 34 | let global_value = self.global_value.lock().unwrap(); 35 | global_value.clone() 36 | } 37 | 38 | fn get_local_value(&self) -> usize { 39 | self.local_value 40 | } 41 | 42 | fn set_local_value(&mut self, value: usize) { 43 | self.local_value = value; 44 | } 45 | } 46 | 47 | let mut linked_object = Thing::new(42, "hello".to_string()); 48 | 49 | assert_eq!(linked_object.get_local_value(), 42); 50 | assert_eq!(linked_object.get_global_value(), "hello"); 51 | 52 | let clone = linked_object.clone(); 53 | 54 | linked_object.set_global_value("world"); 55 | linked_object.set_local_value(43); 56 | 57 | assert_eq!(linked_object.get_local_value(), 43); 58 | assert_eq!(linked_object.get_global_value(), "world"); 59 | 60 | assert_eq!(clone.get_local_value(), 42); 61 | assert_eq!(clone.get_global_value(), "world"); 62 | 63 | let handle = linked_object.family(); 64 | 65 | thread::spawn(move || { 66 | let mut linked_object: Thing = handle.into(); 67 | 68 | assert_eq!(linked_object.get_local_value(), 42); 69 | assert_eq!(linked_object.get_global_value(), "world"); 70 | 71 | linked_object.set_global_value("paradise"); 72 | linked_object.set_local_value(45); 73 | }) 74 | .join() 75 | .unwrap(); 76 | 77 | assert_eq!(linked_object.get_local_value(), 43); 78 | assert_eq!(linked_object.get_global_value(), "paradise"); 79 | 80 | assert_eq!(clone.get_local_value(), 42); 81 | assert_eq!(clone.get_global_value(), "paradise"); 82 | } 83 | -------------------------------------------------------------------------------- /crates/linked_macros/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "linked_macros" 3 | description = "Internal dependency of the 'linked' crate - do not reference directly" 4 | publish = true 5 | version = "0.2.0" 6 | 7 | authors.workspace = true 8 | edition.workspace = true 9 | license.workspace = true 10 | repository.workspace = true 11 | rust-version.workspace = true 12 | 13 | [lib] 14 | proc-macro = true 15 | doc = false 16 | 17 | [package.metadata.cargo-machete] 18 | ignored = ["proc-macro2"] 19 | 20 | [dependencies] 21 | linked_macros_impl = { workspace = true } 22 | proc-macro2 = { workspace = true, features = ["proc-macro"] } 23 | 24 | [dev-dependencies] 25 | 26 | [lints] 27 | workspace = true 28 | -------------------------------------------------------------------------------- /crates/linked_macros/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Copyright (c) Folo authors. 3 | 4 | #![allow( 5 | missing_docs, 6 | reason = "Private API, public API is documented in `linked` crate" 7 | )] 8 | 9 | use proc_macro::TokenStream; 10 | 11 | #[proc_macro_attribute] 12 | pub fn __macro_linked_object(attr: TokenStream, item: TokenStream) -> TokenStream { 13 | linked_macros_impl::linked_object::entrypoint(&attr.into(), &item.into()).into() 14 | } 15 | -------------------------------------------------------------------------------- /crates/linked_macros_impl/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "linked_macros_impl" 3 | description = "Internal dependency of the 'linked_macros' crate - do not reference directly" 4 | publish = true 5 | version = "0.2.0" 6 | 7 | authors.workspace = true 8 | edition.workspace = true 9 | license.workspace = true 10 | repository.workspace = true 11 | rust-version.workspace = true 12 | 13 | [lib] 14 | doc = false 15 | 16 | [dependencies] 17 | proc-macro2 = { workspace = true } 18 | quote = { workspace = true } 19 | syn = { workspace = true, features = ["full", "parsing", "printing"] } 20 | 21 | [dev-dependencies] 22 | 23 | [lints] 24 | workspace = true 25 | -------------------------------------------------------------------------------- /crates/linked_macros_impl/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Copyright (c) Folo authors. 3 | 4 | #![allow( 5 | missing_docs, 6 | reason = "Private API, public API is documented in `linked` crate" 7 | )] 8 | 9 | pub mod linked_object; 10 | mod syn_helpers; 11 | -------------------------------------------------------------------------------- /crates/linked_macros_impl/src/syn_helpers.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Copyright (c) Folo authors. 3 | 4 | //! This module contains helper functions for consuming and producing Rust syntax elements. 5 | 6 | use proc_macro2::TokenStream; 7 | use quote::quote; 8 | 9 | /// Combines a token stream with a syn-originating contextual error message that contains 10 | /// all the necessary metadata to emit rich errors (with red underlines and all that). 11 | /// 12 | /// Also preserves the original token stream, merely appending the error instead of replacing. 13 | pub(crate) fn token_stream_and_error(s: &TokenStream, e: &syn::Error) -> TokenStream { 14 | let error = e.to_compile_error(); 15 | 16 | // We preserve both the original input and emit the compiler error message. 17 | // This ensures that we do not cause extra problems by removing the original input 18 | // from the code file (which would result in "trait not found" and similar errors). 19 | quote! { 20 | #s 21 | #error 22 | } 23 | } 24 | 25 | /// Attempts to identify any compile-time error in the token stream. This is useful for unit 26 | /// testing macros - if the macro is expected to produce a compile-time error, we can check 27 | /// whether one exists. 28 | /// 29 | /// We deliberately do not take an error message as input here. Testing for error messages is 30 | /// fragile and creates maintenance headaches - be satisfied with OK/NOK testing and keep it simple. 31 | #[cfg(test)] 32 | pub(crate) fn contains_compile_error(tokens: &TokenStream) -> bool { 33 | // String-based implementation, so vulnerable to false positives in very unlikely cases. 34 | tokens.to_string().contains(":: core :: compile_error ! {") 35 | } 36 | 37 | #[cfg(test)] 38 | mod tests { 39 | use proc_macro2::Span; 40 | 41 | use super::*; 42 | 43 | #[test] 44 | fn token_stream_and_error_outputs_both() { 45 | // This is a bit tricky because we do not know the specific form the compiler error 46 | // is going to be. However, we know it must contain our error message, so just check that. 47 | let canary = "nrtfynjcrtupyh6rhdoj85m7yoi"; 48 | 49 | // We also need to ensure it contains this function (that it did not get overwritten). 50 | let s = quote! { 51 | fn gkf5dj8yhuldri58uygdkiluyot() {} 52 | }; 53 | 54 | let e = syn::Error::new(Span::call_site(), canary); 55 | 56 | let merged = token_stream_and_error(&s, &e); 57 | 58 | let merged_str = merged.to_string(); 59 | assert!(merged_str.contains(canary)); 60 | assert!(merged_str.contains("gkf5dj8yhuldri58uygdkiluyot")); 61 | } 62 | 63 | #[test] 64 | fn contains_compile_error_yes_raw() { 65 | let tokens = quote! { 66 | let foo = "Some random stuff may also be here"; 67 | blah! { blah } 68 | ::core::compile_error! { "This is a test error message." }; 69 | let bar = "More random stuff here" 70 | }; 71 | 72 | assert!(contains_compile_error(&tokens)); 73 | } 74 | 75 | #[test] 76 | fn contains_compile_error_yes_generated() { 77 | let tokens = quote! { 78 | let foo = "Some random stuff may also be here"; 79 | blah! { blah } 80 | ::core::compile_error!("This is a test error message."); 81 | let bar = "More random stuff here" 82 | }; 83 | 84 | let tokens = 85 | token_stream_and_error(&tokens, &syn::Error::new(Span::call_site(), "Testing")); 86 | 87 | assert!(contains_compile_error(&tokens)); 88 | } 89 | 90 | #[test] 91 | fn contains_compile_error_no() { 92 | let tokens = quote! { 93 | let foo = "No compile error here!" 94 | }; 95 | 96 | assert!(!contains_compile_error(&tokens)); 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /crates/many_cpus/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "many_cpus" 3 | description = "Efficiently schedule work and inspect the hardware environment on many-processor systems" 4 | publish = true 5 | version = "0.3.1" 6 | 7 | authors.workspace = true 8 | edition.workspace = true 9 | license.workspace = true 10 | repository.workspace = true 11 | rust-version.workspace = true 12 | 13 | [features] 14 | default = [] 15 | 16 | [dependencies] 17 | cpulist = { workspace = true } 18 | derive_more = { workspace = true, features = ["as_ref", "display"] } 19 | foldhash = { workspace = true } 20 | folo_utils = { workspace = true } 21 | itertools = { workspace = true } 22 | negative-impl = { workspace = true } 23 | nonempty = { workspace = true } 24 | rand = { workspace = true, features = ["thread_rng"] } 25 | 26 | [target.'cfg(unix)'.dependencies] 27 | cpulist = { workspace = true } 28 | libc = { workspace = true } 29 | 30 | [target.'cfg(windows)'.dependencies] 31 | folo_ffi = { workspace = true } 32 | heapless = { workspace = true } 33 | smallvec = { workspace = true } 34 | windows = { workspace = true, features = [ 35 | "Win32_System_JobObjects", 36 | "Win32_System_Kernel", 37 | "Win32_System_SystemInformation", 38 | "Win32_System_Threading", 39 | ] } 40 | 41 | [dev-dependencies] 42 | benchmark_utils = { workspace = true } 43 | criterion = { workspace = true } 44 | mockall = { workspace = true } 45 | mutants = { workspace = true } 46 | scopeguard = { workspace = true } 47 | static_assertions = { workspace = true } 48 | testing = { workspace = true } 49 | 50 | [target.'cfg(windows)'.dev-dependencies] 51 | windows = { workspace = true, features = ["Win32_Security"] } 52 | 53 | [[bench]] 54 | name = "hardware_info" 55 | harness = false 56 | 57 | [[bench]] 58 | name = "hardware_tracker" 59 | harness = false 60 | 61 | [[bench]] 62 | name = "pal_windows" 63 | harness = false 64 | 65 | [[bench]] 66 | name = "processor_set_builder" 67 | harness = false 68 | 69 | [lints] 70 | workspace = true 71 | -------------------------------------------------------------------------------- /crates/many_cpus/README.md: -------------------------------------------------------------------------------- 1 | Working on many-processor systems with 100+ logical processors can require you to pay extra 2 | attention to the specifics of the hardware to make optimal use of available compute capacity 3 | and extract the most performance out of the system. 4 | 5 | This is part of the [Folo project](https://github.com/folo-rs/folo) that provides mechanisms for 6 | high-performance hardware-aware programming in Rust. 7 | 8 | # Why should one care? 9 | 10 | Modern operating systems try to distribute work fairly between all processors. Typical Rust 11 | sync and async task runtimes like Rayon and Tokio likewise try to be efficient in occupying all 12 | processors with work, even moving work between processors if one risks becoming idle. This is fine 13 | but we can do better. 14 | 15 | Taking direct control over the placement of work on specific processors can yield superior 16 | performance by taking advantage of factors under the service author's control, which are not known 17 | to general-purpose tasking runtimes: 18 | 19 | 1. A key insight we can use is that most service apps exist to process requests or execute jobs - each 20 | unit of work being done is related to a specific data set. We can ensure we only process the data 21 | associated with a specific HTTP/gRPC request on a single processor to ensure optimal data locality. 22 | This means the data related to the request is likely to be in the caches of that processor, speeding 23 | up all operations related to that request by avoiding expensive memory accesses. 24 | 1. Even when data is intentionally shared across processors (e.g. because one processor is not capable 25 | enough to do the work and parallelization is required), performance differences exist between 26 | different pairs of processors because different processors can be connected to different physical 27 | memory modules. Access to non-cached data is optimal when that data is in the same memory region 28 | as the current processor (i.e. on the physical memory modules directly wired to the current 29 | processor). 30 | 31 | # How does this crate help? 32 | 33 | The `many_cpus` crate provides mechanisms to schedule threads on specific processors and in specific 34 | memory regions, ensuring that work assigned to those threads remains on the same hardware and that 35 | data shared between threads is local to the same memory region, enabling you to achieve high data 36 | locality and processor cache efficiency. 37 | 38 | In addition to thread spawning, this crate enables app logic to observe what processor the current 39 | thread is executing on and in which memory region this processor is located, even if the thread is 40 | not bound to a specific processor. This can be a building block for efficiency improvements even 41 | outside directly controlled work scheduling. 42 | 43 | Other crates from the [Folo project](https://github.com/folo-rs/folo) build upon this hardware- 44 | awareness functionality to provide higher-level primitives such as thread pools, work schedulers, 45 | region-local cells and more. 46 | 47 | # How to use this crate? 48 | 49 | More details in the [crate documentation](https://docs.rs/many_cpus/). -------------------------------------------------------------------------------- /crates/many_cpus/benches/hardware_info.rs: -------------------------------------------------------------------------------- 1 | //! Benchmarking operations exposed by the `HardwareInfo` struct. 2 | 3 | #![allow( 4 | missing_docs, 5 | reason = "No need for API documentation in benchmark code" 6 | )] 7 | 8 | use criterion::{Criterion, criterion_group, criterion_main}; 9 | use many_cpus::HardwareInfo; 10 | 11 | criterion_group!(benches, entrypoint); 12 | criterion_main!(benches); 13 | 14 | fn entrypoint(c: &mut Criterion) { 15 | let mut group = c.benchmark_group("HardwareInfo"); 16 | 17 | // Mostly pointless since all the accessors just load from a static lazy-initialize 18 | // variable. Just here to detect anomalies if we do something strange and it gets slow. 19 | group.bench_function("max_processor_id", |b| { 20 | b.iter(HardwareInfo::max_processor_id); 21 | }); 22 | 23 | group.finish(); 24 | } 25 | -------------------------------------------------------------------------------- /crates/many_cpus/benches/hardware_tracker.rs: -------------------------------------------------------------------------------- 1 | //! Benchmarking operations exposed by the `HardwareTracker` struct. 2 | 3 | #![allow( 4 | missing_docs, 5 | reason = "No need for API documentation in benchmark code" 6 | )] 7 | 8 | use std::{hint::black_box, time::Duration}; 9 | 10 | use criterion::{Criterion, criterion_group, criterion_main}; 11 | use folo_utils::nz; 12 | use many_cpus::{HardwareTracker, ProcessorSet}; 13 | 14 | criterion_group!(benches, entrypoint); 15 | criterion_main!(benches); 16 | 17 | fn entrypoint(c: &mut Criterion) { 18 | let mut group = c.benchmark_group("HardwareTracker"); 19 | 20 | // Results from this are really unstable for whatever reason. Give it more time to stabilize. 21 | group.measurement_time(Duration::from_secs(30)); 22 | 23 | group.bench_function("current_processor_unpinned", |b| { 24 | b.iter(|| { 25 | black_box(HardwareTracker::with_current_processor(|p| { 26 | // We cannot return a reference to the processor itself but this is close enough. 27 | p.id() 28 | })); 29 | }); 30 | }); 31 | 32 | group.bench_function("current_processor_id_unpinned", |b| { 33 | b.iter(|| { 34 | black_box(HardwareTracker::current_processor_id()); 35 | }); 36 | }); 37 | 38 | group.bench_function("current_memory_region_id_unpinned", |b| { 39 | b.iter(|| { 40 | black_box(HardwareTracker::current_memory_region_id()); 41 | }); 42 | }); 43 | 44 | // Now we pin the current thread and do the whole thing again! 45 | let one_processor = ProcessorSet::builder() 46 | .performance_processors_only() 47 | .take(nz!(1)) 48 | .unwrap(); 49 | 50 | one_processor.pin_current_thread_to(); 51 | 52 | group.bench_function("current_processor_pinned", |b| { 53 | b.iter(|| { 54 | black_box(HardwareTracker::with_current_processor(|p| { 55 | // We cannot return a reference to the processor itself but this is close enough. 56 | p.id() 57 | })); 58 | }); 59 | }); 60 | 61 | group.bench_function("current_processor_id_pinned", |b| { 62 | b.iter(|| { 63 | black_box(HardwareTracker::current_processor_id()); 64 | }); 65 | }); 66 | 67 | group.bench_function("current_memory_region_id_pinned", |b| { 68 | b.iter(|| { 69 | black_box(HardwareTracker::current_memory_region_id()); 70 | }); 71 | }); 72 | 73 | // Don't forget to unpin the thread to avoid affecting future benchmarks! 74 | ProcessorSet::builder() 75 | .ignoring_resource_quota() 76 | .take_all() 77 | .unwrap() 78 | .pin_current_thread_to(); 79 | 80 | group.finish(); 81 | } 82 | -------------------------------------------------------------------------------- /crates/many_cpus/benches/pal_windows.rs: -------------------------------------------------------------------------------- 1 | //! Benchmarking Windows PAL internal logic via private API that bypasses the 2 | //! public API and allows operations to be performed without (full) caching. 3 | 4 | #![allow( 5 | missing_docs, 6 | reason = "No need for API documentation in benchmark code" 7 | )] 8 | 9 | use criterion::{Criterion, criterion_group, criterion_main}; 10 | 11 | criterion_group!(benches, entrypoint); 12 | criterion_main!(benches); 13 | 14 | #[allow( 15 | clippy::needless_pass_by_ref_mut, 16 | reason = "spurious error on non-Windows" 17 | )] 18 | fn entrypoint(c: &mut Criterion) { 19 | #[cfg(windows)] 20 | windows::entrypoint(c); 21 | 22 | #[cfg(not(windows))] 23 | { 24 | _ = c; 25 | } 26 | } 27 | 28 | #[cfg(windows)] 29 | mod windows { 30 | use std::{hint::black_box, sync::Arc, time::Duration}; 31 | 32 | use benchmark_utils::{ThreadPool, bench_on_threadpool}; 33 | use criterion::Criterion; 34 | use folo_utils::nz; 35 | use many_cpus::{ProcessorSet, pal::BUILD_TARGET_PLATFORM}; 36 | use windows::Win32::System::SystemInformation::GROUP_AFFINITY; 37 | 38 | pub(crate) fn entrypoint(c: &mut Criterion) { 39 | let mut group = c.benchmark_group("Pal_Windows"); 40 | 41 | // The results are quite jittery. Give it some time to stabilize. 42 | group.measurement_time(Duration::from_secs(30)); 43 | 44 | group.bench_function("current_thread_processors", |b| { 45 | b.iter(|| black_box(BUILD_TARGET_PLATFORM.__private_current_thread_processors())); 46 | }); 47 | 48 | group.bench_function("get_all_processors", |b| { 49 | b.iter(|| BUILD_TARGET_PLATFORM.__private_get_all_processors()); 50 | }); 51 | 52 | group.bench_function("affinity_mask_to_processor_id_1", |b| { 53 | let mask = GROUP_AFFINITY { 54 | Group: 0, 55 | Mask: 1, 56 | ..Default::default() 57 | }; 58 | 59 | b.iter(|| { 60 | black_box(BUILD_TARGET_PLATFORM.__private_affinity_mask_to_processor_id(&mask)) 61 | }); 62 | }); 63 | 64 | group.bench_function("affinity_mask_to_processor_id_16", |b| { 65 | let mask = GROUP_AFFINITY { 66 | Group: 0, 67 | Mask: 0xFF, 68 | ..Default::default() 69 | }; 70 | 71 | b.iter(|| { 72 | black_box(BUILD_TARGET_PLATFORM.__private_affinity_mask_to_processor_id(&mask)) 73 | }); 74 | }); 75 | 76 | group.bench_function("pin_thread_to_default_set", |b| { 77 | let default_processor_set = Arc::new(ProcessorSet::default()); 78 | let one_processor = ProcessorSet::builder().take(nz!(1)).unwrap(); 79 | let one_thread = ThreadPool::new(&one_processor); 80 | 81 | b.iter_custom({ 82 | |iters| { 83 | bench_on_threadpool(&one_thread, iters, || (), { 84 | let default_processor_set = Arc::clone(&default_processor_set); 85 | 86 | move |()| { 87 | default_processor_set.pin_current_thread_to(); 88 | } 89 | }) 90 | } 91 | }); 92 | }); 93 | 94 | group.finish(); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /crates/many_cpus/benches/processor_set_builder.rs: -------------------------------------------------------------------------------- 1 | //! Benchmarking operations on the `ProcessorSetBuilder` type. 2 | 3 | #![allow( 4 | missing_docs, 5 | reason = "No need for API documentation in benchmark code" 6 | )] 7 | 8 | use std::{hint::black_box, time::Duration}; 9 | 10 | use benchmark_utils::{ThreadPool, bench_on_threadpool}; 11 | use criterion::{Criterion, criterion_group, criterion_main}; 12 | use folo_utils::nz; 13 | use many_cpus::ProcessorSet; 14 | 15 | criterion_group!(benches, entrypoint); 16 | criterion_main!(benches); 17 | 18 | fn entrypoint(c: &mut Criterion) { 19 | let thread_pool = ThreadPool::default(); 20 | 21 | let mut group = c.benchmark_group("ProcessorSetBuilder"); 22 | 23 | // Results from this are really unstable for whatever reason. Give it more time to stabilize. 24 | group.measurement_time(Duration::from_secs(30)); 25 | 26 | group.bench_function("all", |b| { 27 | b.iter(|| { 28 | black_box(ProcessorSet::builder().take_all().unwrap()); 29 | }); 30 | }); 31 | 32 | group.bench_function("one", |b| { 33 | b.iter(|| { 34 | black_box(ProcessorSet::builder().take(nz!(1)).unwrap()); 35 | }); 36 | }); 37 | 38 | group.bench_function("only_evens", |b| { 39 | b.iter(|| { 40 | black_box( 41 | ProcessorSet::builder() 42 | .filter(|p| p.id() % 2 == 0) 43 | .take_all() 44 | .unwrap(), 45 | ); 46 | }); 47 | }); 48 | 49 | group.finish(); 50 | 51 | let mut group = c.benchmark_group("ProcessorSetBuilder_MT"); 52 | 53 | // Results from this are really unstable for whatever reason. Give it more time to stabilize. 54 | group.measurement_time(Duration::from_secs(30)); 55 | 56 | group.bench_function("all", |b| { 57 | b.iter_custom(|iters| { 58 | bench_on_threadpool( 59 | &thread_pool, 60 | iters, 61 | || (), 62 | |()| { 63 | black_box(ProcessorSet::builder().take_all().unwrap()); 64 | }, 65 | ) 66 | }); 67 | }); 68 | 69 | group.bench_function("one", |b| { 70 | b.iter_custom(|iters| { 71 | bench_on_threadpool( 72 | &thread_pool, 73 | iters, 74 | || (), 75 | |()| { 76 | black_box(ProcessorSet::builder().take(nz!(1)).unwrap()); 77 | }, 78 | ) 79 | }); 80 | }); 81 | 82 | group.bench_function("only_evens", |b| { 83 | b.iter_custom(|iters| { 84 | bench_on_threadpool( 85 | &thread_pool, 86 | iters, 87 | || (), 88 | |()| { 89 | black_box( 90 | ProcessorSet::builder() 91 | .filter(|p| p.id() % 2 == 0) 92 | .take_all() 93 | .unwrap(), 94 | ); 95 | }, 96 | ) 97 | }); 98 | }); 99 | 100 | group.finish(); 101 | } 102 | -------------------------------------------------------------------------------- /crates/many_cpus/docs/snippets/changes_at_runtime.md: -------------------------------------------------------------------------------- 1 | # Changes at runtime 2 | 3 | It is possible that a system will have processors added or removed at runtime, or for 4 | constraints enforced by the operating system to change over time. Such changes will not be 5 | represented in an existing processor set - once created, a processor set is static. 6 | 7 | Changes to resource quotas can be applied by creating a new processor set (e.g. if the 8 | processor time quota is lowered, building a new set will by default use the new quota). 9 | 10 | This crate will not detect more fundamental changes such as added/removed processors. Operations 11 | attempted on removed processors may fail with an error or panic or silently misbehave (e.g. 12 | threads never starting). Added processors will not be considered a member of any set. -------------------------------------------------------------------------------- /crates/many_cpus/docs/snippets/external_constraints.md: -------------------------------------------------------------------------------- 1 | # External constraints 2 | 3 | The operating system may define constraints that prohibit the application from using all 4 | the available processors (e.g. when the app is containerized and provided limited 5 | hardware resources). 6 | 7 | This crate treats platform constraints as follows: 8 | 9 | * Hard limits on which processors are allowed are respected - forbidden processors are mostly 10 | ignored by this crate and cannot be used to spawn threads, though such processors are still 11 | accounted for when inspecting hardware information such as "max processor ID". 12 | The mechanisms for defining such limits are cgroups on Linux and job objects on Windows. 13 | See `examples/obey_job_affinity_limits_windows.rs` for a Windows-specific example. 14 | * Soft limits on which processors are allowed are ignored by default - specifying a processor 15 | affinity via `taskset` on Linux, `start.exe /affinity 0xff` on Windows or similar mechanisms 16 | does not affect the set of processors this crate will use by default, though you can opt in to 17 | this via [`.where_available_for_current_thread()`][crate::ProcessorSetBuilder::where_available_for_current_thread]. 18 | * Limits on processor time are considered an upper bound on the number of processors that can be 19 | included in a processor set. For example, if you configure a processor time limit of 20 | 10 seconds per second of real time on a 20-processor system, then the builder may return up 21 | to 10 of the processors in the resulting processor set (though it may be a different 10 every 22 | time you create a new processor set from scratch). This limit is optional and may be disabled 23 | by using [`.ignoring_resource_quota()`][crate::ProcessorSetBuilder::ignoring_resource_quota]. 24 | See `examples/obey_job_resource_quota_limits_windows.rs` for a Windows-specific example. 25 | 26 | # Working with processor time constraints 27 | 28 | If a process exceeds the processor time limit, the operating system will delay executing the 29 | process further until the "debt is paid off". This is undesirable for most workloads because: 30 | 31 | 1. There will be random latency spikes from when the operating system decides to apply a delay. 32 | 1. The delay may not be evenly applied across all threads of the process, leading to unbalanced 33 | load between worker threads. 34 | 35 | For predictable behavior that does not suffer from delay side-effects, it is important that the 36 | process does not exceed the processor time limit. To keep out of trouble, 37 | follow these guidelines: 38 | 39 | * Ensure that all your concurrently executing thread pools are derived from the same processor 40 | set, so there is a single set of processors (up to the resource quota) that all work of the 41 | process will be executed on. Any new processor sets you create should be subsets of this set, 42 | thereby ensuring that all worker threads combined do not exceed the quota. 43 | * Ensure that the original processor set is constructed while obeying the resource quota (which is 44 | enabled by default), 45 | 46 | If your resource constraints are already applied on process startup, you can use 47 | `ProcessorSet::default()` as the master set from which all other processor sets are derived using 48 | `ProcessorSet::default().to_builder()`. This will ensure the processor time quota is always obeyed 49 | because `ProcessorSet::default()` is guaranteed to obey the resource quota. 50 | 51 | ```rust ignore 52 | let mail_senders = ProcessorSet::default().to_builder().take(MAIL_WORKER_COUNT).unwrap(); 53 | ``` -------------------------------------------------------------------------------- /crates/many_cpus/examples/get_all_processors.rs: -------------------------------------------------------------------------------- 1 | //! We inspect every processor available to the current process and write a 2 | //! human-readable description of it to the terminal. 3 | //! 4 | //! This obeys the operating system enforced processor selection constraints 5 | //! assigned to the current process (which is always the case). 6 | //! 7 | //! However, this does not obey the resource quota available to the current process. This is 8 | //! typically not useful for executing work but may be useful for inspecting available processors. 9 | 10 | use many_cpus::ProcessorSet; 11 | 12 | fn main() { 13 | for processor in ProcessorSet::builder() 14 | .ignoring_resource_quota() 15 | .take_all() 16 | .unwrap() 17 | .processors() 18 | { 19 | println!("{processor:?}"); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /crates/many_cpus/examples/get_default_processors.rs: -------------------------------------------------------------------------------- 1 | //! We inspect every processor in the default set and write a 2 | //! human-readable description of it to the terminal. 3 | //! 4 | //! This obeys the operating system enforced processor selection and resource quota constraints 5 | //! assigned to the current process (which is the default behavior). 6 | 7 | use many_cpus::ProcessorSet; 8 | 9 | fn main() { 10 | for processor in ProcessorSet::default().processors() { 11 | println!("{processor:?}"); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /crates/many_cpus/examples/obey_job_affinity_windows.rs: -------------------------------------------------------------------------------- 1 | //! The mechanism used in Windows to enforce limits on processes is Job Objects. Processes are 2 | //! assigned to jobs, and jobs can be constrained to only use a limited set of processors. 3 | //! 4 | //! This example proves that the APIs we offer do not "see" the universe outside of the limits 5 | //! of the current process's job object constraints on processor affinity (which processors 6 | //! the process is allowed to use). 7 | //! 8 | //! Job object limits are hard limits, whereas all other mechanisms to define affinity (e.g. CPU 9 | //! sets and legacy "process affinity masks") are just wishes by the process in question. 10 | //! In case of conflicting masks, the intersection is used. 11 | //! 12 | //! This example is Windows-only, as job objects are a Windows-specific feature. 13 | 14 | fn main() { 15 | #[cfg(windows)] 16 | windows::main(); 17 | 18 | #[cfg(not(windows))] 19 | panic!("This example is only supported on Windows."); 20 | } 21 | 22 | #[cfg(windows)] 23 | mod windows { 24 | use folo_utils::nz; 25 | use many_cpus::ProcessorSet; 26 | use testing::Job; 27 | 28 | pub(crate) fn main() { 29 | // Restrict the current process to only use 2 processors. 30 | let _job = Job::builder().with_processor_count(nz!(2)).build(); 31 | 32 | verify_limits_obeyed(); 33 | } 34 | 35 | fn verify_limits_obeyed() { 36 | // The default processor set obeys all the limits that apply to the current process. 37 | let processor_count = ProcessorSet::default().len(); 38 | println!("Current process is allowed to use {processor_count} processors."); 39 | 40 | assert_eq!(processor_count, 2); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /crates/many_cpus/examples/obey_job_resource_quota_windows.rs: -------------------------------------------------------------------------------- 1 | //! The mechanism used in Windows to enforce limits on processes is Job Objects. Processes are 2 | //! assigned to jobs, and jobs can be constrained to only use a limited set of processors. 3 | //! 4 | //! This example proves that the APIs we offer can accurately judge the resource quota assigned 5 | //! to the process and follow best practices for processor set sizing when a quota is active. 6 | //! 7 | //! We configure the job object to only grant 50% of the system processor time to the process. 8 | //! 9 | //! This example is Windows-only, as job objects are a Windows-specific feature. 10 | 11 | fn main() { 12 | #[cfg(windows)] 13 | windows::main(); 14 | 15 | #[cfg(not(windows))] 16 | panic!("This example is only supported on Windows."); 17 | } 18 | 19 | #[cfg(windows)] 20 | mod windows { 21 | use many_cpus::{HardwareTracker, ProcessorSet}; 22 | use testing::{Job, ProcessorTimePct}; 23 | 24 | pub(crate) fn main() { 25 | // Restrict the current process to only use 50% of the system processor time. 26 | let _job = Job::builder() 27 | .with_max_processor_time_pct(ProcessorTimePct::new_static::<50>()) 28 | .build(); 29 | 30 | verify_limits_obeyed(); 31 | } 32 | 33 | #[expect( 34 | clippy::cast_precision_loss, 35 | reason = "all expected values are in safe range" 36 | )] 37 | fn verify_limits_obeyed() { 38 | // This is "100%". This count may also include processors that are not available to the 39 | // current process (e.g. when job objects already constrain our processors due to 40 | // executing in a container). 41 | let system_processor_count = HardwareTracker::active_processor_count(); 42 | 43 | let resource_quota = HardwareTracker::resource_quota(); 44 | 45 | // This should say we are allowed to use 50% of the system processor time, which we 46 | // express as processor-seconds per second. 47 | // NB! This can never be higher than our process's max processor time. We rely on the 48 | // example not having process-specific limits that bring it lower than the 50% here. 49 | let max_processor_time = resource_quota.max_processor_time(); 50 | 51 | println!( 52 | "Current process is allowed to use {max_processor_time} seconds of processor time per second of real time." 53 | ); 54 | 55 | let expected_processor_time = system_processor_count as f64 * 0.5; 56 | 57 | assert!( 58 | processor_time_eq(max_processor_time, expected_processor_time), 59 | "The resource quota should be 50% of the available processor time. Expected: {expected_processor_time}, Actual: {max_processor_time}", 60 | ); 61 | 62 | // The default processor set obeys all the limits that apply to the current process. 63 | let quota_limited_processor_count = ProcessorSet::default().len(); 64 | 65 | println!( 66 | "The resource quota allows the current process to use {quota_limited_processor_count} out of a total of {system_processor_count} processors." 67 | ); 68 | 69 | let expected_limited_processor_count = (system_processor_count as f64 * 0.5).floor(); 70 | 71 | assert!( 72 | processor_time_eq( 73 | expected_limited_processor_count, 74 | quota_limited_processor_count as f64 75 | ), 76 | "The resource quota should limit the number of processors to half of the available processors, rounded down. Expected: {expected_limited_processor_count}, Actual: {quota_limited_processor_count}", 77 | ); 78 | } 79 | 80 | fn processor_time_eq(a: f64, b: f64) -> bool { 81 | // Floating point comparison tolerance. 82 | // https://rust-lang.github.io/rust-clippy/master/index.html#float_cmp 83 | const CLOSE_ENOUGH: f64 = 0.01; 84 | 85 | let diff = (a - b).abs(); 86 | diff < CLOSE_ENOUGH 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /crates/many_cpus/examples/observe_processor.rs: -------------------------------------------------------------------------------- 1 | //! Observe the processor assigned to the entrypoint thread, displaying an update in the 2 | //! terminal once per second, looping forever. 3 | 4 | use std::{thread, time::Duration}; 5 | 6 | use many_cpus::{HardwareInfo, HardwareTracker}; 7 | 8 | fn main() { 9 | let max_processors = HardwareInfo::max_processor_count(); 10 | let max_memory_regions = HardwareInfo::max_memory_region_count(); 11 | println!( 12 | "This system can support up to {max_processors} processors in {max_memory_regions} memory regions" 13 | ); 14 | 15 | loop { 16 | let current_processor_id = HardwareTracker::current_processor_id(); 17 | let current_memory_region_id = HardwareTracker::current_memory_region_id(); 18 | 19 | println!( 20 | "Thread executing on processor {current_processor_id} in memory region {current_memory_region_id}" 21 | ); 22 | 23 | thread::sleep(Duration::from_secs(1)); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /crates/many_cpus/examples/spawn_on_all_processors.rs: -------------------------------------------------------------------------------- 1 | //! Spawns one thread on each processor in the default processor set. 2 | 3 | use many_cpus::ProcessorSet; 4 | 5 | fn main() { 6 | let threads = ProcessorSet::default().spawn_threads(|processor| { 7 | println!("Spawned thread on processor {}", processor.id()); 8 | 9 | // In a real service, you would start some work handler here, e.g. to read 10 | // and process messages from a channel or to spawn a web handler. 11 | }); 12 | 13 | for thread in threads { 14 | thread.join().unwrap(); 15 | } 16 | 17 | println!("All threads have finished."); 18 | } 19 | -------------------------------------------------------------------------------- /crates/many_cpus/examples/spawn_on_any_processors.rs: -------------------------------------------------------------------------------- 1 | //! Spawns one thread for each processor in the default processor set but allows the OS to decide 2 | //! which processor each thread runs on. This can be used to observe how the OS schedules threads 3 | //! across processors when not provided any constraints. 4 | //! 5 | //! Each thread will do a bit of work (10 seconds of spinning CPU) and then terminate. 6 | 7 | use std::time::Instant; 8 | 9 | use many_cpus::ProcessorSet; 10 | 11 | fn main() { 12 | // We spawn N threads, where N is the number of processors. 13 | // However, we do not pin them to any specific processor. 14 | // This means that the OS can schedule them however it likes. 15 | 16 | let processor_set = ProcessorSet::default(); 17 | 18 | let mut threads = Vec::with_capacity(processor_set.len()); 19 | 20 | for _ in 0..processor_set.len() { 21 | let thread = std::thread::spawn(move || { 22 | let start = Instant::now(); 23 | 24 | let mut x: u64 = 0; 25 | 26 | loop { 27 | for _ in 0..100_000 { 28 | x = x.wrapping_add(1); 29 | } 30 | 31 | // Every thread spins the CPU for 10 seconds. 32 | if start.elapsed().as_secs() > 10 { 33 | println!("Thread finished after {x} iterations"); 34 | break; 35 | } 36 | } 37 | }); 38 | 39 | threads.push(thread); 40 | } 41 | 42 | println!("Spawned {} threads", threads.len()); 43 | 44 | for thread in threads { 45 | thread.join().unwrap(); 46 | } 47 | 48 | println!("All threads have finished."); 49 | } 50 | -------------------------------------------------------------------------------- /crates/many_cpus/examples/spawn_on_inherited_processors.rs: -------------------------------------------------------------------------------- 1 | //! Starts one thread on every processor in the system, respecting resource quotas and allowing the 2 | //! set of allowed processors to be inherited from the environment (based on user configuration). 3 | //! 4 | //! The set of processors used here can be adjusted via any suitable OS mechanisms. 5 | //! 6 | //! For example, to select only processors 0 and 1: 7 | //! Linux: `taskset 0x3 target/debug/examples/spawn_on_inherited_processors` 8 | //! Windows: `start /affinity 0x3 target/debug/examples/spawn_on_inherited_processors.exe` 9 | 10 | use std::{thread, time::Duration}; 11 | 12 | use many_cpus::ProcessorSet; 13 | 14 | fn main() { 15 | let inherited_processors = ProcessorSet::builder() 16 | // This causes soft limits on processor affinity to be respected. 17 | .where_available_for_current_thread() 18 | .take_all() 19 | .expect("found no processors usable by the current thread - impossible because the thread is currently running on one"); 20 | 21 | println!( 22 | "After applying soft limits, we are allowed to use {} processors.", 23 | inherited_processors.len() 24 | ); 25 | 26 | let threads = inherited_processors.spawn_threads(|processor| { 27 | println!("Spawned thread on processor {}", processor.id()); 28 | 29 | // In a real service, you would start some work handler here, e.g. to read 30 | // and process messages from a channel or to spawn a web handler. 31 | }); 32 | 33 | for thread in threads { 34 | thread.join().unwrap(); 35 | } 36 | 37 | println!("All threads have finished. Exiting in 10 seconds."); 38 | 39 | // Give some time to exit, as on Windows using "start" will create a new window that would 40 | // otherwise disappear instantly, making it hard to see what happened. 41 | thread::sleep(Duration::from_secs(10)); 42 | } 43 | -------------------------------------------------------------------------------- /crates/many_cpus/examples/spawn_on_selected_processors.rs: -------------------------------------------------------------------------------- 1 | //! Selects a pair of processors and spawns a thread on each of them. 2 | //! This demonstrates arbitrary processor selection logic. 3 | 4 | use std::num::NonZero; 5 | 6 | use many_cpus::ProcessorSet; 7 | 8 | const PROCESSOR_COUNT: NonZero = NonZero::new(2).unwrap(); 9 | 10 | fn main() { 11 | let selected_processors = ProcessorSet::builder() 12 | .same_memory_region() 13 | .performance_processors_only() 14 | .take(PROCESSOR_COUNT) 15 | .expect("could not find required number of processors that match the selection criteria"); 16 | 17 | let threads = selected_processors.spawn_threads(|processor| { 18 | println!("Spawned thread on processor {}", processor.id()); 19 | 20 | // In a real service, you would start some work handler here, e.g. to read 21 | // and process messages from a channel or to spawn a web handler. 22 | }); 23 | 24 | for thread in threads { 25 | thread.join().unwrap(); 26 | } 27 | 28 | println!("All threads have finished."); 29 | } 30 | -------------------------------------------------------------------------------- /crates/many_cpus/src/clients.rs: -------------------------------------------------------------------------------- 1 | //! Establishes a client-server pattern whereby logic that uses the hardware tracker can 2 | //! be replaced with a mock, breaking any hard dependencies for testing purposes. 3 | 4 | mod hw_tracker_client; 5 | mod hw_tracker_facade; 6 | 7 | pub(crate) use hw_tracker_client::*; 8 | pub(crate) use hw_tracker_facade::*; 9 | -------------------------------------------------------------------------------- /crates/many_cpus/src/clients/hw_tracker_client.rs: -------------------------------------------------------------------------------- 1 | use crate::{CURRENT_TRACKER, MemoryRegionId, ProcessorId}; 2 | 3 | #[cfg_attr(test, mockall::automock)] 4 | pub(crate) trait HardwareTrackerClient { 5 | fn update_pin_status( 6 | &self, 7 | processor_id: Option, 8 | memory_region_id: Option, 9 | ); 10 | } 11 | 12 | #[derive(Debug)] 13 | pub(crate) struct HardwareTrackerClientImpl; 14 | 15 | impl HardwareTrackerClient for HardwareTrackerClientImpl { 16 | #[cfg_attr(test, mutants::skip)] // Trivial fn, tested on lower levels - skip mutating. 17 | fn update_pin_status( 18 | &self, 19 | processor_id: Option, 20 | memory_region_id: Option, 21 | ) { 22 | CURRENT_TRACKER.with_borrow_mut(|tracker| { 23 | tracker.update_pin_status(processor_id, memory_region_id); 24 | }); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /crates/many_cpus/src/clients/hw_tracker_facade.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | use std::sync::Arc; 3 | 4 | use crate::{MemoryRegionId, ProcessorId}; 5 | 6 | use crate::{HardwareTrackerClient, HardwareTrackerClientImpl}; 7 | 8 | #[cfg(test)] 9 | use crate::MockHardwareTrackerClient; 10 | 11 | #[derive(Clone, Debug)] 12 | pub(crate) enum HardwareTrackerClientFacade { 13 | Real(&'static HardwareTrackerClientImpl), 14 | 15 | #[cfg(test)] 16 | Mock(Arc), 17 | } 18 | 19 | impl HardwareTrackerClientFacade { 20 | pub(crate) const fn real() -> Self { 21 | Self::Real(&HardwareTrackerClientImpl) 22 | } 23 | 24 | #[cfg(test)] 25 | pub(crate) fn from_mock(mock: MockHardwareTrackerClient) -> Self { 26 | Self::Mock(Arc::new(mock)) 27 | } 28 | 29 | #[cfg(test)] 30 | pub(crate) fn default_mock() -> Self { 31 | Self::Mock(Arc::new(MockHardwareTrackerClient::new())) 32 | } 33 | } 34 | 35 | impl HardwareTrackerClient for HardwareTrackerClientFacade { 36 | fn update_pin_status( 37 | &self, 38 | processor_id: Option, 39 | memory_region_id: Option, 40 | ) { 41 | match self { 42 | Self::Real(real) => { 43 | real.update_pin_status(processor_id, memory_region_id); 44 | } 45 | #[cfg(test)] 46 | Self::Mock(mock) => { 47 | mock.update_pin_status(processor_id, memory_region_id); 48 | } 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /crates/many_cpus/src/hardware_info.rs: -------------------------------------------------------------------------------- 1 | use std::marker::PhantomData; 2 | 3 | use crate::{ 4 | MemoryRegionId, ProcessorId, 5 | pal::{BUILD_TARGET_PLATFORM, Platform}, 6 | }; 7 | 8 | /// Reports non-changing information about the system hardware. 9 | /// 10 | /// To inspect information that may change over time, use [`HardwareTracker`][1]. 11 | /// 12 | /// Functions exposed by this type represent the system hardware and are not limited by the 13 | /// current system or process configuration. That is, this type will still count processors and 14 | /// memory regions that are currently inactive (e.g. some processors are physically disconnected) 15 | /// or are not available to this process (e.g. because of cgroups policy). 16 | /// 17 | /// # Example 18 | /// 19 | /// ``` 20 | /// use many_cpus::HardwareInfo; 21 | /// 22 | /// let max_processor_id = HardwareInfo::max_processor_id(); 23 | /// println!("The maximum processor ID is: {max_processor_id}"); 24 | /// ``` 25 | /// 26 | /// [1]: crate::HardwareTracker 27 | #[derive(Debug)] 28 | pub struct HardwareInfo { 29 | _no_ctor: PhantomData<()>, 30 | } 31 | 32 | impl HardwareInfo { 33 | /// Gets the maximum (inclusive) processor ID of any processor that could possibly 34 | /// be present on the system at any point in time. 35 | /// 36 | /// This includes processors that are not currently active and processors that are active 37 | /// but not available to the current process. 38 | #[cfg_attr(test, mutants::skip)] // Trivial layer, we only test the underlying logic. 39 | #[inline] 40 | #[must_use] 41 | pub fn max_processor_id() -> ProcessorId { 42 | BUILD_TARGET_PLATFORM.max_processor_id() 43 | } 44 | 45 | /// Gets the maximum (inclusive) memory region ID of any memory region that could possibly 46 | /// be present on the system at any point in time. 47 | /// 48 | /// This includes memory regions that are not currently active and memory regions that 49 | /// are active but not available to the current process. 50 | #[cfg_attr(test, mutants::skip)] // Trivial layer, we only test the underlying logic. 51 | #[inline] 52 | #[must_use] 53 | pub fn max_memory_region_id() -> MemoryRegionId { 54 | BUILD_TARGET_PLATFORM.max_memory_region_id() 55 | } 56 | 57 | /// Gets the maximum number of processors that could possibly be present on the system 58 | /// at any point in time. 59 | /// 60 | /// This includes processors that are not currently active and processors that are active 61 | /// but not available to the current process. 62 | #[cfg_attr(test, mutants::skip)] // Trivial layer, we only test the underlying logic. 63 | #[inline] 64 | #[must_use] 65 | pub fn max_processor_count() -> usize { 66 | (Self::max_processor_id() as usize) 67 | .checked_add(1) 68 | .expect("overflow when counting processors - this can only result from a critical error in the PAL") 69 | } 70 | 71 | /// Gets the maximum number of memory regions that could possibly be present on the system 72 | /// at any point in time. 73 | /// 74 | /// This includes memory regions that are not currently active and memory regions that 75 | /// are active but not available to the current process. 76 | #[cfg_attr(test, mutants::skip)] // Trivial layer, we only test the underlying logic. 77 | #[inline] 78 | #[must_use] 79 | pub fn max_memory_region_count() -> usize { 80 | (Self::max_memory_region_id() as usize) 81 | .checked_add(1) 82 | .expect("overflow when counting memory regions - this can only result from a critical error in the PAL") 83 | } 84 | } 85 | 86 | #[cfg(test)] 87 | mod tests { 88 | use super::*; 89 | 90 | #[cfg(not(miri))] // Real platform is not supported under Miri. 91 | #[test] 92 | fn count_is_id_plus_one_real() { 93 | assert_eq!( 94 | HardwareInfo::max_processor_count(), 95 | HardwareInfo::max_processor_id() as usize + 1 96 | ); 97 | assert_eq!( 98 | HardwareInfo::max_memory_region_count(), 99 | HardwareInfo::max_memory_region_id() as usize + 1 100 | ); 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /crates/many_cpus/src/pal.rs: -------------------------------------------------------------------------------- 1 | //! Platform Abstraction Layer (PAL). This is private API, though `pub` in parts to allow 2 | //! benchmark code to bypass public API layers for more accurate benchmarking. 3 | 4 | mod abstractions; 5 | pub(crate) use abstractions::*; 6 | 7 | mod facade; 8 | pub(crate) use facade::*; 9 | 10 | #[cfg(target_os = "linux")] 11 | mod linux; 12 | #[cfg(target_os = "linux")] 13 | pub(crate) use linux::*; 14 | 15 | #[cfg(windows)] 16 | mod windows; 17 | #[cfg(windows)] 18 | pub use windows::*; 19 | 20 | #[cfg(test)] 21 | mod mocks; 22 | #[cfg(test)] 23 | pub(crate) use mocks::*; 24 | -------------------------------------------------------------------------------- /crates/many_cpus/src/pal/abstractions.rs: -------------------------------------------------------------------------------- 1 | mod platform; 2 | mod processor; 3 | 4 | pub(crate) use platform::*; 5 | pub(crate) use processor::*; 6 | -------------------------------------------------------------------------------- /crates/many_cpus/src/pal/abstractions/platform.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | 3 | use nonempty::NonEmpty; 4 | 5 | use crate::{MemoryRegionId, ProcessorId, pal::ProcessorFacade}; 6 | 7 | pub(crate) trait Platform: Debug + Send + Sync + 'static { 8 | /// Returns all processors available to the current process. 9 | /// 10 | /// The returned set will exclude processors that are not active or are forbidden from 11 | /// being used due to resource constraints enforced by the operating system. 12 | /// 13 | /// The returned collection of processors is sorted by the processor ID, ascending. 14 | #[must_use] 15 | fn get_all_processors(&self) -> NonEmpty; 16 | 17 | fn pin_current_thread_to

(&self, processors: &NonEmpty

) 18 | where 19 | P: AsRef; 20 | 21 | /// Gets the ID of the processor currently executing this thread. 22 | #[must_use] 23 | fn current_processor_id(&self) -> ProcessorId; 24 | 25 | /// Gets the IDs of all processors that the current thread is allowed to execute on. 26 | /// 27 | /// Note: this may be a superset of `get_all_processors()` because it may include processors 28 | /// that our process is in fact forbidden to use due to resource constraints enforced by 29 | /// the operating system. The filtering to only see what we are allowed to use is performed 30 | /// by `get_all_processors()` but not by this function. 31 | #[must_use] 32 | fn current_thread_processors(&self) -> NonEmpty; 33 | 34 | /// Gets the maximum (inclusive) processor ID of any processor that could possibly 35 | /// be present on the system (including processors that are not currently active). 36 | /// 37 | /// The value also covers processors that are not available to the current process 38 | /// due to resource constraints enforced by the operating system. 39 | /// 40 | /// This value is a constant and will not change over time. 41 | #[must_use] 42 | fn max_processor_id(&self) -> ProcessorId; 43 | 44 | /// Gets the maximum (inclusive) memory region ID of any memory region that could possibly 45 | /// be present on the system (including memory regions that are not currently active). 46 | /// 47 | /// The value also covers memory regions that are not available to the current process 48 | /// due to resource constraints enforced by the operating system. 49 | /// 50 | /// This value is a constant and will not change over time. 51 | #[must_use] 52 | fn max_memory_region_id(&self) -> MemoryRegionId; 53 | 54 | /// Gets the maximum processor time that the process is allowed to use per second of real time, 55 | /// in seconds of processor time. This must be a positive number and will never be greater than 56 | /// the number of processors available to the current process. 57 | #[must_use] 58 | fn max_processor_time(&self) -> f64; 59 | 60 | /// Gets the total number of active processors on the system, including ones that are not 61 | /// necessarily available to the current process (if any such are known). 62 | /// 63 | /// We generally avoid relying on system-scoped data like this but because some platform APIs 64 | /// speak in terms of system-scoped data, we occasionally need to access such values. 65 | #[must_use] 66 | fn active_processor_count(&self) -> usize; 67 | } 68 | -------------------------------------------------------------------------------- /crates/many_cpus/src/pal/abstractions/processor.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | fmt::{Debug, Display}, 3 | hash::Hash, 4 | }; 5 | 6 | use crate::{EfficiencyClass, MemoryRegionId, ProcessorId}; 7 | 8 | pub(crate) trait AbstractProcessor: 9 | Clone + Copy + Debug + Display + Eq + Hash + PartialEq + Send 10 | { 11 | fn id(&self) -> ProcessorId; 12 | fn memory_region_id(&self) -> MemoryRegionId; 13 | fn efficiency_class(&self) -> EfficiencyClass; 14 | } 15 | -------------------------------------------------------------------------------- /crates/many_cpus/src/pal/facade.rs: -------------------------------------------------------------------------------- 1 | mod platform; 2 | mod processor; 3 | 4 | pub(crate) use platform::*; 5 | pub(crate) use processor::*; 6 | -------------------------------------------------------------------------------- /crates/many_cpus/src/pal/facade/platform.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | 3 | #[cfg(test)] 4 | use std::sync::Arc; 5 | 6 | use crate::pal::{BUILD_TARGET_PLATFORM, BuildTargetPlatform, Platform, ProcessorFacade}; 7 | 8 | #[cfg(test)] 9 | use crate::pal::MockPlatform; 10 | 11 | #[derive(Clone)] 12 | pub(crate) enum PlatformFacade { 13 | Real(&'static BuildTargetPlatform), 14 | 15 | #[cfg(test)] 16 | Mock(Arc), 17 | } 18 | 19 | impl PlatformFacade { 20 | pub(crate) fn real() -> Self { 21 | Self::Real(&BUILD_TARGET_PLATFORM) 22 | } 23 | 24 | #[cfg(test)] 25 | pub(crate) fn from_mock(mock: MockPlatform) -> Self { 26 | Self::Mock(Arc::new(mock)) 27 | } 28 | } 29 | 30 | impl Platform for PlatformFacade { 31 | fn get_all_processors(&self) -> nonempty::NonEmpty { 32 | match self { 33 | Self::Real(p) => p.get_all_processors(), 34 | #[cfg(test)] 35 | Self::Mock(p) => p.get_all_processors(), 36 | } 37 | } 38 | 39 | fn pin_current_thread_to

(&self, processors: &nonempty::NonEmpty

) 40 | where 41 | P: AsRef, 42 | { 43 | match self { 44 | Self::Real(p) => p.pin_current_thread_to(processors), 45 | #[cfg(test)] 46 | Self::Mock(p) => p.pin_current_thread_to(processors), 47 | } 48 | } 49 | 50 | fn current_processor_id(&self) -> crate::ProcessorId { 51 | match self { 52 | Self::Real(p) => p.current_processor_id(), 53 | #[cfg(test)] 54 | Self::Mock(p) => p.current_processor_id(), 55 | } 56 | } 57 | 58 | fn max_processor_id(&self) -> crate::ProcessorId { 59 | match self { 60 | Self::Real(p) => p.max_processor_id(), 61 | #[cfg(test)] 62 | Self::Mock(p) => p.max_processor_id(), 63 | } 64 | } 65 | 66 | fn max_memory_region_id(&self) -> crate::MemoryRegionId { 67 | match self { 68 | Self::Real(p) => p.max_memory_region_id(), 69 | #[cfg(test)] 70 | Self::Mock(p) => p.max_memory_region_id(), 71 | } 72 | } 73 | 74 | fn current_thread_processors(&self) -> nonempty::NonEmpty { 75 | match self { 76 | Self::Real(p) => p.current_thread_processors(), 77 | #[cfg(test)] 78 | Self::Mock(p) => p.current_thread_processors(), 79 | } 80 | } 81 | 82 | fn max_processor_time(&self) -> f64 { 83 | match self { 84 | Self::Real(p) => p.max_processor_time(), 85 | #[cfg(test)] 86 | Self::Mock(p) => p.max_processor_time(), 87 | } 88 | } 89 | 90 | fn active_processor_count(&self) -> usize { 91 | match self { 92 | Self::Real(p) => p.active_processor_count(), 93 | #[cfg(test)] 94 | Self::Mock(p) => p.active_processor_count(), 95 | } 96 | } 97 | } 98 | 99 | impl From<&'static BuildTargetPlatform> for PlatformFacade { 100 | fn from(p: &'static BuildTargetPlatform) -> Self { 101 | Self::Real(p) 102 | } 103 | } 104 | 105 | #[cfg(test)] 106 | impl From for PlatformFacade { 107 | fn from(p: MockPlatform) -> Self { 108 | Self::Mock(Arc::new(p)) 109 | } 110 | } 111 | 112 | impl Debug for PlatformFacade { 113 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 114 | match self { 115 | Self::Real(inner) => inner.fmt(f), 116 | #[cfg(test)] 117 | Self::Mock(inner) => inner.fmt(f), 118 | } 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /crates/many_cpus/src/pal/facade/processor.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | 3 | use derive_more::derive::Display; 4 | 5 | use crate::pal::{AbstractProcessor, ProcessorImpl}; 6 | 7 | #[cfg(test)] 8 | use crate::pal::FakeProcessor; 9 | 10 | #[derive(Clone, Copy, Display, Eq, Hash, PartialEq)] 11 | pub(crate) enum ProcessorFacade { 12 | Real(ProcessorImpl), 13 | 14 | #[cfg(test)] 15 | Fake(FakeProcessor), 16 | } 17 | 18 | impl ProcessorFacade { 19 | pub(crate) fn as_real(&self) -> &ProcessorImpl { 20 | match self { 21 | Self::Real(p) => p, 22 | #[cfg(test)] 23 | _ => panic!("attempted to dereference facade into wrong type"), 24 | } 25 | } 26 | } 27 | 28 | impl AsRef for ProcessorFacade { 29 | fn as_ref(&self) -> &Self { 30 | self 31 | } 32 | } 33 | 34 | impl AbstractProcessor for ProcessorFacade { 35 | fn id(&self) -> crate::ProcessorId { 36 | match self { 37 | Self::Real(p) => p.id(), 38 | #[cfg(test)] 39 | Self::Fake(p) => p.id(), 40 | } 41 | } 42 | 43 | fn memory_region_id(&self) -> crate::MemoryRegionId { 44 | match self { 45 | Self::Real(p) => p.memory_region_id(), 46 | #[cfg(test)] 47 | Self::Fake(p) => p.memory_region_id(), 48 | } 49 | } 50 | 51 | fn efficiency_class(&self) -> crate::EfficiencyClass { 52 | match self { 53 | Self::Real(p) => p.efficiency_class(), 54 | #[cfg(test)] 55 | Self::Fake(p) => p.efficiency_class(), 56 | } 57 | } 58 | } 59 | 60 | impl From for ProcessorFacade { 61 | fn from(p: ProcessorImpl) -> Self { 62 | Self::Real(p) 63 | } 64 | } 65 | 66 | #[cfg(test)] 67 | impl From for ProcessorFacade { 68 | fn from(p: FakeProcessor) -> Self { 69 | Self::Fake(p) 70 | } 71 | } 72 | 73 | impl Debug for ProcessorFacade { 74 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 75 | match self { 76 | Self::Real(inner) => inner.fmt(f), 77 | #[cfg(test)] 78 | Self::Fake(inner) => inner.fmt(f), 79 | } 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /crates/many_cpus/src/pal/linux.rs: -------------------------------------------------------------------------------- 1 | mod bindings; 2 | mod filesystem; 3 | mod platform; 4 | mod processor; 5 | 6 | use bindings::*; 7 | use filesystem::*; 8 | pub(crate) use platform::*; 9 | pub(crate) use processor::*; 10 | -------------------------------------------------------------------------------- /crates/many_cpus/src/pal/linux/bindings.rs: -------------------------------------------------------------------------------- 1 | mod abstractions; 2 | mod facade; 3 | mod real; 4 | 5 | pub(crate) use abstractions::*; 6 | pub(crate) use facade::*; 7 | pub(crate) use real::*; 8 | -------------------------------------------------------------------------------- /crates/many_cpus/src/pal/linux/bindings/abstractions.rs: -------------------------------------------------------------------------------- 1 | use std::{fmt::Debug, io}; 2 | 3 | use libc::cpu_set_t; 4 | 5 | /// Bindings for FFI calls into external libraries (either provided by operating system or not). 6 | /// 7 | /// All PAL FFI calls must go through this trait, enabling them to be mocked. 8 | #[cfg_attr(test, mockall::automock)] 9 | pub(crate) trait Bindings: Debug + Send + Sync + 'static { 10 | // sched_setaffinity() for the current thread 11 | fn sched_setaffinity_current(&self, cpuset: &cpu_set_t) -> Result<(), io::Error>; 12 | 13 | // sched_getaffinity() for the current thread 14 | fn sched_getaffinity_current(&self) -> Result; 15 | 16 | fn sched_getcpu(&self) -> i32; 17 | } 18 | -------------------------------------------------------------------------------- /crates/many_cpus/src/pal/linux/bindings/facade.rs: -------------------------------------------------------------------------------- 1 | use std::{fmt::Debug, io}; 2 | 3 | #[cfg(test)] 4 | use std::sync::Arc; 5 | 6 | use libc::cpu_set_t; 7 | 8 | use crate::pal::linux::{Bindings, BuildTargetBindings}; 9 | 10 | #[cfg(test)] 11 | use crate::pal::linux::MockBindings; 12 | 13 | /// Enum to hide the real/mock choice behind a single wrapper type. 14 | #[derive(Clone)] 15 | pub(crate) enum BindingsFacade { 16 | Real(&'static BuildTargetBindings), 17 | 18 | #[cfg(test)] 19 | Mock(Arc), 20 | } 21 | 22 | impl BindingsFacade { 23 | pub(crate) const fn real() -> Self { 24 | Self::Real(&BuildTargetBindings) 25 | } 26 | 27 | #[cfg(test)] 28 | pub(crate) fn from_mock(mock: MockBindings) -> Self { 29 | Self::Mock(Arc::new(mock)) 30 | } 31 | } 32 | 33 | impl Bindings for BindingsFacade { 34 | fn sched_setaffinity_current(&self, cpuset: &cpu_set_t) -> Result<(), io::Error> { 35 | match self { 36 | Self::Real(bindings) => bindings.sched_setaffinity_current(cpuset), 37 | #[cfg(test)] 38 | Self::Mock(mock) => mock.sched_setaffinity_current(cpuset), 39 | } 40 | } 41 | 42 | fn sched_getcpu(&self) -> i32 { 43 | match self { 44 | Self::Real(bindings) => bindings.sched_getcpu(), 45 | #[cfg(test)] 46 | Self::Mock(mock) => mock.sched_getcpu(), 47 | } 48 | } 49 | 50 | fn sched_getaffinity_current(&self) -> Result { 51 | match self { 52 | Self::Real(bindings) => bindings.sched_getaffinity_current(), 53 | #[cfg(test)] 54 | Self::Mock(mock) => mock.sched_getaffinity_current(), 55 | } 56 | } 57 | } 58 | 59 | impl Debug for BindingsFacade { 60 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 61 | match self { 62 | Self::Real(inner) => inner.fmt(f), 63 | #[cfg(test)] 64 | Self::Mock(inner) => inner.fmt(f), 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /crates/many_cpus/src/pal/linux/bindings/real.rs: -------------------------------------------------------------------------------- 1 | use std::{fmt::Debug, io, mem}; 2 | 3 | use libc::cpu_set_t; 4 | 5 | use crate::pal::linux::Bindings; 6 | 7 | /// FFI bindings that target the real operating system that the build is targeting. 8 | /// 9 | /// You would only use different bindings in PAL unit tests that need to use mock bindings. 10 | /// Even then, whenever possible, unit tests should use real bindings for maximum realism. 11 | #[derive(Debug, Default)] 12 | pub(crate) struct BuildTargetBindings; 13 | 14 | impl Bindings for BuildTargetBindings { 15 | fn sched_setaffinity_current(&self, cpuset: &cpu_set_t) -> Result<(), io::Error> { 16 | // 0 means current thread. 17 | // SAFETY: No safety requirements beyond passing valid arguments. 18 | let result = unsafe { libc::sched_setaffinity(0, size_of::(), cpuset) }; 19 | 20 | if result == 0 { 21 | Ok(()) 22 | } else { 23 | Err(io::Error::last_os_error()) 24 | } 25 | } 26 | 27 | fn sched_getcpu(&self) -> i32 { 28 | // SAFETY: No safety requirements. 29 | unsafe { libc::sched_getcpu() } 30 | } 31 | 32 | fn sched_getaffinity_current(&self) -> Result { 33 | // SAFETY: All zeroes is a valid cpu_set_t. 34 | let mut cpuset: cpu_set_t = unsafe { mem::zeroed() }; 35 | 36 | // 0 means current thread. 37 | // SAFETY: No safety requirements beyond passing valid arguments. 38 | let result = unsafe { libc::sched_getaffinity(0, size_of::(), &raw mut cpuset) }; 39 | 40 | if result == 0 { 41 | Ok(cpuset) 42 | } else { 43 | Err(io::Error::last_os_error()) 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /crates/many_cpus/src/pal/linux/filesystem.rs: -------------------------------------------------------------------------------- 1 | mod abstractions; 2 | mod facade; 3 | mod real; 4 | 5 | pub(crate) use abstractions::*; 6 | pub(crate) use facade::*; 7 | pub(crate) use real::*; 8 | -------------------------------------------------------------------------------- /crates/many_cpus/src/pal/linux/filesystem/abstractions.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | 3 | /// Linux has this funny notion of exposing various OS APIs as a virtual filesystem. This trait 4 | /// abstracts this virtual filesystem to allow it to be mocked. 5 | /// 6 | /// The scope of this trait is limited to only the virtual filesystem exposed by the OS. We do not 7 | /// expect to do "real" file I/O in this layer. All I/O is synchronous and blocking because we 8 | /// expect it to hit a fast path in the OS, given the data is never on a real storage device. 9 | #[cfg_attr(test, mockall::automock)] 10 | pub(crate) trait Filesystem: Debug + Send + Sync + 'static { 11 | /// Get the contents of the /proc/cpuinfo file. 12 | /// 13 | /// NB! This file also includes offline processors. To check if a processor is online, you must 14 | /// look in /sys/devices/system/cpu/cpu*/online (which has either 0 and 1 as content). 15 | /// 16 | /// This is a plaintext file with "key : value" pairs, blocks separated by empty lines. 17 | fn get_cpuinfo_contents(&self) -> String; 18 | 19 | /// Get the contents of the /sys/devices/system/node/possible file or `None` if it does 20 | /// not exist. 21 | /// 22 | /// This list all NUMA nodes that could possibly exist in the system, even those that are 23 | /// offline. 24 | /// 25 | /// This is a cpulist format file ("0,1,2-4,5-10:2" style list). 26 | fn get_numa_node_possible_contents(&self) -> Option; 27 | 28 | /// Get the contents of the /sys/devices/system/node/node{}/cpulist file. 29 | /// 30 | /// This is a cpulist format file ("0,1,2-4,5-10:2" style list). 31 | fn get_numa_node_cpulist_contents(&self, node_index: u32) -> String; 32 | 33 | /// Gets the contents of the /sys/devices/system/cpu/cpu{}/online file. 34 | /// 35 | /// This is a single line file with either 0 or 1 as content (+ newline). 36 | /// This file may be absent on some Linux flavors, in which case we assume every CPU is online. 37 | fn get_cpu_online_contents(&self, cpu_index: u32) -> Option; 38 | 39 | /// Gets the contents of the /prod/{pid}/status file for the current process. 40 | /// 41 | /// This is a plaintext file with "key: value" pairs. 42 | fn get_proc_self_status_contents(&self) -> String; 43 | 44 | /// Gets the contents of the /proc/self/cgroup file for the current process. 45 | fn get_proc_self_cgroup(&self) -> Option; 46 | 47 | /// Contents of `/sys/fs/cgroup/cpu/{name}/cpu.cfs_quota_us` 48 | fn get_v1_cgroup_cpu_quota(&self, cgroup_name: &str) -> Option; 49 | 50 | /// Contents of `/sys/fs/cgroup/cpu/{name}/cpu.cfs_period_us` 51 | fn get_v1_cgroup_cpu_period(&self, cgroup_name: &str) -> Option; 52 | 53 | /// Contents of `/sys/fs/cgroup/{name}/cpu.max` 54 | fn get_v2_cgroup_cpu_quota_and_period(&self, cgroup_name: &str) -> Option; 55 | } 56 | -------------------------------------------------------------------------------- /crates/many_cpus/src/pal/linux/filesystem/facade.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | 3 | #[cfg(test)] 4 | use std::sync::Arc; 5 | 6 | use crate::pal::linux::{BuildTargetFilesystem, Filesystem}; 7 | 8 | #[cfg(test)] 9 | use crate::pal::linux::MockFilesystem; 10 | 11 | /// Enum to hide the different filesystem implementations behind a single wrapper type. 12 | #[derive(Clone)] 13 | pub(crate) enum FilesystemFacade { 14 | Real(&'static BuildTargetFilesystem), 15 | 16 | #[cfg(test)] 17 | Mock(Arc), 18 | } 19 | 20 | impl FilesystemFacade { 21 | pub(crate) const fn real() -> Self { 22 | Self::Real(&BuildTargetFilesystem) 23 | } 24 | 25 | #[cfg(test)] 26 | pub(crate) fn from_mock(mock: MockFilesystem) -> Self { 27 | Self::Mock(Arc::new(mock)) 28 | } 29 | } 30 | 31 | impl Filesystem for FilesystemFacade { 32 | fn get_cpuinfo_contents(&self) -> String { 33 | match self { 34 | Self::Real(filesystem) => filesystem.get_cpuinfo_contents(), 35 | #[cfg(test)] 36 | Self::Mock(mock) => mock.get_cpuinfo_contents(), 37 | } 38 | } 39 | 40 | fn get_numa_node_cpulist_contents(&self, node_index: u32) -> String { 41 | match self { 42 | Self::Real(filesystem) => filesystem.get_numa_node_cpulist_contents(node_index), 43 | #[cfg(test)] 44 | Self::Mock(mock) => mock.get_numa_node_cpulist_contents(node_index), 45 | } 46 | } 47 | 48 | fn get_cpu_online_contents(&self, cpu_index: u32) -> Option { 49 | match self { 50 | Self::Real(filesystem) => filesystem.get_cpu_online_contents(cpu_index), 51 | #[cfg(test)] 52 | Self::Mock(mock) => mock.get_cpu_online_contents(cpu_index), 53 | } 54 | } 55 | 56 | fn get_numa_node_possible_contents(&self) -> Option { 57 | match self { 58 | Self::Real(filesystem) => filesystem.get_numa_node_possible_contents(), 59 | #[cfg(test)] 60 | Self::Mock(mock) => mock.get_numa_node_possible_contents(), 61 | } 62 | } 63 | 64 | fn get_proc_self_status_contents(&self) -> String { 65 | match self { 66 | Self::Real(filesystem) => filesystem.get_proc_self_status_contents(), 67 | #[cfg(test)] 68 | Self::Mock(mock) => mock.get_proc_self_status_contents(), 69 | } 70 | } 71 | 72 | fn get_proc_self_cgroup(&self) -> Option { 73 | match self { 74 | Self::Real(filesystem) => filesystem.get_proc_self_cgroup(), 75 | #[cfg(test)] 76 | Self::Mock(mock) => mock.get_proc_self_cgroup(), 77 | } 78 | } 79 | 80 | fn get_v1_cgroup_cpu_quota(&self, cgroup_name: &str) -> Option { 81 | match self { 82 | Self::Real(filesystem) => filesystem.get_v1_cgroup_cpu_quota(cgroup_name), 83 | #[cfg(test)] 84 | Self::Mock(mock) => mock.get_v1_cgroup_cpu_quota(cgroup_name), 85 | } 86 | } 87 | 88 | fn get_v1_cgroup_cpu_period(&self, cgroup_name: &str) -> Option { 89 | match self { 90 | Self::Real(filesystem) => filesystem.get_v1_cgroup_cpu_period(cgroup_name), 91 | #[cfg(test)] 92 | Self::Mock(mock) => mock.get_v1_cgroup_cpu_period(cgroup_name), 93 | } 94 | } 95 | 96 | fn get_v2_cgroup_cpu_quota_and_period(&self, cgroup_name: &str) -> Option { 97 | match self { 98 | Self::Real(filesystem) => filesystem.get_v2_cgroup_cpu_quota_and_period(cgroup_name), 99 | #[cfg(test)] 100 | Self::Mock(mock) => mock.get_v2_cgroup_cpu_quota_and_period(cgroup_name), 101 | } 102 | } 103 | } 104 | 105 | impl Debug for FilesystemFacade { 106 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 107 | match self { 108 | Self::Real(inner) => inner.fmt(f), 109 | #[cfg(test)] 110 | Self::Mock(inner) => inner.fmt(f), 111 | } 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /crates/many_cpus/src/pal/linux/filesystem/real.rs: -------------------------------------------------------------------------------- 1 | use std::{fmt::Debug, fs}; 2 | 3 | use crate::pal::linux::Filesystem; 4 | 5 | /// The virtual filesystem for the real operating system that the build is targeting. 6 | /// 7 | /// You would only use different filesystems in PAL unit tests that need to use a mock filesystem. 8 | /// Even then, whenever possible, unit tests should use the real filesystem for maximum realism. 9 | #[derive(Debug, Default)] 10 | pub(crate) struct BuildTargetFilesystem; 11 | 12 | impl Filesystem for BuildTargetFilesystem { 13 | fn get_cpuinfo_contents(&self) -> String { 14 | fs::read_to_string("/proc/cpuinfo") 15 | .expect("failed to read /proc/cpuinfo - cannot continue execution") 16 | } 17 | 18 | fn get_numa_node_possible_contents(&self) -> Option { 19 | fs::read_to_string("/sys/devices/system/node/possible").ok() 20 | } 21 | 22 | fn get_numa_node_cpulist_contents(&self, node_index: u32) -> String { 23 | fs::read_to_string(format!("/sys/devices/system/node/node{node_index}/cpulist",)) 24 | .expect("failed to read NUMA node cpulist - cannot continue execution") 25 | } 26 | 27 | fn get_cpu_online_contents(&self, cpu_index: u32) -> Option { 28 | fs::read_to_string(format!("/sys/devices/system/cpu/cpu{cpu_index}/online")).ok() 29 | } 30 | 31 | fn get_proc_self_status_contents(&self) -> String { 32 | fs::read_to_string("/proc/self/status") 33 | .expect("failed to read /proc/self/status - cannot continue execution") 34 | } 35 | 36 | fn get_proc_self_cgroup(&self) -> Option { 37 | fs::read_to_string("/proc/self/cgroup").ok() 38 | } 39 | 40 | fn get_v1_cgroup_cpu_quota(&self, cgroup_name: &str) -> Option { 41 | fs::read_to_string(format!("/sys/fs/cgroup/cpu/{cgroup_name}/cpu.cfs_quota_us")).ok() 42 | } 43 | 44 | fn get_v1_cgroup_cpu_period(&self, cgroup_name: &str) -> Option { 45 | fs::read_to_string(format!( 46 | "/sys/fs/cgroup/cpu/{cgroup_name}/cpu.cfs_period_us" 47 | )) 48 | .ok() 49 | } 50 | 51 | fn get_v2_cgroup_cpu_quota_and_period(&self, cgroup_name: &str) -> Option { 52 | fs::read_to_string(format!("/sys/fs/cgroup/{cgroup_name}/cpu.max")).ok() 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /crates/many_cpus/src/pal/linux/processor.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Display; 2 | 3 | use crate::{EfficiencyClass, MemoryRegionId, ProcessorId, pal::AbstractProcessor}; 4 | 5 | /// A processor present on the system and available to the current process. 6 | #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] 7 | pub(crate) struct ProcessorImpl { 8 | pub(super) id: ProcessorId, 9 | pub(super) memory_region_id: MemoryRegionId, 10 | pub(super) efficiency_class: EfficiencyClass, 11 | 12 | pub(super) is_active: bool, 13 | } 14 | 15 | impl Display for ProcessorImpl { 16 | #[cfg_attr(test, mutants::skip)] // There no API contract to test here. 17 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 18 | write!(f, "processor {} [node {}]", self.id, self.memory_region_id) 19 | } 20 | } 21 | 22 | impl AbstractProcessor for ProcessorImpl { 23 | fn id(&self) -> ProcessorId { 24 | self.id 25 | } 26 | 27 | fn memory_region_id(&self) -> MemoryRegionId { 28 | self.memory_region_id 29 | } 30 | 31 | fn efficiency_class(&self) -> EfficiencyClass { 32 | self.efficiency_class 33 | } 34 | } 35 | 36 | impl PartialOrd for ProcessorImpl { 37 | fn partial_cmp(&self, other: &Self) -> Option { 38 | Some(self.cmp(other)) 39 | } 40 | } 41 | 42 | impl Ord for ProcessorImpl { 43 | fn cmp(&self, other: &Self) -> std::cmp::Ordering { 44 | self.id.cmp(&other.id) 45 | } 46 | } 47 | 48 | impl AsRef for ProcessorImpl { 49 | fn as_ref(&self) -> &Self { 50 | self 51 | } 52 | } 53 | 54 | #[cfg(test)] 55 | mod tests { 56 | use super::*; 57 | 58 | #[test] 59 | fn smoke_test() { 60 | let processor = ProcessorImpl { 61 | id: 2, 62 | memory_region_id: 3, 63 | efficiency_class: EfficiencyClass::Performance, 64 | is_active: true, 65 | }; 66 | 67 | assert_eq!(processor.id(), 2); 68 | assert_eq!(processor.memory_region_id(), 3); 69 | assert_eq!(processor.efficiency_class(), EfficiencyClass::Performance); 70 | 71 | let processor2 = ProcessorImpl { 72 | id: 2, 73 | memory_region_id: 3, 74 | efficiency_class: EfficiencyClass::Performance, 75 | is_active: true, 76 | }; 77 | 78 | assert_eq!(processor, processor2); 79 | 80 | let processor3 = ProcessorImpl { 81 | id: 4, 82 | memory_region_id: 3, 83 | efficiency_class: EfficiencyClass::Performance, 84 | is_active: true, 85 | }; 86 | 87 | assert_ne!(processor, processor3); 88 | assert!(processor < processor3); 89 | assert!(processor3 > processor); 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /crates/many_cpus/src/pal/mocks.rs: -------------------------------------------------------------------------------- 1 | #![expect(clippy::same_name_method, reason = "mock magic")] 2 | 3 | use derive_more::derive::Display; 4 | use mockall::mock; 5 | use nonempty::NonEmpty; 6 | 7 | use crate::{ 8 | EfficiencyClass, MemoryRegionId, ProcessorId, 9 | pal::{AbstractProcessor, Platform, ProcessorFacade}, 10 | }; 11 | 12 | #[derive(Clone, Copy, Debug, Display, Eq, Hash, PartialEq)] 13 | #[display("FakeProcessor({index} in node {memory_region}, {efficiency_class:?})")] 14 | pub(crate) struct FakeProcessor { 15 | pub(crate) index: ProcessorId, 16 | pub(crate) memory_region: MemoryRegionId, 17 | pub(crate) efficiency_class: EfficiencyClass, 18 | } 19 | 20 | impl FakeProcessor { 21 | pub(crate) fn with_index(index: ProcessorId) -> Self { 22 | Self { 23 | index, 24 | memory_region: 0, 25 | efficiency_class: EfficiencyClass::Performance, 26 | } 27 | } 28 | } 29 | 30 | impl AbstractProcessor for FakeProcessor { 31 | fn id(&self) -> ProcessorId { 32 | self.index 33 | } 34 | 35 | fn memory_region_id(&self) -> MemoryRegionId { 36 | self.memory_region 37 | } 38 | 39 | fn efficiency_class(&self) -> EfficiencyClass { 40 | self.efficiency_class 41 | } 42 | } 43 | 44 | // Mockall is not able to express all methods on the trait (due to generics deficiency), so we mock 45 | // similar-enough methods that it does know how to mock and simply call these from a manual 46 | // implementation of the trait that translates between the two forms. 47 | mock! { 48 | #[derive(Debug)] 49 | pub Platform { 50 | pub fn get_all_processors_core(&self) -> NonEmpty; 51 | pub fn pin_current_thread_to_core(&self, processors: Vec); 52 | pub fn current_processor_id(&self) -> ProcessorId; 53 | pub fn max_processor_id(&self) -> ProcessorId; 54 | pub fn max_memory_region_id(&self) -> MemoryRegionId; 55 | pub fn current_thread_processors(&self) -> NonEmpty; 56 | pub fn max_processor_time(&self) -> f64; 57 | pub fn active_processor_count(&self) -> usize; 58 | } 59 | } 60 | 61 | impl Platform for MockPlatform { 62 | fn get_all_processors(&self) -> NonEmpty { 63 | self.get_all_processors_core() 64 | } 65 | 66 | fn pin_current_thread_to

(&self, processors: &NonEmpty

) 67 | where 68 | P: AsRef, 69 | { 70 | let processors = processors.iter().map(|p| *p.as_ref()).collect(); 71 | self.pin_current_thread_to_core(processors); 72 | } 73 | 74 | fn current_processor_id(&self) -> ProcessorId { 75 | self.current_processor_id() 76 | } 77 | 78 | fn max_processor_id(&self) -> ProcessorId { 79 | self.max_processor_id() 80 | } 81 | 82 | fn max_memory_region_id(&self) -> MemoryRegionId { 83 | self.max_memory_region_id() 84 | } 85 | 86 | fn current_thread_processors(&self) -> NonEmpty { 87 | self.current_thread_processors() 88 | } 89 | 90 | fn max_processor_time(&self) -> f64 { 91 | self.max_processor_time() 92 | } 93 | 94 | fn active_processor_count(&self) -> usize { 95 | self.active_processor_count() 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /crates/many_cpus/src/pal/windows.rs: -------------------------------------------------------------------------------- 1 | mod bindings; 2 | mod group_mask; 3 | mod platform; 4 | mod processor; 5 | 6 | use bindings::*; 7 | pub(crate) use group_mask::*; 8 | pub use platform::*; 9 | pub(crate) use processor::*; 10 | 11 | type ProcessorGroupIndex = u16; 12 | type ProcessorIndexInGroup = u8; 13 | -------------------------------------------------------------------------------- /crates/many_cpus/src/pal/windows/bindings.rs: -------------------------------------------------------------------------------- 1 | mod abstractions; 2 | mod facade; 3 | mod real; 4 | 5 | pub(crate) use abstractions::*; 6 | pub(crate) use facade::*; 7 | pub(crate) use real::*; 8 | -------------------------------------------------------------------------------- /crates/many_cpus/src/pal/windows/bindings/abstractions.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | 3 | use windows::{ 4 | Win32::System::{ 5 | JobObjects::JOBOBJECT_CPU_RATE_CONTROL_INFORMATION, 6 | Kernel::PROCESSOR_NUMBER, 7 | SystemInformation::{ 8 | GROUP_AFFINITY, LOGICAL_PROCESSOR_RELATIONSHIP, SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, 9 | }, 10 | }, 11 | core::Result, 12 | }; 13 | 14 | /// Bindings for FFI calls into external libraries (either provided by operating system or not). 15 | /// 16 | /// All PAL FFI calls must go through this trait, enabling them to be mocked. 17 | #[cfg_attr(test, mockall::automock)] 18 | pub(crate) trait Bindings: Debug + Send + Sync + 'static { 19 | fn get_active_processor_count(&self, group_number: u16) -> u32; 20 | fn get_maximum_processor_count(&self, group_number: u16) -> u32; 21 | 22 | fn get_maximum_processor_group_count(&self) -> u16; 23 | 24 | fn get_current_processor_number_ex(&self) -> PROCESSOR_NUMBER; 25 | 26 | fn get_numa_highest_node_number(&self) -> u32; 27 | 28 | fn get_current_process_default_cpu_set_masks(&self) -> Vec; 29 | fn get_current_thread_cpu_set_masks(&self) -> Vec; 30 | fn set_current_thread_cpu_set_masks(&self, masks: &[GROUP_AFFINITY]); 31 | 32 | unsafe fn get_logical_processor_information_ex( 33 | &self, 34 | relationship_type: LOGICAL_PROCESSOR_RELATIONSHIP, 35 | buffer: Option<*mut SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>, 36 | returned_length: *mut u32, 37 | ) -> Result<()>; 38 | 39 | // JobObjectGroupInformationEx; may return empty list if not affinitized. 40 | fn get_current_job_cpu_set_masks(&self) -> Vec; 41 | 42 | fn get_current_job_cpu_rate_control(&self) -> Option; 43 | 44 | fn get_current_thread_legacy_group_affinity(&self) -> GROUP_AFFINITY; 45 | } 46 | -------------------------------------------------------------------------------- /crates/many_cpus/src/pal/windows/processor.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Display; 2 | 3 | use crate::{ 4 | EfficiencyClass, MemoryRegionId, ProcessorId, 5 | pal::{ 6 | AbstractProcessor, 7 | windows::{ProcessorGroupIndex, ProcessorIndexInGroup}, 8 | }, 9 | }; 10 | 11 | #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] 12 | pub(crate) struct ProcessorImpl { 13 | pub(super) group_index: ProcessorGroupIndex, 14 | pub(super) index_in_group: ProcessorIndexInGroup, 15 | 16 | // Cumulative index when counting across all groups. 17 | pub(super) id: ProcessorId, 18 | 19 | pub(super) memory_region_id: MemoryRegionId, 20 | 21 | pub(super) efficiency_class: EfficiencyClass, 22 | } 23 | 24 | impl ProcessorImpl { 25 | pub(super) fn new( 26 | group_index: ProcessorGroupIndex, 27 | index_in_group: ProcessorIndexInGroup, 28 | id: ProcessorId, 29 | memory_region_id: MemoryRegionId, 30 | efficiency_class: EfficiencyClass, 31 | ) -> Self { 32 | Self { 33 | group_index, 34 | index_in_group, 35 | id, 36 | memory_region_id, 37 | efficiency_class, 38 | } 39 | } 40 | } 41 | 42 | impl Display for ProcessorImpl { 43 | #[cfg_attr(test, mutants::skip)] // There no API contract to test here. 44 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 45 | write!( 46 | f, 47 | "processor {} [{}.{}]", 48 | self.id, self.group_index, self.index_in_group 49 | ) 50 | } 51 | } 52 | 53 | impl AbstractProcessor for ProcessorImpl { 54 | fn id(&self) -> ProcessorId { 55 | self.id 56 | } 57 | 58 | fn memory_region_id(&self) -> MemoryRegionId { 59 | self.memory_region_id 60 | } 61 | 62 | fn efficiency_class(&self) -> EfficiencyClass { 63 | self.efficiency_class 64 | } 65 | } 66 | 67 | impl AsRef for ProcessorImpl { 68 | fn as_ref(&self) -> &Self { 69 | self 70 | } 71 | } 72 | 73 | impl PartialOrd for ProcessorImpl { 74 | fn partial_cmp(&self, other: &Self) -> Option { 75 | Some(self.cmp(other)) 76 | } 77 | } 78 | 79 | impl Ord for ProcessorImpl { 80 | fn cmp(&self, other: &Self) -> std::cmp::Ordering { 81 | self.id.cmp(&other.id) 82 | } 83 | } 84 | 85 | #[cfg(test)] 86 | mod tests { 87 | use super::*; 88 | 89 | #[test] 90 | fn smoke_test() { 91 | let processor = ProcessorImpl::new(0, 1, 2, 3, EfficiencyClass::Performance); 92 | 93 | assert_eq!(processor.id(), 2); 94 | assert_eq!(processor.memory_region_id(), 3); 95 | assert_eq!(processor.efficiency_class(), EfficiencyClass::Performance); 96 | 97 | let processor2 = ProcessorImpl::new(0, 1, 2, 3, EfficiencyClass::Performance); 98 | assert_eq!(processor, processor2); 99 | 100 | let processor3 = ProcessorImpl::new(0, 1, 4, 3, EfficiencyClass::Performance); 101 | assert_ne!(processor, processor3); 102 | assert!(processor < processor3); 103 | assert!(processor3 > processor); 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /crates/many_cpus/src/primitive_types.rs: -------------------------------------------------------------------------------- 1 | /// A processor identifier, used to differentiate processors in the system. This will match 2 | /// the numeric identifier used by standard tooling of the operating system. 3 | /// 4 | /// It is important to highlight that the values used are not guaranteed to be sequential/contiguous 5 | /// or to start from zero (aspects that are also not guaranteed by operating system tooling). 6 | pub type ProcessorId = u32; 7 | 8 | /// A memory region identifier, used to differentiate memory regions in the system. This will match 9 | /// the numeric identifier used by standard tooling of the operating system. 10 | /// 11 | /// It is important to highlight that the values used are not guaranteed to be sequential/contiguous 12 | /// or to start from zero (aspects that are also not guaranteed by operating system tooling). 13 | pub type MemoryRegionId = u32; 14 | 15 | /// Differentiates processors by their efficiency class, allowing work requiring high 16 | /// performance to be placed on the most performant processors at the expense of energy usage. 17 | /// 18 | /// This is a relative measurement - the most performant processors in a system are always 19 | /// considered performance processors, with less performant ones considered efficiency processors. 20 | #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] 21 | #[expect( 22 | clippy::exhaustive_enums, 23 | reason = "mirroring two-tier structure of platform APIs" 24 | )] 25 | pub enum EfficiencyClass { 26 | /// A processor that is optimized for energy efficiency at the expense of performance. 27 | Efficiency, 28 | 29 | /// A processor that is optimized for performance at the expense of energy efficiency. 30 | Performance, 31 | } 32 | -------------------------------------------------------------------------------- /crates/many_cpus/src/resource_quota.rs: -------------------------------------------------------------------------------- 1 | /// Information about the resource quota that the operating system enforces for the current process. 2 | /// 3 | /// The active resource quota may change over time. You can use [`HardwareTracker`][1] to obtain 4 | /// fresh information about the current resource quota at any time. 5 | /// 6 | /// [1]: crate::HardwareTracker 7 | #[derive(Debug)] 8 | pub struct ResourceQuota { 9 | max_processor_time: f64, 10 | } 11 | 12 | impl ResourceQuota { 13 | pub(crate) fn new(max_processor_time: f64) -> Self { 14 | Self { max_processor_time } 15 | } 16 | 17 | /// How many seconds of processor time the process is allowed to use per second of real time. 18 | /// 19 | /// This will never be more than the number of processors available to the current process. 20 | #[must_use] 21 | #[inline] 22 | pub fn max_processor_time(&self) -> f64 { 23 | self.max_processor_time 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /crates/many_cpus_benchmarking/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "many_cpus_benchmarking" 3 | description = "Criterion benchmark harness to easily compare different processor configurations" 4 | publish = true 5 | version = "0.1.10" 6 | 7 | authors.workspace = true 8 | edition.workspace = true 9 | license.workspace = true 10 | repository.workspace = true 11 | rust-version.workspace = true 12 | 13 | [features] 14 | default = [] 15 | 16 | [dependencies] 17 | cpulist = { workspace = true } 18 | criterion = { workspace = true } 19 | derive_more = { workspace = true, features = ["display"] } 20 | folo_utils = { workspace = true } 21 | itertools = { workspace = true } 22 | many_cpus = { workspace = true } 23 | nonempty = { workspace = true } 24 | rand = { workspace = true } 25 | 26 | [dev-dependencies] 27 | mutants = { workspace = true } 28 | 29 | [[bench]] 30 | name = "many_cpus_harness_demo" 31 | harness = false 32 | 33 | [lints] 34 | workspace = true 35 | -------------------------------------------------------------------------------- /crates/many_cpus_benchmarking/README.md: -------------------------------------------------------------------------------- 1 | [Criterion][1] benchmark harness designed to compare different modes of distributing work in a 2 | many-processor system with multiple memory regions. This helps highlight the performance impact of 3 | cross-memory-region data transfers, cross-processor data transfers and multi-threaded logic. 4 | 5 | More details in the [crate documentation](https://docs.rs/many_cpus_benchmarking/). 6 | 7 | This is part of the [Folo project](https://github.com/folo-rs/folo) that provides mechanisms for 8 | high-performance hardware-aware programming in Rust. 9 | 10 | [1]: https://bheisler.github.io/criterion.rs/book/index.html -------------------------------------------------------------------------------- /crates/many_cpus_benchmarking/benches/many_cpus_harness_demo.rs: -------------------------------------------------------------------------------- 1 | //! Demonstrates basic usage of the benchmark harness provided by `many_cpus_benchmarking`. 2 | 3 | #![allow( 4 | missing_docs, 5 | reason = "No need for API documentation in benchmark code" 6 | )] 7 | 8 | use std::{hint::black_box, ptr}; 9 | 10 | use criterion::{Criterion, criterion_group, criterion_main}; 11 | use many_cpus_benchmarking::{Payload, WorkDistribution, execute_runs}; 12 | 13 | criterion_group!(benches, entrypoint); 14 | criterion_main!(benches); 15 | 16 | fn entrypoint(c: &mut Criterion) { 17 | // We use a BATCH_SIZE of 10, which means 10 * 64 = 640 MB of memory used per worker pair. 18 | execute_runs::(c, WorkDistribution::all()); 19 | } 20 | 21 | const COPY_BYTES_LEN: usize = 64 * 1024 * 1024; 22 | 23 | /// Sample benchmark scenario that copies bytes between the two paired payloads. 24 | /// 25 | /// The source buffers are allocated in the "prepare" step and become local to the "prepare" worker. 26 | /// The destination buffers are allocated in the "process" step. The end result is that we copy 27 | /// from remote memory (allocated in the "prepare" step) to local memory in the "process" step. 28 | /// 29 | /// There is no deep meaning behind this scenario, just a sample benchmark that showcases comparing 30 | /// different work distribution modes to identify performance differences from hardware-awareness. 31 | #[derive(Debug, Default)] 32 | struct CopyBytes { 33 | from: Option>, 34 | } 35 | 36 | impl Payload for CopyBytes { 37 | fn new_pair() -> (Self, Self) { 38 | (Self::default(), Self::default()) 39 | } 40 | 41 | fn prepare(&mut self) { 42 | self.from = Some(vec![99; COPY_BYTES_LEN]); 43 | } 44 | 45 | fn process(&mut self) { 46 | let from = self.from.as_ref().unwrap(); 47 | let mut to = Vec::with_capacity(COPY_BYTES_LEN); 48 | 49 | // SAFETY: The pointers are valid, the length is correct, all is well. 50 | unsafe { 51 | ptr::copy_nonoverlapping(from.as_ptr(), to.as_mut_ptr(), COPY_BYTES_LEN); 52 | } 53 | 54 | // SAFETY: We just filled these bytes, it is all good. 55 | unsafe { 56 | to.set_len(COPY_BYTES_LEN); 57 | } 58 | 59 | // Read from the destination to prevent the compiler from optimizing the copy away. 60 | _ = black_box(to.first().unwrap()); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /crates/many_cpus_benchmarking/images/work_distribution_comparison.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:28cfbf03e3bf7fa149dc80b13cae85ce595dab4cf8e7bc90afc9117e633dc107 3 | size 65663 4 | -------------------------------------------------------------------------------- /crates/many_cpus_benchmarking/src/cache.rs: -------------------------------------------------------------------------------- 1 | #![allow( 2 | dead_code, 3 | reason = "code is conditionally used only in non-test builds" 4 | )] 5 | 6 | use std::{cell::RefCell, hint::black_box, ptr, sync::LazyLock}; 7 | 8 | // Large servers can make hundreds of MBs of L3 cache available to a single core, though it 9 | // depends on the specific model and hardware configuration. We use a sufficiently large data set 10 | // here to have a good chance of evicting the real payload data from the caches. 11 | #[cfg(not(miri))] 12 | const CACHE_CLEANER_LEN_BYTES: usize = 128 * 1024 * 1024; 13 | #[cfg(miri)] 14 | const CACHE_CLEANER_LEN_BYTES: usize = 1024; 15 | 16 | #[expect( 17 | clippy::integer_division, 18 | reason = "we are fine with inaccuracy if the inputs require it - this is a ballpark figure" 19 | )] 20 | const CACHE_CLEANER_LEN_U64: usize = CACHE_CLEANER_LEN_BYTES / size_of::(); 21 | 22 | // We copy the data from a shared immutable source. 23 | static CACHE_CLEANER_SOURCE: LazyLock> = 24 | LazyLock::new(|| vec![0x0102030401020304; CACHE_CLEANER_LEN_U64]); 25 | 26 | // To a thread-specific destination (just to avoid overlap/conflict). 27 | // The existing values here do not matter, we will overwrite them (potentially multiple times). 28 | thread_local! { 29 | static CACHE_CLEANER_DESTINATION: RefCell> = 30 | RefCell::new(vec![0xFFFFFFFFFFFFFFFF; CACHE_CLEANER_LEN_U64]); 31 | } 32 | 33 | /// As the whole point of this benchmark harness is to demonstrate differences when running under 34 | /// different many-processor configurations, we need to ensure that memory actually gets accessed 35 | /// during the benchmark runs - that all data is not simply cached locally. 36 | /// 37 | /// This function will perform a large memory copy operation, which hopefully trashes any caches. 38 | #[cfg_attr(test, mutants::skip)] // Functional testing infeasible; we just check for panic. 39 | pub(crate) fn clean_caches() { 40 | let source_ptr = CACHE_CLEANER_SOURCE.as_ptr(); 41 | let destination_ptr = CACHE_CLEANER_DESTINATION.with_borrow_mut(Vec::as_mut_ptr); 42 | 43 | // SAFETY: Lengths are correct, pointers are valid, we are good to go. 44 | unsafe { 45 | ptr::copy_nonoverlapping(source_ptr, destination_ptr, CACHE_CLEANER_LEN_U64); 46 | } 47 | 48 | // SAFETY: We just filled these bytes, it is all good. 49 | CACHE_CLEANER_DESTINATION.with_borrow_mut(|destination| unsafe { 50 | destination.set_len(CACHE_CLEANER_LEN_U64); 51 | }); 52 | 53 | // Read from the destination to prevent the compiler from optimizing the copy away. 54 | // SAFETY: The pointer is valid, we just used it. 55 | let _ = black_box(unsafe { destination_ptr.read() }); 56 | } 57 | 58 | #[cfg(test)] 59 | mod tests { 60 | use super::*; 61 | 62 | #[test] 63 | fn clean_caches_smoke_test() { 64 | // Just make sure it does not panic and gets a clean bill of health from Miri. 65 | clean_caches(); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /crates/many_cpus_benchmarking/src/payload.rs: -------------------------------------------------------------------------------- 1 | /// One benchmark payload, to be processed by each worker involved in each benchmark. 2 | /// 3 | /// Payloads are created in pairs because the workers are created in pairs. Depending on the 4 | /// benchmark scenario, the pair of payloads may be connected (e.g. reader and writer) or 5 | /// independent (equivalent, two workers doing the same thing). 6 | /// 7 | /// The lifecycle of a payload is: 8 | /// 9 | /// 1. A payload pair is created on the main thread. 10 | /// 1. Each payload in the pair is transferred to a specific thread hosting a specific worker. 11 | /// 1. The `prepare()` method is called to generate any input data. 12 | /// 1. The payload pair is exchanged between the two paired workers. 13 | /// 1. The `process()` method is called to process the data received from the other pair member. 14 | /// 1. The payload pair is dropped. 15 | /// 16 | /// Note that some [work distribution modes][crate::WorkDistribution] (named `*Self`) may skip 17 | /// the payload exchange step. 18 | pub trait Payload: Sized + Send + 'static { 19 | /// Creates the payload pair that will be used to initialize one worker pair in one 20 | /// benchmark iteration. This will be called on the main thread. 21 | fn new_pair() -> (Self, Self); 22 | 23 | /// Performs any initialization required. This will be called before the benchmark time span 24 | /// measurement starts. It will be called on a worker thread but the payload may be moved to 25 | /// a different worker thread before the benchmark starts (as workers by default prepare work 26 | /// for each other, to showcase what happens when the work is transferred between threads). 27 | fn prepare(&mut self) {} 28 | 29 | /// Performs any initialization required on the final worker thread selected. This is not 30 | /// counted as part of the benchmark time span. 31 | fn prepare_local(&mut self) {} 32 | 33 | /// Processes the payload but does not consume it. The iteration is complete when this returns 34 | /// for all payloads. The payloads are dropped later, to ensure that the benchmark time is not 35 | /// affected by the time it takes to drop the payload and release the memory. 36 | fn process(&mut self); 37 | } 38 | -------------------------------------------------------------------------------- /crates/region_cached/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "region_cached" 3 | description = "Adds a logical layer of caching between processor L3 cache and main memory" 4 | publish = true 5 | version = "0.1.11" 6 | 7 | authors.workspace = true 8 | edition.workspace = true 9 | license.workspace = true 10 | repository.workspace = true 11 | rust-version.workspace = true 12 | 13 | [features] 14 | default = [] 15 | 16 | [dependencies] 17 | arc-swap = { workspace = true } 18 | derive_more = { workspace = true, features = ["debug"] } 19 | folo_utils = { workspace = true } 20 | linked = { workspace = true } 21 | many_cpus = { workspace = true } 22 | rsevents = { workspace = true } 23 | simple-mermaid = { workspace = true } 24 | 25 | [dev-dependencies] 26 | axum = { workspace = true, features = ["http1", "tokio"] } 27 | benchmark_utils = { workspace = true } 28 | criterion = { workspace = true } 29 | mockall = { workspace = true } 30 | mutants = { workspace = true } 31 | static_assertions = { workspace = true } 32 | tokio = { workspace = true, features = ["net", "rt-multi-thread"] } 33 | 34 | [[bench]] 35 | name = "region_cached" 36 | harness = false 37 | 38 | [lints] 39 | workspace = true 40 | -------------------------------------------------------------------------------- /crates/region_cached/README.md: -------------------------------------------------------------------------------- 1 | On many-processor systems with multiple memory regions, there is an extra cost associated with 2 | accessing data in physical memory modules that are in a different memory region than the current 3 | processor: 4 | 5 | * Cross-memory-region loads have higher latency (e.g. 100 ns local versus 200 ns remote). 6 | * Cross-memory-region loads have lower throughput (e.g. 50 Gbps local versus 10 Gbps remote). 7 | 8 | This crate provides the capability to cache frequently accessed shared data sets in the local memory 9 | region, speeding up reads when the data is not already in the local processor caches. You can think 10 | of it as an extra level of caching between L3 processor caches and main memory. 11 | 12 | More details in the [crate documentation](https://docs.rs/region_cached/). 13 | 14 | This is part of the [Folo project](https://github.com/folo-rs/folo) that provides mechanisms for 15 | high-performance hardware-aware programming in Rust. -------------------------------------------------------------------------------- /crates/region_cached/doc/region_cached.mermaid: -------------------------------------------------------------------------------- 1 | graph TD 2 | subgraph Region1[Memory region 1] 3 | Processor1[Processor 1] 4 | Processor2[Processor 2] 5 | 6 | Processor1 -->|read| Region1Value[Regional value] 7 | Processor2 -->|read| Region1Value 8 | end 9 | 10 | subgraph Region2[Memory region 2] 11 | Processor3[Processor 3] 12 | Processor4[Processor 4] 13 | 14 | Processor3 -->|read| Region2Value[Regional value] 15 | Processor4 -->|read| Region2Value 16 | end 17 | 18 | Region1Value -->|weakly consistent
clone from| GlobalValue[Global value] 19 | Region2Value -->|weakly consistent
clone from| GlobalValue 20 | 21 | Processor1 -->|write| GlobalValue -------------------------------------------------------------------------------- /crates/region_cached/examples/region_cached_1gb.rs: -------------------------------------------------------------------------------- 1 | //! Allocates a region-cached variable with 1 GB of data and accesses it from every thread. 2 | //! 3 | //! You can observe memory usage to prove that the data is not being copied an unexpected 4 | //! number of times (one copy per memory region is expected, plus one global primary copy). 5 | 6 | use std::{hint::black_box, thread, time::Duration}; 7 | 8 | use many_cpus::ProcessorSet; 9 | use region_cached::{RegionCachedExt, region_cached}; 10 | 11 | region_cached! { 12 | static DATA: Vec = vec![50; 1024 * 1024 * 1024]; 13 | } 14 | 15 | fn main() { 16 | let processor_set = ProcessorSet::default(); 17 | 18 | processor_set 19 | .spawn_threads(|_| DATA.with_cached(|data| _ = black_box(data.len()))) 20 | .into_iter() 21 | .for_each(|x| x.join().unwrap()); 22 | 23 | println!( 24 | "All {} threads have accessed the region-cached data. Terminating in 60 seconds.", 25 | processor_set.len() 26 | ); 27 | 28 | thread::sleep(Duration::from_secs(60)); 29 | } 30 | -------------------------------------------------------------------------------- /crates/region_cached/examples/region_cached_log_filtering.rs: -------------------------------------------------------------------------------- 1 | //! Showcase basic use of the `region_cached!` macro. 2 | 3 | use std::thread; 4 | 5 | use region_cached::{RegionCachedExt, region_cached}; 6 | 7 | region_cached!(static FILTER_KEYS: Vec = load_initial_filters()); 8 | 9 | /// Returns true if the log line contains any of the filter keys. 10 | fn process_log_line(line: &str) -> bool { 11 | // `.with_current()` provides an immutable reference to the cached value. 12 | FILTER_KEYS.with_cached(|keys| keys.iter().any(|key| line.contains(key))) 13 | } 14 | 15 | fn update_filters(new_filters: Vec) { 16 | // `.set()` publishes a new value, which will be distributed to all memory regions in an 17 | // eventually consistent manner. 18 | FILTER_KEYS.set_global(new_filters); 19 | } 20 | 21 | fn load_initial_filters() -> Vec { 22 | // For example purposes we only have a trivial data set, which makes little sense to cache. 23 | // In realistic scenarios, you would want to use region-local caching only if your data 24 | // set is too large to naturally fit in processor caches (e.g. 100K+ entries). Other 25 | // considerations also apply - let profiling be your guide in choosing your data structures. 26 | vec!["error".to_string(), "panic".to_string()] 27 | } 28 | 29 | static SAMPLE_LOG_LINES: &[&str] = &[ 30 | "info: everything is fine", 31 | "error: something went wrong", 32 | "warning: this is a warning", 33 | "panic: oh no, we're doomed", 34 | ]; 35 | 36 | fn main() { 37 | // Start a bunch of threads that will process log lines. 38 | let mut threads = Vec::new(); 39 | 40 | for _ in 0..100 { 41 | threads.push(thread::spawn(move || { 42 | for line in SAMPLE_LOG_LINES { 43 | if process_log_line(line) { 44 | println!("Matched filters: {line}"); 45 | } 46 | } 47 | })); 48 | } 49 | 50 | let new_filters = vec![ 51 | "error".to_string(), 52 | "panic".to_string(), 53 | "warning".to_string(), 54 | ]; 55 | 56 | // Update the filters. The update will arrive eventually on all threads in all memory regions. 57 | // In terminal output, you may see the first threads act on the initial data set and later 58 | // threads act on the updated data set, simply because the first threads already finish before 59 | // getting the updated value. 60 | update_filters(new_filters); 61 | 62 | for thread in threads { 63 | thread.join().unwrap(); 64 | } 65 | 66 | println!("All threads have finished processing log lines."); 67 | } 68 | -------------------------------------------------------------------------------- /crates/region_cached/examples/region_cached_log_filtering_no_statics.rs: -------------------------------------------------------------------------------- 1 | //! This is a variation of the `region_cached_log_filtering.rs` example, but using the `PerThread` 2 | //! runtime wrapper type instead of static variables inside a `region_cached!` block. 3 | 4 | use std::thread; 5 | 6 | use linked::{InstancePerThread, Ref}; 7 | use region_cached::RegionCached; 8 | 9 | /// The current thread's view of the region-cached filter keys instance. 10 | type CachedFilterKeys = Ref>>; 11 | 12 | /// Returns true if the log line contains any of the filter keys. 13 | fn process_log_line(line: &str, filter_keys: &CachedFilterKeys) -> bool { 14 | // `.with()` provides an immutable reference to the cached value. 15 | filter_keys.with_cached(|keys| keys.iter().any(|key| line.contains(key))) 16 | } 17 | 18 | fn update_filters(new_filters: Vec, filter_keys: &CachedFilterKeys) { 19 | // `.set()` publishes a new value, which will be distributed to all memory regions in a 20 | // weakly consistent manner. 21 | filter_keys.set_global(new_filters); 22 | } 23 | 24 | fn load_initial_filters() -> Vec { 25 | // For example purposes we only have a trivial data set, which makes little sense to cache. 26 | // In realistic scenarios, you would want to use region-local caching only if your data 27 | // set is too large to naturally fit in processor caches (e.g. 100K+ entries). Other 28 | // considerations also apply - let profiling be your guide in choosing your data structures. 29 | vec!["error".to_string(), "panic".to_string()] 30 | } 31 | 32 | static SAMPLE_LOG_LINES: &[&str] = &[ 33 | "info: everything is fine", 34 | "error: something went wrong", 35 | "warning: this is a warning", 36 | "panic: oh no, we're doomed", 37 | ]; 38 | 39 | fn main() { 40 | let filters = InstancePerThread::new(RegionCached::new(load_initial_filters())); 41 | 42 | // Start a bunch of threads that will process log lines. 43 | let mut threads = Vec::new(); 44 | 45 | for _ in 0..100 { 46 | threads.push(thread::spawn({ 47 | // We clone the `PerThread` for each thread, so they can all access the filters. 48 | let filters = filters.clone(); 49 | 50 | move || { 51 | // This localizes the `PerThread` instance, giving us the current thread's view. 52 | let filters = filters.acquire(); 53 | 54 | for line in SAMPLE_LOG_LINES { 55 | if process_log_line(line, &filters) { 56 | println!("Matched filters: {line}"); 57 | } 58 | } 59 | } 60 | })); 61 | } 62 | 63 | let new_filters = vec![ 64 | "error".to_string(), 65 | "panic".to_string(), 66 | "warning".to_string(), 67 | ]; 68 | 69 | // Update the filters. The update will arrive eventually on all threads in all memory regions. 70 | // In terminal output, you may see the first threads act on the initial data set and later 71 | // threads act on the updated data set, simply because the first threads already finish before 72 | // getting the updated value. 73 | update_filters(new_filters, &filters.acquire()); 74 | 75 | for thread in threads { 76 | thread.join().unwrap(); 77 | } 78 | 79 | println!("All threads have finished processing log lines."); 80 | } 81 | -------------------------------------------------------------------------------- /crates/region_cached/examples/region_cached_web.rs: -------------------------------------------------------------------------------- 1 | //! Showcase basic use of the `region_cached!` macro in a multithreaded web app. 2 | 3 | use axum::{Router, routing::get}; 4 | use many_cpus::HardwareInfo; 5 | use region_cached::{RegionCachedCopyExt, RegionCachedExt, region_cached}; 6 | use std::time::{SystemTime, UNIX_EPOCH}; 7 | 8 | // A global variable whose latest value is cached in each memory region for fast local read access. 9 | // Writes to this variable are weakly consistent across all memory regions. 10 | // 11 | // Note: to keep the example simple, the value of this variable is of a trivial size and unlikely 12 | // to actually benefit from region-local caching as it easily fits into local processor caches. 13 | region_cached!(static LAST_UPDATE: u128 = 0); 14 | 15 | #[tokio::main] 16 | async fn main() { 17 | // The beneficial impact will arise only on systems with multiple memory regions. 18 | let memory_region_count = HardwareInfo::max_memory_region_count(); 19 | println!("the current system has {memory_region_count} memory regions"); 20 | 21 | let app = Router::new() 22 | .route("/", get(read)) 23 | .route("/update", get(update)); 24 | let listener = tokio::net::TcpListener::bind("0.0.0.0:1234").await.unwrap(); 25 | axum::serve(listener, app).await.unwrap(); 26 | } 27 | 28 | /// Open `http://localhost:1234/` to read the current value. 29 | async fn read() -> String { 30 | let last_update_timestamp = LAST_UPDATE.get_cached(); 31 | 32 | format!("Last update: {last_update_timestamp}") 33 | } 34 | 35 | /// Open `http://localhost:1234/update` to set a new value. 36 | async fn update() -> String { 37 | let now = SystemTime::now() 38 | .duration_since(UNIX_EPOCH) 39 | .unwrap() 40 | .as_millis(); 41 | LAST_UPDATE.set_global(now); 42 | format!("Last update time set to: {now}") 43 | } 44 | -------------------------------------------------------------------------------- /crates/region_cached/src/__private.rs: -------------------------------------------------------------------------------- 1 | pub use linked; 2 | -------------------------------------------------------------------------------- /crates/region_cached/src/clients.rs: -------------------------------------------------------------------------------- 1 | mod hw_info_client; 2 | mod hw_info_facade; 3 | mod hw_tracker_client; 4 | mod hw_tracker_facade; 5 | 6 | pub(crate) use hw_info_client::*; 7 | pub(crate) use hw_info_facade::*; 8 | pub(crate) use hw_tracker_client::*; 9 | pub(crate) use hw_tracker_facade::*; 10 | -------------------------------------------------------------------------------- /crates/region_cached/src/clients/hw_info_client.rs: -------------------------------------------------------------------------------- 1 | use many_cpus::HardwareInfo; 2 | 3 | #[cfg_attr(test, mockall::automock)] 4 | pub(crate) trait HardwareInfoClient { 5 | fn max_memory_region_count(&self) -> usize; 6 | } 7 | 8 | #[derive(Debug)] 9 | pub(crate) struct HardwareInfoClientImpl; 10 | 11 | impl HardwareInfoClient for HardwareInfoClientImpl { 12 | #[cfg_attr(test, mutants::skip)] // Trivial fn, tested on lower levels - skip mutating. 13 | fn max_memory_region_count(&self) -> usize { 14 | HardwareInfo::max_memory_region_count() 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /crates/region_cached/src/clients/hw_info_facade.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | use std::sync::Arc; 3 | 4 | use crate::{HardwareInfoClient, HardwareInfoClientImpl}; 5 | 6 | #[cfg(test)] 7 | use crate::MockHardwareInfoClient; 8 | 9 | #[derive(Clone, Debug)] 10 | pub(crate) enum HardwareInfoClientFacade { 11 | Real(&'static HardwareInfoClientImpl), 12 | 13 | #[cfg(test)] 14 | Mock(Arc), 15 | } 16 | 17 | impl HardwareInfoClientFacade { 18 | pub(crate) const fn real() -> Self { 19 | Self::Real(&HardwareInfoClientImpl) 20 | } 21 | 22 | #[cfg(test)] 23 | pub(crate) fn from_mock(mock: MockHardwareInfoClient) -> Self { 24 | Self::Mock(Arc::new(mock)) 25 | } 26 | } 27 | 28 | impl HardwareInfoClient for HardwareInfoClientFacade { 29 | fn max_memory_region_count(&self) -> usize { 30 | match self { 31 | Self::Real(real) => real.max_memory_region_count(), 32 | #[cfg(test)] 33 | Self::Mock(mock) => mock.max_memory_region_count(), 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /crates/region_cached/src/clients/hw_tracker_client.rs: -------------------------------------------------------------------------------- 1 | use many_cpus::{HardwareTracker, MemoryRegionId}; 2 | 3 | #[cfg_attr(test, mockall::automock)] 4 | pub(crate) trait HardwareTrackerClient { 5 | fn current_memory_region_id(&self) -> MemoryRegionId; 6 | fn is_thread_memory_region_pinned(&self) -> bool; 7 | } 8 | 9 | #[derive(Debug)] 10 | pub(crate) struct HardwareTrackerClientImpl; 11 | 12 | impl HardwareTrackerClient for HardwareTrackerClientImpl { 13 | #[cfg_attr(test, mutants::skip)] // Trivial fn, tested on lower levels - skip mutating. 14 | fn current_memory_region_id(&self) -> MemoryRegionId { 15 | HardwareTracker::current_memory_region_id() 16 | } 17 | 18 | #[cfg_attr(test, mutants::skip)] // Trivial fn, tested on lower levels - skip mutating. 19 | fn is_thread_memory_region_pinned(&self) -> bool { 20 | HardwareTracker::is_thread_memory_region_pinned() 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /crates/region_cached/src/clients/hw_tracker_facade.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | use std::sync::Arc; 3 | 4 | use many_cpus::MemoryRegionId; 5 | 6 | use crate::{HardwareTrackerClient, HardwareTrackerClientImpl}; 7 | 8 | #[cfg(test)] 9 | use crate::MockHardwareTrackerClient; 10 | 11 | #[derive(Clone, Debug)] 12 | pub(crate) enum HardwareTrackerClientFacade { 13 | Real(&'static HardwareTrackerClientImpl), 14 | 15 | #[cfg(test)] 16 | Mock(Arc), 17 | } 18 | 19 | impl HardwareTrackerClientFacade { 20 | pub(crate) const fn real() -> Self { 21 | Self::Real(&HardwareTrackerClientImpl) 22 | } 23 | 24 | #[cfg(test)] 25 | pub(crate) fn from_mock(mock: MockHardwareTrackerClient) -> Self { 26 | Self::Mock(Arc::new(mock)) 27 | } 28 | } 29 | 30 | impl HardwareTrackerClient for HardwareTrackerClientFacade { 31 | fn current_memory_region_id(&self) -> MemoryRegionId { 32 | match self { 33 | Self::Real(real) => real.current_memory_region_id(), 34 | #[cfg(test)] 35 | Self::Mock(mock) => mock.current_memory_region_id(), 36 | } 37 | } 38 | 39 | fn is_thread_memory_region_pinned(&self) -> bool { 40 | match self { 41 | Self::Real(real) => real.is_thread_memory_region_pinned(), 42 | #[cfg(test)] 43 | Self::Mock(mock) => mock.is_thread_memory_region_pinned(), 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /crates/region_cached/src/macros.rs: -------------------------------------------------------------------------------- 1 | /// Marks static variables as region-cached. 2 | /// 3 | /// The static variables are most conveniently used via extension methods on the 4 | /// [`RegionCachedExt`][1] trait. Import this trait when using region-cached static variables. 5 | /// 6 | /// # Example 7 | /// 8 | /// ``` 9 | /// use region_cached::{RegionCachedExt, region_cached}; 10 | /// 11 | /// region_cached! { 12 | /// static ALLOWED_KEYS: Vec = vec![ 13 | /// "error".to_string(), 14 | /// "panic".to_string() 15 | /// ]; 16 | /// static FORBIDDEN_KEYS: Vec = vec![ 17 | /// "info".to_string(), 18 | /// "debug".to_string() 19 | /// ]; 20 | /// } 21 | /// 22 | /// let allowed_key_count = ALLOWED_KEYS.with_cached(|keys| keys.len()); 23 | /// ``` 24 | /// 25 | /// [1]: crate::RegionCachedExt 26 | #[macro_export] 27 | macro_rules! region_cached { 28 | () => {}; 29 | 30 | ($(#[$attr:meta])* $vis:vis static $NAME:ident: $t:ty = $initial_value:expr; $($rest:tt)*) => ( 31 | $crate::region_cached!($(#[$attr])* $vis static $NAME: $t = $initial_value); 32 | $crate::region_cached!($($rest)*); 33 | ); 34 | 35 | ($(#[$attr:meta])* $vis:vis static $NAME:ident: $t:ty = $initial_value:expr) => { 36 | $crate::__private::linked::thread_local_rc! { 37 | $(#[$attr])* $vis static $NAME: $crate::RegionCached<$t> = 38 | $crate::RegionCached::new($initial_value); 39 | } 40 | }; 41 | } 42 | -------------------------------------------------------------------------------- /crates/region_cached/src/region_cached_ext.rs: -------------------------------------------------------------------------------- 1 | use crate::RegionCached; 2 | 3 | /// Extension trait that adds convenience methods to region-cached static variables 4 | /// in a `region_cached!` block. 5 | pub trait RegionCachedExt { 6 | /// Executes the provided function with a reference to the cached value 7 | /// in the current memory region. 8 | /// 9 | /// # Example 10 | /// 11 | /// ``` 12 | /// use region_cached::{region_cached, RegionCachedExt}; 13 | /// 14 | /// region_cached!(static FAVORITE_COLOR: String = "blue".to_string()); 15 | /// 16 | /// let len = FAVORITE_COLOR.with_cached(|color| color.len()); 17 | /// assert_eq!(len, 4); 18 | /// ``` 19 | fn with_cached(&self, f: F) -> R 20 | where 21 | F: FnOnce(&T) -> R; 22 | 23 | /// Publishes a new value to all memory regions. 24 | /// 25 | /// The update will be applied to all memory regions in a [weakly consistent manner][1]. 26 | /// 27 | /// # Example 28 | /// 29 | /// ``` 30 | /// use region_cached::{region_cached, RegionCachedExt}; 31 | /// 32 | /// region_cached!(static FAVORITE_COLOR: String = "blue".to_string()); 33 | /// 34 | /// FAVORITE_COLOR.set_global("red".to_string()); 35 | /// ``` 36 | /// 37 | /// Updating the value is [weakly consistent][1]. Do not expect the update to be 38 | /// immediately visible. Even on the same thread, it is only guaranteed to be 39 | /// immediately visible if the thread is pinned to a specific memory region. 40 | /// 41 | /// ``` 42 | /// use many_cpus::ProcessorSet; 43 | /// use region_cached::{region_cached, RegionCachedExt}; 44 | /// use std::num::NonZero; 45 | /// 46 | /// region_cached!(static FAVORITE_COLOR: String = "blue".to_string()); 47 | /// 48 | /// // We can use this to pin a thread to a specific processor, to demonstrate a 49 | /// // situation where you can rely on consistency guarantees for immediate visibility. 50 | /// let one_processor = ProcessorSet::builder() 51 | /// .take(NonZero::new(1).unwrap()) 52 | /// .unwrap(); 53 | /// 54 | /// one_processor.spawn_thread(move |processor_set| { 55 | /// let processor = processor_set.processors().first(); 56 | /// println!("Thread pinned to processor {} in memory region {}", 57 | /// processor.id(), 58 | /// processor.memory_region_id() 59 | /// ); 60 | /// 61 | /// FAVORITE_COLOR.set_global("red".to_string()); 62 | /// 63 | /// // This thread is pinned to a specific processor, so it is guaranteed to stay 64 | /// // within the same memory region (== on the same physical hardware). This means 65 | /// // that an update to a region-cached value is immediately visible. 66 | /// let color = FAVORITE_COLOR.with_cached(|color| color.clone()); 67 | /// assert_eq!(color, "red"); 68 | /// }).join().unwrap(); 69 | /// ``` 70 | /// 71 | /// [1]: crate#consistency-guarantees 72 | fn set_global(&self, value: T); 73 | } 74 | 75 | /// Extension trait that adds convenience methods to region-cached static variables 76 | /// in a `region_cached!` block, specifically for `Copy` types. 77 | pub trait RegionCachedCopyExt 78 | where 79 | T: Copy, 80 | { 81 | /// Gets a copy of the cached value in the current memory region. 82 | /// 83 | /// # Example 84 | /// 85 | /// ``` 86 | /// use region_cached::{region_cached, RegionCachedCopyExt}; 87 | /// 88 | /// region_cached!(static CURRENT_ACCESS_TOKEN: u128 = 0x123100); 89 | /// 90 | /// let token = CURRENT_ACCESS_TOKEN.get_cached(); 91 | /// assert_eq!(token, 0x123100); 92 | /// ``` 93 | fn get_cached(&self) -> T; 94 | } 95 | 96 | impl RegionCachedExt for linked::StaticInstancePerThread> 97 | where 98 | T: Clone + Send + Sync + 'static, 99 | { 100 | #[inline] 101 | fn with_cached(&self, f: F) -> R 102 | where 103 | F: FnOnce(&T) -> R, 104 | { 105 | self.with(|inner| inner.with_cached(f)) 106 | } 107 | 108 | #[inline] 109 | fn set_global(&self, value: T) { 110 | self.with(|inner| inner.set_global(value)); 111 | } 112 | } 113 | 114 | impl RegionCachedCopyExt for linked::StaticInstancePerThread> 115 | where 116 | T: Clone + Copy + Send + Sync + 'static, 117 | { 118 | #[inline] 119 | fn get_cached(&self) -> T { 120 | self.with(|inner| inner.get_cached()) 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /crates/region_local/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "region_local" 3 | description = "Isolated variable storage per memory region, similar to `thread_local_rc!`" 4 | publish = true 5 | version = "0.1.11" 6 | 7 | authors.workspace = true 8 | edition.workspace = true 9 | license.workspace = true 10 | repository.workspace = true 11 | rust-version.workspace = true 12 | 13 | [features] 14 | default = [] 15 | 16 | [dependencies] 17 | arc-swap = { workspace = true } 18 | derive_more = { workspace = true, features = ["debug"] } 19 | folo_utils = { workspace = true } 20 | linked = { workspace = true } 21 | many_cpus = { workspace = true } 22 | rsevents = { workspace = true } 23 | simple-mermaid = { workspace = true } 24 | 25 | [dev-dependencies] 26 | axum = { workspace = true, features = ["http1", "tokio"] } 27 | benchmark_utils = { workspace = true } 28 | criterion = { workspace = true } 29 | mockall = { workspace = true } 30 | mutants = { workspace = true } 31 | static_assertions = { workspace = true } 32 | tokio = { workspace = true, features = ["net", "rt-multi-thread"] } 33 | 34 | [[bench]] 35 | name = "region_local" 36 | harness = false 37 | 38 | [lints] 39 | workspace = true 40 | -------------------------------------------------------------------------------- /crates/region_local/README.md: -------------------------------------------------------------------------------- 1 | On many-processor systems with multiple memory regions, there is an extra cost associated with 2 | accessing data in physical memory modules that are in a different memory region than the current 3 | processor: 4 | 5 | * Cross-memory-region loads have higher latency (e.g. 100 ns local versus 200 ns remote). 6 | * Cross-memory-region loads have lower throughput (e.g. 50 Gbps local versus 10 Gbps remote). 7 | 8 | This crate provides the capability to create static variables that maintain separate storage per 9 | memory region. This may be useful in circumstances where state needs to be shared but only within 10 | each memory region (e.g. because you intentionally want to avoid the overhead of cross-memory-region 11 | transfers and want to isolate the data sets). 12 | 13 | Think of this as an equivalent of `thread_local_rc!`, except operating on the memory region boundary 14 | instead of the thread boundary. 15 | 16 | More details in the [crate documentation](https://docs.rs/region_local/). 17 | 18 | This is part of the [Folo project](https://github.com/folo-rs/folo) that provides mechanisms for 19 | high-performance hardware-aware programming in Rust. -------------------------------------------------------------------------------- /crates/region_local/doc/region_local.mermaid: -------------------------------------------------------------------------------- 1 | graph TD 2 | subgraph Region2[Memory region 2] 3 | Processor3[Processor 3] 4 | Processor4[Processor 4] 5 | 6 | Processor3 --> Region2Value[Regional value] 7 | Processor4 --> Region2Value 8 | end 9 | 10 | subgraph Region1[Memory region 1] 11 | Processor1[Processor 1] 12 | Processor2[Processor 2] 13 | 14 | Processor1 --> Region1Value[Regional value] 15 | Processor2 --> Region1Value 16 | end -------------------------------------------------------------------------------- /crates/region_local/examples/region_local_1gb.rs: -------------------------------------------------------------------------------- 1 | //! Allocates a region-local variable with 1 GB of data and accesses it from every thread. 2 | //! 3 | //! You can observe memory usage to prove that the data is not being copied an unexpected 4 | //! number of times (one copy per memory region is expected). 5 | 6 | use std::{hint::black_box, thread, time::Duration}; 7 | 8 | use many_cpus::ProcessorSet; 9 | use region_local::{RegionLocalExt, region_local}; 10 | 11 | region_local! { 12 | static DATA: Vec = vec![50; 1024 * 1024 * 1024]; 13 | } 14 | 15 | fn main() { 16 | let processor_set = ProcessorSet::default(); 17 | 18 | processor_set 19 | .spawn_threads(|_| DATA.with_local(|data| _ = black_box(data.len()))) 20 | .into_iter() 21 | .for_each(|x| x.join().unwrap()); 22 | 23 | println!( 24 | "All {} threads have accessed the region-local data. Terminating in 60 seconds.", 25 | processor_set.len() 26 | ); 27 | 28 | thread::sleep(Duration::from_secs(60)); 29 | } 30 | -------------------------------------------------------------------------------- /crates/region_local/examples/region_local_web.rs: -------------------------------------------------------------------------------- 1 | //! Showcase basic use of the `region_local!` macro in a multithreaded web app. 2 | //! 3 | use axum::{Router, routing::get}; 4 | use many_cpus::HardwareInfo; 5 | use region_local::{RegionLocalCopyExt, RegionLocalExt, region_local}; 6 | use std::time::{SystemTime, UNIX_EPOCH}; 7 | 8 | // A global variable whose value is unique in each memory region for fast local access. 9 | // Writes to this variable are eventually consistent across all threads in the same memory region. 10 | region_local!(static LAST_UPDATE: u128 = 0); 11 | 12 | #[tokio::main] 13 | async fn main() { 14 | let memory_region_count = HardwareInfo::max_memory_region_count(); 15 | println!("the current system has {memory_region_count} memory regions"); 16 | 17 | let app = Router::new() 18 | .route("/", get(read)) 19 | .route("/update", get(update)); 20 | let listener = tokio::net::TcpListener::bind("0.0.0.0:1234").await.unwrap(); 21 | axum::serve(listener, app).await.unwrap(); 22 | } 23 | 24 | /// Open `http://localhost:1234/` to read the current value. 25 | async fn read() -> String { 26 | let last_update_timestamp = LAST_UPDATE.get_local(); 27 | 28 | format!("Last update: {last_update_timestamp}") 29 | } 30 | 31 | /// Open `http://localhost:1234/update` to set a new value. 32 | /// The new value is only visible to `read()` handlers that run in the same memory region. 33 | async fn update() -> String { 34 | let now = SystemTime::now() 35 | .duration_since(UNIX_EPOCH) 36 | .unwrap() 37 | .as_millis(); 38 | LAST_UPDATE.set_local(now); 39 | format!("Last update time set to: {now}") 40 | } 41 | -------------------------------------------------------------------------------- /crates/region_local/src/__private.rs: -------------------------------------------------------------------------------- 1 | pub use linked; 2 | -------------------------------------------------------------------------------- /crates/region_local/src/clients.rs: -------------------------------------------------------------------------------- 1 | mod hw_info_client; 2 | mod hw_info_facade; 3 | mod hw_tracker_client; 4 | mod hw_tracker_facade; 5 | 6 | pub(crate) use hw_info_client::*; 7 | pub(crate) use hw_info_facade::*; 8 | pub(crate) use hw_tracker_client::*; 9 | pub(crate) use hw_tracker_facade::*; 10 | -------------------------------------------------------------------------------- /crates/region_local/src/clients/hw_info_client.rs: -------------------------------------------------------------------------------- 1 | use many_cpus::HardwareInfo; 2 | 3 | #[cfg_attr(test, mockall::automock)] 4 | pub(crate) trait HardwareInfoClient { 5 | fn max_memory_region_count(&self) -> usize; 6 | } 7 | 8 | #[derive(Debug)] 9 | pub(crate) struct HardwareInfoClientImpl; 10 | 11 | impl HardwareInfoClient for HardwareInfoClientImpl { 12 | #[cfg_attr(test, mutants::skip)] // Trivial fn, tested on lower levels - skip mutating. 13 | fn max_memory_region_count(&self) -> usize { 14 | HardwareInfo::max_memory_region_count() 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /crates/region_local/src/clients/hw_info_facade.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | use std::sync::Arc; 3 | 4 | use crate::{HardwareInfoClient, HardwareInfoClientImpl}; 5 | 6 | #[cfg(test)] 7 | use crate::MockHardwareInfoClient; 8 | 9 | #[derive(Clone, Debug)] 10 | pub(crate) enum HardwareInfoClientFacade { 11 | Real(&'static HardwareInfoClientImpl), 12 | 13 | #[cfg(test)] 14 | Mock(Arc), 15 | } 16 | 17 | impl HardwareInfoClientFacade { 18 | pub(crate) const fn real() -> Self { 19 | Self::Real(&HardwareInfoClientImpl) 20 | } 21 | 22 | #[cfg(test)] 23 | pub(crate) fn from_mock(mock: MockHardwareInfoClient) -> Self { 24 | Self::Mock(Arc::new(mock)) 25 | } 26 | } 27 | 28 | impl HardwareInfoClient for HardwareInfoClientFacade { 29 | fn max_memory_region_count(&self) -> usize { 30 | match self { 31 | Self::Real(real) => real.max_memory_region_count(), 32 | #[cfg(test)] 33 | Self::Mock(mock) => mock.max_memory_region_count(), 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /crates/region_local/src/clients/hw_tracker_client.rs: -------------------------------------------------------------------------------- 1 | use many_cpus::{HardwareTracker, MemoryRegionId}; 2 | 3 | #[cfg_attr(test, mockall::automock)] 4 | pub(crate) trait HardwareTrackerClient { 5 | fn current_memory_region_id(&self) -> MemoryRegionId; 6 | fn is_thread_memory_region_pinned(&self) -> bool; 7 | } 8 | 9 | #[derive(Debug)] 10 | pub(crate) struct HardwareTrackerClientImpl; 11 | 12 | impl HardwareTrackerClient for HardwareTrackerClientImpl { 13 | #[cfg_attr(test, mutants::skip)] // Trivial fn, tested on lower levels - skip mutating. 14 | fn current_memory_region_id(&self) -> MemoryRegionId { 15 | HardwareTracker::current_memory_region_id() 16 | } 17 | 18 | #[cfg_attr(test, mutants::skip)] // Trivial fn, tested on lower levels - skip mutating. 19 | fn is_thread_memory_region_pinned(&self) -> bool { 20 | HardwareTracker::is_thread_memory_region_pinned() 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /crates/region_local/src/clients/hw_tracker_facade.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | use std::sync::Arc; 3 | 4 | use many_cpus::MemoryRegionId; 5 | 6 | use crate::{HardwareTrackerClient, HardwareTrackerClientImpl}; 7 | 8 | #[cfg(test)] 9 | use crate::MockHardwareTrackerClient; 10 | 11 | #[derive(Clone, Debug)] 12 | pub(crate) enum HardwareTrackerClientFacade { 13 | Real(&'static HardwareTrackerClientImpl), 14 | 15 | #[cfg(test)] 16 | Mock(Arc), 17 | } 18 | 19 | impl HardwareTrackerClientFacade { 20 | pub(crate) const fn real() -> Self { 21 | Self::Real(&HardwareTrackerClientImpl) 22 | } 23 | 24 | #[cfg(test)] 25 | pub(crate) fn from_mock(mock: MockHardwareTrackerClient) -> Self { 26 | Self::Mock(Arc::new(mock)) 27 | } 28 | } 29 | 30 | impl HardwareTrackerClient for HardwareTrackerClientFacade { 31 | fn current_memory_region_id(&self) -> MemoryRegionId { 32 | match self { 33 | Self::Real(real) => real.current_memory_region_id(), 34 | #[cfg(test)] 35 | Self::Mock(mock) => mock.current_memory_region_id(), 36 | } 37 | } 38 | 39 | fn is_thread_memory_region_pinned(&self) -> bool { 40 | match self { 41 | Self::Real(real) => real.is_thread_memory_region_pinned(), 42 | #[cfg(test)] 43 | Self::Mock(mock) => mock.is_thread_memory_region_pinned(), 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /crates/region_local/src/macros.rs: -------------------------------------------------------------------------------- 1 | /// Marks static variables as region-local. 2 | /// 3 | /// The static variables are most conveniently used via extension methods on the 4 | /// [`RegionLocalExt`][1] trait. Import this trait when using region-local static variables. 5 | /// 6 | /// # Example 7 | /// 8 | /// ``` 9 | /// use region_local::{RegionLocalExt, region_local}; 10 | /// 11 | /// region_local! { 12 | /// static ALLOWED_KEYS: Vec = vec![ 13 | /// "error".to_string(), 14 | /// "panic".to_string() 15 | /// ]; 16 | /// static FORBIDDEN_KEYS: Vec = vec![ 17 | /// "info".to_string(), 18 | /// "debug".to_string() 19 | /// ]; 20 | /// } 21 | /// 22 | /// let allowed_key_count = ALLOWED_KEYS.with_local(|keys| keys.len()); 23 | /// ``` 24 | /// 25 | /// [1]: crate::RegionLocalExt 26 | #[macro_export] 27 | macro_rules! region_local { 28 | () => {}; 29 | 30 | ($(#[$attr:meta])* $vis:vis static $NAME:ident: $t:ty = $initial_value:expr; $($rest:tt)*) => ( 31 | $crate::region_local!($(#[$attr])* $vis static $NAME: $t = $initial_value); 32 | $crate::region_local!($($rest)*); 33 | ); 34 | 35 | ($(#[$attr:meta])* $vis:vis static $NAME:ident: $t:ty = $initial_value:expr) => { 36 | $crate::__private::linked::thread_local_rc! { 37 | $(#[$attr])* $vis static $NAME: $crate::RegionLocal<$t> = 38 | $crate::RegionLocal::new(|| $initial_value); 39 | } 40 | }; 41 | } 42 | -------------------------------------------------------------------------------- /crates/testing/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "testing" 3 | description = "Private helpers for testing and examples in Folo crates" 4 | publish = false 5 | version = "0.0.1-never" 6 | 7 | authors.workspace = true 8 | edition.workspace = true 9 | license.workspace = true 10 | repository.workspace = true 11 | rust-version.workspace = true 12 | 13 | [lib] 14 | doc = false 15 | 16 | [features] 17 | default = [] 18 | 19 | [dependencies] 20 | 21 | [target.'cfg(windows)'.dependencies] 22 | deranged = { workspace = true, features = ["macros"] } 23 | folo_utils = { workspace = true } 24 | windows = { workspace = true, features = [ 25 | "Win32_System_JobObjects", 26 | "Win32_System_Threading", 27 | "Win32_Security", 28 | ] } 29 | 30 | [dev-dependencies] 31 | mutants = { workspace = true } 32 | 33 | [lints] 34 | workspace = true 35 | -------------------------------------------------------------------------------- /crates/testing/examples/spin_cpu_windows.rs: -------------------------------------------------------------------------------- 1 | //! Demonstrates the effect of Windows job object limits by spinning the CPU. 2 | 3 | fn main() { 4 | #[cfg(windows)] 5 | windows::main(); 6 | 7 | #[cfg(not(windows))] 8 | panic!("This example is only supported on Windows."); 9 | } 10 | 11 | #[cfg(windows)] 12 | mod windows { 13 | use std::{thread, time::Duration}; 14 | 15 | use folo_utils::nz; 16 | use testing::{Job, ProcessorTimePct}; 17 | use windows::Win32::System::Threading::{ 18 | GetCurrentThread, SetThreadPriority, THREAD_PRIORITY_IDLE, 19 | }; 20 | 21 | const SLEEP_TIME_SECS: u64 = 10; 22 | 23 | pub(crate) fn main() { 24 | let job = Job::builder() 25 | .with_processor_count(nz!(8)) 26 | .with_max_processor_time_pct(ProcessorTimePct::new_static::<50>()) 27 | .build(); 28 | 29 | println!("Starting with limit of 8 processors and 50% processor time."); 30 | 31 | // We start a bunch of worker threads, enough to saturate a bunch of processors. 32 | for _ in 0..100 { 33 | start_spinner(); 34 | } 35 | 36 | thread::sleep(Duration::from_secs(SLEEP_TIME_SECS)); 37 | 38 | drop(job); 39 | println!("Switching to 8 processors and 1% processor time."); 40 | 41 | let job = Job::builder() 42 | .with_processor_count(nz!(1)) 43 | .with_max_processor_time_pct(ProcessorTimePct::new_static::<1>()) 44 | .build(); 45 | 46 | thread::sleep(Duration::from_secs(SLEEP_TIME_SECS)); 47 | 48 | drop(job); 49 | println!("Switching to 1 processor and 80% processor time."); 50 | 51 | let job = Job::builder() 52 | .with_processor_count(nz!(1)) 53 | .with_max_processor_time_pct(ProcessorTimePct::new_static::<80>()) 54 | .build(); 55 | 56 | thread::sleep(Duration::from_secs(SLEEP_TIME_SECS)); 57 | 58 | drop(job); 59 | println!("Switching to 4 processors and 75% processor time."); 60 | 61 | let job = Job::builder() 62 | .with_processor_count(nz!(4)) 63 | .with_max_processor_time_pct(ProcessorTimePct::new_static::<75>()) 64 | .build(); 65 | 66 | thread::sleep(Duration::from_secs(SLEEP_TIME_SECS)); 67 | 68 | drop(job); 69 | println!("All done."); 70 | } 71 | 72 | fn start_spinner() { 73 | thread::spawn(|| { 74 | // Avoid the spinning being troublesome for other threads by lowering thread priority. 75 | 76 | // SAFETY: No safety requirements. 77 | let current_thread = unsafe { GetCurrentThread() }; 78 | 79 | // SAFETY: No safety requirements. 80 | unsafe { 81 | SetThreadPriority(current_thread, THREAD_PRIORITY_IDLE).unwrap(); 82 | } 83 | 84 | // Spin spin spin spin. 85 | 86 | let mut i: usize = 0; 87 | 88 | loop { 89 | i = i.wrapping_add(1); 90 | } 91 | }); 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /crates/testing/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Private helpers for testing and examples in Folo crates. 2 | 3 | #[cfg(windows)] 4 | mod windows; 5 | 6 | #[cfg(windows)] 7 | pub use windows::*; 8 | 9 | /// Calculates the difference between two f64 values and considers 10 | /// them equal if the difference is not more than `close_enough`. 11 | /// 12 | /// This is a "correctly performed" floating point equality comparison. 13 | #[must_use] 14 | pub fn f64_diff_abs(a: f64, b: f64, close_enough: f64) -> f64 { 15 | let diff = (a - b).abs(); 16 | 17 | if diff <= close_enough { 0.0 } else { diff } 18 | } 19 | -------------------------------------------------------------------------------- /crates/testing/src/windows.rs: -------------------------------------------------------------------------------- 1 | mod job; 2 | 3 | pub use job::*; 4 | -------------------------------------------------------------------------------- /just_basics.just: -------------------------------------------------------------------------------- 1 | [group('basics')] 2 | build PROFILE='dev': 3 | cargo build {{ target_package }} --profile {{ PROFILE }} --all-features --all-targets 4 | 5 | [group('basics')] 6 | check PROFILE='dev': 7 | cargo check {{ target_package }} --profile {{ PROFILE }} --all-features --all-targets 8 | 9 | [group('basics')] 10 | clean: 11 | cargo clean 12 | 13 | [group('basics')] 14 | docs: 15 | #!{{ shebang }} 16 | $env:RUSTDOCFLAGS = "-D warnings" 17 | cargo doc {{ target_package }} --no-deps --all-features 18 | 19 | [group('basics')] 20 | docs-open: 21 | #!{{ shebang }} 22 | $env:RUSTDOCFLAGS = "-D warnings" 23 | cargo doc {{ target_package }} --no-deps --all-features --open 24 | -------------------------------------------------------------------------------- /just_quality.just: -------------------------------------------------------------------------------- 1 | [group('quality')] 2 | clippy PROFILE='dev': 3 | cargo clippy {{ target_package }} --profile {{ PROFILE }} --all-targets --all-features --locked -- -D warnings 4 | 5 | [group('quality')] 6 | coverage-measure: 7 | # Before running the tests, we need to clear old test coverage data because the coverage report 8 | # simply sums up all the data in the target folder, even if it is from old builds. 9 | cargo llvm-cov clean --workspace 10 | 11 | # This will run tests and generate test coverage data files, to be analyzed separately. 12 | cargo llvm-cov nextest {{ target_package }} --all-targets --no-report --all-features --locked 13 | 14 | # This tool needs a different way to specify the package. 15 | coverage-package := if package == "" { "" } else { "-p " + package } 16 | 17 | [group('quality')] 18 | coverage-report: 19 | cargo llvm-cov report {{ coverage-package }} --open 20 | 21 | [group('quality')] 22 | format: 23 | cargo fmt --verbose --all 24 | cargo sort-derives 25 | 26 | [group('quality')] 27 | format-check: 28 | cargo fmt --verbose --all --check 29 | cargo sort-derives --check 30 | 31 | [group('quality')] 32 | hack: 33 | cargo hack check --feature-powerset 34 | 35 | [group('quality')] 36 | machete: 37 | cargo machete --skip-target-dir 38 | 39 | # Separate file because it is a giant script. 40 | import 'just_quality_mutants.just' 41 | 42 | # Full validation of primary factors, as you would do in a build pipeline before a release. 43 | # Skips some potentially very lengthy validation, which you can run separately via `validate-extra`. 44 | # We assume this is executed on Windows, and will also perform the full validation on Linux. 45 | [group('quality')] 46 | validate: validate-local validate-linux 47 | 48 | # Performs the part of the `validate` recipe that must run on Linux, when commanded from Windows. 49 | [group('quality')] 50 | validate-linux: 51 | wsl -e bash -l -c "just validate-local" 52 | 53 | # Full validation of primary factors, as you would do in a build pipeline before a release. 54 | # Performs validation on the current platform, whatever that may be. 55 | [group('quality')] 56 | validate-local: 57 | cargo generate-lockfile 58 | just machete 59 | just format-check 60 | just clippy dev 61 | just check dev 62 | just test 63 | just test-docs 64 | just test-benches 65 | just docs 66 | just miri 67 | just clippy release 68 | just check release 69 | just build release 70 | just hack 71 | 72 | # Validation of extra factors that take potentially too long to run in regular validation. 73 | [group('quality')] 74 | validate-extra: validate-extra-local validate-extra-linux 75 | 76 | # Performs the part of the `validate-extra` recipe that must run on Linux, when commanded from Windows. 77 | [group('quality')] 78 | validate-extra-linux: 79 | wsl -e bash -l -c "just validate-extra-local" 80 | 81 | # Validation of extra factors that take potentially too long to run in regular validation. 82 | # Performs validation on the current platform, whatever that may be. 83 | [group('quality')] 84 | validate-extra-local: mutants 85 | -------------------------------------------------------------------------------- /just_quality_mutants.just: -------------------------------------------------------------------------------- 1 | [group('quality')] 2 | mutants: 3 | #!{{ shebang }} 4 | 5 | function Escape-Wildcards ($s) { 6 | if (!$IsLinux) { 7 | return $s 8 | } 9 | 10 | # On Linux, PowerShell has built-in globbing that expands wildcards. Unfortunately, 11 | # cargo mutants requires literal input values, globbing just breaks it. We convince 12 | # PowerShell to turn off globbing by single-quoting the arguments we fear may be 13 | # interpreted as wildcard glob expressions. 14 | "'" + $s + "'" 15 | } 16 | 17 | $args = @( 18 | "-e" 19 | # Parts of this crate require Criterion to work and other parts are currently not tested 20 | # as there is no public way to simulate a system topology for `many_cpus`. 21 | "many_cpus_benchmarking", 22 | 23 | "-e" 24 | # Macros are tested via the impl crate, mutations in the middle layer might not be detected. 25 | "linked_macros" 26 | 27 | # We do not test facades, as they are just trivial code that forwards calls to real impls. 28 | "-e" 29 | (Escape-Wildcards "**/*facade.rs") 30 | "-e" 31 | "facade" 32 | 33 | "-e" 34 | # We have limited coverage of platform bindings because it can be difficult to set up the 35 | # right scenarios for each, given they are platform-dependent. Instead, we test higher 36 | # level code using a mock platform. 37 | "bindings" 38 | 39 | "-e" 40 | # This is just a different type of bindings, skipped for same reason as `bindings` above. 41 | (Escape-Wildcards "crates/many_cpus/src/pal/linux/filesystem/**") 42 | 43 | "-e" 44 | # All this is code only used in tests - we do not test this code itself. 45 | (Escape-Wildcards "crates/testing/**") 46 | ) 47 | 48 | if ($IsLinux) { 49 | $args += "-e" 50 | $args += (Escape-Wildcards "**/*windows.rs") 51 | 52 | $args += "-e" 53 | $args += "windows" 54 | } else { 55 | $args += "-e" 56 | $args += (Escape-Wildcards "**/*linux.rs") 57 | 58 | $args += "-e" 59 | $args += "linux" 60 | } 61 | 62 | # We deliberately do not use nextest here because it cannot run doctests. 63 | 64 | # Multi-job mutation on Linux does not appear to work well - it seems to cause some interference 65 | # between the jobs. Perhaps due to our CARGO_TARGET_DIR being shared between jobs? One job is 66 | # executing tests from the wrong job because they are overwriting each other? Simple fix is 67 | # to just use 1 job on Linux, which is good enough for now. 68 | if ($IsLinux) { 69 | $args += "--jobs" 70 | $args += "1" 71 | } else { 72 | $args += "--jobs" 73 | $args += "4" 74 | } 75 | 76 | # We must use Invoke-Expression to preserve the quotes around the wildcarded arguments on Linux. 77 | $expanded_args = [String]::join(" ", $args) 78 | Invoke-Expression "cargo mutants {{ target_package }} --profile=mutants $expanded_args" 79 | -------------------------------------------------------------------------------- /just_release.just: -------------------------------------------------------------------------------- 1 | [group('release')] 2 | audit: 3 | cargo audit 4 | 5 | [group('release')] 6 | prepare-release: 7 | release-plz update 8 | 9 | [group('release')] 10 | release: 11 | #!{{ shebang }} 12 | $env:GIT_TOKEN = gh auth token 13 | try { 14 | release-plz release 15 | } finally { 16 | $env:GIT_TOKEN = $null 17 | } -------------------------------------------------------------------------------- /just_setup.just: -------------------------------------------------------------------------------- 1 | [group('setup')] 2 | install-tools: 3 | cargo install cargo-machete cargo-nextest release-plz cargo-semver-checks cargo-audit cargo-hack cargo-mutants cargo-llvm-cov cargo-sort-derives --locked 4 | rustup toolchain install nightly --component miri 5 | -------------------------------------------------------------------------------- /just_testing.just: -------------------------------------------------------------------------------- 1 | [group('testing')] 2 | bench TARGET="": 3 | #!{{ shebang }} 4 | $target_selector = @() 5 | 6 | if ("{{ TARGET }}" -ne "") { 7 | $target_selector += "--bench" 8 | $target_selector += "{{ TARGET }}" 9 | } 10 | 11 | cargo bench {{ target_package }} --all-features $target_selector 12 | 13 | [group('testing')] 14 | miri: 15 | cargo +nightly miri nextest run {{ target_package }} 16 | 17 | [group('testing')] 18 | test FILTER="": 19 | cargo nextest run {{ target_package }} --all-features {{ FILTER }} 20 | 21 | # We run benches separately because they are slow in Nextest multi-process mode, 22 | # probably due to the Gnuplot integration that spawns an external process. 23 | [group('testing')] 24 | test-benches FILTER="": 25 | cargo test --benches --all-features {{ FILTER }} 26 | 27 | [group('testing')] 28 | test-docs FILTER="": 29 | cargo test {{ target_package }} --all-features --doc {{ FILTER }} 30 | -------------------------------------------------------------------------------- /justfile: -------------------------------------------------------------------------------- 1 | set windows-shell := ["pwsh.exe", "-NoLogo", "-NoProfile", "-NonInteractive", "-Command"] 2 | shebang := if os() == "windows" { "pwsh.exe" } else { "/usr/bin/env pwsh" } 3 | 4 | package := "" 5 | target_package := if package == "" { " --workspace" } else { " -p " + package } 6 | 7 | _default: 8 | @just --list 9 | 10 | import 'just_basics.just' 11 | import 'just_quality.just' 12 | import 'just_release.just' 13 | import 'just_setup.just' 14 | import 'just_testing.just' 15 | -------------------------------------------------------------------------------- /release-plz.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | # TEMP: it currently complains for some unclear reason. 3 | allow_dirty = true 4 | 5 | # At this point in pre-alpha time, changelogs are very messy and useless. 6 | changelog_update = false 7 | 8 | # All we care about is crates.io, Git releases are not used. 9 | git_release_enable = false 10 | 11 | [[package]] 12 | name = "linked" 13 | 14 | # Changelogs of these two are merged into the `linked` crate as the others are "invisible" crates. 15 | changelog_include = ["linked_macros", "linked_macros_impl"] 16 | 17 | # All the `linked*` crates are published under the same version, as they are 18 | # all the "logically same" crate, just separated for cargotechnical reasons. 19 | version_group = "linked" 20 | 21 | [[package]] 22 | name = "linked_macros" 23 | 24 | # This crate is invisible, changes are recorded in `linked` changelog instead. 25 | changelog_update = false 26 | 27 | # All the `linked*` crates are published under the same version, as they are 28 | # all the "logically same" crate, just separated for cargotechnical reasons. 29 | version_group = "linked" 30 | 31 | [[package]] 32 | name = "linked_macros_impl" 33 | 34 | # This crate is invisible, changes are recorded in `linked` changelog instead. 35 | changelog_update = false 36 | 37 | # All the `linked*` crates are published under the same version, as they are 38 | # all the "logically same" crate, just separated for cargotechnical reasons. 39 | version_group = "linked" -------------------------------------------------------------------------------- /rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | channel = "1.86" 3 | --------------------------------------------------------------------------------