├── .config
    └── nextest.toml
├── .gitattributes
├── .gitconfig
├── .github
    └── copilot-instructions.md
├── .gitignore
├── .vscode
    ├── settings.json
    └── tasks.json
├── Cargo.toml
├── DEVELOPMENT.md
├── LICENSE
├── README.md
├── RELEASING.md
├── clippy.toml
├── crates
    ├── benchmark_utils
    │   ├── Cargo.toml
    │   └── src
    │   │   ├── lib.rs
    │   │   └── threadpool.rs
    ├── benchmarks
    │   ├── Cargo.toml
    │   ├── benches
    │   │   ├── effects_of_memory.rs
    │   │   ├── effects_of_memory_windows.rs
    │   │   └── variable_access.rs
    │   └── src
    │   │   └── lib.rs
    ├── cpulist
    │   ├── Cargo.toml
    │   ├── README.md
    │   ├── examples
    │   │   ├── cpulist_basic.rs
    │   │   └── cpulist_stride.rs
    │   └── src
    │   │   ├── emit.rs
    │   │   ├── error.rs
    │   │   ├── lib.rs
    │   │   └── parse.rs
    ├── folo_ffi
    │   ├── Cargo.toml
    │   ├── README.md
    │   └── src
    │   │   ├── lib.rs
    │   │   └── native_buffer.rs
    ├── folo_utils
    │   ├── Cargo.toml
    │   ├── README.md
    │   └── src
    │   │   └── lib.rs
    ├── linked
    │   ├── Cargo.toml
    │   ├── README.md
    │   ├── benches
    │   │   ├── instance_per_thread.rs
    │   │   ├── instance_per_thread_sync.rs
    │   │   ├── instances.rs
    │   │   ├── static_thread_local_arc.rs
    │   │   └── static_thread_local_rc.rs
    │   ├── doc
    │   │   ├── instance_per_thread.mermaid
    │   │   ├── instance_per_thread_sync.mermaid
    │   │   └── linked.mermaid
    │   ├── examples
    │   │   ├── linked_basic.rs
    │   │   ├── linked_box.rs
    │   │   ├── linked_family.rs
    │   │   ├── linked_std_box.rs
    │   │   ├── linked_thread_local_arc.rs
    │   │   └── linked_thread_local_rc.rs
    │   ├── src
    │   │   ├── __private.rs
    │   │   ├── box.rs
    │   │   ├── constants.rs
    │   │   ├── family.rs
    │   │   ├── instance_per_thread.rs
    │   │   ├── instance_per_thread_sync.rs
    │   │   ├── lib.rs
    │   │   ├── macros.rs
    │   │   ├── object.rs
    │   │   ├── static_instance_per_thread.rs
    │   │   ├── static_instance_per_thread_sync.rs
    │   │   ├── static_instances.rs
    │   │   └── thread_id_hash.rs
    │   └── tests
    │   │   ├── linked_object.rs
    │   │   └── smoke.rs
    ├── linked_macros
    │   ├── Cargo.toml
    │   └── src
    │   │   └── lib.rs
    ├── linked_macros_impl
    │   ├── Cargo.toml
    │   └── src
    │   │   ├── lib.rs
    │   │   ├── linked_object.rs
    │   │   └── syn_helpers.rs
    ├── many_cpus
    │   ├── Cargo.toml
    │   ├── README.md
    │   ├── benches
    │   │   ├── hardware_info.rs
    │   │   ├── hardware_tracker.rs
    │   │   ├── pal_windows.rs
    │   │   └── processor_set_builder.rs
    │   ├── docs
    │   │   └── snippets
    │   │   │   ├── changes_at_runtime.md
    │   │   │   └── external_constraints.md
    │   ├── examples
    │   │   ├── get_all_processors.rs
    │   │   ├── get_default_processors.rs
    │   │   ├── obey_job_affinity_windows.rs
    │   │   ├── obey_job_resource_quota_windows.rs
    │   │   ├── observe_processor.rs
    │   │   ├── spawn_on_all_processors.rs
    │   │   ├── spawn_on_any_processors.rs
    │   │   ├── spawn_on_inherited_processors.rs
    │   │   └── spawn_on_selected_processors.rs
    │   ├── src
    │   │   ├── clients.rs
    │   │   ├── clients
    │   │   │   ├── hw_tracker_client.rs
    │   │   │   └── hw_tracker_facade.rs
    │   │   ├── hardware_info.rs
    │   │   ├── hardware_tracker.rs
    │   │   ├── lib.rs
    │   │   ├── pal.rs
    │   │   ├── pal
    │   │   │   ├── abstractions.rs
    │   │   │   ├── abstractions
    │   │   │   │   ├── platform.rs
    │   │   │   │   └── processor.rs
    │   │   │   ├── facade.rs
    │   │   │   ├── facade
    │   │   │   │   ├── platform.rs
    │   │   │   │   └── processor.rs
    │   │   │   ├── linux.rs
    │   │   │   ├── linux
    │   │   │   │   ├── bindings.rs
    │   │   │   │   ├── bindings
    │   │   │   │   │   ├── abstractions.rs
    │   │   │   │   │   ├── facade.rs
    │   │   │   │   │   └── real.rs
    │   │   │   │   ├── filesystem.rs
    │   │   │   │   ├── filesystem
    │   │   │   │   │   ├── abstractions.rs
    │   │   │   │   │   ├── facade.rs
    │   │   │   │   │   └── real.rs
    │   │   │   │   ├── platform.rs
    │   │   │   │   └── processor.rs
    │   │   │   ├── mocks.rs
    │   │   │   ├── windows.rs
    │   │   │   └── windows
    │   │   │   │   ├── bindings.rs
    │   │   │   │   ├── bindings
    │   │   │   │       ├── abstractions.rs
    │   │   │   │       ├── facade.rs
    │   │   │   │       └── real.rs
    │   │   │   │   ├── group_mask.rs
    │   │   │   │   ├── platform.rs
    │   │   │   │   └── processor.rs
    │   │   ├── primitive_types.rs
    │   │   ├── processor.rs
    │   │   ├── processor_set.rs
    │   │   ├── processor_set_builder.rs
    │   │   └── resource_quota.rs
    │   └── tests
    │   │   └── job_limits_windows.rs
    ├── many_cpus_benchmarking
    │   ├── Cargo.toml
    │   ├── README.md
    │   ├── benches
    │   │   └── many_cpus_harness_demo.rs
    │   ├── images
    │   │   └── work_distribution_comparison.png
    │   └── src
    │   │   ├── cache.rs
    │   │   ├── lib.rs
    │   │   ├── payload.rs
    │   │   ├── run.rs
    │   │   └── work_distribution.rs
    ├── region_cached
    │   ├── Cargo.toml
    │   ├── README.md
    │   ├── benches
    │   │   └── region_cached.rs
    │   ├── doc
    │   │   └── region_cached.mermaid
    │   ├── examples
    │   │   ├── region_cached_1gb.rs
    │   │   ├── region_cached_log_filtering.rs
    │   │   ├── region_cached_log_filtering_no_statics.rs
    │   │   └── region_cached_web.rs
    │   └── src
    │   │   ├── __private.rs
    │   │   ├── clients.rs
    │   │   ├── clients
    │   │       ├── hw_info_client.rs
    │   │       ├── hw_info_facade.rs
    │   │       ├── hw_tracker_client.rs
    │   │       └── hw_tracker_facade.rs
    │   │   ├── lib.rs
    │   │   ├── macros.rs
    │   │   ├── region_cached.rs
    │   │   └── region_cached_ext.rs
    ├── region_local
    │   ├── Cargo.toml
    │   ├── README.md
    │   ├── benches
    │   │   └── region_local.rs
    │   ├── doc
    │   │   └── region_local.mermaid
    │   ├── examples
    │   │   ├── region_local_1gb.rs
    │   │   └── region_local_web.rs
    │   └── src
    │   │   ├── __private.rs
    │   │   ├── clients.rs
    │   │   ├── clients
    │   │       ├── hw_info_client.rs
    │   │       ├── hw_info_facade.rs
    │   │       ├── hw_tracker_client.rs
    │   │       └── hw_tracker_facade.rs
    │   │   ├── lib.rs
    │   │   ├── macros.rs
    │   │   ├── region_local.rs
    │   │   └── region_local_ext.rs
    └── testing
    │   ├── Cargo.toml
    │   ├── examples
    │       └── spin_cpu_windows.rs
    │   └── src
    │       ├── lib.rs
    │       ├── windows.rs
    │       └── windows
    │           └── job.rs
├── just_basics.just
├── just_quality.just
├── just_quality_mutants.just
├── just_release.just
├── just_setup.just
├── just_testing.just
├── justfile
├── release-plz.toml
└── rust-toolchain.toml


/.config/nextest.toml:
--------------------------------------------------------------------------------
1 | [profile.default]
2 | # The leak detector is just bad and calls everything a leaky test because it relies on timing
3 | # coincidences to do its job. It is not an accurate nor valuable feature and we would rather
4 | # disable it but there is no off button so let's just set a high timeout that avoids it complaining.
5 | leak-timeout = "10s"
6 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Our regular development environment is dual-OS - the same working directory is accessed
2 | # from both Windows and Linux. This means they need to agree on the line endings! All text
3 | # is therefore using Linux line endings (LF).
4 | * text eol=lf
5 | 
6 | # Binary files often seen in our documentation.
7 | *.png filter=lfs diff=lfs merge=lfs -text
8 | *.jpg filter=lfs diff=lfs merge=lfs -text
9 | 


--------------------------------------------------------------------------------
/.gitconfig:
--------------------------------------------------------------------------------
1 | [core]
2 | # We do not use CRLF but if someone (or some automation) happens
3 | # to try commit a CRLF file, we convert it to LF for storage in the Git index.
4 | autocrlf = input
5 | 
6 | # Scream if files do not match what .gitattributes requires.
7 | safecrlf = true
8 | 


--------------------------------------------------------------------------------
/.github/copilot-instructions.md:
--------------------------------------------------------------------------------
 1 | # Standard commands
 2 | 
 3 | We use the Just command runner for many common commands - look inside *.just files to see the
 4 | list of available commands. Some relevant ones are:
 5 | 
 6 | * `just build` - build the entire workspace
 7 | * `just package=many_cpus build` - build a single workspace (most commands accept a `package` parameter)
 8 | * `just test` - test the entire workspace
 9 | 
10 | Avoid running `just bench`, as the benchmarks take a lot of time and `just test` will anyway run
11 | a single benchmark iteration to validate they are still working.
12 | 
13 | We generally prefer using Just commands over raw Cargo commands if there is a suitable Just command
14 | defined in one of the *.just files.
15 | 
16 | Do not execute `just release` - this is a critical tool reserved for human use.
17 | 
18 | # Validating changes
19 | 
20 | Use `just test` to verify that the code compiles and tests pass.
21 | 
22 | Use `just clippy` to verify that all linter rules pass. We operate under a "zero warnings allowed"
23 | requirement - fix all warnings that Clippy generates.
24 | 
25 | Use `just format` to apply auto-formatting to code files, ensuring consistent code style.
26 | 
27 | # Multiplatform codebase
28 | 
29 | This is a multiplatform codebase. In some crates you will find folders named `linux` and `windows`,
30 | which contain platform-specific code. When modifying files of one platform, you are also expected
31 | to make the equivalent modifications in the other.
32 | 
33 | By default, we are operating on Windows. However, you can also invoke commands on Linux using the
34 | syntax `wsl -e bash -l -c "command"`. For example, to test on both Windows and Linux, execute:
35 | 
36 | 1. `just test`
37 | 2. `wsl -e bash -l -c "just test"`
38 | 
39 | You are expected to validate all changes on both operating systems.
40 | 
41 | # Facades and abstractions
42 | 
43 | Some crates like `many_cpus` use a platform abstraction layer (PAL), where an abstraction like
44 | `trait Platform` defined in `crates/many_cpus/src/pal/abstractions/**` has multiple different
45 | implementations:
46 | 
47 | 1. A Windows implementation (`crates/many_cpus/src/pal/windows/**`)
48 | 2. A Linux implementation (`crates/many_cpus/src/pal/linux/**`)
49 | 3. A mock implementation (`crates/many_cpus/src/pal/mocks.rs`)
50 | 
51 | Logic code will consume this abstraction via facade types, which can either call into the real
52 | implementation of the build target platform (Windows or Linux) or the mock implementation (only
53 | when building in test mode). The facades are defined in `crates/many_cpus/src/pal/facade/**` and
54 | only exist to be minimal pass-through layers to allow swapping in the mock implementation in tests.
55 | 
56 | When modifying the API of the PAL, you are expected to make the API changes in the
57 | abstraction, facade and implementation types at the same time, as the API surface must match.
58 | 
59 | The same pattern may also be used elsewhere (e.g. inside the PAL implementations as a second layer
60 | of abstraction, or in other crates).
61 | 
62 | # Filesystem structure
63 | 
64 | We prefer many smaller files over few large files, typically only packing implementation details
65 | and unit tests into the same file but keeping separate API-visible types in separate files (even
66 | if only API-visible inside the same crate).
67 | 
68 | We prefer to keep the public API relatively flat - even if we create separate Rust modules for
69 | types, we re-export them all at the parent, so while we have modules like
70 | `crates/many_cpus/src/hardware_tracker.rs` the type itself is exported at the crate root as
71 | `many_cpus::HardwareTracker` instead of at the module as `many_cpus::hardware_tracker::HardwareTracker`.
72 | 
73 | # Scripting
74 | 
75 | You can assume PowerShell is available. Prefer PowerShell over Bash.


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | mutants.out
 2 | mutants.out.old
 3 | 
 4 | # Generated by Cargo
 5 | # will have compiled files and executables
 6 | debug/
 7 | target/
 8 | 
 9 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
10 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
11 | Cargo.lock
12 | 
13 | # These are backup files generated by rustfmt
14 | **/*.rs.bk
15 | 
16 | # MSVC Windows builds of rustc generate these, which store debugging information
17 | *.pdb
18 | 
19 | # RustRover
20 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
21 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
22 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
23 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
24 | #.idea/


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "cSpell.words": [
 3 |         "affinitization",
 4 |         "affinitized",
 5 |         "affinitizing",
 6 |         "cpulist",
 7 |         "cpus",
 8 |         "foldhash",
 9 |         "Folo",
10 |         "heapless",
11 |         "JOBOBJECT",
12 |         "metas",
13 |         "miri",
14 |         "nanos",
15 |         "nextest",
16 |         "nonoverlapping",
17 |         "pointee",
18 |         "taskset",
19 |         "withf"
20 |     ],
21 |     "rust-analyzer.cargo.cfgs": [
22 |         "debug_assertions"
23 |     ]
24 | }


--------------------------------------------------------------------------------
/.vscode/tasks.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": "2.0.0",
 3 |     "tasks": [
 4 |         {
 5 |             "type": "process",
 6 |             "command": "just",
 7 |             "args": [
 8 |                 "build",
 9 |             ],
10 |             "problemMatcher": [
11 |                 "$rustc"
12 |             ],
13 |             "group": {
14 |                 "kind": "build",
15 |                 "isDefault": true
16 |             },
17 |             "label": "just: build"
18 |         },
19 |         {
20 |             "type": "process",
21 |             "command": "just",
22 |             "args": [
23 |                 "test",
24 |             ],
25 |             "problemMatcher": [
26 |                 "$rustc"
27 |             ],
28 |             "group": {
29 |                 "kind": "test",
30 |                 "isDefault": true
31 |             },
32 |             "label": "just: test"
33 |         },
34 |         {
35 |             "type": "process",
36 |             "command": "just",
37 |             "args": [
38 |                 "test-docs",
39 |             ],
40 |             "problemMatcher": [
41 |                 "$rustc"
42 |             ],
43 |             "group": {
44 |                 "kind": "test"
45 |             },
46 |             "label": "just: test-docs"
47 |         },
48 |         {
49 |             "type": "process",
50 |             "command": "just",
51 |             "args": [
52 |                 "docs"
53 |             ],
54 |             "problemMatcher": [
55 |                 "$rustc"
56 |             ],
57 |             "group": "none",
58 |             "label": "just: docs"
59 |         },
60 |         {
61 |             "type": "process",
62 |             "command": "just",
63 |             "args": [
64 |                 "bench"
65 |             ],
66 |             "problemMatcher": [
67 |                 "$rustc"
68 |             ],
69 |             "group": "none",
70 |             "label": "just: bench"
71 |         },
72 |     ]
73 | }


--------------------------------------------------------------------------------
/DEVELOPMENT.md:
--------------------------------------------------------------------------------
 1 | # The basics
 2 | 
 3 | This is a multiplatform project supporting both Windows and Linux. Development of the Linux
 4 | functionality takes place in a Windows Subsystem for Linux (WSL) virtual machine.
 5 | 
 6 | See `rust-toolchain.toml` for the required stable Rust toolchain version. The `nightly` toolchain
 7 | is also required for some development tooling.
 8 | 
 9 | # Development environment setup (Windows)
10 | 
11 | Prerequisites:
12 | 
13 | * Windows 11
14 | * Visual Studio 2022 with workload "Desktop development with C++"
15 | * Visual Studio Code with extensions:
16 |     * C/C++
17 |     * rust-analyzer
18 |     * vscode-just
19 |     * WSL
20 | * PowerShell 7
21 | * Rust development tools for version listed in `rust-toolchain.toml`
22 | * `rustup toolchain install nightly`
23 | * `cargo install just`
24 | * (Only if publishing releases) GitHub CLI + `gh auth login`
25 | 
26 | Setup:
27 | 
28 | 1. Clone the repo to a directory of your choosing.
29 | 1. Open a terminal in the repo root.
30 | 1. Execute `git config --local include.path ./.gitconfig` to attach the repo-specific Git configuration.
31 | 1. Execute `just install-tools` to install development tools.
32 | 
33 | Validation:
34 | 
35 | 1. Open repo directory in Visual Studio code.
36 | 1. Execute from task palette (F1):
37 |     * `Tasks: Run Build Task`
38 |     * `Tasks: Run Test Task`
39 | 1. Execute `just validate-local` in terminal.
40 | 
41 | # Development environment setup (Linux)
42 | 
43 | Prerequisites:
44 | 
45 | * Ubuntu 24 installed in WSL
46 | * `sudo apt install -y git git-lfs build-essential cmake gcc make curl`
47 | * [PowerShell 7](https://learn.microsoft.com/en-us/powershell/scripting/install/install-ubuntu?view=powershell-7.5)
48 | * Rust development tools for version listed in `rust-toolchain.toml`
49 | * `rustup toolchain install nightly`
50 | * `cargo install just`
51 | * If first time Git setup, execute `git config --global credential.helper "/mnt/c/Program\ Files/Git/mingw64/bin/git-credential-manager.exe"` to setup authentication flow
52 | 
53 | Setup:
54 | 
55 | 1. Navigate to repo shared with Windows host (under `/mnt/c/`). Do not create a separate clone of the repo for Linux.
56 | 1. Execute `just install-tools` to install development tools.
57 | 1. Open Visual Studio code via `code .`
58 | 1. If first time setup, install required Visual Studio Code extensions:
59 |     * C/C++
60 |     * rust-analyzer
61 | 
62 | Validation:
63 | 
64 | 1. Execute from task palette (F1):
65 |     * `Tasks: Run Build Task`
66 |     * `Tasks: Run Test Task`
67 | 1. Execute `just validate-local` in terminal.


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024+ Folo authors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Folo
 2 | 
 3 | Mechanisms for high-performance hardware-aware programming in Rust.
 4 | 
 5 | The design tenets this project aims to satisfy are the following:
 6 | 
 7 | * In services, keep the processing of each request on a single processor to ensure both that data
 8 |   is locally cached for fast access and to avoid polluting caches of many processors with data of
 9 |   a single request.
10 | * Be aware of [memory region boundaries](https://www.kernel.org/doc/html/v4.18/vm/numa.html)
11 |   when scheduling work and placing data. Avoid moving data across these boundaries because it can
12 |   be very slow.
13 | * Use single-threaded logic without synchronization - even atomics and "lock-free" synchronization
14 |   primitives are expensive compared to single-threaded logic. Whenever feasible, use `!Send` types
15 |   to avoid accidental multithreading. Maintain separate mutable data sets per thread or memory
16 |   region instead of maintaining global data sets.
17 | * Use asynchronous logic in the app, in library code and when communicating with the operating
18 |   system, ensuring that a thread is never blocked from doing useful work.
19 | 
20 | # Contents
21 | 
22 | This is an umbrella project that covers multiple largely independent crates:
23 | 
24 | | Crate                                                               | Description                                                                                                  |
25 | |---------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------|
26 | | [`linked`](crates/linked/README.md) + siblings                      | Create families of linked objects that can collaborate across threads while being internally single-threaded |
27 | | [`many_cpus`](crates/many_cpus/README.md)                           | Efficiently schedule work and inspect the hardware environment on many-processor systems                     |
28 | | [`many_cpus_benchmarking`](crates/many_cpus_benchmarking/README.md) | Criterion benchmark harness to easily compare different processor configurations                             |
29 | | [`region_cached`](crates/region_cached/README.md)                   | Add a layer of cache between L3 and main memory                                                              |
30 | | [`region_local`](crates/region_local/README.md)                     | Isolate variable storage per memory region, similar to `thread_local_rc!`                                       |
31 | 
32 | Some auxiliary crates are also published because the primary crates above require their
33 | functionality. They only indirectly contribute to the Folo mission, so are listed separately:
34 | 
35 | | Crate                                       | Description                                                                                              |
36 | |---------------------------------------------|----------------------------------------------------------------------------------------------------------|
37 | | [`cpulist`](crates/cpulist/README.md)       | Utilities for parsing and emitting Linux cpulist strings                                                 |
38 | | [`folo_ffi`](crates/folo_ffi/README.md)     | Utilities for working with FFI logic; exists for internal use in Folo crates; no stable API surface      |
39 | | [`folo_utils`](crates/folo_utils/README.md) | Utilities for internal use in Folo crates; exists for internal use in Folo crates; no stable API surface |
40 | 
41 | There are also some development-only crates in this repo, which are not published:
42 | 
43 | | Crate                                       | Description                                                                        |
44 | |---------------------------------------------|------------------------------------------------------------------------------------|
45 | | [`benchmark_utils`](crates/benchmark_utils) | Common benchmarking logic used across the crates in this project                   |
46 | | [`benchmarks`](crates/benchmarks)           | Random pile of benchmarks to explore relevant scenarios and guide Folo development |
47 | | [`testing`](crates/testing)                 | Private helpers for testing and examples in Folo crates                            |
48 | 
49 | # Development environment setup
50 | 
51 | See [DEVELOPMENT.md](DEVELOPMENT.md).


--------------------------------------------------------------------------------
/RELEASING.md:
--------------------------------------------------------------------------------
1 | # Guide to releasing a new version
2 | 
3 | 1. Validate everything via `just validate` on Windows (will automatically invoke Linux validation).
4 | 1. Execute `just prepare-release` on `main` branch to increment version numbers and update changelogs.
5 |     * Verify pending changes manually and adjust as necessary.
6 |     * Commit as "chore: prepare for release" when satisfied with the changes.
7 |     * `git push`
8 | 1. Execute `just release` to upload new packages to `crates.io`.
9 | 


--------------------------------------------------------------------------------
/clippy.toml:
--------------------------------------------------------------------------------
1 | # This file contains fine-tuning settings that cannot be specified in cargo.toml (which only supports "error level").
2 | 
3 | # Absolute paths of length 3 can be useful to emphasize where a particular symbol is coming from,
4 | # e.g. by using "std::sync::mutex" versus "tokio::sync::mutex".  Anything beyond 3 segments seems
5 | # excessively verbose, so we limit it to 3 - import or alias to shorten longer symbol paths.
6 | absolute-paths-max-segments = 3


--------------------------------------------------------------------------------
/crates/benchmark_utils/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "benchmark_utils"
 3 | description = "Common benchmarking logic used across the crates in this project"
 4 | publish = false
 5 | version = "0.0.1-never"
 6 | 
 7 | authors.workspace = true
 8 | edition.workspace = true
 9 | license.workspace = true
10 | repository.workspace = true
11 | rust-version.workspace = true
12 | 
13 | [lib]
14 | doc = false
15 | 
16 | [dependencies]
17 | folo_utils = { workspace = true }
18 | many_cpus = { workspace = true }
19 | 
20 | oneshot = { workspace = true }
21 | 
22 | [dev-dependencies]
23 | mutants = { workspace = true }
24 | 
25 | [lints]
26 | workspace = true
27 | 


--------------------------------------------------------------------------------
/crates/benchmarks/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "benchmarks"
 3 | description = "Random pile of benchmarks to explore relevant scenarios and guide Folo development"
 4 | publish = false
 5 | version = "0.0.1-never"
 6 | 
 7 | authors.workspace = true
 8 | edition.workspace = true
 9 | license.workspace = true
10 | repository.workspace = true
11 | rust-version.workspace = true
12 | 
13 | [lib]
14 | doc = false
15 | 
16 | [dev-dependencies]
17 | linked = { workspace = true }
18 | many_cpus = { workspace = true }
19 | many_cpus_benchmarking = { workspace = true }
20 | 
21 | criterion = { workspace = true }
22 | fake_headers = { workspace = true }
23 | frozen-collections = { workspace = true }
24 | http = { workspace = true }
25 | scc = { workspace = true }
26 | 
27 | [target.'cfg(windows)'.dev-dependencies]
28 | windows = { workspace = true, features = ["Win32_System_Memory"] }
29 | 
30 | [[bench]]
31 | name = "variable_access"
32 | harness = false
33 | 
34 | [[bench]]
35 | name = "effects_of_memory"
36 | harness = false
37 | 
38 | [[bench]]
39 | name = "effects_of_memory_windows"
40 | harness = false
41 | 
42 | [lints]
43 | workspace = true
44 | 


--------------------------------------------------------------------------------
/crates/benchmarks/src/lib.rs:
--------------------------------------------------------------------------------
1 | //! This crate contains nothing, it exists just as a container for benchmarks.
2 | 


--------------------------------------------------------------------------------
/crates/cpulist/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "cpulist"
 3 | description = "Parse and emit the Linux 'cpulist' data format used to list processors, memory regions and similar entities"
 4 | publish = true
 5 | version = "0.2.0"
 6 | 
 7 | authors.workspace = true
 8 | edition.workspace = true
 9 | license.workspace = true
10 | repository.workspace = true
11 | rust-version.workspace = true
12 | 
13 | [features]
14 | default = []
15 | 
16 | [dependencies]
17 | folo_utils = { workspace = true }
18 | include-doc = { workspace = true }
19 | itertools = { workspace = true }
20 | thiserror = { workspace = true }
21 | 
22 | [dev-dependencies]
23 | 
24 | [lints]
25 | workspace = true
26 | 


--------------------------------------------------------------------------------
/crates/cpulist/README.md:
--------------------------------------------------------------------------------
 1 | Utilities for parsing and emitting strings in the the `cpulist` format often used by Linux
 2 | utilities that work with processor IDs, memory region IDs and similar numeric hardware
 3 | identifiers.
 4 | 
 5 | Example cpulist string: `0,1,2-4,5-9:2,6-10:2`
 6 | 
 7 | More details in the [crate documentation](https://docs.rs/cpulist/).
 8 | 
 9 | This is part of the [Folo project](https://github.com/folo-rs/folo) that provides mechanisms for
10 | high-performance hardware-aware programming in Rust.


--------------------------------------------------------------------------------
/crates/cpulist/examples/cpulist_basic.rs:
--------------------------------------------------------------------------------
 1 | //! Parsing a cpulist string and emitting it back to the terminal.
 2 | 
 3 | fn main() {
 4 |     let selected_processors = cpulist::parse("0-9,32-35,40").unwrap();
 5 |     assert_eq!(
 6 |         selected_processors,
 7 |         vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 32, 33, 34, 35, 40]
 8 |     );
 9 | 
10 |     println!("Selected processors: {selected_processors:?}");
11 |     println!("As cpulist: {}", cpulist::emit(selected_processors));
12 | }
13 | 


--------------------------------------------------------------------------------
/crates/cpulist/examples/cpulist_stride.rs:
--------------------------------------------------------------------------------
 1 | //! The stride operator can be used to divide ranges into any number of individual series.
 2 | 
 3 | fn main() {
 4 |     let evens = cpulist::parse("0-16:2").unwrap();
 5 |     let odds = cpulist::parse("1-16:2").unwrap();
 6 | 
 7 |     let all = cpulist::emit(odds.iter().chain(evens.iter()).copied());
 8 | 
 9 |     println!("Evens: {evens:?}");
10 |     println!("Odds: {odds:?}");
11 | 
12 |     println!("All as cpulist: {all}");
13 | }
14 | 


--------------------------------------------------------------------------------
/crates/cpulist/src/error.rs:
--------------------------------------------------------------------------------
 1 | use thiserror::Error;
 2 | 
 3 | /// Errors that can occur when processing cpulist strings.
 4 | #[derive(Debug, Error)]
 5 | #[non_exhaustive]
 6 | pub enum Error {
 7 |     /// The caller provided a supposed cpulist string but it did not match the expected format.
 8 |     #[error("invalid cpulist syntax: '{invalid_value}' is invalid: {problem}")]
 9 |     InvalidSyntax {
10 |         /// The specific value that was invalid. This may either be the entire cpulist string
11 |         /// or a specific part of it, depending on the problem.
12 |         invalid_value: String,
13 | 
14 |         /// A human-readable description of the problem.
15 |         problem: String,
16 |     },
17 | }
18 | 
19 | /// A specialized `Result` type for cpulist operations, returning the crate's
20 | /// [`Error`] type as the error value.
21 | pub(crate) type Result<T> = std::result::Result<T, Error>;
22 | 


--------------------------------------------------------------------------------
/crates/cpulist/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! Utilities for parsing and emitting strings in the the `cpulist` format often used by Linux
 2 | //! utilities that work with processor IDs, memory region IDs and similar numeric hardware
 3 | //! identifiers.
 4 | //!
 5 | //! Example cpulist string: `0-9,32-35,40`
 6 | //!
 7 | //! This is part of the [Folo project](https://github.com/folo-rs/folo) that provides mechanisms for
 8 | //! high-performance hardware-aware programming in Rust.
 9 | //!
10 | //! # Format
11 | //!
12 | //! The value is a comma-separated list of zero or more integers or integer ranges, where each item
13 | //! is either:
14 | //!
15 | //! * a single integer (e.g. `1`)
16 | //! * a range of integers (e.g. `2-4`)
17 | //! * a range of integers with a stride (step size) operator (e.g. `5-9:2` which is equivalent to `5,7,9`)
18 | //!
19 | //! Whitespace or extra characters are not allowed anywhere in the string.
20 | //!
21 | //! The identifiers in the list are of size `u32`.
22 | //!
23 | //! # Example
24 | //!
25 | //! Basic conversion from/to strings:
26 | //!
27 | //! ```
28 | #![doc = source_file!("examples/cpulist_basic.rs")]
29 | //! ```
30 | //!
31 | //! The stride operator is also supported for parsing:
32 | //!
33 | //! ```
34 | #![doc = source_file!("examples/cpulist_stride.rs")]
35 | //! ```
36 | 
37 | use include_doc::source_file;
38 | 
39 | mod emit;
40 | mod error;
41 | mod parse;
42 | 
43 | pub use emit::*;
44 | pub use error::*;
45 | pub use parse::*;
46 | 
47 | pub(crate) type Item = u32;
48 | 


--------------------------------------------------------------------------------
/crates/folo_ffi/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "folo_ffi"
 3 | description = "Utilities for working with FFI logic; exists for internal use in Folo crates; no stable API surface"
 4 | publish = true
 5 | version = "0.1.2"
 6 | 
 7 | authors.workspace = true
 8 | edition.workspace = true
 9 | license.workspace = true
10 | repository.workspace = true
11 | rust-version.workspace = true
12 | 
13 | [lib]
14 | doc = false
15 | 
16 | [features]
17 | default = []
18 | 
19 | [dependencies]
20 | 
21 | [dev-dependencies]
22 | mutants = { workspace = true }
23 | 
24 | [lints]
25 | workspace = true
26 | 


--------------------------------------------------------------------------------
/crates/folo_ffi/README.md:
--------------------------------------------------------------------------------
1 | Utilities for working with FFI calls. This exists to serve the internal FFI needs of Folo crates.
2 | Accordingly, the crate has no stable API surface.
3 | 
4 | This is part of the [Folo project](https://github.com/folo-rs/folo) that provides mechanisms for
5 | high-performance hardware-aware programming in Rust.


--------------------------------------------------------------------------------
/crates/folo_ffi/src/lib.rs:
--------------------------------------------------------------------------------
1 | //! Utilities for working with FFI logic; exists for internal use in Folo crates; no stable API surface.
2 | 
3 | mod native_buffer;
4 | 
5 | pub use native_buffer::*;
6 | 


--------------------------------------------------------------------------------
/crates/folo_utils/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "folo_utils"
 3 | description = "Utilities for internal use in Folo crates; no stable API surface"
 4 | publish = true
 5 | version = "0.1.0"
 6 | 
 7 | authors.workspace = true
 8 | edition.workspace = true
 9 | license.workspace = true
10 | repository.workspace = true
11 | rust-version.workspace = true
12 | 
13 | [lib]
14 | doc = false
15 | 
16 | [features]
17 | default = []
18 | 
19 | [dependencies]
20 | 
21 | [dev-dependencies]
22 | mutants = { workspace = true }
23 | 
24 | [lints]
25 | workspace = true
26 | 


--------------------------------------------------------------------------------
/crates/folo_utils/README.md:
--------------------------------------------------------------------------------
1 | Utilities for internal use in Folo crates; no stable API surface. 
2 | This exists to serve the internal FFI needs of Folo crates.
3 | Accordingly, the crate has no stable API surface.
4 | 
5 | This is part of the [Folo project](https://github.com/folo-rs/folo) that provides mechanisms for
6 | high-performance hardware-aware programming in Rust.


--------------------------------------------------------------------------------
/crates/folo_utils/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! Utilities for internal use in Folo crates; no stable API surface
 2 | 
 3 | /// A macro to create a `NonZero` constant from a literal value.
 4 | #[macro_export]
 5 | macro_rules! nz {
 6 |     ($x:literal) => {
 7 |         const { ::std::num::NonZero::new($x).expect("literal must have non-zero value") }
 8 |     };
 9 | }
10 | 


--------------------------------------------------------------------------------
/crates/linked/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "linked"
 3 | description = "Create families of linked objects that can collaborate across threads while being internally single-threaded"
 4 | publish = true
 5 | version = "0.2.0"
 6 | 
 7 | authors.workspace = true
 8 | edition.workspace = true
 9 | license.workspace = true
10 | repository.workspace = true
11 | rust-version.workspace = true
12 | 
13 | [features]
14 | default = []
15 | 
16 | [dependencies]
17 | hash_hasher = { workspace = true }
18 | linked_macros = { workspace = true }
19 | paste = { workspace = true }
20 | simple-mermaid = { workspace = true }
21 | 
22 | [dev-dependencies]
23 | benchmark_utils = { workspace = true }
24 | criterion = { workspace = true }
25 | many_cpus = { workspace = true }
26 | mutants = { workspace = true }
27 | seq-macro = { workspace = true }
28 | 
29 | [[bench]]
30 | name = "instances"
31 | harness = false
32 | 
33 | [[bench]]
34 | name = "instance_per_thread"
35 | harness = false
36 | 
37 | [[bench]]
38 | name = "instance_per_thread_sync"
39 | harness = false
40 | 
41 | [[bench]]
42 | name = "static_thread_local_arc"
43 | harness = false
44 | 
45 | [[bench]]
46 | name = "static_thread_local_rc"
47 | harness = false
48 | 
49 | [lints]
50 | workspace = true
51 | 


--------------------------------------------------------------------------------
/crates/linked/README.md:
--------------------------------------------------------------------------------
 1 | Mechanisms for creating families of linked objects that can collaborate across threads,
 2 | with each instance only used from a single thread.
 3 | 
 4 | The problem this crate solves is that while writing highly efficient lock-free thread-local
 5 | code can yield great performance, it comes with serious drawbacks in terms of usability and
 6 | developer experience.
 7 | 
 8 | This crate bridges the gap by providing patterns and mechanisms that facilitate thread-local
 9 | behavior while presenting a simple and reasonably ergonomic API to user code:
10 | 
11 | * Internally, a linked object can take advantage of lock-free thread-isolated logic for **high
12 |   performance and efficiency** because it operates as a multithreaded family of thread-isolated
13 |   objects, each of which implements local behavior on a single thread.
14 | * Externally, the linked object family can look and act very much like a single Rust object and
15 |   can hide the fact that there is collaboration happening on multiple threads,
16 |   providing **a reasonably simple API with minimal extra complexity** for both the author
17 |   and the user of a type.
18 | 
19 | The patterns and mechanisms provided by this crate are designed to make it easy to create linked
20 | object families and to provide primitives that allow these object families to be used without
21 | the user code having to understand how the objects are wired up inside or keeping track of which
22 | instance is meant to be used on which thread.
23 | 
24 | More details in the [crate documentation](https://docs.rs/linked/).
25 | 
26 | This is part of the [Folo project](https://github.com/folo-rs/folo) that provides mechanisms for
27 | high-performance hardware-aware programming in Rust.


--------------------------------------------------------------------------------
/crates/linked/benches/instances.rs:
--------------------------------------------------------------------------------
  1 | //! Basic operations on the `instances!` macro and underlying type.
  2 | 
  3 | #![allow(
  4 |     missing_docs,
  5 |     reason = "No need for API documentation in benchmark code"
  6 | )]
  7 | 
  8 | use std::{
  9 |     hint::black_box,
 10 |     sync::{Arc, atomic::AtomicUsize},
 11 | };
 12 | 
 13 | use benchmark_utils::{ThreadPool, bench_on_threadpool};
 14 | use criterion::{BatchSize, Criterion, criterion_group, criterion_main};
 15 | use seq_macro::seq;
 16 | 
 17 | criterion_group!(benches, entrypoint);
 18 | criterion_main!(benches);
 19 | 
 20 | #[expect(
 21 |     dead_code,
 22 |     reason = "We do not care about using all the fields but we want to pay the price of initializing them"
 23 | )]
 24 | #[linked::object]
 25 | struct TestSubject {
 26 |     local_state: AtomicUsize,
 27 |     shared_state: Arc<AtomicUsize>,
 28 | }
 29 | 
 30 | impl TestSubject {
 31 |     fn new() -> Self {
 32 |         let shared_state = Arc::new(AtomicUsize::new(0));
 33 | 
 34 |         linked::new!(Self {
 35 |             local_state: AtomicUsize::new(0),
 36 |             shared_state: Arc::clone(&shared_state),
 37 |         })
 38 |     }
 39 | }
 40 | 
 41 | linked::instances!(static TARGET: TestSubject = TestSubject::new());
 42 | 
 43 | fn entrypoint(c: &mut Criterion) {
 44 |     let thread_pool = ThreadPool::default();
 45 | 
 46 |     let mut g = c.benchmark_group("instances::get");
 47 | 
 48 |     g.bench_function("single-threaded", |b| {
 49 |         b.iter(|| black_box(Arc::weak_count(&TARGET.get().shared_state)));
 50 |     });
 51 | 
 52 |     g.bench_function("multi-threaded", |b| {
 53 |         b.iter_custom(|iters| {
 54 |             bench_on_threadpool(
 55 |                 &thread_pool,
 56 |                 iters,
 57 |                 || (),
 58 |                 |()| {
 59 |                     black_box(Arc::weak_count(&TARGET.get().shared_state));
 60 |                 },
 61 |             )
 62 |         });
 63 |     });
 64 | 
 65 |     g.finish();
 66 | 
 67 |     let mut g = c.benchmark_group("instances::get_1000");
 68 | 
 69 |     g.bench_function("single-threaded", |b| {
 70 |         b.iter_batched_ref(
 71 |             LinkedVariableClearGuard::default,
 72 |             |_| {
 73 |                 seq!(N in 0..1000 {
 74 |                     black_box(Arc::weak_count(&TARGET_MANY_~N.get().shared_state));
 75 |                 });
 76 |             },
 77 |             BatchSize::SmallInput,
 78 |         );
 79 |     });
 80 | 
 81 |     g.bench_function("multi-threaded", |b| {
 82 |         b.iter_custom(|iters| {
 83 |             let duration = bench_on_threadpool(
 84 |                 &thread_pool,
 85 |                 iters,
 86 |                 || (),
 87 |                 |()| {
 88 |                     seq!(N in 0..1000 {
 89 |                         black_box(Arc::weak_count(&TARGET_MANY_~N.get().shared_state));
 90 |                     });
 91 |                 },
 92 |             );
 93 | 
 94 |             // The other threads were all temporary and have already gone away, so all we care about
 95 |             // is destroying the remains in the global registry, which is fine from this thread.
 96 |             linked::__private_clear_linked_variables();
 97 | 
 98 |             duration
 99 |         });
100 |     });
101 | 
102 |     g.finish();
103 | }
104 | 
105 | // We manually expand the macro here just because macro-in-macro goes crazy and fails to operate.
106 | seq!(N in 0..1000 {
107 |     #[expect(non_camel_case_types, reason = "manually replicating uglified macro internals for benchmark")]
108 |     struct __lookup_key_~N;
109 | 
110 |     const TARGET_MANY_~N : ::linked::StaticInstances<TestSubject> =
111 |         ::linked::StaticInstances::new(
112 |             ::std::any::TypeId::of::<__lookup_key_~N>,
113 |             TestSubject::new
114 |         );
115 | });
116 | 
117 | /// Clears all data stored in the shared variable system when created and dropped. Just for testing.
118 | #[derive(Debug)]
119 | struct LinkedVariableClearGuard {}
120 | 
121 | impl Default for LinkedVariableClearGuard {
122 |     fn default() -> Self {
123 |         ::linked::__private_clear_linked_variables();
124 |         Self {}
125 |     }
126 | }
127 | 
128 | impl Drop for LinkedVariableClearGuard {
129 |     fn drop(&mut self) {
130 |         ::linked::__private_clear_linked_variables();
131 |     }
132 | }
133 | 


--------------------------------------------------------------------------------
/crates/linked/doc/instance_per_thread.mermaid:
--------------------------------------------------------------------------------
 1 | graph TD
 2 |     subgraph Thread1[Thread 1]
 3 |         Task1a[Local task] -->|"::acquire()"| Local1a[Ref]
 4 |         Task1b[Local task] -->|"::acquire()"| Local1b[Ref]
 5 | 
 6 |         Local1a --> SharedOwnership((Shared<br/>ownership))
 7 |         Local1b --> SharedOwnership
 8 | 
 9 |         SharedOwnership --> Instance1[Linked object instance]
10 |     end
11 | 
12 |     subgraph Thread2[Thread 2]
13 |         Task2a[Local task] -->|"::acquire()"| Local2a[Ref]
14 |         Task2b[Local task] -->|"::acquire()"| Local2b[Ref]
15 | 
16 |         Local2a --> SharedOwnership2((Shared<br/>ownership))
17 |         Local2b --> SharedOwnership2
18 | 
19 |         SharedOwnership2 --> Instance2[Linked object instance]
20 |     end
21 | 
22 |     Instance1 --> SharedState[Family state]
23 |     Instance2 --> SharedState


--------------------------------------------------------------------------------
/crates/linked/doc/instance_per_thread_sync.mermaid:
--------------------------------------------------------------------------------
 1 | graph TD
 2 |     subgraph Thread1[Thread 1]
 3 |         Task1a[Local task] -->|"::acquire()"| Local1a[RefSync]
 4 |         Task1b[Local task] -->|"::acquire()"| Local1b[RefSync]
 5 | 
 6 |         Local1a --> SharedOwnership((Shared<br/>ownership))
 7 |         Local1b --> SharedOwnership
 8 | 
 9 |         SharedOwnership --> Instance1[Linked object instance]
10 |     end
11 | 
12 |     subgraph Thread2[Thread 2]
13 |         Task2a[Local task] -->|"::acquire()"| Local2a[RefSync]
14 |         Task2b[Local task] -->|"::acquire()"| Local2b[RefSync]
15 | 
16 |         Local2a --> SharedOwnership2((Shared<br/>ownership))
17 |         Local2b --> SharedOwnership2
18 | 
19 |         SharedOwnership2 --> Instance2[Linked object instance]
20 |     end
21 | 
22 |     Instance1 --> SharedState[Family state]
23 |     Instance2 --> SharedState


--------------------------------------------------------------------------------
/crates/linked/doc/linked.mermaid:
--------------------------------------------------------------------------------
 1 | graph TD
 2 |     subgraph Thread1[Thread 1]
 3 |         Task1[Local task] -->|thread-agnostic API surface| Instance1[Linked object instance]
 4 |         Instance1 -->|lock-free| Local1[Local state]
 5 |     end
 6 |     
 7 |     subgraph Thread2[Thread 2]
 8 |         Task2[Local task] -->|thread-agnostic API surface| Instance2[Linked object instance]
 9 |         Instance2 -->|lock-free| Local2[Local state]
10 |     end
11 |     
12 |     SS[Family state]
13 | 
14 |     Instance1 ---> SS
15 |     Instance2 ---> SS
16 | 


--------------------------------------------------------------------------------
/crates/linked/examples/linked_basic.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Microsoft Corporation.
 2 | // Copyright (c) Folo authors.
 3 | 
 4 | //! Demonstrates basic usage of the linked object pattern.
 5 | 
 6 | #![allow(clippy::new_without_default, reason = "not relevant for example")]
 7 | 
 8 | use std::thread;
 9 | 
10 | mod counters {
11 |     use std::sync::Arc;
12 |     use std::sync::atomic::{AtomicUsize, Ordering};
13 | 
14 |     /// An event counter that keeps both a local count and a global count across all linked instances.
15 |     #[linked::object] // Activates the linked object pattern on this type.
16 |     pub(crate) struct EventCounter {
17 |         // Each instance has its own local count.
18 |         local_count: usize,
19 | 
20 |         // Each instance also increments a global count shared between all instances.
21 |         global_count: Arc<AtomicUsize>,
22 |     }
23 | 
24 |     impl EventCounter {
25 |         pub(crate) fn new() -> Self {
26 |             // The global count is shared between all instances by cloning this Arc into each one.
27 |             let global_count = Arc::new(AtomicUsize::new(0));
28 | 
29 |             // Instead of just creating a new instance, we must use the `linked::new!` macro.
30 |             // The body of the macro must be a `Self` struct-expression. This
31 |             // struct-expression will be reused to create each linked instance. It may capture any
32 |             // necessary variables as long as they are thread-safe (`Send` + `Sync` + `'static`).
33 |             linked::new!(Self {
34 |                 local_count: 0,
35 |                 global_count: Arc::clone(&global_count),
36 |             })
37 |         }
38 | 
39 |         pub(crate) fn increment(&mut self) {
40 |             self.local_count = self.local_count.saturating_add(1);
41 |             self.global_count.fetch_add(1, Ordering::Relaxed);
42 |         }
43 | 
44 |         pub(crate) fn local_count(&self) -> usize {
45 |             self.local_count
46 |         }
47 | 
48 |         pub(crate) fn global_count(&self) -> usize {
49 |             self.global_count.load(Ordering::Relaxed)
50 |         }
51 |     }
52 | }
53 | 
54 | use counters::EventCounter;
55 | 
56 | // A static variable provides linked instances of the event counter on any thread.
57 | // The `linked::instances!` macro gives all necessary superpowers to this static variable.
58 | // This is the simplest way to create instances that are linked across threads.
59 | linked::instances!(static RECORDS_PROCESSED: EventCounter = EventCounter::new());
60 | 
61 | fn main() {
62 |     const THREAD_COUNT: usize = 4;
63 |     const RECORDS_PER_THREAD: usize = 1_000;
64 | 
65 |     let mut threads = Vec::with_capacity(THREAD_COUNT);
66 | 
67 |     for _ in 0..THREAD_COUNT {
68 |         threads.push(thread::spawn(move || {
69 |             let mut counter = RECORDS_PROCESSED.get();
70 | 
71 |             for _ in 0..RECORDS_PER_THREAD {
72 |                 counter.increment();
73 |             }
74 | 
75 |             println!(
76 |                 "Thread completed work; local count: {}, global count: {}",
77 |                 counter.local_count(),
78 |                 counter.global_count()
79 |             );
80 |         }));
81 |     }
82 | 
83 |     for thread in threads {
84 |         thread.join().unwrap();
85 |     }
86 | 
87 |     let final_count = RECORDS_PROCESSED.get().global_count();
88 | 
89 |     println!("All threads completed work; final global count: {final_count}");
90 | }
91 | 


--------------------------------------------------------------------------------
/crates/linked/examples/linked_box.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Microsoft Corporation.
  2 | // Copyright (c) Folo authors.
  3 | 
  4 | //! This is a variation of `linked_basic.rs` - familiarize yourself with that example first.
  5 | //!
  6 | //! Demonstrates how to apply the linked object pattern to types exposed via abstractions (traits).
  7 | //! This aims to preserve all the functionality of the linked objects pattern while allowing you
  8 | //! to expose the instances themselves as `dyn SomeTrait` instead of the concrete type.
  9 | //!
 10 | //! This is enabled by `linked::Box` which works like `std::boxed::Box` but with the
 11 | //! necessary extra machinery for linked objects.
 12 | //!
 13 | //! Under this model, **all** instances of a type T must be created as `linked::Box<dyn SomeTrait>`,
 14 | //! starting right from the constructor. If you want to have some instances exist as `T` and only
 15 | //! some as `dyn SomeTrait`, refer to the example `linked_std_box.rs`.
 16 | 
 17 | use std::thread;
 18 | 
 19 | mod counters {
 20 |     use std::sync::Arc;
 21 |     use std::sync::atomic::{AtomicUsize, Ordering};
 22 | 
 23 |     pub(crate) trait Counter {
 24 |         fn increment(&mut self);
 25 |         fn local_count(&self) -> usize;
 26 |         fn global_count(&self) -> usize;
 27 |     }
 28 | 
 29 |     // Note the difference from `linked_basic.rs`: there is no `#[linked::object]` attribute
 30 |     // This is because the `linked::Box` wrapper we use provides the necessary machinery.
 31 |     pub(crate) struct EventCounter {
 32 |         local_count: usize,
 33 |         global_count: Arc<AtomicUsize>,
 34 |     }
 35 | 
 36 |     impl EventCounter {
 37 |         // The desired pattern is to suffix the constructor with "as_<trait_name>" to indicate that
 38 |         // it returns the result as a trait object instead of the concrete type.
 39 |         pub(crate) fn new_as_counter() -> linked::Box<dyn Counter> {
 40 |             let global_count = Arc::new(AtomicUsize::new(0));
 41 | 
 42 |             // Instead of `linked::new!` as we did in `linked_basic.rs`, we use `linked::new_box!`.
 43 |             // The first argument is the trait object that our linked object will be used through.
 44 |             // The second argument is the instance template as a `Self` struct-expression. This
 45 |             // struct-expression will be reused to create each linked instance. It may capture any
 46 |             // necessary variables as long as they are thread-safe (`Send`+`Sync`+`'static`).
 47 |             linked::new_box!(
 48 |                 dyn Counter,
 49 |                 Self {
 50 |                     local_count: 0,
 51 |                     global_count: Arc::clone(&global_count),
 52 |                 }
 53 |             )
 54 |         }
 55 |     }
 56 | 
 57 |     impl Counter for EventCounter {
 58 |         fn increment(&mut self) {
 59 |             self.local_count = self.local_count.saturating_add(1);
 60 |             self.global_count.fetch_add(1, Ordering::Relaxed);
 61 |         }
 62 | 
 63 |         fn local_count(&self) -> usize {
 64 |             self.local_count
 65 |         }
 66 | 
 67 |         fn global_count(&self) -> usize {
 68 |             self.global_count.load(Ordering::Relaxed)
 69 |         }
 70 |     }
 71 | }
 72 | 
 73 | use counters::{Counter, EventCounter};
 74 | 
 75 | linked::instances!(static RECORDS_PROCESSED: linked::Box<dyn Counter> = EventCounter::new_as_counter());
 76 | 
 77 | fn main() {
 78 |     const THREAD_COUNT: usize = 4;
 79 |     const RECORDS_PER_THREAD: usize = 1_000;
 80 | 
 81 |     let mut threads = Vec::with_capacity(THREAD_COUNT);
 82 | 
 83 |     for _ in 0..THREAD_COUNT {
 84 |         threads.push(thread::spawn(move || {
 85 |             let mut counter = RECORDS_PROCESSED.get();
 86 | 
 87 |             for _ in 0..RECORDS_PER_THREAD {
 88 |                 counter.increment();
 89 |             }
 90 | 
 91 |             println!(
 92 |                 "Thread completed work; local count: {}, global count: {}",
 93 |                 counter.local_count(),
 94 |                 counter.global_count()
 95 |             );
 96 |         }));
 97 |     }
 98 | 
 99 |     for thread in threads {
100 |         thread.join().unwrap();
101 |     }
102 | 
103 |     let final_count = RECORDS_PROCESSED.get().global_count();
104 | 
105 |     println!("All threads completed work; final global count: {final_count}");
106 | }
107 | 


--------------------------------------------------------------------------------
/crates/linked/examples/linked_family.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Microsoft Corporation.
  2 | // Copyright (c) Folo authors.
  3 | 
  4 | //! This is a variation of `linked_basic.rs` - familiarize yourself with that example first.
  5 | //!
  6 | //! Demonstrates how to use linked objects across threads by manually establishing the linked
  7 | //! object family relationships via passing a reference to the family across threads and manually
  8 | //! creating instances from the family. This is useful because sometimes it might not be convenient
  9 | //! for you to define a static variable or use one of the standard instance-per-thread mechanisms.
 10 | //!
 11 | //! This example creates linked instances directly from the linked object family. This is
 12 | //! the most flexible approach but also requires the most code from you.
 13 | 
 14 | #![allow(clippy::new_without_default, reason = "Not relevant for example")]
 15 | 
 16 | use std::thread;
 17 | 
 18 | // This trait allows you to access the family of a linked object.
 19 | use linked::Object;
 20 | 
 21 | // Everything in the "counters" module is the same as in `linked_basic.rs`.
 22 | // The difference is all in main() below.
 23 | mod counters {
 24 |     use std::sync::Arc;
 25 |     use std::sync::atomic::{AtomicUsize, Ordering};
 26 | 
 27 |     #[linked::object]
 28 |     pub(crate) struct EventCounter {
 29 |         local_count: usize,
 30 |         global_count: Arc<AtomicUsize>,
 31 |     }
 32 | 
 33 |     impl EventCounter {
 34 |         pub(crate) fn new() -> Self {
 35 |             let global_count = Arc::new(AtomicUsize::new(0));
 36 | 
 37 |             linked::new!(Self {
 38 |                 local_count: 0,
 39 |                 global_count: Arc::clone(&global_count),
 40 |             })
 41 |         }
 42 | 
 43 |         pub(crate) fn increment(&mut self) {
 44 |             self.local_count = self.local_count.saturating_add(1);
 45 |             self.global_count.fetch_add(1, Ordering::Relaxed);
 46 |         }
 47 | 
 48 |         pub(crate) fn local_count(&self) -> usize {
 49 |             self.local_count
 50 |         }
 51 | 
 52 |         pub(crate) fn global_count(&self) -> usize {
 53 |             self.global_count.load(Ordering::Relaxed)
 54 |         }
 55 |     }
 56 | }
 57 | 
 58 | use counters::EventCounter;
 59 | 
 60 | fn main() {
 61 |     const THREAD_COUNT: usize = 4;
 62 |     const RECORDS_PER_THREAD: usize = 1_000;
 63 | 
 64 |     let mut threads = Vec::with_capacity(THREAD_COUNT);
 65 | 
 66 |     // We create the counter as a local variable here. Linked objects are
 67 |     // regular structs and are not limited to static variables in any way.
 68 |     let counter = EventCounter::new();
 69 | 
 70 |     // Every linked object belongs to a family, which you can access like this.
 71 |     // The family reference this returns is always thread-safe, even if the linked
 72 |     // object instances themselves are not. This allows you to pass it between threads.
 73 |     let counter_family = counter.family();
 74 | 
 75 |     for _ in 0..THREAD_COUNT {
 76 |         threads.push(thread::spawn({
 77 |             // We create a new clone of the family reference for each thread we spawn.
 78 |             let counter_family = counter_family.clone();
 79 | 
 80 |             move || {
 81 |                 // The family reference can be converted to a new instance on demand.
 82 |                 let mut counter: EventCounter = counter_family.into();
 83 | 
 84 |                 for _ in 0..RECORDS_PER_THREAD {
 85 |                     counter.increment();
 86 |                 }
 87 | 
 88 |                 println!(
 89 |                     "Thread completed work; local count: {}, global count: {}",
 90 |                     counter.local_count(),
 91 |                     counter.global_count()
 92 |                 );
 93 |             }
 94 |         }));
 95 |     }
 96 | 
 97 |     for thread in threads {
 98 |         thread.join().unwrap();
 99 |     }
100 | 
101 |     let final_count = counter.global_count();
102 | 
103 |     println!("All threads completed work; final global count: {final_count}");
104 | }
105 | 


--------------------------------------------------------------------------------
/crates/linked/examples/linked_std_box.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Microsoft Corporation.
  2 | // Copyright (c) Folo authors.
  3 | 
  4 | //! This is a variation of `linked_basic.rs` - familiarize yourself with that example first.
  5 | //!
  6 | //! Demonstrates how to expose linked objects via abstractions (traits) on demand while
  7 | //! still using the linked objects via the concrete type itself.
  8 | //!
  9 | //! In this form, there exist two categories of instances for a type T:
 10 | //!
 11 | //! 1. The regular instances of type T, which are ordinary linked objects.
 12 | //! 2. Instances of `std::boxed::Box<dyn Xyz>` where `T: Xyz`. These remain linked internally but
 13 | //!    cannot be used to create additional linked instances (there is no `.clone()` and
 14 | //!    no `.family()` on these objects).
 15 | //!
 16 | //! If you want to be able to create additional linked instances of `dyn Xyz` from an existing
 17 | //! instance of `dyn Xyz`, you must create **all** instances (starting from the constructor) as
 18 | //! `linked::Box<dyn Xyz>` instead of `std::boxed::Box<T>`. See `linked_box.rs` for an example.
 19 | 
 20 | #![allow(clippy::new_without_default, reason = "Not relevant for example")]
 21 | 
 22 | use std::thread;
 23 | 
 24 | mod counters {
 25 |     use std::sync::Arc;
 26 |     use std::sync::atomic::{AtomicUsize, Ordering};
 27 | 
 28 |     /// A trait that defines functions for reporting the results of some counting that happened.
 29 |     pub(crate) trait CountResult {
 30 |         fn local_count(&self) -> usize;
 31 |         fn global_count(&self) -> usize;
 32 |     }
 33 | 
 34 |     // Note how this is a regular linked object type, just like in `linked_basic.rs`.
 35 |     #[linked::object]
 36 |     pub(crate) struct EventCounter {
 37 |         local_count: usize,
 38 |         global_count: Arc<AtomicUsize>,
 39 |     }
 40 | 
 41 |     impl EventCounter {
 42 |         pub(crate) fn new() -> Self {
 43 |             let global_count = Arc::new(AtomicUsize::new(0));
 44 | 
 45 |             linked::new!(Self {
 46 |                 local_count: 0,
 47 |                 global_count: Arc::clone(&global_count),
 48 |             })
 49 |         }
 50 | 
 51 |         pub(crate) fn increment(&mut self) {
 52 |             self.local_count = self.local_count.saturating_add(1);
 53 |             self.global_count.fetch_add(1, Ordering::Relaxed);
 54 |         }
 55 |     }
 56 | 
 57 |     impl CountResult for EventCounter {
 58 |         fn local_count(&self) -> usize {
 59 |             self.local_count
 60 |         }
 61 | 
 62 |         fn global_count(&self) -> usize {
 63 |             self.global_count.load(Ordering::Relaxed)
 64 |         }
 65 |     }
 66 | }
 67 | 
 68 | use counters::{CountResult, EventCounter};
 69 | 
 70 | linked::instances!(static RECORDS_PROCESSED: EventCounter = EventCounter::new());
 71 | 
 72 | // Here we have some code that takes ownership of abstract count results. In this simple example
 73 | // there is of course no real "need" for us to use an abstraction but let's pretend we have a
 74 | // reason to do so.
 75 | #[expect(clippy::needless_pass_by_value, reason = "adding realism to example")]
 76 | fn finalize_counter_processing(result: Box<dyn CountResult>) {
 77 |     println!(
 78 |         "Counter finished counting: local count: {}, global count: {}",
 79 |         result.local_count(),
 80 |         result.global_count()
 81 |     );
 82 | }
 83 | 
 84 | fn main() {
 85 |     const THREAD_COUNT: usize = 4;
 86 |     const RECORDS_PER_THREAD: usize = 1_000;
 87 | 
 88 |     let mut threads = Vec::with_capacity(THREAD_COUNT);
 89 | 
 90 |     for _ in 0..THREAD_COUNT {
 91 |         threads.push(thread::spawn(move || {
 92 |             let mut counter = RECORDS_PROCESSED.get();
 93 | 
 94 |             for _ in 0..RECORDS_PER_THREAD {
 95 |                 counter.increment();
 96 |             }
 97 | 
 98 |             // You can take a regular instance of a linked object and stuff it into a Box any time.
 99 |             // Note, however, that you cannot use this instance anymore to create additional linked
100 |             // instances because now it lacks the `.clone()` and `.family()` required for that.
101 |             let boxed_count_result = Box::new(counter);
102 |             finalize_counter_processing(boxed_count_result);
103 |         }));
104 |     }
105 | 
106 |     for thread in threads {
107 |         thread.join().unwrap();
108 |     }
109 | 
110 |     let final_count = RECORDS_PROCESSED.get().global_count();
111 | 
112 |     println!("All threads completed work; final global count: {final_count}");
113 | }
114 | 


--------------------------------------------------------------------------------
/crates/linked/examples/linked_thread_local_rc.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Microsoft Corporation.
  2 | // Copyright (c) Folo authors.
  3 | 
  4 | //! This is a variation of `linked_basic.rs` - familiarize yourself with that example first.
  5 | //!
  6 | //! Demonstrates how to share thread-local instances of linked objects, so all callers on a single
  7 | //! thread access the same instance of the linked object.
  8 | 
  9 | #![allow(clippy::new_without_default, reason = "Not relevant for example")]
 10 | 
 11 | use std::thread;
 12 | 
 13 | mod counters {
 14 |     use std::cell::Cell;
 15 |     use std::sync::Arc;
 16 |     use std::sync::atomic::{AtomicUsize, Ordering};
 17 | 
 18 |     #[linked::object]
 19 |     pub(crate) struct EventCounter {
 20 |         // This now acts as a thread-local count because we only access a single instance of the
 21 |         // linked object on every thread.
 22 |         //
 23 |         // Multiple callers on a single thread using the same instance means they cannot use `&mut`
 24 |         // references, so we cannot have any function in our `impl` block that takes `&mut self`!
 25 |         // That requires interior mutability to be used for any local state changes, which is why
 26 |         // we use a Cell here to facilitate incrementing the local count.
 27 |         local_count: Cell<usize>,
 28 | 
 29 |         global_count: Arc<AtomicUsize>,
 30 |     }
 31 | 
 32 |     impl EventCounter {
 33 |         pub(crate) fn new() -> Self {
 34 |             let global_count = Arc::new(AtomicUsize::new(0));
 35 | 
 36 |             linked::new!(Self {
 37 |                 local_count: Cell::new(0),
 38 |                 global_count: Arc::clone(&global_count),
 39 |             })
 40 |         }
 41 | 
 42 |         // Note how this is `&self` instead of `&mut self` - we cannot use `&mut self` or have any
 43 |         // variables typed `mut EventCounter` or `&mut EventCounter` if we are reusing the same
 44 |         // instance for all operations aligned to a single thread.
 45 |         pub(crate) fn increment(&self) {
 46 |             self.local_count
 47 |                 .set(self.local_count.get().saturating_add(1));
 48 |             self.global_count.fetch_add(1, Ordering::Relaxed);
 49 |         }
 50 | 
 51 |         pub(crate) fn local_count(&self) -> usize {
 52 |             self.local_count.get()
 53 |         }
 54 | 
 55 |         pub(crate) fn global_count(&self) -> usize {
 56 |             self.global_count.load(Ordering::Relaxed)
 57 |         }
 58 |     }
 59 | }
 60 | 
 61 | use counters::EventCounter;
 62 | 
 63 | linked::thread_local_rc!(static RECORDS_PROCESSED: EventCounter = EventCounter::new());
 64 | 
 65 | fn main() {
 66 |     const THREAD_COUNT: usize = 4;
 67 |     const INCREMENT_ITERATIONS: usize = 1_000;
 68 | 
 69 |     let mut threads = Vec::with_capacity(THREAD_COUNT);
 70 | 
 71 |     for _ in 0..THREAD_COUNT {
 72 |         threads.push(thread::spawn(move || {
 73 |             // This is the simplest approach, directly referencing the current thread's instance.
 74 |             RECORDS_PROCESSED.with(|x| x.increment());
 75 | 
 76 |             // If needed, you can also obtain a long-lived reference to the current thread's
 77 |             // instance. Obtaining a long-lived reference is more efficient when accessing the
 78 |             // thread-specific instance, as long as you actually reuse the reference.
 79 |             //
 80 |             // These two are the exact same instance, just accessed via different references.
 81 |             let counter1 = RECORDS_PROCESSED.to_rc();
 82 |             let counter2 = RECORDS_PROCESSED.to_rc();
 83 | 
 84 |             for _ in 0..INCREMENT_ITERATIONS {
 85 |                 counter1.increment();
 86 |                 counter2.increment();
 87 |             }
 88 | 
 89 |             // Again, the exact same instance as above!
 90 |             let counter3 = RECORDS_PROCESSED.to_rc();
 91 | 
 92 |             println!(
 93 |                 "Thread completed work; thread local count: {}, global count: {}",
 94 |                 counter3.local_count(),
 95 |                 counter3.global_count()
 96 |             );
 97 |         }));
 98 |     }
 99 | 
100 |     for thread in threads {
101 |         thread.join().unwrap();
102 |     }
103 | 
104 |     let final_count = RECORDS_PROCESSED.to_rc().global_count();
105 | 
106 |     println!("All threads completed work; final global count: {final_count}");
107 | }
108 | 


--------------------------------------------------------------------------------
/crates/linked/src/__private.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Microsoft Corporation.
 2 | // Copyright (c) Folo authors.
 3 | 
 4 | //! This module contains logically private things that must be technically public
 5 | //! because they are accessed from macro-generated code.
 6 | 
 7 | use std::fmt::{self, Debug, Formatter};
 8 | use std::sync::Arc;
 9 | 
10 | use crate::{Family, Object};
11 | 
12 | /// Re-export so we can use it via macros in projects that do not have a reference to `paste`.
13 | pub use ::paste::paste;
14 | 
15 | /// This is meant to be used via the [`linked::new!`][crate::new] macro, never directly called.
16 | ///
17 | /// Creates a family of linked objects, the instances of which are created using a callback whose
18 | /// captured state connects all members of the linked object family.
19 | ///
20 | /// The instance factory must be thread-safe, which implies that all captured state in this factory
21 | /// function must be `Send` + `Sync` + `'static`. The instances it returns do not need to be thread-
22 | /// safe, however.
23 | #[inline]
24 | pub fn new<T>(instance_factory: impl Fn(Link<T>) -> T + Send + Sync + 'static) -> T {
25 |     Link::new(Arc::new(instance_factory)).into_instance()
26 | }
27 | 
28 | /// This is meant to be used via the `#[linked::object]` macro, never directly called.
29 | ///
30 | /// Clones a linked object. They require a specific pattern to clone, so the `#[linked::object]`
31 | /// macro wires up a suitable `Clone` implementation for all such types to avoid mistakes.
32 | #[inline]
33 | pub fn clone<T>(value: &T) -> T
34 | where
35 |     T: Object + From<Family<T>>,
36 | {
37 |     value.family().into()
38 | }
39 | 
40 | pub(crate) type InstanceFactory<T> = Arc<dyn Fn(Link<T>) -> T + Send + Sync + 'static>;
41 | 
42 | /// An object that connects an instance to other instances in the same linked object family.
43 | ///
44 | /// This type serves the linked object infrastructure and is not meant to be used by user code.
45 | /// It is a private public type because it is used in macro-generated code.
46 | pub struct Link<T> {
47 |     pub(super) instance_factory: InstanceFactory<T>,
48 | }
49 | 
50 | impl<T> Debug for Link<T> {
51 |     #[cfg_attr(test, mutants::skip)] // We have no API contract for this.
52 |     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
53 |         f.debug_struct("Link")
54 |             .field(
55 |                 "instance_factory",
56 |                 &format_args!(
57 |                     "Arc<dyn Fn(Link<{t}>) -> {t}>",
58 |                     t = std::any::type_name::<T>()
59 |                 ),
60 |             )
61 |             .finish()
62 |     }
63 | }
64 | 
65 | impl<T> Link<T> {
66 |     #[must_use]
67 |     pub(super) fn new(instance_factory: InstanceFactory<T>) -> Self {
68 |         Self { instance_factory }
69 |     }
70 | 
71 |     #[must_use]
72 |     pub(super) fn into_instance(self) -> T {
73 |         let instance_factory = Arc::clone(&self.instance_factory);
74 |         (instance_factory)(self)
75 |     }
76 | 
77 |     // This type deliberately does not implement `Clone` to discourage accidental implementation of
78 |     // cloning of type `T` via `#[derive(Clone)]`. The expected pattern is to use `#[linked::object]`
79 |     // which generates both a `Linked` implementation and a specialized `Clone` implementation.
80 |     #[must_use]
81 |     fn clone(&self) -> Self {
82 |         Self {
83 |             instance_factory: Arc::clone(&self.instance_factory),
84 |         }
85 |     }
86 | 
87 |     #[inline]
88 |     #[must_use]
89 |     pub fn family(&self) -> Family<T> {
90 |         Family::new(self.clone())
91 |     }
92 | }
93 | 


--------------------------------------------------------------------------------
/crates/linked/src/constants.rs:
--------------------------------------------------------------------------------
1 | // A poisoned lock means the process is in an unrecoverable/unsafe state and must exit (we panic).
2 | pub(crate) const ERR_POISONED_LOCK: &str = "encountered poisoned lock - continued execution is not safe because we can no longer ensure that we uphold security and privacy guarantees";
3 | 


--------------------------------------------------------------------------------
/crates/linked/src/family.rs:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Microsoft Corporation.
  2 | // Copyright (c) Folo authors.
  3 | 
  4 | use std::any::type_name;
  5 | use std::fmt::{self, Debug, Formatter};
  6 | 
  7 | use crate::__private::{InstanceFactory, Link};
  8 | 
  9 | /// Represents a family of [linked objects][crate] and allows you to create additional instances
 10 | /// in the same family.
 11 | ///
 12 | /// Clones represent the same family and are functionally equivalent.
 13 | ///
 14 | /// # When to use this type
 15 | ///
 16 | /// The family is a low-level primitive for creating instances of linked objects. You will need to
 17 | /// use it directly if you are implementing custom instance management patterns. Typical usage of
 18 | /// linked objects occurs via standard macros/wrappers provided by the crate:
 19 | ///
 20 | /// * [`linked::instances!`][1]
 21 | /// * [`linked::thread_local_rc!`][2]
 22 | /// * [`linked::thread_local_arc!`][4] (if `T: Sync`)
 23 | /// * [`linked::InstancePerThread<T>`][5]
 24 | /// * [`linked::InstancePerThreadSync<T>`][6] (if `T: Sync`)
 25 | ///
 26 | /// # Example
 27 | ///
 28 | /// ```rust
 29 | /// # use std::sync::{Arc, Mutex};
 30 | /// # #[linked::object]
 31 | /// # struct Thing {
 32 | /// #     value: Arc<Mutex<String>>,
 33 | /// # }
 34 | /// # impl Thing {
 35 | /// #     pub fn new(initial_value: String) -> Self {
 36 | /// #         let shared_value = Arc::new(Mutex::new(initial_value));
 37 | /// #         linked::new!(Self {
 38 | /// #             value: shared_value.clone(),
 39 | /// #         })
 40 | /// #     }
 41 | /// #     pub fn value(&self) -> String {
 42 | /// #         self.value.lock().unwrap().clone()
 43 | /// #     }
 44 | /// #     pub fn set_value(&self, value: String) {
 45 | /// #         *self.value.lock().unwrap() = value;
 46 | /// #     }
 47 | /// # }
 48 | /// use linked::Object; // This brings .family() into scope.
 49 | /// use std::thread;
 50 | ///
 51 | /// let thing = Thing::new("hello".to_string());
 52 | /// assert_eq!(thing.value(), "hello");
 53 | ///
 54 | /// thing.set_value("world".to_string());
 55 | ///
 56 | /// thread::spawn({
 57 | ///     let thing_family = thing.family();
 58 | ///
 59 | ///     move || {
 60 | ///         let thing: Thing = thing_family.into();
 61 | ///         assert_eq!(thing.value(), "world");
 62 | ///     }
 63 | /// }).join().unwrap();
 64 | /// ```
 65 | ///
 66 | /// [1]: crate::instances
 67 | /// [2]: crate::thread_local_rc
 68 | /// [4]: crate::thread_local_arc
 69 | /// [5]: crate::InstancePerThread
 70 | /// [6]: crate::InstancePerThreadSync
 71 | #[derive(Clone)]
 72 | pub struct Family<T> {
 73 |     // For the family, we extract the factory from the `Link` because the `Link` is not thread-safe.
 74 |     // In other words, a `Link` exists only in interactions with a specific instance of `T`.
 75 |     instance_factory: InstanceFactory<T>,
 76 | }
 77 | 
 78 | impl<T> Debug for Family<T> {
 79 |     #[cfg_attr(test, mutants::skip)] // We have no API contract for this.
 80 |     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
 81 |         f.debug_struct(type_name::<Self>())
 82 |             .field(
 83 |                 "instance_factory",
 84 |                 &format_args!("Arc<dyn Fn(Link<{t}>) -> {t}>", t = type_name::<T>()),
 85 |             )
 86 |             .finish()
 87 |     }
 88 | }
 89 | 
 90 | impl<T> Family<T> {
 91 |     #[must_use]
 92 |     pub(super) fn new(link: Link<T>) -> Self {
 93 |         Self {
 94 |             instance_factory: link.instance_factory,
 95 |         }
 96 |     }
 97 | 
 98 |     // Implementation of `From<Family<T>> for T`, called from macro-generated code for a specific T.
 99 |     #[doc(hidden)]
100 |     #[inline]
101 |     #[must_use]
102 |     pub fn __private_into(self) -> T {
103 |         Link::new(self.instance_factory).into_instance()
104 |     }
105 | }
106 | 


--------------------------------------------------------------------------------
/crates/linked/src/macros.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Microsoft Corporation.
 2 | // Copyright (c) Folo authors.
 3 | 
 4 | /// Defines the template used to create every instance in a linked object family.
 5 | ///
 6 | /// You are expected to use this in the constructor of a [linked object][crate],
 7 | /// except when you want to always express the linked object via trait objects (`dyn Xyz`),
 8 | /// in which case you should use [`linked::new_box`][crate::new_box].
 9 | ///
10 | /// The macro body must be a struct-expression of the `Self` type. Any variables the macro body
11 | /// captures must be thread-safe (`Send` + `Sync` + `'static`). The returned object itself does
12 | /// not need to be thread-safe.
13 | ///
14 | /// # Example
15 | ///
16 | /// ```
17 | /// use std::sync::{Arc, Mutex};
18 | ///
19 | /// #[linked::object]
20 | /// struct TokenCache {
21 | ///     tokens_created: usize,
22 | ///     name: String,
23 | ///     master_key: Arc<Mutex<String>>,
24 | ///     is_multidimensional: bool,
25 | /// }
26 | ///
27 | /// impl TokenCache {
28 | ///     fn new(name: String, is_multidimensional: bool) -> Self {
29 | ///         // Any shared data referenced by the macro body must be thread-safe.
30 | ///         let master_key = Arc::new(Mutex::new(String::new()));
31 | ///
32 | ///         linked::new!(Self {
33 | ///             tokens_created: 0,
34 | ///             name: name.clone(),
35 | ///             master_key: Arc::clone(&master_key),
36 | ///             is_multidimensional,
37 | ///         })
38 | ///     }
39 | /// }
40 | /// ```
41 | ///
42 | /// Complex expressions are supported within the `Self` struct-expression:
43 | ///
44 | /// ```
45 | /// #[linked::object]
46 | /// struct TokenCache {
47 | ///     token_sources: Vec<linked::Box<dyn TokenSource>>,
48 | /// }
49 | /// # trait TokenSource {}
50 | ///
51 | /// impl TokenCache {
52 | ///     fn new(source_families: Vec<linked::Family<linked::Box<dyn TokenSource>>>) -> Self {
53 | ///         linked::new!(Self {
54 | ///             token_sources: source_families
55 | ///                 .iter()
56 | ///                 .cloned()
57 | ///                 .map(linked::Family::into)
58 | ///                 .collect()
59 | ///         })
60 | ///     }
61 | /// }
62 | /// ```
63 | ///
64 | /// For a complete example, see `examples/linked_basic.rs`.
65 | #[macro_export]
66 | macro_rules! new {
67 |     // `new!()` is forwarded to `new!(Self {})`
68 |     (Self) => {
69 |         $crate::new!(Self {})
70 |     };
71 |     // Special case if there are no field initializers (for proper comma handling).
72 |     (Self {}) => {
73 |         $crate::__private::new(move |__private_linked_link| Self {
74 |             __private_linked_link,
75 |         })
76 |     };
77 |     // Typical case - struct expression with zero or more field initializers.
78 |     // Each field initializer is processed as per the `@expand` rules below,
79 |     // which essentially does not touch/change them.
80 |     (Self { $($field:ident $( : $value:expr )?),* $(,)? }) => {
81 |         $crate::__private::new(move |__private_linked_link| Self {
82 |             $($field: $crate::new!(@expand $field $( : $value )?)),*,
83 |             __private_linked_link,
84 |         })
85 |     };
86 |     (@expand $field:ident : $value:expr) => {
87 |         $value
88 |     };
89 |     (@expand $field:ident) => {
90 |         $field
91 |     };
92 | }
93 | 


--------------------------------------------------------------------------------
/crates/linked/src/object.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Microsoft Corporation.
 2 | // Copyright (c) Folo authors.
 3 | 
 4 | use crate::Family;
 5 | 
 6 | /// Operations available on every instance of a [linked object][crate].
 7 | ///
 8 | /// The only supported way to implement this is via [`#[linked::object]`][crate::object].
 9 | pub trait Object: From<Family<Self>> + Sized + Clone + 'static {
10 |     /// The object family that the current instance is linked to.
11 |     ///
12 |     /// The returned object can be used to create additional instances linked to the same family.
13 |     fn family(&self) -> Family<Self>;
14 | }
15 | 


--------------------------------------------------------------------------------
/crates/linked/src/thread_id_hash.rs:
--------------------------------------------------------------------------------
 1 | use std::hash::{BuildHasher, Hasher};
 2 | 
 3 | /// A hasher implementation specialized for thread IDs.
 4 | pub(crate) struct ThreadIdHasher {
 5 |     state: u64,
 6 | }
 7 | 
 8 | impl ThreadIdHasher {
 9 |     pub(crate) fn new() -> Self {
10 |         Self { state: 0 }
11 |     }
12 | }
13 | 
14 | impl Hasher for ThreadIdHasher {
15 |     fn finish(&self) -> u64 {
16 |         self.state
17 |     }
18 | 
19 |     // No mutation - we avoid hardcoding hash logic into tests, so expectations are minimal.
20 |     #[cfg_attr(test, mutants::skip)]
21 |     fn write(&mut self, bytes: &[u8]) {
22 |         assert_eq!(
23 |             bytes.len(),
24 |             8,
25 |             "ThreadIdHasher expects exactly 8 bytes (u64) as input"
26 |         );
27 | 
28 |         // We expect this to only be called once per hash operation.
29 |         // We expect the contents to be a u64 that typically has only
30 |         // the low bits set (rare to see more than 16 bits of data, often even 8 bits).
31 |         self.state = u64::from_le_bytes(bytes.try_into().expect("expecting ThreadId to be u64"));
32 | 
33 |         // We copy the low byte into the high byte because HashMap seems to care a lot about
34 |         // the high bits (this is used as the control byte for fast comparisons).
35 |         self.state ^= u64::from(
36 |             *bytes
37 |                 .first()
38 |                 .expect("already asserted that we have enough bytes"),
39 |         ) << 56;
40 |     }
41 | }
42 | 
43 | /// A `BuildHasher` that creates `ThreadIdHasher` instances.
44 | pub(crate) struct BuildThreadIdHasher;
45 | 
46 | impl BuildHasher for BuildThreadIdHasher {
47 |     type Hasher = ThreadIdHasher;
48 | 
49 |     fn build_hasher(&self) -> Self::Hasher {
50 |         ThreadIdHasher::new()
51 |     }
52 | }
53 | 
54 | #[cfg(test)]
55 | mod tests {
56 |     use super::*;
57 | 
58 |     #[test]
59 |     fn control_byte_is_different() {
60 |         // Even for tiny changes in the ID value, we expect the control byte (high byte) to be
61 |         // different because the control byte comparison is performance-critical.
62 |         let mut hasher = ThreadIdHasher::new();
63 |         hasher.write(&0_u64.to_le_bytes());
64 |         let hash1 = hasher.finish();
65 | 
66 |         let mut hasher = ThreadIdHasher::new();
67 |         hasher.write(&1_u64.to_le_bytes());
68 |         let hash2 = hasher.finish();
69 | 
70 |         // There has to be at least some difference.
71 |         assert_ne!(hash1, hash2);
72 | 
73 |         // This is the control byte (high byte).
74 |         assert_ne!(hash1 & 0xFF00_0000_0000_0000, hash2 & 0xFF00_0000_0000_0000);
75 |     }
76 | }
77 | 


--------------------------------------------------------------------------------
/crates/linked/tests/linked_object.rs:
--------------------------------------------------------------------------------
 1 | //! Linked object definition under various edge cases.
 2 | 
 3 | #[test]
 4 | fn empty_struct() {
 5 |     #[linked::object]
 6 |     struct Empty {}
 7 | 
 8 |     impl Empty {
 9 |         fn new() -> Self {
10 |             linked::new!(Self {})
11 |         }
12 |     }
13 | 
14 |     drop(Empty::new());
15 | }
16 | 
17 | #[test]
18 | fn very_empty_struct() {
19 |     #[linked::object]
20 |     struct Empty {}
21 | 
22 |     impl Empty {
23 |         fn new() -> Self {
24 |             linked::new!(Self)
25 |         }
26 |     }
27 | 
28 |     drop(Empty::new());
29 | }
30 | 


--------------------------------------------------------------------------------
/crates/linked/tests/smoke.rs:
--------------------------------------------------------------------------------
 1 | //! Basic operations on linked objects.
 2 | 
 3 | use std::{
 4 |     sync::{Arc, Mutex},
 5 |     thread,
 6 | };
 7 | 
 8 | use linked::Object;
 9 | 
10 | #[test]
11 | fn linked_objects_smoke_test() {
12 |     #[linked::object]
13 |     struct Thing {
14 |         local_value: usize,
15 |         global_value: Arc<Mutex<String>>,
16 |     }
17 | 
18 |     impl Thing {
19 |         fn new(local_value: usize, global_value: String) -> Self {
20 |             let global_value = Arc::new(Mutex::new(global_value));
21 | 
22 |             linked::new!(Self {
23 |                 local_value,
24 |                 global_value: Arc::clone(&global_value),
25 |             })
26 |         }
27 | 
28 |         fn set_global_value(&self, value: &str) {
29 |             let mut global_value = self.global_value.lock().unwrap();
30 |             *global_value = value.to_string();
31 |         }
32 | 
33 |         fn get_global_value(&self) -> String {
34 |             let global_value = self.global_value.lock().unwrap();
35 |             global_value.clone()
36 |         }
37 | 
38 |         fn get_local_value(&self) -> usize {
39 |             self.local_value
40 |         }
41 | 
42 |         fn set_local_value(&mut self, value: usize) {
43 |             self.local_value = value;
44 |         }
45 |     }
46 | 
47 |     let mut linked_object = Thing::new(42, "hello".to_string());
48 | 
49 |     assert_eq!(linked_object.get_local_value(), 42);
50 |     assert_eq!(linked_object.get_global_value(), "hello");
51 | 
52 |     let clone = linked_object.clone();
53 | 
54 |     linked_object.set_global_value("world");
55 |     linked_object.set_local_value(43);
56 | 
57 |     assert_eq!(linked_object.get_local_value(), 43);
58 |     assert_eq!(linked_object.get_global_value(), "world");
59 | 
60 |     assert_eq!(clone.get_local_value(), 42);
61 |     assert_eq!(clone.get_global_value(), "world");
62 | 
63 |     let handle = linked_object.family();
64 | 
65 |     thread::spawn(move || {
66 |         let mut linked_object: Thing = handle.into();
67 | 
68 |         assert_eq!(linked_object.get_local_value(), 42);
69 |         assert_eq!(linked_object.get_global_value(), "world");
70 | 
71 |         linked_object.set_global_value("paradise");
72 |         linked_object.set_local_value(45);
73 |     })
74 |     .join()
75 |     .unwrap();
76 | 
77 |     assert_eq!(linked_object.get_local_value(), 43);
78 |     assert_eq!(linked_object.get_global_value(), "paradise");
79 | 
80 |     assert_eq!(clone.get_local_value(), 42);
81 |     assert_eq!(clone.get_global_value(), "paradise");
82 | }
83 | 


--------------------------------------------------------------------------------
/crates/linked_macros/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "linked_macros"
 3 | description = "Internal dependency of the 'linked' crate - do not reference directly"
 4 | publish = true
 5 | version = "0.2.0"
 6 | 
 7 | authors.workspace = true
 8 | edition.workspace = true
 9 | license.workspace = true
10 | repository.workspace = true
11 | rust-version.workspace = true
12 | 
13 | [lib]
14 | proc-macro = true
15 | doc = false
16 | 
17 | [package.metadata.cargo-machete]
18 | ignored = ["proc-macro2"]
19 | 
20 | [dependencies]
21 | linked_macros_impl = { workspace = true }
22 | proc-macro2 = { workspace = true, features = ["proc-macro"] }
23 | 
24 | [dev-dependencies]
25 | 
26 | [lints]
27 | workspace = true
28 | 


--------------------------------------------------------------------------------
/crates/linked_macros/src/lib.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Microsoft Corporation.
 2 | // Copyright (c) Folo authors.
 3 | 
 4 | #![allow(
 5 |     missing_docs,
 6 |     reason = "Private API, public API is documented in `linked` crate"
 7 | )]
 8 | 
 9 | use proc_macro::TokenStream;
10 | 
11 | #[proc_macro_attribute]
12 | pub fn __macro_linked_object(attr: TokenStream, item: TokenStream) -> TokenStream {
13 |     linked_macros_impl::linked_object::entrypoint(&attr.into(), &item.into()).into()
14 | }
15 | 


--------------------------------------------------------------------------------
/crates/linked_macros_impl/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "linked_macros_impl"
 3 | description = "Internal dependency of the 'linked_macros' crate - do not reference directly"
 4 | publish = true
 5 | version = "0.2.0"
 6 | 
 7 | authors.workspace = true
 8 | edition.workspace = true
 9 | license.workspace = true
10 | repository.workspace = true
11 | rust-version.workspace = true
12 | 
13 | [lib]
14 | doc = false
15 | 
16 | [dependencies]
17 | proc-macro2 = { workspace = true }
18 | quote = { workspace = true }
19 | syn = { workspace = true, features = ["full", "parsing", "printing"] }
20 | 
21 | [dev-dependencies]
22 | 
23 | [lints]
24 | workspace = true
25 | 


--------------------------------------------------------------------------------
/crates/linked_macros_impl/src/lib.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Microsoft Corporation.
 2 | // Copyright (c) Folo authors.
 3 | 
 4 | #![allow(
 5 |     missing_docs,
 6 |     reason = "Private API, public API is documented in `linked` crate"
 7 | )]
 8 | 
 9 | pub mod linked_object;
10 | mod syn_helpers;
11 | 


--------------------------------------------------------------------------------
/crates/linked_macros_impl/src/syn_helpers.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Microsoft Corporation.
 2 | // Copyright (c) Folo authors.
 3 | 
 4 | //! This module contains helper functions for consuming and producing Rust syntax elements.
 5 | 
 6 | use proc_macro2::TokenStream;
 7 | use quote::quote;
 8 | 
 9 | /// Combines a token stream with a syn-originating contextual error message that contains
10 | /// all the necessary metadata to emit rich errors (with red underlines and all that).
11 | ///
12 | /// Also preserves the original token stream, merely appending the error instead of replacing.
13 | pub(crate) fn token_stream_and_error(s: &TokenStream, e: &syn::Error) -> TokenStream {
14 |     let error = e.to_compile_error();
15 | 
16 |     // We preserve both the original input and emit the compiler error message.
17 |     // This ensures that we do not cause extra problems by removing the original input
18 |     // from the code file (which would result in "trait not found" and similar errors).
19 |     quote! {
20 |         #s
21 |         #error
22 |     }
23 | }
24 | 
25 | /// Attempts to identify any compile-time error in the token stream. This is useful for unit
26 | /// testing macros - if the macro is expected to produce a compile-time error, we can check
27 | /// whether one exists.
28 | ///
29 | /// We deliberately do not take an error message as input here. Testing for error messages is
30 | /// fragile and creates maintenance headaches - be satisfied with OK/NOK testing and keep it simple.
31 | #[cfg(test)]
32 | pub(crate) fn contains_compile_error(tokens: &TokenStream) -> bool {
33 |     // String-based implementation, so vulnerable to false positives in very unlikely cases.
34 |     tokens.to_string().contains(":: core :: compile_error ! {")
35 | }
36 | 
37 | #[cfg(test)]
38 | mod tests {
39 |     use proc_macro2::Span;
40 | 
41 |     use super::*;
42 | 
43 |     #[test]
44 |     fn token_stream_and_error_outputs_both() {
45 |         // This is a bit tricky because we do not know the specific form the compiler error
46 |         // is going to be. However, we know it must contain our error message, so just check that.
47 |         let canary = "nrtfynjcrtupyh6rhdoj85m7yoi";
48 | 
49 |         // We also need to ensure it contains this function (that it did not get overwritten).
50 |         let s = quote! {
51 |             fn gkf5dj8yhuldri58uygdkiluyot() {}
52 |         };
53 | 
54 |         let e = syn::Error::new(Span::call_site(), canary);
55 | 
56 |         let merged = token_stream_and_error(&s, &e);
57 | 
58 |         let merged_str = merged.to_string();
59 |         assert!(merged_str.contains(canary));
60 |         assert!(merged_str.contains("gkf5dj8yhuldri58uygdkiluyot"));
61 |     }
62 | 
63 |     #[test]
64 |     fn contains_compile_error_yes_raw() {
65 |         let tokens = quote! {
66 |             let foo = "Some random stuff may also be here";
67 |             blah! { blah }
68 |             ::core::compile_error! { "This is a test error message." };
69 |             let bar = "More random stuff here"
70 |         };
71 | 
72 |         assert!(contains_compile_error(&tokens));
73 |     }
74 | 
75 |     #[test]
76 |     fn contains_compile_error_yes_generated() {
77 |         let tokens = quote! {
78 |             let foo = "Some random stuff may also be here";
79 |             blah! { blah }
80 |             ::core::compile_error!("This is a test error message.");
81 |             let bar = "More random stuff here"
82 |         };
83 | 
84 |         let tokens =
85 |             token_stream_and_error(&tokens, &syn::Error::new(Span::call_site(), "Testing"));
86 | 
87 |         assert!(contains_compile_error(&tokens));
88 |     }
89 | 
90 |     #[test]
91 |     fn contains_compile_error_no() {
92 |         let tokens = quote! {
93 |             let foo = "No compile error here!"
94 |         };
95 | 
96 |         assert!(!contains_compile_error(&tokens));
97 |     }
98 | }
99 | 


--------------------------------------------------------------------------------
/crates/many_cpus/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "many_cpus"
 3 | description = "Efficiently schedule work and inspect the hardware environment on many-processor systems"
 4 | publish = true
 5 | version = "0.3.1"
 6 | 
 7 | authors.workspace = true
 8 | edition.workspace = true
 9 | license.workspace = true
10 | repository.workspace = true
11 | rust-version.workspace = true
12 | 
13 | [features]
14 | default = []
15 | 
16 | [dependencies]
17 | cpulist = { workspace = true }
18 | derive_more = { workspace = true, features = ["as_ref", "display"] }
19 | foldhash = { workspace = true }
20 | folo_utils = { workspace = true }
21 | itertools = { workspace = true }
22 | negative-impl = { workspace = true }
23 | nonempty = { workspace = true }
24 | rand = { workspace = true, features = ["thread_rng"] }
25 | 
26 | [target.'cfg(unix)'.dependencies]
27 | cpulist = { workspace = true }
28 | libc = { workspace = true }
29 | 
30 | [target.'cfg(windows)'.dependencies]
31 | folo_ffi = { workspace = true }
32 | heapless = { workspace = true }
33 | smallvec = { workspace = true }
34 | windows = { workspace = true, features = [
35 |     "Win32_System_JobObjects",
36 |     "Win32_System_Kernel",
37 |     "Win32_System_SystemInformation",
38 |     "Win32_System_Threading",
39 | ] }
40 | 
41 | [dev-dependencies]
42 | benchmark_utils = { workspace = true }
43 | criterion = { workspace = true }
44 | mockall = { workspace = true }
45 | mutants = { workspace = true }
46 | scopeguard = { workspace = true }
47 | static_assertions = { workspace = true }
48 | testing = { workspace = true }
49 | 
50 | [target.'cfg(windows)'.dev-dependencies]
51 | windows = { workspace = true, features = ["Win32_Security"] }
52 | 
53 | [[bench]]
54 | name = "hardware_info"
55 | harness = false
56 | 
57 | [[bench]]
58 | name = "hardware_tracker"
59 | harness = false
60 | 
61 | [[bench]]
62 | name = "pal_windows"
63 | harness = false
64 | 
65 | [[bench]]
66 | name = "processor_set_builder"
67 | harness = false
68 | 
69 | [lints]
70 | workspace = true
71 | 


--------------------------------------------------------------------------------
/crates/many_cpus/README.md:
--------------------------------------------------------------------------------
 1 | Working on many-processor systems with 100+ logical processors can require you to pay extra
 2 | attention to the specifics of the hardware to make optimal use of available compute capacity
 3 | and extract the most performance out of the system.
 4 | 
 5 | This is part of the [Folo project](https://github.com/folo-rs/folo) that provides mechanisms for
 6 | high-performance hardware-aware programming in Rust.
 7 | 
 8 | # Why should one care?
 9 | 
10 | Modern operating systems try to distribute work fairly between all processors. Typical Rust
11 | sync and async task runtimes like Rayon and Tokio likewise try to be efficient in occupying all
12 | processors with work, even moving work between processors if one risks becoming idle. This is fine
13 | but we can do better.
14 | 
15 | Taking direct control over the placement of work on specific processors can yield superior
16 | performance by taking advantage of factors under the service author's control, which are not known
17 | to general-purpose tasking runtimes:
18 | 
19 | 1. A key insight we can use is that most service apps exist to process requests or execute jobs - each
20 |    unit of work being done is related to a specific data set. We can ensure we only process the data
21 |    associated with a specific HTTP/gRPC request on a single processor to ensure optimal data locality.
22 |    This means the data related to the request is likely to be in the caches of that processor, speeding
23 |    up all operations related to that request by avoiding expensive memory accesses.
24 | 1. Even when data is intentionally shared across processors (e.g. because one processor is not capable
25 |    enough to do the work and parallelization is required), performance differences exist between
26 |    different pairs of processors because different processors can be connected to different physical
27 |    memory modules. Access to non-cached data is optimal when that data is in the same memory region
28 |    as the current processor (i.e. on the physical memory modules directly wired to the current
29 |    processor).
30 | 
31 | # How does this crate help?
32 | 
33 | The `many_cpus` crate provides mechanisms to schedule threads on specific processors and in specific
34 | memory regions, ensuring that work assigned to those threads remains on the same hardware and that
35 | data shared between threads is local to the same memory region, enabling you to achieve high data
36 | locality and processor cache efficiency.
37 | 
38 | In addition to thread spawning, this crate enables app logic to observe what processor the current
39 | thread is executing on and in which memory region this processor is located, even if the thread is
40 | not bound to a specific processor. This can be a building block for efficiency improvements even
41 | outside directly controlled work scheduling.
42 | 
43 | Other crates from the [Folo project](https://github.com/folo-rs/folo) build upon this hardware-
44 | awareness functionality to provide higher-level primitives such as thread pools, work schedulers,
45 | region-local cells and more.
46 | 
47 | # How to use this crate?
48 | 
49 | More details in the [crate documentation](https://docs.rs/many_cpus/).


--------------------------------------------------------------------------------
/crates/many_cpus/benches/hardware_info.rs:
--------------------------------------------------------------------------------
 1 | //! Benchmarking operations exposed by the `HardwareInfo` struct.
 2 | 
 3 | #![allow(
 4 |     missing_docs,
 5 |     reason = "No need for API documentation in benchmark code"
 6 | )]
 7 | 
 8 | use criterion::{Criterion, criterion_group, criterion_main};
 9 | use many_cpus::HardwareInfo;
10 | 
11 | criterion_group!(benches, entrypoint);
12 | criterion_main!(benches);
13 | 
14 | fn entrypoint(c: &mut Criterion) {
15 |     let mut group = c.benchmark_group("HardwareInfo");
16 | 
17 |     // Mostly pointless since all the accessors just load from a static lazy-initialize
18 |     // variable. Just here to detect anomalies if we do something strange and it gets slow.
19 |     group.bench_function("max_processor_id", |b| {
20 |         b.iter(HardwareInfo::max_processor_id);
21 |     });
22 | 
23 |     group.finish();
24 | }
25 | 


--------------------------------------------------------------------------------
/crates/many_cpus/benches/hardware_tracker.rs:
--------------------------------------------------------------------------------
 1 | //! Benchmarking operations exposed by the `HardwareTracker` struct.
 2 | 
 3 | #![allow(
 4 |     missing_docs,
 5 |     reason = "No need for API documentation in benchmark code"
 6 | )]
 7 | 
 8 | use std::{hint::black_box, time::Duration};
 9 | 
10 | use criterion::{Criterion, criterion_group, criterion_main};
11 | use folo_utils::nz;
12 | use many_cpus::{HardwareTracker, ProcessorSet};
13 | 
14 | criterion_group!(benches, entrypoint);
15 | criterion_main!(benches);
16 | 
17 | fn entrypoint(c: &mut Criterion) {
18 |     let mut group = c.benchmark_group("HardwareTracker");
19 | 
20 |     // Results from this are really unstable for whatever reason. Give it more time to stabilize.
21 |     group.measurement_time(Duration::from_secs(30));
22 | 
23 |     group.bench_function("current_processor_unpinned", |b| {
24 |         b.iter(|| {
25 |             black_box(HardwareTracker::with_current_processor(|p| {
26 |                 // We cannot return a reference to the processor itself but this is close enough.
27 |                 p.id()
28 |             }));
29 |         });
30 |     });
31 | 
32 |     group.bench_function("current_processor_id_unpinned", |b| {
33 |         b.iter(|| {
34 |             black_box(HardwareTracker::current_processor_id());
35 |         });
36 |     });
37 | 
38 |     group.bench_function("current_memory_region_id_unpinned", |b| {
39 |         b.iter(|| {
40 |             black_box(HardwareTracker::current_memory_region_id());
41 |         });
42 |     });
43 | 
44 |     // Now we pin the current thread and do the whole thing again!
45 |     let one_processor = ProcessorSet::builder()
46 |         .performance_processors_only()
47 |         .take(nz!(1))
48 |         .unwrap();
49 | 
50 |     one_processor.pin_current_thread_to();
51 | 
52 |     group.bench_function("current_processor_pinned", |b| {
53 |         b.iter(|| {
54 |             black_box(HardwareTracker::with_current_processor(|p| {
55 |                 // We cannot return a reference to the processor itself but this is close enough.
56 |                 p.id()
57 |             }));
58 |         });
59 |     });
60 | 
61 |     group.bench_function("current_processor_id_pinned", |b| {
62 |         b.iter(|| {
63 |             black_box(HardwareTracker::current_processor_id());
64 |         });
65 |     });
66 | 
67 |     group.bench_function("current_memory_region_id_pinned", |b| {
68 |         b.iter(|| {
69 |             black_box(HardwareTracker::current_memory_region_id());
70 |         });
71 |     });
72 | 
73 |     // Don't forget to unpin the thread to avoid affecting future benchmarks!
74 |     ProcessorSet::builder()
75 |         .ignoring_resource_quota()
76 |         .take_all()
77 |         .unwrap()
78 |         .pin_current_thread_to();
79 | 
80 |     group.finish();
81 | }
82 | 


--------------------------------------------------------------------------------
/crates/many_cpus/benches/pal_windows.rs:
--------------------------------------------------------------------------------
 1 | //! Benchmarking Windows PAL internal logic via private API that bypasses the
 2 | //! public API and allows operations to be performed without (full) caching.
 3 | 
 4 | #![allow(
 5 |     missing_docs,
 6 |     reason = "No need for API documentation in benchmark code"
 7 | )]
 8 | 
 9 | use criterion::{Criterion, criterion_group, criterion_main};
10 | 
11 | criterion_group!(benches, entrypoint);
12 | criterion_main!(benches);
13 | 
14 | #[allow(
15 |     clippy::needless_pass_by_ref_mut,
16 |     reason = "spurious error on non-Windows"
17 | )]
18 | fn entrypoint(c: &mut Criterion) {
19 |     #[cfg(windows)]
20 |     windows::entrypoint(c);
21 | 
22 |     #[cfg(not(windows))]
23 |     {
24 |         _ = c;
25 |     }
26 | }
27 | 
28 | #[cfg(windows)]
29 | mod windows {
30 |     use std::{hint::black_box, sync::Arc, time::Duration};
31 | 
32 |     use benchmark_utils::{ThreadPool, bench_on_threadpool};
33 |     use criterion::Criterion;
34 |     use folo_utils::nz;
35 |     use many_cpus::{ProcessorSet, pal::BUILD_TARGET_PLATFORM};
36 |     use windows::Win32::System::SystemInformation::GROUP_AFFINITY;
37 | 
38 |     pub(crate) fn entrypoint(c: &mut Criterion) {
39 |         let mut group = c.benchmark_group("Pal_Windows");
40 | 
41 |         // The results are quite jittery. Give it some time to stabilize.
42 |         group.measurement_time(Duration::from_secs(30));
43 | 
44 |         group.bench_function("current_thread_processors", |b| {
45 |             b.iter(|| black_box(BUILD_TARGET_PLATFORM.__private_current_thread_processors()));
46 |         });
47 | 
48 |         group.bench_function("get_all_processors", |b| {
49 |             b.iter(|| BUILD_TARGET_PLATFORM.__private_get_all_processors());
50 |         });
51 | 
52 |         group.bench_function("affinity_mask_to_processor_id_1", |b| {
53 |             let mask = GROUP_AFFINITY {
54 |                 Group: 0,
55 |                 Mask: 1,
56 |                 ..Default::default()
57 |             };
58 | 
59 |             b.iter(|| {
60 |                 black_box(BUILD_TARGET_PLATFORM.__private_affinity_mask_to_processor_id(&mask))
61 |             });
62 |         });
63 | 
64 |         group.bench_function("affinity_mask_to_processor_id_16", |b| {
65 |             let mask = GROUP_AFFINITY {
66 |                 Group: 0,
67 |                 Mask: 0xFF,
68 |                 ..Default::default()
69 |             };
70 | 
71 |             b.iter(|| {
72 |                 black_box(BUILD_TARGET_PLATFORM.__private_affinity_mask_to_processor_id(&mask))
73 |             });
74 |         });
75 | 
76 |         group.bench_function("pin_thread_to_default_set", |b| {
77 |             let default_processor_set = Arc::new(ProcessorSet::default());
78 |             let one_processor = ProcessorSet::builder().take(nz!(1)).unwrap();
79 |             let one_thread = ThreadPool::new(&one_processor);
80 | 
81 |             b.iter_custom({
82 |                 |iters| {
83 |                     bench_on_threadpool(&one_thread, iters, || (), {
84 |                         let default_processor_set = Arc::clone(&default_processor_set);
85 | 
86 |                         move |()| {
87 |                             default_processor_set.pin_current_thread_to();
88 |                         }
89 |                     })
90 |                 }
91 |             });
92 |         });
93 | 
94 |         group.finish();
95 |     }
96 | }
97 | 


--------------------------------------------------------------------------------
/crates/many_cpus/benches/processor_set_builder.rs:
--------------------------------------------------------------------------------
  1 | //! Benchmarking operations on the `ProcessorSetBuilder` type.
  2 | 
  3 | #![allow(
  4 |     missing_docs,
  5 |     reason = "No need for API documentation in benchmark code"
  6 | )]
  7 | 
  8 | use std::{hint::black_box, time::Duration};
  9 | 
 10 | use benchmark_utils::{ThreadPool, bench_on_threadpool};
 11 | use criterion::{Criterion, criterion_group, criterion_main};
 12 | use folo_utils::nz;
 13 | use many_cpus::ProcessorSet;
 14 | 
 15 | criterion_group!(benches, entrypoint);
 16 | criterion_main!(benches);
 17 | 
 18 | fn entrypoint(c: &mut Criterion) {
 19 |     let thread_pool = ThreadPool::default();
 20 | 
 21 |     let mut group = c.benchmark_group("ProcessorSetBuilder");
 22 | 
 23 |     // Results from this are really unstable for whatever reason. Give it more time to stabilize.
 24 |     group.measurement_time(Duration::from_secs(30));
 25 | 
 26 |     group.bench_function("all", |b| {
 27 |         b.iter(|| {
 28 |             black_box(ProcessorSet::builder().take_all().unwrap());
 29 |         });
 30 |     });
 31 | 
 32 |     group.bench_function("one", |b| {
 33 |         b.iter(|| {
 34 |             black_box(ProcessorSet::builder().take(nz!(1)).unwrap());
 35 |         });
 36 |     });
 37 | 
 38 |     group.bench_function("only_evens", |b| {
 39 |         b.iter(|| {
 40 |             black_box(
 41 |                 ProcessorSet::builder()
 42 |                     .filter(|p| p.id() % 2 == 0)
 43 |                     .take_all()
 44 |                     .unwrap(),
 45 |             );
 46 |         });
 47 |     });
 48 | 
 49 |     group.finish();
 50 | 
 51 |     let mut group = c.benchmark_group("ProcessorSetBuilder_MT");
 52 | 
 53 |     // Results from this are really unstable for whatever reason. Give it more time to stabilize.
 54 |     group.measurement_time(Duration::from_secs(30));
 55 | 
 56 |     group.bench_function("all", |b| {
 57 |         b.iter_custom(|iters| {
 58 |             bench_on_threadpool(
 59 |                 &thread_pool,
 60 |                 iters,
 61 |                 || (),
 62 |                 |()| {
 63 |                     black_box(ProcessorSet::builder().take_all().unwrap());
 64 |                 },
 65 |             )
 66 |         });
 67 |     });
 68 | 
 69 |     group.bench_function("one", |b| {
 70 |         b.iter_custom(|iters| {
 71 |             bench_on_threadpool(
 72 |                 &thread_pool,
 73 |                 iters,
 74 |                 || (),
 75 |                 |()| {
 76 |                     black_box(ProcessorSet::builder().take(nz!(1)).unwrap());
 77 |                 },
 78 |             )
 79 |         });
 80 |     });
 81 | 
 82 |     group.bench_function("only_evens", |b| {
 83 |         b.iter_custom(|iters| {
 84 |             bench_on_threadpool(
 85 |                 &thread_pool,
 86 |                 iters,
 87 |                 || (),
 88 |                 |()| {
 89 |                     black_box(
 90 |                         ProcessorSet::builder()
 91 |                             .filter(|p| p.id() % 2 == 0)
 92 |                             .take_all()
 93 |                             .unwrap(),
 94 |                     );
 95 |                 },
 96 |             )
 97 |         });
 98 |     });
 99 | 
100 |     group.finish();
101 | }
102 | 


--------------------------------------------------------------------------------
/crates/many_cpus/docs/snippets/changes_at_runtime.md:
--------------------------------------------------------------------------------
 1 | # Changes at runtime
 2 | 
 3 | It is possible that a system will have processors added or removed at runtime, or for
 4 | constraints enforced by the operating system to change over time. Such changes will not be
 5 | represented in an existing processor set - once created, a processor set is static.
 6 | 
 7 | Changes to resource quotas can be applied by creating a new processor set (e.g. if the
 8 | processor time quota is lowered, building a new set will by default use the new quota).
 9 | 
10 | This crate will not detect more fundamental changes such as added/removed processors. Operations
11 | attempted on removed processors may fail with an error or panic or silently misbehave (e.g.
12 | threads never starting). Added processors will not be considered a member of any set.


--------------------------------------------------------------------------------
/crates/many_cpus/docs/snippets/external_constraints.md:
--------------------------------------------------------------------------------
 1 | # External constraints
 2 | 
 3 | The operating system may define constraints that prohibit the application from using all
 4 | the available processors (e.g. when the app is containerized and provided limited
 5 | hardware resources).
 6 | 
 7 | This crate treats platform constraints as follows:
 8 | 
 9 | * Hard limits on which processors are allowed are respected - forbidden processors are mostly
10 |   ignored by this crate and cannot be used to spawn threads, though such processors are still
11 |   accounted for when inspecting hardware information such as "max processor ID".
12 |   The mechanisms for defining such limits are cgroups on Linux and job objects on Windows.
13 |   See `examples/obey_job_affinity_limits_windows.rs` for a Windows-specific example.
14 | * Soft limits on which processors are allowed are ignored by default - specifying a processor
15 |   affinity via `taskset` on Linux, `start.exe /affinity 0xff` on Windows or similar mechanisms
16 |   does not affect the set of processors this crate will use by default, though you can opt in to
17 |   this via [`.where_available_for_current_thread()`][crate::ProcessorSetBuilder::where_available_for_current_thread].
18 | * Limits on processor time are considered an upper bound on the number of processors that can be
19 |   included in a processor set. For example, if you configure a processor time limit of
20 |   10 seconds per second of real time on a 20-processor system, then the builder may return up
21 |   to 10 of the processors in the resulting processor set (though it may be a different 10 every
22 |   time you create a new processor set from scratch). This limit is optional and may be disabled
23 |   by using [`.ignoring_resource_quota()`][crate::ProcessorSetBuilder::ignoring_resource_quota].
24 |   See `examples/obey_job_resource_quota_limits_windows.rs` for a Windows-specific example.
25 | 
26 | # Working with processor time constraints
27 | 
28 | If a process exceeds the processor time limit, the operating system will delay executing the
29 | process further until the "debt is paid off". This is undesirable for most workloads because:
30 | 
31 | 1. There will be random latency spikes from when the operating system decides to apply a delay.
32 | 1. The delay may not be evenly applied across all threads of the process, leading to unbalanced
33 |    load between worker threads.
34 | 
35 | For predictable behavior that does not suffer from delay side-effects, it is important that the
36 | process does not exceed the processor time limit. To keep out of trouble,
37 | follow these guidelines:
38 | 
39 | * Ensure that all your concurrently executing thread pools are derived from the same processor
40 |   set, so there is a single set of processors (up to the resource quota) that all work of the
41 |   process will be executed on. Any new processor sets you create should be subsets of this set,
42 |   thereby ensuring that all worker threads combined do not exceed the quota.
43 | * Ensure that the original processor set is constructed while obeying the resource quota (which is
44 |   enabled by default),
45 | 
46 | If your resource constraints are already applied on process startup, you can use
47 | `ProcessorSet::default()` as the master set from which all other processor sets are derived using
48 | `ProcessorSet::default().to_builder()`. This will ensure the processor time quota is always obeyed
49 | because `ProcessorSet::default()` is guaranteed to obey the resource quota.
50 | 
51 | ```rust ignore
52 | let mail_senders = ProcessorSet::default().to_builder().take(MAIL_WORKER_COUNT).unwrap();
53 | ```


--------------------------------------------------------------------------------
/crates/many_cpus/examples/get_all_processors.rs:
--------------------------------------------------------------------------------
 1 | //! We inspect every processor available to the current process and write a
 2 | //! human-readable description of it to the terminal.
 3 | //!
 4 | //! This obeys the operating system enforced processor selection constraints
 5 | //! assigned to the current process (which is always the case).
 6 | //!
 7 | //! However, this does not obey the resource quota available to the current process. This is
 8 | //! typically not useful for executing work but may be useful for inspecting available processors.
 9 | 
10 | use many_cpus::ProcessorSet;
11 | 
12 | fn main() {
13 |     for processor in ProcessorSet::builder()
14 |         .ignoring_resource_quota()
15 |         .take_all()
16 |         .unwrap()
17 |         .processors()
18 |     {
19 |         println!("{processor:?}");
20 |     }
21 | }
22 | 


--------------------------------------------------------------------------------
/crates/many_cpus/examples/get_default_processors.rs:
--------------------------------------------------------------------------------
 1 | //! We inspect every processor in the default set and write a
 2 | //! human-readable description of it to the terminal.
 3 | //!
 4 | //! This obeys the operating system enforced processor selection and resource quota constraints
 5 | //! assigned to the current process (which is the default behavior).
 6 | 
 7 | use many_cpus::ProcessorSet;
 8 | 
 9 | fn main() {
10 |     for processor in ProcessorSet::default().processors() {
11 |         println!("{processor:?}");
12 |     }
13 | }
14 | 


--------------------------------------------------------------------------------
/crates/many_cpus/examples/obey_job_affinity_windows.rs:
--------------------------------------------------------------------------------
 1 | //! The mechanism used in Windows to enforce limits on processes is Job Objects. Processes are
 2 | //! assigned to jobs, and jobs can be constrained to only use a limited set of processors.
 3 | //!
 4 | //! This example proves that the APIs we offer do not "see" the universe outside of the limits
 5 | //! of the current process's job object constraints on processor affinity (which processors
 6 | //! the process is allowed to use).
 7 | //!
 8 | //! Job object limits are hard limits, whereas all other mechanisms to define affinity (e.g. CPU
 9 | //! sets and legacy "process affinity masks") are just wishes by the process in question.
10 | //! In case of conflicting masks, the intersection is used.
11 | //!
12 | //! This example is Windows-only, as job objects are a Windows-specific feature.
13 | 
14 | fn main() {
15 |     #[cfg(windows)]
16 |     windows::main();
17 | 
18 |     #[cfg(not(windows))]
19 |     panic!("This example is only supported on Windows.");
20 | }
21 | 
22 | #[cfg(windows)]
23 | mod windows {
24 |     use folo_utils::nz;
25 |     use many_cpus::ProcessorSet;
26 |     use testing::Job;
27 | 
28 |     pub(crate) fn main() {
29 |         // Restrict the current process to only use 2 processors.
30 |         let _job = Job::builder().with_processor_count(nz!(2)).build();
31 | 
32 |         verify_limits_obeyed();
33 |     }
34 | 
35 |     fn verify_limits_obeyed() {
36 |         // The default processor set obeys all the limits that apply to the current process.
37 |         let processor_count = ProcessorSet::default().len();
38 |         println!("Current process is allowed to use {processor_count} processors.");
39 | 
40 |         assert_eq!(processor_count, 2);
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/crates/many_cpus/examples/obey_job_resource_quota_windows.rs:
--------------------------------------------------------------------------------
 1 | //! The mechanism used in Windows to enforce limits on processes is Job Objects. Processes are
 2 | //! assigned to jobs, and jobs can be constrained to only use a limited set of processors.
 3 | //!
 4 | //! This example proves that the APIs we offer can accurately judge the resource quota assigned
 5 | //! to the process and follow best practices for processor set sizing when a quota is active.
 6 | //!
 7 | //! We configure the job object to only grant 50% of the system processor time to the process.
 8 | //!
 9 | //! This example is Windows-only, as job objects are a Windows-specific feature.
10 | 
11 | fn main() {
12 |     #[cfg(windows)]
13 |     windows::main();
14 | 
15 |     #[cfg(not(windows))]
16 |     panic!("This example is only supported on Windows.");
17 | }
18 | 
19 | #[cfg(windows)]
20 | mod windows {
21 |     use many_cpus::{HardwareTracker, ProcessorSet};
22 |     use testing::{Job, ProcessorTimePct};
23 | 
24 |     pub(crate) fn main() {
25 |         // Restrict the current process to only use 50% of the system processor time.
26 |         let _job = Job::builder()
27 |             .with_max_processor_time_pct(ProcessorTimePct::new_static::<50>())
28 |             .build();
29 | 
30 |         verify_limits_obeyed();
31 |     }
32 | 
33 |     #[expect(
34 |         clippy::cast_precision_loss,
35 |         reason = "all expected values are in safe range"
36 |     )]
37 |     fn verify_limits_obeyed() {
38 |         // This is "100%". This count may also include processors that are not available to the
39 |         // current process (e.g. when job objects already constrain our processors due to
40 |         // executing in a container).
41 |         let system_processor_count = HardwareTracker::active_processor_count();
42 | 
43 |         let resource_quota = HardwareTracker::resource_quota();
44 | 
45 |         // This should say we are allowed to use 50% of the system processor time, which we
46 |         // express as processor-seconds per second.
47 |         // NB! This can never be higher than our process's max processor time. We rely on the
48 |         // example not having process-specific limits that bring it lower than the 50% here.
49 |         let max_processor_time = resource_quota.max_processor_time();
50 | 
51 |         println!(
52 |             "Current process is allowed to use {max_processor_time} seconds of processor time per second of real time."
53 |         );
54 | 
55 |         let expected_processor_time = system_processor_count as f64 * 0.5;
56 | 
57 |         assert!(
58 |             processor_time_eq(max_processor_time, expected_processor_time),
59 |             "The resource quota should be 50% of the available processor time. Expected: {expected_processor_time}, Actual: {max_processor_time}",
60 |         );
61 | 
62 |         // The default processor set obeys all the limits that apply to the current process.
63 |         let quota_limited_processor_count = ProcessorSet::default().len();
64 | 
65 |         println!(
66 |             "The resource quota allows the current process to use {quota_limited_processor_count} out of a total of {system_processor_count} processors."
67 |         );
68 | 
69 |         let expected_limited_processor_count = (system_processor_count as f64 * 0.5).floor();
70 | 
71 |         assert!(
72 |             processor_time_eq(
73 |                 expected_limited_processor_count,
74 |                 quota_limited_processor_count as f64
75 |             ),
76 |             "The resource quota should limit the number of processors to half of the available processors, rounded down. Expected: {expected_limited_processor_count}, Actual: {quota_limited_processor_count}",
77 |         );
78 |     }
79 | 
80 |     fn processor_time_eq(a: f64, b: f64) -> bool {
81 |         // Floating point comparison tolerance.
82 |         // https://rust-lang.github.io/rust-clippy/master/index.html#float_cmp
83 |         const CLOSE_ENOUGH: f64 = 0.01;
84 | 
85 |         let diff = (a - b).abs();
86 |         diff < CLOSE_ENOUGH
87 |     }
88 | }
89 | 


--------------------------------------------------------------------------------
/crates/many_cpus/examples/observe_processor.rs:
--------------------------------------------------------------------------------
 1 | //! Observe the processor assigned to the entrypoint thread, displaying an update in the
 2 | //! terminal once per second, looping forever.
 3 | 
 4 | use std::{thread, time::Duration};
 5 | 
 6 | use many_cpus::{HardwareInfo, HardwareTracker};
 7 | 
 8 | fn main() {
 9 |     let max_processors = HardwareInfo::max_processor_count();
10 |     let max_memory_regions = HardwareInfo::max_memory_region_count();
11 |     println!(
12 |         "This system can support up to {max_processors} processors in {max_memory_regions} memory regions"
13 |     );
14 | 
15 |     loop {
16 |         let current_processor_id = HardwareTracker::current_processor_id();
17 |         let current_memory_region_id = HardwareTracker::current_memory_region_id();
18 | 
19 |         println!(
20 |             "Thread executing on processor {current_processor_id} in memory region {current_memory_region_id}"
21 |         );
22 | 
23 |         thread::sleep(Duration::from_secs(1));
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/crates/many_cpus/examples/spawn_on_all_processors.rs:
--------------------------------------------------------------------------------
 1 | //! Spawns one thread on each processor in the default processor set.
 2 | 
 3 | use many_cpus::ProcessorSet;
 4 | 
 5 | fn main() {
 6 |     let threads = ProcessorSet::default().spawn_threads(|processor| {
 7 |         println!("Spawned thread on processor {}", processor.id());
 8 | 
 9 |         // In a real service, you would start some work handler here, e.g. to read
10 |         // and process messages from a channel or to spawn a web handler.
11 |     });
12 | 
13 |     for thread in threads {
14 |         thread.join().unwrap();
15 |     }
16 | 
17 |     println!("All threads have finished.");
18 | }
19 | 


--------------------------------------------------------------------------------
/crates/many_cpus/examples/spawn_on_any_processors.rs:
--------------------------------------------------------------------------------
 1 | //! Spawns one thread for each processor in the default processor set but allows the OS to decide
 2 | //! which processor each thread runs on. This can be used to observe how the OS schedules threads
 3 | //! across processors when not provided any constraints.
 4 | //!
 5 | //! Each thread will do a bit of work (10 seconds of spinning CPU) and then terminate.
 6 | 
 7 | use std::time::Instant;
 8 | 
 9 | use many_cpus::ProcessorSet;
10 | 
11 | fn main() {
12 |     // We spawn N threads, where N is the number of processors.
13 |     // However, we do not pin them to any specific processor.
14 |     // This means that the OS can schedule them however it likes.
15 | 
16 |     let processor_set = ProcessorSet::default();
17 | 
18 |     let mut threads = Vec::with_capacity(processor_set.len());
19 | 
20 |     for _ in 0..processor_set.len() {
21 |         let thread = std::thread::spawn(move || {
22 |             let start = Instant::now();
23 | 
24 |             let mut x: u64 = 0;
25 | 
26 |             loop {
27 |                 for _ in 0..100_000 {
28 |                     x = x.wrapping_add(1);
29 |                 }
30 | 
31 |                 // Every thread spins the CPU for 10 seconds.
32 |                 if start.elapsed().as_secs() > 10 {
33 |                     println!("Thread finished after {x} iterations");
34 |                     break;
35 |                 }
36 |             }
37 |         });
38 | 
39 |         threads.push(thread);
40 |     }
41 | 
42 |     println!("Spawned {} threads", threads.len());
43 | 
44 |     for thread in threads {
45 |         thread.join().unwrap();
46 |     }
47 | 
48 |     println!("All threads have finished.");
49 | }
50 | 


--------------------------------------------------------------------------------
/crates/many_cpus/examples/spawn_on_inherited_processors.rs:
--------------------------------------------------------------------------------
 1 | //! Starts one thread on every processor in the system, respecting resource quotas and allowing the
 2 | //! set of allowed processors to be inherited from the environment (based on user configuration).
 3 | //!
 4 | //! The set of processors used here can be adjusted via any suitable OS mechanisms.
 5 | //!
 6 | //! For example, to select only processors 0 and 1:
 7 | //! Linux: `taskset 0x3 target/debug/examples/spawn_on_inherited_processors`
 8 | //! Windows: `start /affinity 0x3 target/debug/examples/spawn_on_inherited_processors.exe`
 9 | 
10 | use std::{thread, time::Duration};
11 | 
12 | use many_cpus::ProcessorSet;
13 | 
14 | fn main() {
15 |     let inherited_processors = ProcessorSet::builder()
16 |         // This causes soft limits on processor affinity to be respected.
17 |         .where_available_for_current_thread()
18 |         .take_all()
19 |         .expect("found no processors usable by the current thread - impossible because the thread is currently running on one");
20 | 
21 |     println!(
22 |         "After applying soft limits, we are allowed to use {} processors.",
23 |         inherited_processors.len()
24 |     );
25 | 
26 |     let threads = inherited_processors.spawn_threads(|processor| {
27 |         println!("Spawned thread on processor {}", processor.id());
28 | 
29 |         // In a real service, you would start some work handler here, e.g. to read
30 |         // and process messages from a channel or to spawn a web handler.
31 |     });
32 | 
33 |     for thread in threads {
34 |         thread.join().unwrap();
35 |     }
36 | 
37 |     println!("All threads have finished. Exiting in 10 seconds.");
38 | 
39 |     // Give some time to exit, as on Windows using "start" will create a new window that would
40 |     // otherwise disappear instantly, making it hard to see what happened.
41 |     thread::sleep(Duration::from_secs(10));
42 | }
43 | 


--------------------------------------------------------------------------------
/crates/many_cpus/examples/spawn_on_selected_processors.rs:
--------------------------------------------------------------------------------
 1 | //! Selects a pair of processors and spawns a thread on each of them.
 2 | //! This demonstrates arbitrary processor selection logic.
 3 | 
 4 | use std::num::NonZero;
 5 | 
 6 | use many_cpus::ProcessorSet;
 7 | 
 8 | const PROCESSOR_COUNT: NonZero<usize> = NonZero::new(2).unwrap();
 9 | 
10 | fn main() {
11 |     let selected_processors = ProcessorSet::builder()
12 |         .same_memory_region()
13 |         .performance_processors_only()
14 |         .take(PROCESSOR_COUNT)
15 |         .expect("could not find required number of processors that match the selection criteria");
16 | 
17 |     let threads = selected_processors.spawn_threads(|processor| {
18 |         println!("Spawned thread on processor {}", processor.id());
19 | 
20 |         // In a real service, you would start some work handler here, e.g. to read
21 |         // and process messages from a channel or to spawn a web handler.
22 |     });
23 | 
24 |     for thread in threads {
25 |         thread.join().unwrap();
26 |     }
27 | 
28 |     println!("All threads have finished.");
29 | }
30 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/clients.rs:
--------------------------------------------------------------------------------
1 | //! Establishes a client-server pattern whereby logic that uses the hardware tracker can
2 | //! be replaced with a mock, breaking any hard dependencies for testing purposes.
3 | 
4 | mod hw_tracker_client;
5 | mod hw_tracker_facade;
6 | 
7 | pub(crate) use hw_tracker_client::*;
8 | pub(crate) use hw_tracker_facade::*;
9 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/clients/hw_tracker_client.rs:
--------------------------------------------------------------------------------
 1 | use crate::{CURRENT_TRACKER, MemoryRegionId, ProcessorId};
 2 | 
 3 | #[cfg_attr(test, mockall::automock)]
 4 | pub(crate) trait HardwareTrackerClient {
 5 |     fn update_pin_status(
 6 |         &self,
 7 |         processor_id: Option<ProcessorId>,
 8 |         memory_region_id: Option<MemoryRegionId>,
 9 |     );
10 | }
11 | 
12 | #[derive(Debug)]
13 | pub(crate) struct HardwareTrackerClientImpl;
14 | 
15 | impl HardwareTrackerClient for HardwareTrackerClientImpl {
16 |     #[cfg_attr(test, mutants::skip)] // Trivial fn, tested on lower levels - skip mutating.
17 |     fn update_pin_status(
18 |         &self,
19 |         processor_id: Option<ProcessorId>,
20 |         memory_region_id: Option<MemoryRegionId>,
21 |     ) {
22 |         CURRENT_TRACKER.with_borrow_mut(|tracker| {
23 |             tracker.update_pin_status(processor_id, memory_region_id);
24 |         });
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/clients/hw_tracker_facade.rs:
--------------------------------------------------------------------------------
 1 | #[cfg(test)]
 2 | use std::sync::Arc;
 3 | 
 4 | use crate::{MemoryRegionId, ProcessorId};
 5 | 
 6 | use crate::{HardwareTrackerClient, HardwareTrackerClientImpl};
 7 | 
 8 | #[cfg(test)]
 9 | use crate::MockHardwareTrackerClient;
10 | 
11 | #[derive(Clone, Debug)]
12 | pub(crate) enum HardwareTrackerClientFacade {
13 |     Real(&'static HardwareTrackerClientImpl),
14 | 
15 |     #[cfg(test)]
16 |     Mock(Arc<MockHardwareTrackerClient>),
17 | }
18 | 
19 | impl HardwareTrackerClientFacade {
20 |     pub(crate) const fn real() -> Self {
21 |         Self::Real(&HardwareTrackerClientImpl)
22 |     }
23 | 
24 |     #[cfg(test)]
25 |     pub(crate) fn from_mock(mock: MockHardwareTrackerClient) -> Self {
26 |         Self::Mock(Arc::new(mock))
27 |     }
28 | 
29 |     #[cfg(test)]
30 |     pub(crate) fn default_mock() -> Self {
31 |         Self::Mock(Arc::new(MockHardwareTrackerClient::new()))
32 |     }
33 | }
34 | 
35 | impl HardwareTrackerClient for HardwareTrackerClientFacade {
36 |     fn update_pin_status(
37 |         &self,
38 |         processor_id: Option<ProcessorId>,
39 |         memory_region_id: Option<MemoryRegionId>,
40 |     ) {
41 |         match self {
42 |             Self::Real(real) => {
43 |                 real.update_pin_status(processor_id, memory_region_id);
44 |             }
45 |             #[cfg(test)]
46 |             Self::Mock(mock) => {
47 |                 mock.update_pin_status(processor_id, memory_region_id);
48 |             }
49 |         }
50 |     }
51 | }
52 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/hardware_info.rs:
--------------------------------------------------------------------------------
  1 | use std::marker::PhantomData;
  2 | 
  3 | use crate::{
  4 |     MemoryRegionId, ProcessorId,
  5 |     pal::{BUILD_TARGET_PLATFORM, Platform},
  6 | };
  7 | 
  8 | /// Reports non-changing information about the system hardware.
  9 | ///
 10 | /// To inspect information that may change over time, use [`HardwareTracker`][1].
 11 | ///
 12 | /// Functions exposed by this type represent the system hardware and are not limited by the
 13 | /// current system or process configuration. That is, this type will still count processors and
 14 | /// memory regions that are currently inactive (e.g. some processors are physically disconnected)
 15 | /// or are not available to this process (e.g. because of cgroups policy).
 16 | ///
 17 | /// # Example
 18 | ///
 19 | /// ```
 20 | /// use many_cpus::HardwareInfo;
 21 | ///
 22 | /// let max_processor_id = HardwareInfo::max_processor_id();
 23 | /// println!("The maximum processor ID is: {max_processor_id}");
 24 | /// ```
 25 | ///
 26 | /// [1]: crate::HardwareTracker
 27 | #[derive(Debug)]
 28 | pub struct HardwareInfo {
 29 |     _no_ctor: PhantomData<()>,
 30 | }
 31 | 
 32 | impl HardwareInfo {
 33 |     /// Gets the maximum (inclusive) processor ID of any processor that could possibly
 34 |     /// be present on the system at any point in time.
 35 |     ///
 36 |     /// This includes processors that are not currently active and processors that are active
 37 |     /// but not available to the current process.
 38 |     #[cfg_attr(test, mutants::skip)] // Trivial layer, we only test the underlying logic.
 39 |     #[inline]
 40 |     #[must_use]
 41 |     pub fn max_processor_id() -> ProcessorId {
 42 |         BUILD_TARGET_PLATFORM.max_processor_id()
 43 |     }
 44 | 
 45 |     /// Gets the maximum (inclusive) memory region ID of any memory region that could possibly
 46 |     /// be present on the system at any point in time.
 47 |     ///
 48 |     /// This includes memory regions that are not currently active and memory regions that
 49 |     /// are active but not available to the current process.
 50 |     #[cfg_attr(test, mutants::skip)] // Trivial layer, we only test the underlying logic.
 51 |     #[inline]
 52 |     #[must_use]
 53 |     pub fn max_memory_region_id() -> MemoryRegionId {
 54 |         BUILD_TARGET_PLATFORM.max_memory_region_id()
 55 |     }
 56 | 
 57 |     /// Gets the maximum number of processors that could possibly be present on the system
 58 |     /// at any point in time.
 59 |     ///
 60 |     /// This includes processors that are not currently active and processors that are active
 61 |     /// but not available to the current process.
 62 |     #[cfg_attr(test, mutants::skip)] // Trivial layer, we only test the underlying logic.
 63 |     #[inline]
 64 |     #[must_use]
 65 |     pub fn max_processor_count() -> usize {
 66 |         (Self::max_processor_id() as usize)
 67 |             .checked_add(1)
 68 |             .expect("overflow when counting processors - this can only result from a critical error in the PAL")
 69 |     }
 70 | 
 71 |     /// Gets the maximum number of memory regions that could possibly be present on the system
 72 |     /// at any point in time.
 73 |     ///
 74 |     /// This includes memory regions that are not currently active and memory regions that
 75 |     /// are active but not available to the current process.
 76 |     #[cfg_attr(test, mutants::skip)] // Trivial layer, we only test the underlying logic.
 77 |     #[inline]
 78 |     #[must_use]
 79 |     pub fn max_memory_region_count() -> usize {
 80 |         (Self::max_memory_region_id() as usize)
 81 |             .checked_add(1)
 82 |             .expect("overflow when counting memory regions - this can only result from a critical error in the PAL")
 83 |     }
 84 | }
 85 | 
 86 | #[cfg(test)]
 87 | mod tests {
 88 |     use super::*;
 89 | 
 90 |     #[cfg(not(miri))] // Real platform is not supported under Miri.
 91 |     #[test]
 92 |     fn count_is_id_plus_one_real() {
 93 |         assert_eq!(
 94 |             HardwareInfo::max_processor_count(),
 95 |             HardwareInfo::max_processor_id() as usize + 1
 96 |         );
 97 |         assert_eq!(
 98 |             HardwareInfo::max_memory_region_count(),
 99 |             HardwareInfo::max_memory_region_id() as usize + 1
100 |         );
101 |     }
102 | }
103 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/pal.rs:
--------------------------------------------------------------------------------
 1 | //! Platform Abstraction Layer (PAL). This is private API, though `pub` in parts to allow
 2 | //! benchmark code to bypass public API layers for more accurate benchmarking.
 3 | 
 4 | mod abstractions;
 5 | pub(crate) use abstractions::*;
 6 | 
 7 | mod facade;
 8 | pub(crate) use facade::*;
 9 | 
10 | #[cfg(target_os = "linux")]
11 | mod linux;
12 | #[cfg(target_os = "linux")]
13 | pub(crate) use linux::*;
14 | 
15 | #[cfg(windows)]
16 | mod windows;
17 | #[cfg(windows)]
18 | pub use windows::*;
19 | 
20 | #[cfg(test)]
21 | mod mocks;
22 | #[cfg(test)]
23 | pub(crate) use mocks::*;
24 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/pal/abstractions.rs:
--------------------------------------------------------------------------------
1 | mod platform;
2 | mod processor;
3 | 
4 | pub(crate) use platform::*;
5 | pub(crate) use processor::*;
6 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/pal/abstractions/platform.rs:
--------------------------------------------------------------------------------
 1 | use std::fmt::Debug;
 2 | 
 3 | use nonempty::NonEmpty;
 4 | 
 5 | use crate::{MemoryRegionId, ProcessorId, pal::ProcessorFacade};
 6 | 
 7 | pub(crate) trait Platform: Debug + Send + Sync + 'static {
 8 |     /// Returns all processors available to the current process.
 9 |     ///
10 |     /// The returned set will exclude processors that are not active or are forbidden from
11 |     /// being used due to resource constraints enforced by the operating system.
12 |     ///
13 |     /// The returned collection of processors is sorted by the processor ID, ascending.
14 |     #[must_use]
15 |     fn get_all_processors(&self) -> NonEmpty<ProcessorFacade>;
16 | 
17 |     fn pin_current_thread_to<P>(&self, processors: &NonEmpty<P>)
18 |     where
19 |         P: AsRef<ProcessorFacade>;
20 | 
21 |     /// Gets the ID of the processor currently executing this thread.
22 |     #[must_use]
23 |     fn current_processor_id(&self) -> ProcessorId;
24 | 
25 |     /// Gets the IDs of all processors that the current thread is allowed to execute on.
26 |     ///
27 |     /// Note: this may be a superset of `get_all_processors()` because it may include processors
28 |     /// that our process is in fact forbidden to use due to resource constraints enforced by
29 |     /// the operating system. The filtering to only see what we are allowed to use is performed
30 |     /// by `get_all_processors()` but not by this function.
31 |     #[must_use]
32 |     fn current_thread_processors(&self) -> NonEmpty<ProcessorId>;
33 | 
34 |     /// Gets the maximum (inclusive) processor ID of any processor that could possibly
35 |     /// be present on the system (including processors that are not currently active).
36 |     ///
37 |     /// The value also covers processors that are not available to the current process
38 |     /// due to resource constraints enforced by the operating system.
39 |     ///
40 |     /// This value is a constant and will not change over time.
41 |     #[must_use]
42 |     fn max_processor_id(&self) -> ProcessorId;
43 | 
44 |     /// Gets the maximum (inclusive) memory region ID of any memory region that could possibly
45 |     /// be present on the system (including memory regions that are not currently active).
46 |     ///
47 |     /// The value also covers memory regions that are not available to the current process
48 |     /// due to resource constraints enforced by the operating system.
49 |     ///
50 |     /// This value is a constant and will not change over time.
51 |     #[must_use]
52 |     fn max_memory_region_id(&self) -> MemoryRegionId;
53 | 
54 |     /// Gets the maximum processor time that the process is allowed to use per second of real time,
55 |     /// in seconds of processor time. This must be a positive number and will never be greater than
56 |     /// the number of processors available to the current process.
57 |     #[must_use]
58 |     fn max_processor_time(&self) -> f64;
59 | 
60 |     /// Gets the total number of active processors on the system, including ones that are not
61 |     /// necessarily available to the current process (if any such are known).
62 |     ///
63 |     /// We generally avoid relying on system-scoped data like this but because some platform APIs
64 |     /// speak in terms of system-scoped data, we occasionally need to access such values.
65 |     #[must_use]
66 |     fn active_processor_count(&self) -> usize;
67 | }
68 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/pal/abstractions/processor.rs:
--------------------------------------------------------------------------------
 1 | use std::{
 2 |     fmt::{Debug, Display},
 3 |     hash::Hash,
 4 | };
 5 | 
 6 | use crate::{EfficiencyClass, MemoryRegionId, ProcessorId};
 7 | 
 8 | pub(crate) trait AbstractProcessor:
 9 |     Clone + Copy + Debug + Display + Eq + Hash + PartialEq + Send
10 | {
11 |     fn id(&self) -> ProcessorId;
12 |     fn memory_region_id(&self) -> MemoryRegionId;
13 |     fn efficiency_class(&self) -> EfficiencyClass;
14 | }
15 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/pal/facade.rs:
--------------------------------------------------------------------------------
1 | mod platform;
2 | mod processor;
3 | 
4 | pub(crate) use platform::*;
5 | pub(crate) use processor::*;
6 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/pal/facade/platform.rs:
--------------------------------------------------------------------------------
  1 | use std::fmt::Debug;
  2 | 
  3 | #[cfg(test)]
  4 | use std::sync::Arc;
  5 | 
  6 | use crate::pal::{BUILD_TARGET_PLATFORM, BuildTargetPlatform, Platform, ProcessorFacade};
  7 | 
  8 | #[cfg(test)]
  9 | use crate::pal::MockPlatform;
 10 | 
 11 | #[derive(Clone)]
 12 | pub(crate) enum PlatformFacade {
 13 |     Real(&'static BuildTargetPlatform),
 14 | 
 15 |     #[cfg(test)]
 16 |     Mock(Arc<MockPlatform>),
 17 | }
 18 | 
 19 | impl PlatformFacade {
 20 |     pub(crate) fn real() -> Self {
 21 |         Self::Real(&BUILD_TARGET_PLATFORM)
 22 |     }
 23 | 
 24 |     #[cfg(test)]
 25 |     pub(crate) fn from_mock(mock: MockPlatform) -> Self {
 26 |         Self::Mock(Arc::new(mock))
 27 |     }
 28 | }
 29 | 
 30 | impl Platform for PlatformFacade {
 31 |     fn get_all_processors(&self) -> nonempty::NonEmpty<ProcessorFacade> {
 32 |         match self {
 33 |             Self::Real(p) => p.get_all_processors(),
 34 |             #[cfg(test)]
 35 |             Self::Mock(p) => p.get_all_processors(),
 36 |         }
 37 |     }
 38 | 
 39 |     fn pin_current_thread_to<P>(&self, processors: &nonempty::NonEmpty<P>)
 40 |     where
 41 |         P: AsRef<ProcessorFacade>,
 42 |     {
 43 |         match self {
 44 |             Self::Real(p) => p.pin_current_thread_to(processors),
 45 |             #[cfg(test)]
 46 |             Self::Mock(p) => p.pin_current_thread_to(processors),
 47 |         }
 48 |     }
 49 | 
 50 |     fn current_processor_id(&self) -> crate::ProcessorId {
 51 |         match self {
 52 |             Self::Real(p) => p.current_processor_id(),
 53 |             #[cfg(test)]
 54 |             Self::Mock(p) => p.current_processor_id(),
 55 |         }
 56 |     }
 57 | 
 58 |     fn max_processor_id(&self) -> crate::ProcessorId {
 59 |         match self {
 60 |             Self::Real(p) => p.max_processor_id(),
 61 |             #[cfg(test)]
 62 |             Self::Mock(p) => p.max_processor_id(),
 63 |         }
 64 |     }
 65 | 
 66 |     fn max_memory_region_id(&self) -> crate::MemoryRegionId {
 67 |         match self {
 68 |             Self::Real(p) => p.max_memory_region_id(),
 69 |             #[cfg(test)]
 70 |             Self::Mock(p) => p.max_memory_region_id(),
 71 |         }
 72 |     }
 73 | 
 74 |     fn current_thread_processors(&self) -> nonempty::NonEmpty<crate::ProcessorId> {
 75 |         match self {
 76 |             Self::Real(p) => p.current_thread_processors(),
 77 |             #[cfg(test)]
 78 |             Self::Mock(p) => p.current_thread_processors(),
 79 |         }
 80 |     }
 81 | 
 82 |     fn max_processor_time(&self) -> f64 {
 83 |         match self {
 84 |             Self::Real(p) => p.max_processor_time(),
 85 |             #[cfg(test)]
 86 |             Self::Mock(p) => p.max_processor_time(),
 87 |         }
 88 |     }
 89 | 
 90 |     fn active_processor_count(&self) -> usize {
 91 |         match self {
 92 |             Self::Real(p) => p.active_processor_count(),
 93 |             #[cfg(test)]
 94 |             Self::Mock(p) => p.active_processor_count(),
 95 |         }
 96 |     }
 97 | }
 98 | 
 99 | impl From<&'static BuildTargetPlatform> for PlatformFacade {
100 |     fn from(p: &'static BuildTargetPlatform) -> Self {
101 |         Self::Real(p)
102 |     }
103 | }
104 | 
105 | #[cfg(test)]
106 | impl From<MockPlatform> for PlatformFacade {
107 |     fn from(p: MockPlatform) -> Self {
108 |         Self::Mock(Arc::new(p))
109 |     }
110 | }
111 | 
112 | impl Debug for PlatformFacade {
113 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
114 |         match self {
115 |             Self::Real(inner) => inner.fmt(f),
116 |             #[cfg(test)]
117 |             Self::Mock(inner) => inner.fmt(f),
118 |         }
119 |     }
120 | }
121 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/pal/facade/processor.rs:
--------------------------------------------------------------------------------
 1 | use std::fmt::Debug;
 2 | 
 3 | use derive_more::derive::Display;
 4 | 
 5 | use crate::pal::{AbstractProcessor, ProcessorImpl};
 6 | 
 7 | #[cfg(test)]
 8 | use crate::pal::FakeProcessor;
 9 | 
10 | #[derive(Clone, Copy, Display, Eq, Hash, PartialEq)]
11 | pub(crate) enum ProcessorFacade {
12 |     Real(ProcessorImpl),
13 | 
14 |     #[cfg(test)]
15 |     Fake(FakeProcessor),
16 | }
17 | 
18 | impl ProcessorFacade {
19 |     pub(crate) fn as_real(&self) -> &ProcessorImpl {
20 |         match self {
21 |             Self::Real(p) => p,
22 |             #[cfg(test)]
23 |             _ => panic!("attempted to dereference facade into wrong type"),
24 |         }
25 |     }
26 | }
27 | 
28 | impl AsRef<Self> for ProcessorFacade {
29 |     fn as_ref(&self) -> &Self {
30 |         self
31 |     }
32 | }
33 | 
34 | impl AbstractProcessor for ProcessorFacade {
35 |     fn id(&self) -> crate::ProcessorId {
36 |         match self {
37 |             Self::Real(p) => p.id(),
38 |             #[cfg(test)]
39 |             Self::Fake(p) => p.id(),
40 |         }
41 |     }
42 | 
43 |     fn memory_region_id(&self) -> crate::MemoryRegionId {
44 |         match self {
45 |             Self::Real(p) => p.memory_region_id(),
46 |             #[cfg(test)]
47 |             Self::Fake(p) => p.memory_region_id(),
48 |         }
49 |     }
50 | 
51 |     fn efficiency_class(&self) -> crate::EfficiencyClass {
52 |         match self {
53 |             Self::Real(p) => p.efficiency_class(),
54 |             #[cfg(test)]
55 |             Self::Fake(p) => p.efficiency_class(),
56 |         }
57 |     }
58 | }
59 | 
60 | impl From<ProcessorImpl> for ProcessorFacade {
61 |     fn from(p: ProcessorImpl) -> Self {
62 |         Self::Real(p)
63 |     }
64 | }
65 | 
66 | #[cfg(test)]
67 | impl From<FakeProcessor> for ProcessorFacade {
68 |     fn from(p: FakeProcessor) -> Self {
69 |         Self::Fake(p)
70 |     }
71 | }
72 | 
73 | impl Debug for ProcessorFacade {
74 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
75 |         match self {
76 |             Self::Real(inner) => inner.fmt(f),
77 |             #[cfg(test)]
78 |             Self::Fake(inner) => inner.fmt(f),
79 |         }
80 |     }
81 | }
82 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/pal/linux.rs:
--------------------------------------------------------------------------------
 1 | mod bindings;
 2 | mod filesystem;
 3 | mod platform;
 4 | mod processor;
 5 | 
 6 | use bindings::*;
 7 | use filesystem::*;
 8 | pub(crate) use platform::*;
 9 | pub(crate) use processor::*;
10 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/pal/linux/bindings.rs:
--------------------------------------------------------------------------------
1 | mod abstractions;
2 | mod facade;
3 | mod real;
4 | 
5 | pub(crate) use abstractions::*;
6 | pub(crate) use facade::*;
7 | pub(crate) use real::*;
8 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/pal/linux/bindings/abstractions.rs:
--------------------------------------------------------------------------------
 1 | use std::{fmt::Debug, io};
 2 | 
 3 | use libc::cpu_set_t;
 4 | 
 5 | /// Bindings for FFI calls into external libraries (either provided by operating system or not).
 6 | ///
 7 | /// All PAL FFI calls must go through this trait, enabling them to be mocked.
 8 | #[cfg_attr(test, mockall::automock)]
 9 | pub(crate) trait Bindings: Debug + Send + Sync + 'static {
10 |     // sched_setaffinity() for the current thread
11 |     fn sched_setaffinity_current(&self, cpuset: &cpu_set_t) -> Result<(), io::Error>;
12 | 
13 |     // sched_getaffinity() for the current thread
14 |     fn sched_getaffinity_current(&self) -> Result<cpu_set_t, io::Error>;
15 | 
16 |     fn sched_getcpu(&self) -> i32;
17 | }
18 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/pal/linux/bindings/facade.rs:
--------------------------------------------------------------------------------
 1 | use std::{fmt::Debug, io};
 2 | 
 3 | #[cfg(test)]
 4 | use std::sync::Arc;
 5 | 
 6 | use libc::cpu_set_t;
 7 | 
 8 | use crate::pal::linux::{Bindings, BuildTargetBindings};
 9 | 
10 | #[cfg(test)]
11 | use crate::pal::linux::MockBindings;
12 | 
13 | /// Enum to hide the real/mock choice behind a single wrapper type.
14 | #[derive(Clone)]
15 | pub(crate) enum BindingsFacade {
16 |     Real(&'static BuildTargetBindings),
17 | 
18 |     #[cfg(test)]
19 |     Mock(Arc<MockBindings>),
20 | }
21 | 
22 | impl BindingsFacade {
23 |     pub(crate) const fn real() -> Self {
24 |         Self::Real(&BuildTargetBindings)
25 |     }
26 | 
27 |     #[cfg(test)]
28 |     pub(crate) fn from_mock(mock: MockBindings) -> Self {
29 |         Self::Mock(Arc::new(mock))
30 |     }
31 | }
32 | 
33 | impl Bindings for BindingsFacade {
34 |     fn sched_setaffinity_current(&self, cpuset: &cpu_set_t) -> Result<(), io::Error> {
35 |         match self {
36 |             Self::Real(bindings) => bindings.sched_setaffinity_current(cpuset),
37 |             #[cfg(test)]
38 |             Self::Mock(mock) => mock.sched_setaffinity_current(cpuset),
39 |         }
40 |     }
41 | 
42 |     fn sched_getcpu(&self) -> i32 {
43 |         match self {
44 |             Self::Real(bindings) => bindings.sched_getcpu(),
45 |             #[cfg(test)]
46 |             Self::Mock(mock) => mock.sched_getcpu(),
47 |         }
48 |     }
49 | 
50 |     fn sched_getaffinity_current(&self) -> Result<cpu_set_t, io::Error> {
51 |         match self {
52 |             Self::Real(bindings) => bindings.sched_getaffinity_current(),
53 |             #[cfg(test)]
54 |             Self::Mock(mock) => mock.sched_getaffinity_current(),
55 |         }
56 |     }
57 | }
58 | 
59 | impl Debug for BindingsFacade {
60 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61 |         match self {
62 |             Self::Real(inner) => inner.fmt(f),
63 |             #[cfg(test)]
64 |             Self::Mock(inner) => inner.fmt(f),
65 |         }
66 |     }
67 | }
68 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/pal/linux/bindings/real.rs:
--------------------------------------------------------------------------------
 1 | use std::{fmt::Debug, io, mem};
 2 | 
 3 | use libc::cpu_set_t;
 4 | 
 5 | use crate::pal::linux::Bindings;
 6 | 
 7 | /// FFI bindings that target the real operating system that the build is targeting.
 8 | ///
 9 | /// You would only use different bindings in PAL unit tests that need to use mock bindings.
10 | /// Even then, whenever possible, unit tests should use real bindings for maximum realism.
11 | #[derive(Debug, Default)]
12 | pub(crate) struct BuildTargetBindings;
13 | 
14 | impl Bindings for BuildTargetBindings {
15 |     fn sched_setaffinity_current(&self, cpuset: &cpu_set_t) -> Result<(), io::Error> {
16 |         // 0 means current thread.
17 |         // SAFETY: No safety requirements beyond passing valid arguments.
18 |         let result = unsafe { libc::sched_setaffinity(0, size_of::<cpu_set_t>(), cpuset) };
19 | 
20 |         if result == 0 {
21 |             Ok(())
22 |         } else {
23 |             Err(io::Error::last_os_error())
24 |         }
25 |     }
26 | 
27 |     fn sched_getcpu(&self) -> i32 {
28 |         // SAFETY: No safety requirements.
29 |         unsafe { libc::sched_getcpu() }
30 |     }
31 | 
32 |     fn sched_getaffinity_current(&self) -> Result<cpu_set_t, io::Error> {
33 |         // SAFETY: All zeroes is a valid cpu_set_t.
34 |         let mut cpuset: cpu_set_t = unsafe { mem::zeroed() };
35 | 
36 |         // 0 means current thread.
37 |         // SAFETY: No safety requirements beyond passing valid arguments.
38 |         let result = unsafe { libc::sched_getaffinity(0, size_of::<cpu_set_t>(), &raw mut cpuset) };
39 | 
40 |         if result == 0 {
41 |             Ok(cpuset)
42 |         } else {
43 |             Err(io::Error::last_os_error())
44 |         }
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/pal/linux/filesystem.rs:
--------------------------------------------------------------------------------
1 | mod abstractions;
2 | mod facade;
3 | mod real;
4 | 
5 | pub(crate) use abstractions::*;
6 | pub(crate) use facade::*;
7 | pub(crate) use real::*;
8 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/pal/linux/filesystem/abstractions.rs:
--------------------------------------------------------------------------------
 1 | use std::fmt::Debug;
 2 | 
 3 | /// Linux has this funny notion of exposing various OS APIs as a virtual filesystem. This trait
 4 | /// abstracts this virtual filesystem to allow it to be mocked.
 5 | ///
 6 | /// The scope of this trait is limited to only the virtual filesystem exposed by the OS. We do not
 7 | /// expect to do "real" file I/O in this layer. All I/O is synchronous and blocking because we
 8 | /// expect it to hit a fast path in the OS, given the data is never on a real storage device.
 9 | #[cfg_attr(test, mockall::automock)]
10 | pub(crate) trait Filesystem: Debug + Send + Sync + 'static {
11 |     /// Get the contents of the /proc/cpuinfo file.
12 |     ///
13 |     /// NB! This file also includes offline processors. To check if a processor is online, you must
14 |     /// look in /sys/devices/system/cpu/cpu*/online (which has either 0 and 1 as content).
15 |     ///
16 |     /// This is a plaintext file with "key    : value" pairs, blocks separated by empty lines.
17 |     fn get_cpuinfo_contents(&self) -> String;
18 | 
19 |     /// Get the contents of the /sys/devices/system/node/possible file or `None` if it does
20 |     /// not exist.
21 |     ///
22 |     /// This list all NUMA nodes that could possibly exist in the system, even those that are
23 |     /// offline.
24 |     ///
25 |     /// This is a cpulist format file ("0,1,2-4,5-10:2" style list).
26 |     fn get_numa_node_possible_contents(&self) -> Option<String>;
27 | 
28 |     /// Get the contents of the /sys/devices/system/node/node{}/cpulist file.
29 |     ///
30 |     /// This is a cpulist format file ("0,1,2-4,5-10:2" style list).
31 |     fn get_numa_node_cpulist_contents(&self, node_index: u32) -> String;
32 | 
33 |     /// Gets the contents of the /sys/devices/system/cpu/cpu{}/online file.
34 |     ///
35 |     /// This is a single line file with either 0 or 1 as content (+ newline).
36 |     /// This file may be absent on some Linux flavors, in which case we assume every CPU is online.
37 |     fn get_cpu_online_contents(&self, cpu_index: u32) -> Option<String>;
38 | 
39 |     /// Gets the contents of the /prod/{pid}/status file for the current process.
40 |     ///
41 |     /// This is a plaintext file with "key:     value" pairs.
42 |     fn get_proc_self_status_contents(&self) -> String;
43 | 
44 |     /// Gets the contents of the /proc/self/cgroup file for the current process.
45 |     fn get_proc_self_cgroup(&self) -> Option<String>;
46 | 
47 |     /// Contents of `/sys/fs/cgroup/cpu/{name}/cpu.cfs_quota_us`
48 |     fn get_v1_cgroup_cpu_quota(&self, cgroup_name: &str) -> Option<String>;
49 | 
50 |     /// Contents of `/sys/fs/cgroup/cpu/{name}/cpu.cfs_period_us`
51 |     fn get_v1_cgroup_cpu_period(&self, cgroup_name: &str) -> Option<String>;
52 | 
53 |     /// Contents of `/sys/fs/cgroup/{name}/cpu.max`
54 |     fn get_v2_cgroup_cpu_quota_and_period(&self, cgroup_name: &str) -> Option<String>;
55 | }
56 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/pal/linux/filesystem/facade.rs:
--------------------------------------------------------------------------------
  1 | use std::fmt::Debug;
  2 | 
  3 | #[cfg(test)]
  4 | use std::sync::Arc;
  5 | 
  6 | use crate::pal::linux::{BuildTargetFilesystem, Filesystem};
  7 | 
  8 | #[cfg(test)]
  9 | use crate::pal::linux::MockFilesystem;
 10 | 
 11 | /// Enum to hide the different filesystem implementations behind a single wrapper type.
 12 | #[derive(Clone)]
 13 | pub(crate) enum FilesystemFacade {
 14 |     Real(&'static BuildTargetFilesystem),
 15 | 
 16 |     #[cfg(test)]
 17 |     Mock(Arc<MockFilesystem>),
 18 | }
 19 | 
 20 | impl FilesystemFacade {
 21 |     pub(crate) const fn real() -> Self {
 22 |         Self::Real(&BuildTargetFilesystem)
 23 |     }
 24 | 
 25 |     #[cfg(test)]
 26 |     pub(crate) fn from_mock(mock: MockFilesystem) -> Self {
 27 |         Self::Mock(Arc::new(mock))
 28 |     }
 29 | }
 30 | 
 31 | impl Filesystem for FilesystemFacade {
 32 |     fn get_cpuinfo_contents(&self) -> String {
 33 |         match self {
 34 |             Self::Real(filesystem) => filesystem.get_cpuinfo_contents(),
 35 |             #[cfg(test)]
 36 |             Self::Mock(mock) => mock.get_cpuinfo_contents(),
 37 |         }
 38 |     }
 39 | 
 40 |     fn get_numa_node_cpulist_contents(&self, node_index: u32) -> String {
 41 |         match self {
 42 |             Self::Real(filesystem) => filesystem.get_numa_node_cpulist_contents(node_index),
 43 |             #[cfg(test)]
 44 |             Self::Mock(mock) => mock.get_numa_node_cpulist_contents(node_index),
 45 |         }
 46 |     }
 47 | 
 48 |     fn get_cpu_online_contents(&self, cpu_index: u32) -> Option<String> {
 49 |         match self {
 50 |             Self::Real(filesystem) => filesystem.get_cpu_online_contents(cpu_index),
 51 |             #[cfg(test)]
 52 |             Self::Mock(mock) => mock.get_cpu_online_contents(cpu_index),
 53 |         }
 54 |     }
 55 | 
 56 |     fn get_numa_node_possible_contents(&self) -> Option<String> {
 57 |         match self {
 58 |             Self::Real(filesystem) => filesystem.get_numa_node_possible_contents(),
 59 |             #[cfg(test)]
 60 |             Self::Mock(mock) => mock.get_numa_node_possible_contents(),
 61 |         }
 62 |     }
 63 | 
 64 |     fn get_proc_self_status_contents(&self) -> String {
 65 |         match self {
 66 |             Self::Real(filesystem) => filesystem.get_proc_self_status_contents(),
 67 |             #[cfg(test)]
 68 |             Self::Mock(mock) => mock.get_proc_self_status_contents(),
 69 |         }
 70 |     }
 71 | 
 72 |     fn get_proc_self_cgroup(&self) -> Option<String> {
 73 |         match self {
 74 |             Self::Real(filesystem) => filesystem.get_proc_self_cgroup(),
 75 |             #[cfg(test)]
 76 |             Self::Mock(mock) => mock.get_proc_self_cgroup(),
 77 |         }
 78 |     }
 79 | 
 80 |     fn get_v1_cgroup_cpu_quota(&self, cgroup_name: &str) -> Option<String> {
 81 |         match self {
 82 |             Self::Real(filesystem) => filesystem.get_v1_cgroup_cpu_quota(cgroup_name),
 83 |             #[cfg(test)]
 84 |             Self::Mock(mock) => mock.get_v1_cgroup_cpu_quota(cgroup_name),
 85 |         }
 86 |     }
 87 | 
 88 |     fn get_v1_cgroup_cpu_period(&self, cgroup_name: &str) -> Option<String> {
 89 |         match self {
 90 |             Self::Real(filesystem) => filesystem.get_v1_cgroup_cpu_period(cgroup_name),
 91 |             #[cfg(test)]
 92 |             Self::Mock(mock) => mock.get_v1_cgroup_cpu_period(cgroup_name),
 93 |         }
 94 |     }
 95 | 
 96 |     fn get_v2_cgroup_cpu_quota_and_period(&self, cgroup_name: &str) -> Option<String> {
 97 |         match self {
 98 |             Self::Real(filesystem) => filesystem.get_v2_cgroup_cpu_quota_and_period(cgroup_name),
 99 |             #[cfg(test)]
100 |             Self::Mock(mock) => mock.get_v2_cgroup_cpu_quota_and_period(cgroup_name),
101 |         }
102 |     }
103 | }
104 | 
105 | impl Debug for FilesystemFacade {
106 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
107 |         match self {
108 |             Self::Real(inner) => inner.fmt(f),
109 |             #[cfg(test)]
110 |             Self::Mock(inner) => inner.fmt(f),
111 |         }
112 |     }
113 | }
114 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/pal/linux/filesystem/real.rs:
--------------------------------------------------------------------------------
 1 | use std::{fmt::Debug, fs};
 2 | 
 3 | use crate::pal::linux::Filesystem;
 4 | 
 5 | /// The virtual filesystem for the real operating system that the build is targeting.
 6 | ///
 7 | /// You would only use different filesystems in PAL unit tests that need to use a mock filesystem.
 8 | /// Even then, whenever possible, unit tests should use the real filesystem for maximum realism.
 9 | #[derive(Debug, Default)]
10 | pub(crate) struct BuildTargetFilesystem;
11 | 
12 | impl Filesystem for BuildTargetFilesystem {
13 |     fn get_cpuinfo_contents(&self) -> String {
14 |         fs::read_to_string("/proc/cpuinfo")
15 |             .expect("failed to read /proc/cpuinfo - cannot continue execution")
16 |     }
17 | 
18 |     fn get_numa_node_possible_contents(&self) -> Option<String> {
19 |         fs::read_to_string("/sys/devices/system/node/possible").ok()
20 |     }
21 | 
22 |     fn get_numa_node_cpulist_contents(&self, node_index: u32) -> String {
23 |         fs::read_to_string(format!("/sys/devices/system/node/node{node_index}/cpulist",))
24 |             .expect("failed to read NUMA node cpulist - cannot continue execution")
25 |     }
26 | 
27 |     fn get_cpu_online_contents(&self, cpu_index: u32) -> Option<String> {
28 |         fs::read_to_string(format!("/sys/devices/system/cpu/cpu{cpu_index}/online")).ok()
29 |     }
30 | 
31 |     fn get_proc_self_status_contents(&self) -> String {
32 |         fs::read_to_string("/proc/self/status")
33 |             .expect("failed to read /proc/self/status - cannot continue execution")
34 |     }
35 | 
36 |     fn get_proc_self_cgroup(&self) -> Option<String> {
37 |         fs::read_to_string("/proc/self/cgroup").ok()
38 |     }
39 | 
40 |     fn get_v1_cgroup_cpu_quota(&self, cgroup_name: &str) -> Option<String> {
41 |         fs::read_to_string(format!("/sys/fs/cgroup/cpu/{cgroup_name}/cpu.cfs_quota_us")).ok()
42 |     }
43 | 
44 |     fn get_v1_cgroup_cpu_period(&self, cgroup_name: &str) -> Option<String> {
45 |         fs::read_to_string(format!(
46 |             "/sys/fs/cgroup/cpu/{cgroup_name}/cpu.cfs_period_us"
47 |         ))
48 |         .ok()
49 |     }
50 | 
51 |     fn get_v2_cgroup_cpu_quota_and_period(&self, cgroup_name: &str) -> Option<String> {
52 |         fs::read_to_string(format!("/sys/fs/cgroup/{cgroup_name}/cpu.max")).ok()
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/pal/linux/processor.rs:
--------------------------------------------------------------------------------
 1 | use std::fmt::Display;
 2 | 
 3 | use crate::{EfficiencyClass, MemoryRegionId, ProcessorId, pal::AbstractProcessor};
 4 | 
 5 | /// A processor present on the system and available to the current process.
 6 | #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
 7 | pub(crate) struct ProcessorImpl {
 8 |     pub(super) id: ProcessorId,
 9 |     pub(super) memory_region_id: MemoryRegionId,
10 |     pub(super) efficiency_class: EfficiencyClass,
11 | 
12 |     pub(super) is_active: bool,
13 | }
14 | 
15 | impl Display for ProcessorImpl {
16 |     #[cfg_attr(test, mutants::skip)] // There no API contract to test here.
17 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
18 |         write!(f, "processor {} [node {}]", self.id, self.memory_region_id)
19 |     }
20 | }
21 | 
22 | impl AbstractProcessor for ProcessorImpl {
23 |     fn id(&self) -> ProcessorId {
24 |         self.id
25 |     }
26 | 
27 |     fn memory_region_id(&self) -> MemoryRegionId {
28 |         self.memory_region_id
29 |     }
30 | 
31 |     fn efficiency_class(&self) -> EfficiencyClass {
32 |         self.efficiency_class
33 |     }
34 | }
35 | 
36 | impl PartialOrd for ProcessorImpl {
37 |     fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
38 |         Some(self.cmp(other))
39 |     }
40 | }
41 | 
42 | impl Ord for ProcessorImpl {
43 |     fn cmp(&self, other: &Self) -> std::cmp::Ordering {
44 |         self.id.cmp(&other.id)
45 |     }
46 | }
47 | 
48 | impl AsRef<Self> for ProcessorImpl {
49 |     fn as_ref(&self) -> &Self {
50 |         self
51 |     }
52 | }
53 | 
54 | #[cfg(test)]
55 | mod tests {
56 |     use super::*;
57 | 
58 |     #[test]
59 |     fn smoke_test() {
60 |         let processor = ProcessorImpl {
61 |             id: 2,
62 |             memory_region_id: 3,
63 |             efficiency_class: EfficiencyClass::Performance,
64 |             is_active: true,
65 |         };
66 | 
67 |         assert_eq!(processor.id(), 2);
68 |         assert_eq!(processor.memory_region_id(), 3);
69 |         assert_eq!(processor.efficiency_class(), EfficiencyClass::Performance);
70 | 
71 |         let processor2 = ProcessorImpl {
72 |             id: 2,
73 |             memory_region_id: 3,
74 |             efficiency_class: EfficiencyClass::Performance,
75 |             is_active: true,
76 |         };
77 | 
78 |         assert_eq!(processor, processor2);
79 | 
80 |         let processor3 = ProcessorImpl {
81 |             id: 4,
82 |             memory_region_id: 3,
83 |             efficiency_class: EfficiencyClass::Performance,
84 |             is_active: true,
85 |         };
86 | 
87 |         assert_ne!(processor, processor3);
88 |         assert!(processor < processor3);
89 |         assert!(processor3 > processor);
90 |     }
91 | }
92 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/pal/mocks.rs:
--------------------------------------------------------------------------------
 1 | #![expect(clippy::same_name_method, reason = "mock magic")]
 2 | 
 3 | use derive_more::derive::Display;
 4 | use mockall::mock;
 5 | use nonempty::NonEmpty;
 6 | 
 7 | use crate::{
 8 |     EfficiencyClass, MemoryRegionId, ProcessorId,
 9 |     pal::{AbstractProcessor, Platform, ProcessorFacade},
10 | };
11 | 
12 | #[derive(Clone, Copy, Debug, Display, Eq, Hash, PartialEq)]
13 | #[display("FakeProcessor({index} in node {memory_region}, {efficiency_class:?})")]
14 | pub(crate) struct FakeProcessor {
15 |     pub(crate) index: ProcessorId,
16 |     pub(crate) memory_region: MemoryRegionId,
17 |     pub(crate) efficiency_class: EfficiencyClass,
18 | }
19 | 
20 | impl FakeProcessor {
21 |     pub(crate) fn with_index(index: ProcessorId) -> Self {
22 |         Self {
23 |             index,
24 |             memory_region: 0,
25 |             efficiency_class: EfficiencyClass::Performance,
26 |         }
27 |     }
28 | }
29 | 
30 | impl AbstractProcessor for FakeProcessor {
31 |     fn id(&self) -> ProcessorId {
32 |         self.index
33 |     }
34 | 
35 |     fn memory_region_id(&self) -> MemoryRegionId {
36 |         self.memory_region
37 |     }
38 | 
39 |     fn efficiency_class(&self) -> EfficiencyClass {
40 |         self.efficiency_class
41 |     }
42 | }
43 | 
44 | // Mockall is not able to express all methods on the trait (due to generics deficiency), so we mock
45 | // similar-enough methods that it does know how to mock and simply call these from a manual
46 | // implementation of the trait that translates between the two forms.
47 | mock! {
48 |     #[derive(Debug)]
49 |     pub Platform {
50 |         pub fn get_all_processors_core(&self) -> NonEmpty<ProcessorFacade>;
51 |         pub fn pin_current_thread_to_core(&self, processors: Vec<ProcessorFacade>);
52 |         pub fn current_processor_id(&self) -> ProcessorId;
53 |         pub fn max_processor_id(&self) -> ProcessorId;
54 |         pub fn max_memory_region_id(&self) -> MemoryRegionId;
55 |         pub fn current_thread_processors(&self) -> NonEmpty<ProcessorId>;
56 |         pub fn max_processor_time(&self) -> f64;
57 |         pub fn active_processor_count(&self) -> usize;
58 |     }
59 | }
60 | 
61 | impl Platform for MockPlatform {
62 |     fn get_all_processors(&self) -> NonEmpty<ProcessorFacade> {
63 |         self.get_all_processors_core()
64 |     }
65 | 
66 |     fn pin_current_thread_to<P>(&self, processors: &NonEmpty<P>)
67 |     where
68 |         P: AsRef<ProcessorFacade>,
69 |     {
70 |         let processors = processors.iter().map(|p| *p.as_ref()).collect();
71 |         self.pin_current_thread_to_core(processors);
72 |     }
73 | 
74 |     fn current_processor_id(&self) -> ProcessorId {
75 |         self.current_processor_id()
76 |     }
77 | 
78 |     fn max_processor_id(&self) -> ProcessorId {
79 |         self.max_processor_id()
80 |     }
81 | 
82 |     fn max_memory_region_id(&self) -> MemoryRegionId {
83 |         self.max_memory_region_id()
84 |     }
85 | 
86 |     fn current_thread_processors(&self) -> NonEmpty<ProcessorId> {
87 |         self.current_thread_processors()
88 |     }
89 | 
90 |     fn max_processor_time(&self) -> f64 {
91 |         self.max_processor_time()
92 |     }
93 | 
94 |     fn active_processor_count(&self) -> usize {
95 |         self.active_processor_count()
96 |     }
97 | }
98 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/pal/windows.rs:
--------------------------------------------------------------------------------
 1 | mod bindings;
 2 | mod group_mask;
 3 | mod platform;
 4 | mod processor;
 5 | 
 6 | use bindings::*;
 7 | pub(crate) use group_mask::*;
 8 | pub use platform::*;
 9 | pub(crate) use processor::*;
10 | 
11 | type ProcessorGroupIndex = u16;
12 | type ProcessorIndexInGroup = u8;
13 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/pal/windows/bindings.rs:
--------------------------------------------------------------------------------
1 | mod abstractions;
2 | mod facade;
3 | mod real;
4 | 
5 | pub(crate) use abstractions::*;
6 | pub(crate) use facade::*;
7 | pub(crate) use real::*;
8 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/pal/windows/bindings/abstractions.rs:
--------------------------------------------------------------------------------
 1 | use std::fmt::Debug;
 2 | 
 3 | use windows::{
 4 |     Win32::System::{
 5 |         JobObjects::JOBOBJECT_CPU_RATE_CONTROL_INFORMATION,
 6 |         Kernel::PROCESSOR_NUMBER,
 7 |         SystemInformation::{
 8 |             GROUP_AFFINITY, LOGICAL_PROCESSOR_RELATIONSHIP, SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX,
 9 |         },
10 |     },
11 |     core::Result,
12 | };
13 | 
14 | /// Bindings for FFI calls into external libraries (either provided by operating system or not).
15 | ///
16 | /// All PAL FFI calls must go through this trait, enabling them to be mocked.
17 | #[cfg_attr(test, mockall::automock)]
18 | pub(crate) trait Bindings: Debug + Send + Sync + 'static {
19 |     fn get_active_processor_count(&self, group_number: u16) -> u32;
20 |     fn get_maximum_processor_count(&self, group_number: u16) -> u32;
21 | 
22 |     fn get_maximum_processor_group_count(&self) -> u16;
23 | 
24 |     fn get_current_processor_number_ex(&self) -> PROCESSOR_NUMBER;
25 | 
26 |     fn get_numa_highest_node_number(&self) -> u32;
27 | 
28 |     fn get_current_process_default_cpu_set_masks(&self) -> Vec<GROUP_AFFINITY>;
29 |     fn get_current_thread_cpu_set_masks(&self) -> Vec<GROUP_AFFINITY>;
30 |     fn set_current_thread_cpu_set_masks(&self, masks: &[GROUP_AFFINITY]);
31 | 
32 |     unsafe fn get_logical_processor_information_ex(
33 |         &self,
34 |         relationship_type: LOGICAL_PROCESSOR_RELATIONSHIP,
35 |         buffer: Option<*mut SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>,
36 |         returned_length: *mut u32,
37 |     ) -> Result<()>;
38 | 
39 |     // JobObjectGroupInformationEx; may return empty list if not affinitized.
40 |     fn get_current_job_cpu_set_masks(&self) -> Vec<GROUP_AFFINITY>;
41 | 
42 |     fn get_current_job_cpu_rate_control(&self) -> Option<JOBOBJECT_CPU_RATE_CONTROL_INFORMATION>;
43 | 
44 |     fn get_current_thread_legacy_group_affinity(&self) -> GROUP_AFFINITY;
45 | }
46 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/pal/windows/processor.rs:
--------------------------------------------------------------------------------
  1 | use std::fmt::Display;
  2 | 
  3 | use crate::{
  4 |     EfficiencyClass, MemoryRegionId, ProcessorId,
  5 |     pal::{
  6 |         AbstractProcessor,
  7 |         windows::{ProcessorGroupIndex, ProcessorIndexInGroup},
  8 |     },
  9 | };
 10 | 
 11 | #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
 12 | pub(crate) struct ProcessorImpl {
 13 |     pub(super) group_index: ProcessorGroupIndex,
 14 |     pub(super) index_in_group: ProcessorIndexInGroup,
 15 | 
 16 |     // Cumulative index when counting across all groups.
 17 |     pub(super) id: ProcessorId,
 18 | 
 19 |     pub(super) memory_region_id: MemoryRegionId,
 20 | 
 21 |     pub(super) efficiency_class: EfficiencyClass,
 22 | }
 23 | 
 24 | impl ProcessorImpl {
 25 |     pub(super) fn new(
 26 |         group_index: ProcessorGroupIndex,
 27 |         index_in_group: ProcessorIndexInGroup,
 28 |         id: ProcessorId,
 29 |         memory_region_id: MemoryRegionId,
 30 |         efficiency_class: EfficiencyClass,
 31 |     ) -> Self {
 32 |         Self {
 33 |             group_index,
 34 |             index_in_group,
 35 |             id,
 36 |             memory_region_id,
 37 |             efficiency_class,
 38 |         }
 39 |     }
 40 | }
 41 | 
 42 | impl Display for ProcessorImpl {
 43 |     #[cfg_attr(test, mutants::skip)] // There no API contract to test here.
 44 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 45 |         write!(
 46 |             f,
 47 |             "processor {} [{}.{}]",
 48 |             self.id, self.group_index, self.index_in_group
 49 |         )
 50 |     }
 51 | }
 52 | 
 53 | impl AbstractProcessor for ProcessorImpl {
 54 |     fn id(&self) -> ProcessorId {
 55 |         self.id
 56 |     }
 57 | 
 58 |     fn memory_region_id(&self) -> MemoryRegionId {
 59 |         self.memory_region_id
 60 |     }
 61 | 
 62 |     fn efficiency_class(&self) -> EfficiencyClass {
 63 |         self.efficiency_class
 64 |     }
 65 | }
 66 | 
 67 | impl AsRef<Self> for ProcessorImpl {
 68 |     fn as_ref(&self) -> &Self {
 69 |         self
 70 |     }
 71 | }
 72 | 
 73 | impl PartialOrd for ProcessorImpl {
 74 |     fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
 75 |         Some(self.cmp(other))
 76 |     }
 77 | }
 78 | 
 79 | impl Ord for ProcessorImpl {
 80 |     fn cmp(&self, other: &Self) -> std::cmp::Ordering {
 81 |         self.id.cmp(&other.id)
 82 |     }
 83 | }
 84 | 
 85 | #[cfg(test)]
 86 | mod tests {
 87 |     use super::*;
 88 | 
 89 |     #[test]
 90 |     fn smoke_test() {
 91 |         let processor = ProcessorImpl::new(0, 1, 2, 3, EfficiencyClass::Performance);
 92 | 
 93 |         assert_eq!(processor.id(), 2);
 94 |         assert_eq!(processor.memory_region_id(), 3);
 95 |         assert_eq!(processor.efficiency_class(), EfficiencyClass::Performance);
 96 | 
 97 |         let processor2 = ProcessorImpl::new(0, 1, 2, 3, EfficiencyClass::Performance);
 98 |         assert_eq!(processor, processor2);
 99 | 
100 |         let processor3 = ProcessorImpl::new(0, 1, 4, 3, EfficiencyClass::Performance);
101 |         assert_ne!(processor, processor3);
102 |         assert!(processor < processor3);
103 |         assert!(processor3 > processor);
104 |     }
105 | }
106 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/primitive_types.rs:
--------------------------------------------------------------------------------
 1 | /// A processor identifier, used to differentiate processors in the system. This will match
 2 | /// the numeric identifier used by standard tooling of the operating system.
 3 | ///
 4 | /// It is important to highlight that the values used are not guaranteed to be sequential/contiguous
 5 | /// or to start from zero (aspects that are also not guaranteed by operating system tooling).
 6 | pub type ProcessorId = u32;
 7 | 
 8 | /// A memory region identifier, used to differentiate memory regions in the system. This will match
 9 | /// the numeric identifier used by standard tooling of the operating system.
10 | ///
11 | /// It is important to highlight that the values used are not guaranteed to be sequential/contiguous
12 | /// or to start from zero (aspects that are also not guaranteed by operating system tooling).
13 | pub type MemoryRegionId = u32;
14 | 
15 | /// Differentiates processors by their efficiency class, allowing work requiring high
16 | /// performance to be placed on the most performant processors at the expense of energy usage.
17 | ///
18 | /// This is a relative measurement - the most performant processors in a system are always
19 | /// considered performance processors, with less performant ones considered efficiency processors.
20 | #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
21 | #[expect(
22 |     clippy::exhaustive_enums,
23 |     reason = "mirroring two-tier structure of platform APIs"
24 | )]
25 | pub enum EfficiencyClass {
26 |     /// A processor that is optimized for energy efficiency at the expense of performance.
27 |     Efficiency,
28 | 
29 |     /// A processor that is optimized for performance at the expense of energy efficiency.
30 |     Performance,
31 | }
32 | 


--------------------------------------------------------------------------------
/crates/many_cpus/src/resource_quota.rs:
--------------------------------------------------------------------------------
 1 | /// Information about the resource quota that the operating system enforces for the current process.
 2 | ///
 3 | /// The active resource quota may change over time. You can use [`HardwareTracker`][1] to obtain
 4 | /// fresh information about the current resource quota at any time.
 5 | ///
 6 | /// [1]: crate::HardwareTracker
 7 | #[derive(Debug)]
 8 | pub struct ResourceQuota {
 9 |     max_processor_time: f64,
10 | }
11 | 
12 | impl ResourceQuota {
13 |     pub(crate) fn new(max_processor_time: f64) -> Self {
14 |         Self { max_processor_time }
15 |     }
16 | 
17 |     /// How many seconds of processor time the process is allowed to use per second of real time.
18 |     ///
19 |     /// This will never be more than the number of processors available to the current process.
20 |     #[must_use]
21 |     #[inline]
22 |     pub fn max_processor_time(&self) -> f64 {
23 |         self.max_processor_time
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/crates/many_cpus_benchmarking/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "many_cpus_benchmarking"
 3 | description = "Criterion benchmark harness to easily compare different processor configurations"
 4 | publish = true
 5 | version = "0.1.10"
 6 | 
 7 | authors.workspace = true
 8 | edition.workspace = true
 9 | license.workspace = true
10 | repository.workspace = true
11 | rust-version.workspace = true
12 | 
13 | [features]
14 | default = []
15 | 
16 | [dependencies]
17 | cpulist = { workspace = true }
18 | criterion = { workspace = true }
19 | derive_more = { workspace = true, features = ["display"] }
20 | folo_utils = { workspace = true }
21 | itertools = { workspace = true }
22 | many_cpus = { workspace = true }
23 | nonempty = { workspace = true }
24 | rand = { workspace = true }
25 | 
26 | [dev-dependencies]
27 | mutants = { workspace = true }
28 | 
29 | [[bench]]
30 | name = "many_cpus_harness_demo"
31 | harness = false
32 | 
33 | [lints]
34 | workspace = true
35 | 


--------------------------------------------------------------------------------
/crates/many_cpus_benchmarking/README.md:
--------------------------------------------------------------------------------
 1 | [Criterion][1] benchmark harness designed to compare different modes of distributing work in a
 2 | many-processor system with multiple memory regions. This helps highlight the performance impact of
 3 | cross-memory-region data transfers, cross-processor data transfers and multi-threaded logic.
 4 | 
 5 | More details in the [crate documentation](https://docs.rs/many_cpus_benchmarking/).
 6 | 
 7 | This is part of the [Folo project](https://github.com/folo-rs/folo) that provides mechanisms for
 8 | high-performance hardware-aware programming in Rust.
 9 | 
10 | [1]: https://bheisler.github.io/criterion.rs/book/index.html


--------------------------------------------------------------------------------
/crates/many_cpus_benchmarking/benches/many_cpus_harness_demo.rs:
--------------------------------------------------------------------------------
 1 | //! Demonstrates basic usage of the benchmark harness provided by `many_cpus_benchmarking`.
 2 | 
 3 | #![allow(
 4 |     missing_docs,
 5 |     reason = "No need for API documentation in benchmark code"
 6 | )]
 7 | 
 8 | use std::{hint::black_box, ptr};
 9 | 
10 | use criterion::{Criterion, criterion_group, criterion_main};
11 | use many_cpus_benchmarking::{Payload, WorkDistribution, execute_runs};
12 | 
13 | criterion_group!(benches, entrypoint);
14 | criterion_main!(benches);
15 | 
16 | fn entrypoint(c: &mut Criterion) {
17 |     // We use a BATCH_SIZE of 10, which means 10 * 64 = 640 MB of memory used per worker pair.
18 |     execute_runs::<CopyBytes, 10>(c, WorkDistribution::all());
19 | }
20 | 
21 | const COPY_BYTES_LEN: usize = 64 * 1024 * 1024;
22 | 
23 | /// Sample benchmark scenario that copies bytes between the two paired payloads.
24 | ///
25 | /// The source buffers are allocated in the "prepare" step and become local to the "prepare" worker.
26 | /// The destination buffers are allocated in the "process" step. The end result is that we copy
27 | /// from remote memory (allocated in the "prepare" step) to local memory in the "process" step.
28 | ///
29 | /// There is no deep meaning behind this scenario, just a sample benchmark that showcases comparing
30 | /// different work distribution modes to identify performance differences from hardware-awareness.
31 | #[derive(Debug, Default)]
32 | struct CopyBytes {
33 |     from: Option<Vec<u8>>,
34 | }
35 | 
36 | impl Payload for CopyBytes {
37 |     fn new_pair() -> (Self, Self) {
38 |         (Self::default(), Self::default())
39 |     }
40 | 
41 |     fn prepare(&mut self) {
42 |         self.from = Some(vec![99; COPY_BYTES_LEN]);
43 |     }
44 | 
45 |     fn process(&mut self) {
46 |         let from = self.from.as_ref().unwrap();
47 |         let mut to = Vec::with_capacity(COPY_BYTES_LEN);
48 | 
49 |         // SAFETY: The pointers are valid, the length is correct, all is well.
50 |         unsafe {
51 |             ptr::copy_nonoverlapping(from.as_ptr(), to.as_mut_ptr(), COPY_BYTES_LEN);
52 |         }
53 | 
54 |         // SAFETY: We just filled these bytes, it is all good.
55 |         unsafe {
56 |             to.set_len(COPY_BYTES_LEN);
57 |         }
58 | 
59 |         // Read from the destination to prevent the compiler from optimizing the copy away.
60 |         _ = black_box(to.first().unwrap());
61 |     }
62 | }
63 | 


--------------------------------------------------------------------------------
/crates/many_cpus_benchmarking/images/work_distribution_comparison.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:28cfbf03e3bf7fa149dc80b13cae85ce595dab4cf8e7bc90afc9117e633dc107
3 | size 65663
4 | 


--------------------------------------------------------------------------------
/crates/many_cpus_benchmarking/src/cache.rs:
--------------------------------------------------------------------------------
 1 | #![allow(
 2 |     dead_code,
 3 |     reason = "code is conditionally used only in non-test builds"
 4 | )]
 5 | 
 6 | use std::{cell::RefCell, hint::black_box, ptr, sync::LazyLock};
 7 | 
 8 | // Large servers can make hundreds of MBs of L3 cache available to a single core, though it
 9 | // depends on the specific model and hardware configuration. We use a sufficiently large data set
10 | // here to have a good chance of evicting the real payload data from the caches.
11 | #[cfg(not(miri))]
12 | const CACHE_CLEANER_LEN_BYTES: usize = 128 * 1024 * 1024;
13 | #[cfg(miri)]
14 | const CACHE_CLEANER_LEN_BYTES: usize = 1024;
15 | 
16 | #[expect(
17 |     clippy::integer_division,
18 |     reason = "we are fine with inaccuracy if the inputs require it - this is a ballpark figure"
19 | )]
20 | const CACHE_CLEANER_LEN_U64: usize = CACHE_CLEANER_LEN_BYTES / size_of::<u64>();
21 | 
22 | // We copy the data from a shared immutable source.
23 | static CACHE_CLEANER_SOURCE: LazyLock<Vec<u64>> =
24 |     LazyLock::new(|| vec![0x0102030401020304; CACHE_CLEANER_LEN_U64]);
25 | 
26 | // To a thread-specific destination (just to avoid overlap/conflict).
27 | // The existing values here do not matter, we will overwrite them (potentially multiple times).
28 | thread_local! {
29 |     static CACHE_CLEANER_DESTINATION: RefCell<Vec<u64>> =
30 |         RefCell::new(vec![0xFFFFFFFFFFFFFFFF; CACHE_CLEANER_LEN_U64]);
31 | }
32 | 
33 | /// As the whole point of this benchmark harness is to demonstrate differences when running under
34 | /// different many-processor configurations, we need to ensure that memory actually gets accessed
35 | /// during the benchmark runs - that all data is not simply cached locally.
36 | ///
37 | /// This function will perform a large memory copy operation, which hopefully trashes any caches.
38 | #[cfg_attr(test, mutants::skip)] // Functional testing infeasible; we just check for panic.
39 | pub(crate) fn clean_caches() {
40 |     let source_ptr = CACHE_CLEANER_SOURCE.as_ptr();
41 |     let destination_ptr = CACHE_CLEANER_DESTINATION.with_borrow_mut(Vec::as_mut_ptr);
42 | 
43 |     // SAFETY: Lengths are correct, pointers are valid, we are good to go.
44 |     unsafe {
45 |         ptr::copy_nonoverlapping(source_ptr, destination_ptr, CACHE_CLEANER_LEN_U64);
46 |     }
47 | 
48 |     // SAFETY: We just filled these bytes, it is all good.
49 |     CACHE_CLEANER_DESTINATION.with_borrow_mut(|destination| unsafe {
50 |         destination.set_len(CACHE_CLEANER_LEN_U64);
51 |     });
52 | 
53 |     // Read from the destination to prevent the compiler from optimizing the copy away.
54 |     // SAFETY: The pointer is valid, we just used it.
55 |     let _ = black_box(unsafe { destination_ptr.read() });
56 | }
57 | 
58 | #[cfg(test)]
59 | mod tests {
60 |     use super::*;
61 | 
62 |     #[test]
63 |     fn clean_caches_smoke_test() {
64 |         // Just make sure it does not panic and gets a clean bill of health from Miri.
65 |         clean_caches();
66 |     }
67 | }
68 | 


--------------------------------------------------------------------------------
/crates/many_cpus_benchmarking/src/payload.rs:
--------------------------------------------------------------------------------
 1 | /// One benchmark payload, to be processed by each worker involved in each benchmark.
 2 | ///
 3 | /// Payloads are created in pairs because the workers are created in pairs. Depending on the
 4 | /// benchmark scenario, the pair of payloads may be connected (e.g. reader and writer) or
 5 | /// independent (equivalent, two workers doing the same thing).
 6 | ///
 7 | /// The lifecycle of a payload is:
 8 | ///
 9 | /// 1. A payload pair is created on the main thread.
10 | /// 1. Each payload in the pair is transferred to a specific thread hosting a specific worker.
11 | /// 1. The `prepare()` method is called to generate any input data.
12 | /// 1. The payload pair is exchanged between the two paired workers.
13 | /// 1. The `process()` method is called to process the data received from the other pair member.
14 | /// 1. The payload pair is dropped.
15 | ///
16 | /// Note that some [work distribution modes][crate::WorkDistribution] (named `*Self`) may skip
17 | /// the payload exchange step.
18 | pub trait Payload: Sized + Send + 'static {
19 |     /// Creates the payload pair that will be used to initialize one worker pair in one
20 |     /// benchmark iteration. This will be called on the main thread.
21 |     fn new_pair() -> (Self, Self);
22 | 
23 |     /// Performs any initialization required. This will be called before the benchmark time span
24 |     /// measurement starts. It will be called on a worker thread but the payload may be moved to
25 |     /// a different worker thread before the benchmark starts (as workers by default prepare work
26 |     /// for each other, to showcase what happens when the work is transferred between threads).
27 |     fn prepare(&mut self) {}
28 | 
29 |     /// Performs any initialization required on the final worker thread selected. This is not
30 |     /// counted as part of the benchmark time span.
31 |     fn prepare_local(&mut self) {}
32 | 
33 |     /// Processes the payload but does not consume it. The iteration is complete when this returns
34 |     /// for all payloads. The payloads are dropped later, to ensure that the benchmark time is not
35 |     /// affected by the time it takes to drop the payload and release the memory.
36 |     fn process(&mut self);
37 | }
38 | 


--------------------------------------------------------------------------------
/crates/region_cached/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "region_cached"
 3 | description = "Adds a logical layer of caching between processor L3 cache and main memory"
 4 | publish = true
 5 | version = "0.1.11"
 6 | 
 7 | authors.workspace = true
 8 | edition.workspace = true
 9 | license.workspace = true
10 | repository.workspace = true
11 | rust-version.workspace = true
12 | 
13 | [features]
14 | default = []
15 | 
16 | [dependencies]
17 | arc-swap = { workspace = true }
18 | derive_more = { workspace = true, features = ["debug"] }
19 | folo_utils = { workspace = true }
20 | linked = { workspace = true }
21 | many_cpus = { workspace = true }
22 | rsevents = { workspace = true }
23 | simple-mermaid = { workspace = true }
24 | 
25 | [dev-dependencies]
26 | axum = { workspace = true, features = ["http1", "tokio"] }
27 | benchmark_utils = { workspace = true }
28 | criterion = { workspace = true }
29 | mockall = { workspace = true }
30 | mutants = { workspace = true }
31 | static_assertions = { workspace = true }
32 | tokio = { workspace = true, features = ["net", "rt-multi-thread"] }
33 | 
34 | [[bench]]
35 | name = "region_cached"
36 | harness = false
37 | 
38 | [lints]
39 | workspace = true
40 | 


--------------------------------------------------------------------------------
/crates/region_cached/README.md:
--------------------------------------------------------------------------------
 1 | On many-processor systems with multiple memory regions, there is an extra cost associated with
 2 | accessing data in physical memory modules that are in a different memory region than the current
 3 | processor:
 4 | 
 5 | * Cross-memory-region loads have higher latency (e.g. 100 ns local versus 200 ns remote).
 6 | * Cross-memory-region loads have lower throughput (e.g. 50 Gbps local versus 10 Gbps remote).
 7 | 
 8 | This crate provides the capability to cache frequently accessed shared data sets in the local memory
 9 | region, speeding up reads when the data is not already in the local processor caches. You can think
10 | of it as an extra level of caching between L3 processor caches and main memory.
11 | 
12 | More details in the [crate documentation](https://docs.rs/region_cached/).
13 | 
14 | This is part of the [Folo project](https://github.com/folo-rs/folo) that provides mechanisms for
15 | high-performance hardware-aware programming in Rust.


--------------------------------------------------------------------------------
/crates/region_cached/doc/region_cached.mermaid:
--------------------------------------------------------------------------------
 1 | graph TD
 2 |     subgraph Region1[Memory region 1]
 3 |         Processor1[Processor 1]
 4 |         Processor2[Processor 2]
 5 | 
 6 |         Processor1 -->|read| Region1Value[Regional value]
 7 |         Processor2 -->|read| Region1Value
 8 |     end
 9 | 
10 |     subgraph Region2[Memory region 2]
11 |         Processor3[Processor 3]
12 |         Processor4[Processor 4]
13 | 
14 |         Processor3 -->|read| Region2Value[Regional value]
15 |         Processor4 -->|read| Region2Value
16 |     end
17 | 
18 |     Region1Value -->|weakly consistent<br/>clone from| GlobalValue[Global value]
19 |     Region2Value -->|weakly consistent<br/>clone from| GlobalValue
20 | 
21 |     Processor1 -->|write| GlobalValue


--------------------------------------------------------------------------------
/crates/region_cached/examples/region_cached_1gb.rs:
--------------------------------------------------------------------------------
 1 | //! Allocates a region-cached variable with 1 GB of data and accesses it from every thread.
 2 | //!
 3 | //! You can observe memory usage to prove that the data is not being copied an unexpected
 4 | //! number of times (one copy per memory region is expected, plus one global primary copy).
 5 | 
 6 | use std::{hint::black_box, thread, time::Duration};
 7 | 
 8 | use many_cpus::ProcessorSet;
 9 | use region_cached::{RegionCachedExt, region_cached};
10 | 
11 | region_cached! {
12 |     static DATA: Vec<u8> = vec![50; 1024 * 1024 * 1024];
13 | }
14 | 
15 | fn main() {
16 |     let processor_set = ProcessorSet::default();
17 | 
18 |     processor_set
19 |         .spawn_threads(|_| DATA.with_cached(|data| _ = black_box(data.len())))
20 |         .into_iter()
21 |         .for_each(|x| x.join().unwrap());
22 | 
23 |     println!(
24 |         "All {} threads have accessed the region-cached data. Terminating in 60 seconds.",
25 |         processor_set.len()
26 |     );
27 | 
28 |     thread::sleep(Duration::from_secs(60));
29 | }
30 | 


--------------------------------------------------------------------------------
/crates/region_cached/examples/region_cached_log_filtering.rs:
--------------------------------------------------------------------------------
 1 | //! Showcase basic use of the `region_cached!` macro.
 2 | 
 3 | use std::thread;
 4 | 
 5 | use region_cached::{RegionCachedExt, region_cached};
 6 | 
 7 | region_cached!(static FILTER_KEYS: Vec<String> = load_initial_filters());
 8 | 
 9 | /// Returns true if the log line contains any of the filter keys.
10 | fn process_log_line(line: &str) -> bool {
11 |     // `.with_current()` provides an immutable reference to the cached value.
12 |     FILTER_KEYS.with_cached(|keys| keys.iter().any(|key| line.contains(key)))
13 | }
14 | 
15 | fn update_filters(new_filters: Vec<String>) {
16 |     // `.set()` publishes a new value, which will be distributed to all memory regions in an
17 |     // eventually consistent manner.
18 |     FILTER_KEYS.set_global(new_filters);
19 | }
20 | 
21 | fn load_initial_filters() -> Vec<String> {
22 |     // For example purposes we only have a trivial data set, which makes little sense to cache.
23 |     // In realistic scenarios, you would want to use region-local caching only if your data
24 |     // set is too large to naturally fit in processor caches (e.g. 100K+ entries). Other
25 |     // considerations also apply - let profiling be your guide in choosing your data structures.
26 |     vec!["error".to_string(), "panic".to_string()]
27 | }
28 | 
29 | static SAMPLE_LOG_LINES: &[&str] = &[
30 |     "info: everything is fine",
31 |     "error: something went wrong",
32 |     "warning: this is a warning",
33 |     "panic: oh no, we're doomed",
34 | ];
35 | 
36 | fn main() {
37 |     // Start a bunch of threads that will process log lines.
38 |     let mut threads = Vec::new();
39 | 
40 |     for _ in 0..100 {
41 |         threads.push(thread::spawn(move || {
42 |             for line in SAMPLE_LOG_LINES {
43 |                 if process_log_line(line) {
44 |                     println!("Matched filters: {line}");
45 |                 }
46 |             }
47 |         }));
48 |     }
49 | 
50 |     let new_filters = vec![
51 |         "error".to_string(),
52 |         "panic".to_string(),
53 |         "warning".to_string(),
54 |     ];
55 | 
56 |     // Update the filters. The update will arrive eventually on all threads in all memory regions.
57 |     // In terminal output, you may see the first threads act on the initial data set and later
58 |     // threads act on the updated data set, simply because the first threads already finish before
59 |     // getting the updated value.
60 |     update_filters(new_filters);
61 | 
62 |     for thread in threads {
63 |         thread.join().unwrap();
64 |     }
65 | 
66 |     println!("All threads have finished processing log lines.");
67 | }
68 | 


--------------------------------------------------------------------------------
/crates/region_cached/examples/region_cached_log_filtering_no_statics.rs:
--------------------------------------------------------------------------------
 1 | //! This is a variation of the `region_cached_log_filtering.rs` example, but using the `PerThread`
 2 | //! runtime wrapper type instead of static variables inside a `region_cached!` block.
 3 | 
 4 | use std::thread;
 5 | 
 6 | use linked::{InstancePerThread, Ref};
 7 | use region_cached::RegionCached;
 8 | 
 9 | /// The current thread's view of the region-cached filter keys instance.
10 | type CachedFilterKeys = Ref<RegionCached<Vec<String>>>;
11 | 
12 | /// Returns true if the log line contains any of the filter keys.
13 | fn process_log_line(line: &str, filter_keys: &CachedFilterKeys) -> bool {
14 |     // `.with()` provides an immutable reference to the cached value.
15 |     filter_keys.with_cached(|keys| keys.iter().any(|key| line.contains(key)))
16 | }
17 | 
18 | fn update_filters(new_filters: Vec<String>, filter_keys: &CachedFilterKeys) {
19 |     // `.set()` publishes a new value, which will be distributed to all memory regions in a
20 |     // weakly consistent manner.
21 |     filter_keys.set_global(new_filters);
22 | }
23 | 
24 | fn load_initial_filters() -> Vec<String> {
25 |     // For example purposes we only have a trivial data set, which makes little sense to cache.
26 |     // In realistic scenarios, you would want to use region-local caching only if your data
27 |     // set is too large to naturally fit in processor caches (e.g. 100K+ entries). Other
28 |     // considerations also apply - let profiling be your guide in choosing your data structures.
29 |     vec!["error".to_string(), "panic".to_string()]
30 | }
31 | 
32 | static SAMPLE_LOG_LINES: &[&str] = &[
33 |     "info: everything is fine",
34 |     "error: something went wrong",
35 |     "warning: this is a warning",
36 |     "panic: oh no, we're doomed",
37 | ];
38 | 
39 | fn main() {
40 |     let filters = InstancePerThread::new(RegionCached::new(load_initial_filters()));
41 | 
42 |     // Start a bunch of threads that will process log lines.
43 |     let mut threads = Vec::new();
44 | 
45 |     for _ in 0..100 {
46 |         threads.push(thread::spawn({
47 |             // We clone the `PerThread` for each thread, so they can all access the filters.
48 |             let filters = filters.clone();
49 | 
50 |             move || {
51 |                 // This localizes the `PerThread` instance, giving us the current thread's view.
52 |                 let filters = filters.acquire();
53 | 
54 |                 for line in SAMPLE_LOG_LINES {
55 |                     if process_log_line(line, &filters) {
56 |                         println!("Matched filters: {line}");
57 |                     }
58 |                 }
59 |             }
60 |         }));
61 |     }
62 | 
63 |     let new_filters = vec![
64 |         "error".to_string(),
65 |         "panic".to_string(),
66 |         "warning".to_string(),
67 |     ];
68 | 
69 |     // Update the filters. The update will arrive eventually on all threads in all memory regions.
70 |     // In terminal output, you may see the first threads act on the initial data set and later
71 |     // threads act on the updated data set, simply because the first threads already finish before
72 |     // getting the updated value.
73 |     update_filters(new_filters, &filters.acquire());
74 | 
75 |     for thread in threads {
76 |         thread.join().unwrap();
77 |     }
78 | 
79 |     println!("All threads have finished processing log lines.");
80 | }
81 | 


--------------------------------------------------------------------------------
/crates/region_cached/examples/region_cached_web.rs:
--------------------------------------------------------------------------------
 1 | //! Showcase basic use of the `region_cached!` macro in a multithreaded web app.
 2 | 
 3 | use axum::{Router, routing::get};
 4 | use many_cpus::HardwareInfo;
 5 | use region_cached::{RegionCachedCopyExt, RegionCachedExt, region_cached};
 6 | use std::time::{SystemTime, UNIX_EPOCH};
 7 | 
 8 | // A global variable whose latest value is cached in each memory region for fast local read access.
 9 | // Writes to this variable are weakly consistent across all memory regions.
10 | //
11 | // Note: to keep the example simple, the value of this variable is of a trivial size and unlikely
12 | // to actually benefit from region-local caching as it easily fits into local processor caches.
13 | region_cached!(static LAST_UPDATE: u128 = 0);
14 | 
15 | #[tokio::main]
16 | async fn main() {
17 |     // The beneficial impact will arise only on systems with multiple memory regions.
18 |     let memory_region_count = HardwareInfo::max_memory_region_count();
19 |     println!("the current system has {memory_region_count} memory regions");
20 | 
21 |     let app = Router::new()
22 |         .route("/", get(read))
23 |         .route("/update", get(update));
24 |     let listener = tokio::net::TcpListener::bind("0.0.0.0:1234").await.unwrap();
25 |     axum::serve(listener, app).await.unwrap();
26 | }
27 | 
28 | /// Open `http://localhost:1234/` to read the current value.
29 | async fn read() -> String {
30 |     let last_update_timestamp = LAST_UPDATE.get_cached();
31 | 
32 |     format!("Last update: {last_update_timestamp}")
33 | }
34 | 
35 | /// Open `http://localhost:1234/update` to set a new value.
36 | async fn update() -> String {
37 |     let now = SystemTime::now()
38 |         .duration_since(UNIX_EPOCH)
39 |         .unwrap()
40 |         .as_millis();
41 |     LAST_UPDATE.set_global(now);
42 |     format!("Last update time set to: {now}")
43 | }
44 | 


--------------------------------------------------------------------------------
/crates/region_cached/src/__private.rs:
--------------------------------------------------------------------------------
1 | pub use linked;
2 | 


--------------------------------------------------------------------------------
/crates/region_cached/src/clients.rs:
--------------------------------------------------------------------------------
 1 | mod hw_info_client;
 2 | mod hw_info_facade;
 3 | mod hw_tracker_client;
 4 | mod hw_tracker_facade;
 5 | 
 6 | pub(crate) use hw_info_client::*;
 7 | pub(crate) use hw_info_facade::*;
 8 | pub(crate) use hw_tracker_client::*;
 9 | pub(crate) use hw_tracker_facade::*;
10 | 


--------------------------------------------------------------------------------
/crates/region_cached/src/clients/hw_info_client.rs:
--------------------------------------------------------------------------------
 1 | use many_cpus::HardwareInfo;
 2 | 
 3 | #[cfg_attr(test, mockall::automock)]
 4 | pub(crate) trait HardwareInfoClient {
 5 |     fn max_memory_region_count(&self) -> usize;
 6 | }
 7 | 
 8 | #[derive(Debug)]
 9 | pub(crate) struct HardwareInfoClientImpl;
10 | 
11 | impl HardwareInfoClient for HardwareInfoClientImpl {
12 |     #[cfg_attr(test, mutants::skip)] // Trivial fn, tested on lower levels - skip mutating.
13 |     fn max_memory_region_count(&self) -> usize {
14 |         HardwareInfo::max_memory_region_count()
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/crates/region_cached/src/clients/hw_info_facade.rs:
--------------------------------------------------------------------------------
 1 | #[cfg(test)]
 2 | use std::sync::Arc;
 3 | 
 4 | use crate::{HardwareInfoClient, HardwareInfoClientImpl};
 5 | 
 6 | #[cfg(test)]
 7 | use crate::MockHardwareInfoClient;
 8 | 
 9 | #[derive(Clone, Debug)]
10 | pub(crate) enum HardwareInfoClientFacade {
11 |     Real(&'static HardwareInfoClientImpl),
12 | 
13 |     #[cfg(test)]
14 |     Mock(Arc<MockHardwareInfoClient>),
15 | }
16 | 
17 | impl HardwareInfoClientFacade {
18 |     pub(crate) const fn real() -> Self {
19 |         Self::Real(&HardwareInfoClientImpl)
20 |     }
21 | 
22 |     #[cfg(test)]
23 |     pub(crate) fn from_mock(mock: MockHardwareInfoClient) -> Self {
24 |         Self::Mock(Arc::new(mock))
25 |     }
26 | }
27 | 
28 | impl HardwareInfoClient for HardwareInfoClientFacade {
29 |     fn max_memory_region_count(&self) -> usize {
30 |         match self {
31 |             Self::Real(real) => real.max_memory_region_count(),
32 |             #[cfg(test)]
33 |             Self::Mock(mock) => mock.max_memory_region_count(),
34 |         }
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/crates/region_cached/src/clients/hw_tracker_client.rs:
--------------------------------------------------------------------------------
 1 | use many_cpus::{HardwareTracker, MemoryRegionId};
 2 | 
 3 | #[cfg_attr(test, mockall::automock)]
 4 | pub(crate) trait HardwareTrackerClient {
 5 |     fn current_memory_region_id(&self) -> MemoryRegionId;
 6 |     fn is_thread_memory_region_pinned(&self) -> bool;
 7 | }
 8 | 
 9 | #[derive(Debug)]
10 | pub(crate) struct HardwareTrackerClientImpl;
11 | 
12 | impl HardwareTrackerClient for HardwareTrackerClientImpl {
13 |     #[cfg_attr(test, mutants::skip)] // Trivial fn, tested on lower levels - skip mutating.
14 |     fn current_memory_region_id(&self) -> MemoryRegionId {
15 |         HardwareTracker::current_memory_region_id()
16 |     }
17 | 
18 |     #[cfg_attr(test, mutants::skip)] // Trivial fn, tested on lower levels - skip mutating.
19 |     fn is_thread_memory_region_pinned(&self) -> bool {
20 |         HardwareTracker::is_thread_memory_region_pinned()
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/crates/region_cached/src/clients/hw_tracker_facade.rs:
--------------------------------------------------------------------------------
 1 | #[cfg(test)]
 2 | use std::sync::Arc;
 3 | 
 4 | use many_cpus::MemoryRegionId;
 5 | 
 6 | use crate::{HardwareTrackerClient, HardwareTrackerClientImpl};
 7 | 
 8 | #[cfg(test)]
 9 | use crate::MockHardwareTrackerClient;
10 | 
11 | #[derive(Clone, Debug)]
12 | pub(crate) enum HardwareTrackerClientFacade {
13 |     Real(&'static HardwareTrackerClientImpl),
14 | 
15 |     #[cfg(test)]
16 |     Mock(Arc<MockHardwareTrackerClient>),
17 | }
18 | 
19 | impl HardwareTrackerClientFacade {
20 |     pub(crate) const fn real() -> Self {
21 |         Self::Real(&HardwareTrackerClientImpl)
22 |     }
23 | 
24 |     #[cfg(test)]
25 |     pub(crate) fn from_mock(mock: MockHardwareTrackerClient) -> Self {
26 |         Self::Mock(Arc::new(mock))
27 |     }
28 | }
29 | 
30 | impl HardwareTrackerClient for HardwareTrackerClientFacade {
31 |     fn current_memory_region_id(&self) -> MemoryRegionId {
32 |         match self {
33 |             Self::Real(real) => real.current_memory_region_id(),
34 |             #[cfg(test)]
35 |             Self::Mock(mock) => mock.current_memory_region_id(),
36 |         }
37 |     }
38 | 
39 |     fn is_thread_memory_region_pinned(&self) -> bool {
40 |         match self {
41 |             Self::Real(real) => real.is_thread_memory_region_pinned(),
42 |             #[cfg(test)]
43 |             Self::Mock(mock) => mock.is_thread_memory_region_pinned(),
44 |         }
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/crates/region_cached/src/macros.rs:
--------------------------------------------------------------------------------
 1 | /// Marks static variables as region-cached.
 2 | ///
 3 | /// The static variables are most conveniently used via extension methods on the
 4 | /// [`RegionCachedExt`][1] trait. Import this trait when using region-cached static variables.
 5 | ///
 6 | /// # Example
 7 | ///
 8 | /// ```
 9 | /// use region_cached::{RegionCachedExt, region_cached};
10 | ///
11 | /// region_cached! {
12 | ///     static ALLOWED_KEYS: Vec<String> = vec![
13 | ///         "error".to_string(),
14 | ///         "panic".to_string()
15 | ///     ];
16 | ///     static FORBIDDEN_KEYS: Vec<String> = vec![
17 | ///         "info".to_string(),
18 | ///         "debug".to_string()
19 | ///     ];
20 | /// }
21 | ///
22 | /// let allowed_key_count = ALLOWED_KEYS.with_cached(|keys| keys.len());
23 | /// ```
24 | ///
25 | /// [1]: crate::RegionCachedExt
26 | #[macro_export]
27 | macro_rules! region_cached {
28 |     () => {};
29 | 
30 |     ($(#[$attr:meta])* $vis:vis static $NAME:ident: $t:ty = $initial_value:expr; $($rest:tt)*) => (
31 |         $crate::region_cached!($(#[$attr])* $vis static $NAME: $t = $initial_value);
32 |         $crate::region_cached!($($rest)*);
33 |     );
34 | 
35 |     ($(#[$attr:meta])* $vis:vis static $NAME:ident: $t:ty = $initial_value:expr) => {
36 |         $crate::__private::linked::thread_local_rc! {
37 |             $(#[$attr])* $vis static $NAME: $crate::RegionCached<$t> =
38 |                 $crate::RegionCached::new($initial_value);
39 |         }
40 |     };
41 | }
42 | 


--------------------------------------------------------------------------------
/crates/region_cached/src/region_cached_ext.rs:
--------------------------------------------------------------------------------
  1 | use crate::RegionCached;
  2 | 
  3 | /// Extension trait that adds convenience methods to region-cached static variables
  4 | /// in a `region_cached!` block.
  5 | pub trait RegionCachedExt<T> {
  6 |     /// Executes the provided function with a reference to the cached value
  7 |     /// in the current memory region.
  8 |     ///
  9 |     /// # Example
 10 |     ///
 11 |     /// ```
 12 |     /// use region_cached::{region_cached, RegionCachedExt};
 13 |     ///
 14 |     /// region_cached!(static FAVORITE_COLOR: String = "blue".to_string());
 15 |     ///
 16 |     /// let len = FAVORITE_COLOR.with_cached(|color| color.len());
 17 |     /// assert_eq!(len, 4);
 18 |     /// ```
 19 |     fn with_cached<F, R>(&self, f: F) -> R
 20 |     where
 21 |         F: FnOnce(&T) -> R;
 22 | 
 23 |     /// Publishes a new value to all memory regions.
 24 |     ///
 25 |     /// The update will be applied to all memory regions in a [weakly consistent manner][1].
 26 |     ///
 27 |     /// # Example
 28 |     ///
 29 |     /// ```
 30 |     /// use region_cached::{region_cached, RegionCachedExt};
 31 |     ///
 32 |     /// region_cached!(static FAVORITE_COLOR: String = "blue".to_string());
 33 |     ///
 34 |     /// FAVORITE_COLOR.set_global("red".to_string());
 35 |     /// ```
 36 |     ///
 37 |     /// Updating the value is [weakly consistent][1]. Do not expect the update to be
 38 |     /// immediately visible. Even on the same thread, it is only guaranteed to be
 39 |     /// immediately visible if the thread is pinned to a specific memory region.
 40 |     ///
 41 |     /// ```
 42 |     /// use many_cpus::ProcessorSet;
 43 |     /// use region_cached::{region_cached, RegionCachedExt};
 44 |     /// use std::num::NonZero;
 45 |     ///
 46 |     /// region_cached!(static FAVORITE_COLOR: String = "blue".to_string());
 47 |     ///
 48 |     /// // We can use this to pin a thread to a specific processor, to demonstrate a
 49 |     /// // situation where you can rely on consistency guarantees for immediate visibility.
 50 |     /// let one_processor = ProcessorSet::builder()
 51 |     ///     .take(NonZero::new(1).unwrap())
 52 |     ///     .unwrap();
 53 |     ///
 54 |     /// one_processor.spawn_thread(move |processor_set| {
 55 |     ///     let processor = processor_set.processors().first();
 56 |     ///     println!("Thread pinned to processor {} in memory region {}",
 57 |     ///         processor.id(),
 58 |     ///         processor.memory_region_id()
 59 |     ///     );
 60 |     ///
 61 |     ///     FAVORITE_COLOR.set_global("red".to_string());
 62 |     ///
 63 |     ///     // This thread is pinned to a specific processor, so it is guaranteed to stay
 64 |     ///     // within the same memory region (== on the same physical hardware). This means
 65 |     ///     // that an update to a region-cached value is immediately visible.
 66 |     ///     let color = FAVORITE_COLOR.with_cached(|color| color.clone());
 67 |     ///     assert_eq!(color, "red");
 68 |     /// }).join().unwrap();
 69 |     /// ```
 70 |     ///
 71 |     /// [1]: crate#consistency-guarantees
 72 |     fn set_global(&self, value: T);
 73 | }
 74 | 
 75 | /// Extension trait that adds convenience methods to region-cached static variables
 76 | /// in a `region_cached!` block, specifically for `Copy` types.
 77 | pub trait RegionCachedCopyExt<T>
 78 | where
 79 |     T: Copy,
 80 | {
 81 |     /// Gets a copy of the cached value in the current memory region.
 82 |     ///
 83 |     /// # Example
 84 |     ///
 85 |     /// ```
 86 |     /// use region_cached::{region_cached, RegionCachedCopyExt};
 87 |     ///
 88 |     /// region_cached!(static CURRENT_ACCESS_TOKEN: u128 = 0x123100);
 89 |     ///
 90 |     /// let token = CURRENT_ACCESS_TOKEN.get_cached();
 91 |     /// assert_eq!(token, 0x123100);
 92 |     /// ```
 93 |     fn get_cached(&self) -> T;
 94 | }
 95 | 
 96 | impl<T> RegionCachedExt<T> for linked::StaticInstancePerThread<RegionCached<T>>
 97 | where
 98 |     T: Clone + Send + Sync + 'static,
 99 | {
100 |     #[inline]
101 |     fn with_cached<F, R>(&self, f: F) -> R
102 |     where
103 |         F: FnOnce(&T) -> R,
104 |     {
105 |         self.with(|inner| inner.with_cached(f))
106 |     }
107 | 
108 |     #[inline]
109 |     fn set_global(&self, value: T) {
110 |         self.with(|inner| inner.set_global(value));
111 |     }
112 | }
113 | 
114 | impl<T> RegionCachedCopyExt<T> for linked::StaticInstancePerThread<RegionCached<T>>
115 | where
116 |     T: Clone + Copy + Send + Sync + 'static,
117 | {
118 |     #[inline]
119 |     fn get_cached(&self) -> T {
120 |         self.with(|inner| inner.get_cached())
121 |     }
122 | }
123 | 


--------------------------------------------------------------------------------
/crates/region_local/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "region_local"
 3 | description = "Isolated variable storage per memory region, similar to `thread_local_rc!`"
 4 | publish = true
 5 | version = "0.1.11"
 6 | 
 7 | authors.workspace = true
 8 | edition.workspace = true
 9 | license.workspace = true
10 | repository.workspace = true
11 | rust-version.workspace = true
12 | 
13 | [features]
14 | default = []
15 | 
16 | [dependencies]
17 | arc-swap = { workspace = true }
18 | derive_more = { workspace = true, features = ["debug"] }
19 | folo_utils = { workspace = true }
20 | linked = { workspace = true }
21 | many_cpus = { workspace = true }
22 | rsevents = { workspace = true }
23 | simple-mermaid = { workspace = true }
24 | 
25 | [dev-dependencies]
26 | axum = { workspace = true, features = ["http1", "tokio"] }
27 | benchmark_utils = { workspace = true }
28 | criterion = { workspace = true }
29 | mockall = { workspace = true }
30 | mutants = { workspace = true }
31 | static_assertions = { workspace = true }
32 | tokio = { workspace = true, features = ["net", "rt-multi-thread"] }
33 | 
34 | [[bench]]
35 | name = "region_local"
36 | harness = false
37 | 
38 | [lints]
39 | workspace = true
40 | 


--------------------------------------------------------------------------------
/crates/region_local/README.md:
--------------------------------------------------------------------------------
 1 | On many-processor systems with multiple memory regions, there is an extra cost associated with
 2 | accessing data in physical memory modules that are in a different memory region than the current
 3 | processor:
 4 | 
 5 | * Cross-memory-region loads have higher latency (e.g. 100 ns local versus 200 ns remote).
 6 | * Cross-memory-region loads have lower throughput (e.g. 50 Gbps local versus 10 Gbps remote).
 7 | 
 8 | This crate provides the capability to create static variables that maintain separate storage per
 9 | memory region. This may be useful in circumstances where state needs to be shared but only within
10 | each memory region (e.g. because you intentionally want to avoid the overhead of cross-memory-region
11 | transfers and want to isolate the data sets).
12 | 
13 | Think of this as an equivalent of `thread_local_rc!`, except operating on the memory region boundary
14 | instead of the thread boundary.
15 | 
16 | More details in the [crate documentation](https://docs.rs/region_local/).
17 | 
18 | This is part of the [Folo project](https://github.com/folo-rs/folo) that provides mechanisms for
19 | high-performance hardware-aware programming in Rust.


--------------------------------------------------------------------------------
/crates/region_local/doc/region_local.mermaid:
--------------------------------------------------------------------------------
 1 | graph TD
 2 |     subgraph Region2[Memory region 2]
 3 |         Processor3[Processor 3]
 4 |         Processor4[Processor 4]
 5 | 
 6 |         Processor3 --> Region2Value[Regional value]
 7 |         Processor4 --> Region2Value
 8 |     end
 9 | 
10 |     subgraph Region1[Memory region 1]
11 |         Processor1[Processor 1]
12 |         Processor2[Processor 2]
13 | 
14 |         Processor1 --> Region1Value[Regional value]
15 |         Processor2 --> Region1Value
16 |     end


--------------------------------------------------------------------------------
/crates/region_local/examples/region_local_1gb.rs:
--------------------------------------------------------------------------------
 1 | //! Allocates a region-local variable with 1 GB of data and accesses it from every thread.
 2 | //!
 3 | //! You can observe memory usage to prove that the data is not being copied an unexpected
 4 | //! number of times (one copy per memory region is expected).
 5 | 
 6 | use std::{hint::black_box, thread, time::Duration};
 7 | 
 8 | use many_cpus::ProcessorSet;
 9 | use region_local::{RegionLocalExt, region_local};
10 | 
11 | region_local! {
12 |     static DATA: Vec<u8> = vec![50; 1024 * 1024 * 1024];
13 | }
14 | 
15 | fn main() {
16 |     let processor_set = ProcessorSet::default();
17 | 
18 |     processor_set
19 |         .spawn_threads(|_| DATA.with_local(|data| _ = black_box(data.len())))
20 |         .into_iter()
21 |         .for_each(|x| x.join().unwrap());
22 | 
23 |     println!(
24 |         "All {} threads have accessed the region-local data. Terminating in 60 seconds.",
25 |         processor_set.len()
26 |     );
27 | 
28 |     thread::sleep(Duration::from_secs(60));
29 | }
30 | 


--------------------------------------------------------------------------------
/crates/region_local/examples/region_local_web.rs:
--------------------------------------------------------------------------------
 1 | //! Showcase basic use of the `region_local!` macro in a multithreaded web app.
 2 | //!
 3 | use axum::{Router, routing::get};
 4 | use many_cpus::HardwareInfo;
 5 | use region_local::{RegionLocalCopyExt, RegionLocalExt, region_local};
 6 | use std::time::{SystemTime, UNIX_EPOCH};
 7 | 
 8 | // A global variable whose value is unique in each memory region for fast local access.
 9 | // Writes to this variable are eventually consistent across all threads in the same memory region.
10 | region_local!(static LAST_UPDATE: u128 = 0);
11 | 
12 | #[tokio::main]
13 | async fn main() {
14 |     let memory_region_count = HardwareInfo::max_memory_region_count();
15 |     println!("the current system has {memory_region_count} memory regions");
16 | 
17 |     let app = Router::new()
18 |         .route("/", get(read))
19 |         .route("/update", get(update));
20 |     let listener = tokio::net::TcpListener::bind("0.0.0.0:1234").await.unwrap();
21 |     axum::serve(listener, app).await.unwrap();
22 | }
23 | 
24 | /// Open `http://localhost:1234/` to read the current value.
25 | async fn read() -> String {
26 |     let last_update_timestamp = LAST_UPDATE.get_local();
27 | 
28 |     format!("Last update: {last_update_timestamp}")
29 | }
30 | 
31 | /// Open `http://localhost:1234/update` to set a new value.
32 | /// The new value is only visible to `read()` handlers that run in the same memory region.
33 | async fn update() -> String {
34 |     let now = SystemTime::now()
35 |         .duration_since(UNIX_EPOCH)
36 |         .unwrap()
37 |         .as_millis();
38 |     LAST_UPDATE.set_local(now);
39 |     format!("Last update time set to: {now}")
40 | }
41 | 


--------------------------------------------------------------------------------
/crates/region_local/src/__private.rs:
--------------------------------------------------------------------------------
1 | pub use linked;
2 | 


--------------------------------------------------------------------------------
/crates/region_local/src/clients.rs:
--------------------------------------------------------------------------------
 1 | mod hw_info_client;
 2 | mod hw_info_facade;
 3 | mod hw_tracker_client;
 4 | mod hw_tracker_facade;
 5 | 
 6 | pub(crate) use hw_info_client::*;
 7 | pub(crate) use hw_info_facade::*;
 8 | pub(crate) use hw_tracker_client::*;
 9 | pub(crate) use hw_tracker_facade::*;
10 | 


--------------------------------------------------------------------------------
/crates/region_local/src/clients/hw_info_client.rs:
--------------------------------------------------------------------------------
 1 | use many_cpus::HardwareInfo;
 2 | 
 3 | #[cfg_attr(test, mockall::automock)]
 4 | pub(crate) trait HardwareInfoClient {
 5 |     fn max_memory_region_count(&self) -> usize;
 6 | }
 7 | 
 8 | #[derive(Debug)]
 9 | pub(crate) struct HardwareInfoClientImpl;
10 | 
11 | impl HardwareInfoClient for HardwareInfoClientImpl {
12 |     #[cfg_attr(test, mutants::skip)] // Trivial fn, tested on lower levels - skip mutating.
13 |     fn max_memory_region_count(&self) -> usize {
14 |         HardwareInfo::max_memory_region_count()
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/crates/region_local/src/clients/hw_info_facade.rs:
--------------------------------------------------------------------------------
 1 | #[cfg(test)]
 2 | use std::sync::Arc;
 3 | 
 4 | use crate::{HardwareInfoClient, HardwareInfoClientImpl};
 5 | 
 6 | #[cfg(test)]
 7 | use crate::MockHardwareInfoClient;
 8 | 
 9 | #[derive(Clone, Debug)]
10 | pub(crate) enum HardwareInfoClientFacade {
11 |     Real(&'static HardwareInfoClientImpl),
12 | 
13 |     #[cfg(test)]
14 |     Mock(Arc<MockHardwareInfoClient>),
15 | }
16 | 
17 | impl HardwareInfoClientFacade {
18 |     pub(crate) const fn real() -> Self {
19 |         Self::Real(&HardwareInfoClientImpl)
20 |     }
21 | 
22 |     #[cfg(test)]
23 |     pub(crate) fn from_mock(mock: MockHardwareInfoClient) -> Self {
24 |         Self::Mock(Arc::new(mock))
25 |     }
26 | }
27 | 
28 | impl HardwareInfoClient for HardwareInfoClientFacade {
29 |     fn max_memory_region_count(&self) -> usize {
30 |         match self {
31 |             Self::Real(real) => real.max_memory_region_count(),
32 |             #[cfg(test)]
33 |             Self::Mock(mock) => mock.max_memory_region_count(),
34 |         }
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/crates/region_local/src/clients/hw_tracker_client.rs:
--------------------------------------------------------------------------------
 1 | use many_cpus::{HardwareTracker, MemoryRegionId};
 2 | 
 3 | #[cfg_attr(test, mockall::automock)]
 4 | pub(crate) trait HardwareTrackerClient {
 5 |     fn current_memory_region_id(&self) -> MemoryRegionId;
 6 |     fn is_thread_memory_region_pinned(&self) -> bool;
 7 | }
 8 | 
 9 | #[derive(Debug)]
10 | pub(crate) struct HardwareTrackerClientImpl;
11 | 
12 | impl HardwareTrackerClient for HardwareTrackerClientImpl {
13 |     #[cfg_attr(test, mutants::skip)] // Trivial fn, tested on lower levels - skip mutating.
14 |     fn current_memory_region_id(&self) -> MemoryRegionId {
15 |         HardwareTracker::current_memory_region_id()
16 |     }
17 | 
18 |     #[cfg_attr(test, mutants::skip)] // Trivial fn, tested on lower levels - skip mutating.
19 |     fn is_thread_memory_region_pinned(&self) -> bool {
20 |         HardwareTracker::is_thread_memory_region_pinned()
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/crates/region_local/src/clients/hw_tracker_facade.rs:
--------------------------------------------------------------------------------
 1 | #[cfg(test)]
 2 | use std::sync::Arc;
 3 | 
 4 | use many_cpus::MemoryRegionId;
 5 | 
 6 | use crate::{HardwareTrackerClient, HardwareTrackerClientImpl};
 7 | 
 8 | #[cfg(test)]
 9 | use crate::MockHardwareTrackerClient;
10 | 
11 | #[derive(Clone, Debug)]
12 | pub(crate) enum HardwareTrackerClientFacade {
13 |     Real(&'static HardwareTrackerClientImpl),
14 | 
15 |     #[cfg(test)]
16 |     Mock(Arc<MockHardwareTrackerClient>),
17 | }
18 | 
19 | impl HardwareTrackerClientFacade {
20 |     pub(crate) const fn real() -> Self {
21 |         Self::Real(&HardwareTrackerClientImpl)
22 |     }
23 | 
24 |     #[cfg(test)]
25 |     pub(crate) fn from_mock(mock: MockHardwareTrackerClient) -> Self {
26 |         Self::Mock(Arc::new(mock))
27 |     }
28 | }
29 | 
30 | impl HardwareTrackerClient for HardwareTrackerClientFacade {
31 |     fn current_memory_region_id(&self) -> MemoryRegionId {
32 |         match self {
33 |             Self::Real(real) => real.current_memory_region_id(),
34 |             #[cfg(test)]
35 |             Self::Mock(mock) => mock.current_memory_region_id(),
36 |         }
37 |     }
38 | 
39 |     fn is_thread_memory_region_pinned(&self) -> bool {
40 |         match self {
41 |             Self::Real(real) => real.is_thread_memory_region_pinned(),
42 |             #[cfg(test)]
43 |             Self::Mock(mock) => mock.is_thread_memory_region_pinned(),
44 |         }
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/crates/region_local/src/macros.rs:
--------------------------------------------------------------------------------
 1 | /// Marks static variables as region-local.
 2 | ///
 3 | /// The static variables are most conveniently used via extension methods on the
 4 | /// [`RegionLocalExt`][1] trait. Import this trait when using region-local static variables.
 5 | ///
 6 | /// # Example
 7 | ///
 8 | /// ```
 9 | /// use region_local::{RegionLocalExt, region_local};
10 | ///
11 | /// region_local! {
12 | ///     static ALLOWED_KEYS: Vec<String> = vec![
13 | ///         "error".to_string(),
14 | ///         "panic".to_string()
15 | ///     ];
16 | ///     static FORBIDDEN_KEYS: Vec<String> = vec![
17 | ///         "info".to_string(),
18 | ///         "debug".to_string()
19 | ///     ];
20 | /// }
21 | ///
22 | /// let allowed_key_count = ALLOWED_KEYS.with_local(|keys| keys.len());
23 | /// ```
24 | ///
25 | /// [1]: crate::RegionLocalExt
26 | #[macro_export]
27 | macro_rules! region_local {
28 |     () => {};
29 | 
30 |     ($(#[$attr:meta])* $vis:vis static $NAME:ident: $t:ty = $initial_value:expr; $($rest:tt)*) => (
31 |         $crate::region_local!($(#[$attr])* $vis static $NAME: $t = $initial_value);
32 |         $crate::region_local!($($rest)*);
33 |     );
34 | 
35 |     ($(#[$attr:meta])* $vis:vis static $NAME:ident: $t:ty = $initial_value:expr) => {
36 |         $crate::__private::linked::thread_local_rc! {
37 |             $(#[$attr])* $vis static $NAME: $crate::RegionLocal<$t> =
38 |                 $crate::RegionLocal::new(|| $initial_value);
39 |         }
40 |     };
41 | }
42 | 


--------------------------------------------------------------------------------
/crates/testing/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "testing"
 3 | description = "Private helpers for testing and examples in Folo crates"
 4 | publish = false
 5 | version = "0.0.1-never"
 6 | 
 7 | authors.workspace = true
 8 | edition.workspace = true
 9 | license.workspace = true
10 | repository.workspace = true
11 | rust-version.workspace = true
12 | 
13 | [lib]
14 | doc = false
15 | 
16 | [features]
17 | default = []
18 | 
19 | [dependencies]
20 | 
21 | [target.'cfg(windows)'.dependencies]
22 | deranged = { workspace = true, features = ["macros"] }
23 | folo_utils = { workspace = true }
24 | windows = { workspace = true, features = [
25 |     "Win32_System_JobObjects",
26 |     "Win32_System_Threading",
27 |     "Win32_Security",
28 | ] }
29 | 
30 | [dev-dependencies]
31 | mutants = { workspace = true }
32 | 
33 | [lints]
34 | workspace = true
35 | 


--------------------------------------------------------------------------------
/crates/testing/examples/spin_cpu_windows.rs:
--------------------------------------------------------------------------------
 1 | //! Demonstrates the effect of Windows job object limits by spinning the CPU.
 2 | 
 3 | fn main() {
 4 |     #[cfg(windows)]
 5 |     windows::main();
 6 | 
 7 |     #[cfg(not(windows))]
 8 |     panic!("This example is only supported on Windows.");
 9 | }
10 | 
11 | #[cfg(windows)]
12 | mod windows {
13 |     use std::{thread, time::Duration};
14 | 
15 |     use folo_utils::nz;
16 |     use testing::{Job, ProcessorTimePct};
17 |     use windows::Win32::System::Threading::{
18 |         GetCurrentThread, SetThreadPriority, THREAD_PRIORITY_IDLE,
19 |     };
20 | 
21 |     const SLEEP_TIME_SECS: u64 = 10;
22 | 
23 |     pub(crate) fn main() {
24 |         let job = Job::builder()
25 |             .with_processor_count(nz!(8))
26 |             .with_max_processor_time_pct(ProcessorTimePct::new_static::<50>())
27 |             .build();
28 | 
29 |         println!("Starting with limit of 8 processors and 50% processor time.");
30 | 
31 |         // We start a bunch of worker threads, enough to saturate a bunch of processors.
32 |         for _ in 0..100 {
33 |             start_spinner();
34 |         }
35 | 
36 |         thread::sleep(Duration::from_secs(SLEEP_TIME_SECS));
37 | 
38 |         drop(job);
39 |         println!("Switching to 8 processors and 1% processor time.");
40 | 
41 |         let job = Job::builder()
42 |             .with_processor_count(nz!(1))
43 |             .with_max_processor_time_pct(ProcessorTimePct::new_static::<1>())
44 |             .build();
45 | 
46 |         thread::sleep(Duration::from_secs(SLEEP_TIME_SECS));
47 | 
48 |         drop(job);
49 |         println!("Switching to 1 processor and 80% processor time.");
50 | 
51 |         let job = Job::builder()
52 |             .with_processor_count(nz!(1))
53 |             .with_max_processor_time_pct(ProcessorTimePct::new_static::<80>())
54 |             .build();
55 | 
56 |         thread::sleep(Duration::from_secs(SLEEP_TIME_SECS));
57 | 
58 |         drop(job);
59 |         println!("Switching to 4 processors and 75% processor time.");
60 | 
61 |         let job = Job::builder()
62 |             .with_processor_count(nz!(4))
63 |             .with_max_processor_time_pct(ProcessorTimePct::new_static::<75>())
64 |             .build();
65 | 
66 |         thread::sleep(Duration::from_secs(SLEEP_TIME_SECS));
67 | 
68 |         drop(job);
69 |         println!("All done.");
70 |     }
71 | 
72 |     fn start_spinner() {
73 |         thread::spawn(|| {
74 |             // Avoid the spinning being troublesome for other threads by lowering thread priority.
75 | 
76 |             // SAFETY: No safety requirements.
77 |             let current_thread = unsafe { GetCurrentThread() };
78 | 
79 |             // SAFETY: No safety requirements.
80 |             unsafe {
81 |                 SetThreadPriority(current_thread, THREAD_PRIORITY_IDLE).unwrap();
82 |             }
83 | 
84 |             // Spin spin spin spin.
85 | 
86 |             let mut i: usize = 0;
87 | 
88 |             loop {
89 |                 i = i.wrapping_add(1);
90 |             }
91 |         });
92 |     }
93 | }
94 | 


--------------------------------------------------------------------------------
/crates/testing/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! Private helpers for testing and examples in Folo crates.
 2 | 
 3 | #[cfg(windows)]
 4 | mod windows;
 5 | 
 6 | #[cfg(windows)]
 7 | pub use windows::*;
 8 | 
 9 | /// Calculates the difference between two f64 values and considers
10 | /// them equal if the difference is not more than `close_enough`.
11 | ///
12 | /// This is a "correctly performed" floating point equality comparison.
13 | #[must_use]
14 | pub fn f64_diff_abs(a: f64, b: f64, close_enough: f64) -> f64 {
15 |     let diff = (a - b).abs();
16 | 
17 |     if diff <= close_enough { 0.0 } else { diff }
18 | }
19 | 


--------------------------------------------------------------------------------
/crates/testing/src/windows.rs:
--------------------------------------------------------------------------------
1 | mod job;
2 | 
3 | pub use job::*;
4 | 


--------------------------------------------------------------------------------
/just_basics.just:
--------------------------------------------------------------------------------
 1 | [group('basics')]
 2 | build PROFILE='dev':
 3 |     cargo build {{ target_package }} --profile {{ PROFILE }} --all-features --all-targets
 4 | 
 5 | [group('basics')]
 6 | check PROFILE='dev':
 7 |     cargo check {{ target_package }} --profile {{ PROFILE }} --all-features --all-targets
 8 | 
 9 | [group('basics')]
10 | clean:
11 |     cargo clean
12 | 
13 | [group('basics')]
14 | docs:
15 |     #!{{ shebang }}
16 |     $env:RUSTDOCFLAGS = "-D warnings"
17 |     cargo doc {{ target_package }} --no-deps --all-features
18 | 
19 | [group('basics')]
20 | docs-open:
21 |     #!{{ shebang }}
22 |     $env:RUSTDOCFLAGS = "-D warnings"
23 |     cargo doc {{ target_package }} --no-deps --all-features --open
24 | 


--------------------------------------------------------------------------------
/just_quality.just:
--------------------------------------------------------------------------------
 1 | [group('quality')]
 2 | clippy PROFILE='dev':
 3 |     cargo clippy {{ target_package }} --profile {{ PROFILE }} --all-targets --all-features --locked -- -D warnings
 4 | 
 5 | [group('quality')]
 6 | coverage-measure:
 7 |     # Before running the tests, we need to clear old test coverage data because the coverage report
 8 |     # simply sums up all the data in the target folder, even if it is from old builds.
 9 |     cargo llvm-cov clean --workspace
10 | 
11 |     # This will run tests and generate test coverage data files, to be analyzed separately.
12 |     cargo llvm-cov nextest {{ target_package }} --all-targets --no-report --all-features --locked
13 | 
14 | # This tool needs a different way to specify the package.
15 | coverage-package := if package == "" { "" } else { "-p " + package }
16 | 
17 | [group('quality')]
18 | coverage-report:
19 |     cargo llvm-cov report {{ coverage-package }} --open
20 | 
21 | [group('quality')]
22 | format:
23 |     cargo fmt --verbose --all
24 |     cargo sort-derives
25 | 
26 | [group('quality')]
27 | format-check:
28 |     cargo fmt --verbose --all --check
29 |     cargo sort-derives --check
30 | 
31 | [group('quality')]
32 | hack:
33 |     cargo hack check --feature-powerset
34 | 
35 | [group('quality')]
36 | machete:
37 |     cargo machete --skip-target-dir
38 | 
39 | # Separate file because it is a giant script.
40 | import 'just_quality_mutants.just'
41 | 
42 | # Full validation of primary factors, as you would do in a build pipeline before a release.
43 | # Skips some potentially very lengthy validation, which you can run separately via `validate-extra`.
44 | # We assume this is executed on Windows, and will also perform the full validation on Linux.
45 | [group('quality')]
46 | validate: validate-local validate-linux
47 | 
48 | # Performs the part of the `validate` recipe that must run on Linux, when commanded from Windows.
49 | [group('quality')]
50 | validate-linux:
51 |     wsl -e bash -l -c "just validate-local"
52 | 
53 | # Full validation of primary factors, as you would do in a build pipeline before a release.
54 | # Performs validation on the current platform, whatever that may be.
55 | [group('quality')]
56 | validate-local:
57 |     cargo generate-lockfile
58 |     just machete
59 |     just format-check
60 |     just clippy dev
61 |     just check dev
62 |     just test
63 |     just test-docs
64 |     just test-benches
65 |     just docs
66 |     just miri
67 |     just clippy release
68 |     just check release
69 |     just build release
70 |     just hack
71 | 
72 | # Validation of extra factors that take potentially too long to run in regular validation.
73 | [group('quality')]
74 | validate-extra: validate-extra-local validate-extra-linux
75 | 
76 | # Performs the part of the `validate-extra` recipe that must run on Linux, when commanded from Windows.
77 | [group('quality')]
78 | validate-extra-linux:
79 |     wsl -e bash -l -c "just validate-extra-local"
80 | 
81 | # Validation of extra factors that take potentially too long to run in regular validation.
82 | # Performs validation on the current platform, whatever that may be.
83 | [group('quality')]
84 | validate-extra-local: mutants
85 | 


--------------------------------------------------------------------------------
/just_quality_mutants.just:
--------------------------------------------------------------------------------
 1 | [group('quality')]
 2 | mutants:
 3 |     #!{{ shebang }}
 4 | 
 5 |     function Escape-Wildcards ($s) {
 6 |         if (!$IsLinux) {
 7 |             return $s
 8 |         }
 9 | 
10 |         # On Linux, PowerShell has built-in globbing that expands wildcards. Unfortunately,
11 |         # cargo mutants requires literal input values, globbing just breaks it. We convince
12 |         # PowerShell to turn off globbing by single-quoting the arguments we fear may be
13 |         # interpreted as wildcard glob expressions.
14 |         "'" + $s + "'"
15 |     }
16 | 
17 |     $args = @(
18 |         "-e"
19 |         # Parts of this crate require Criterion to work and other parts are currently not tested
20 |         # as there is no public way to simulate a system topology for `many_cpus`.
21 |         "many_cpus_benchmarking",
22 | 
23 |         "-e"
24 |         # Macros are tested via the impl crate, mutations in the middle layer might not be detected.
25 |         "linked_macros"
26 | 
27 |         # We do not test facades, as they are just trivial code that forwards calls to real impls.
28 |         "-e"
29 |         (Escape-Wildcards "**/*facade.rs")
30 |         "-e"
31 |         "facade"
32 | 
33 |         "-e"
34 |         # We have limited coverage of platform bindings because it can be difficult to set up the
35 |         # right scenarios for each, given they are platform-dependent. Instead, we test higher
36 |         # level code using a mock platform.
37 |         "bindings"
38 | 
39 |         "-e"
40 |         # This is just a different type of bindings, skipped for same reason as `bindings` above.
41 |         (Escape-Wildcards "crates/many_cpus/src/pal/linux/filesystem/**")
42 |         
43 |         "-e"
44 |         # All this is code only used in tests - we do not test this code itself.
45 |         (Escape-Wildcards "crates/testing/**")
46 |     )
47 | 
48 |     if ($IsLinux) {
49 |         $args += "-e"
50 |         $args += (Escape-Wildcards "**/*windows.rs")
51 | 
52 |         $args += "-e"
53 |         $args += "windows"
54 |     } else {
55 |         $args += "-e"
56 |         $args += (Escape-Wildcards "**/*linux.rs")
57 | 
58 |         $args += "-e"
59 |         $args += "linux"
60 |     }
61 | 
62 |     # We deliberately do not use nextest here because it cannot run doctests.
63 | 
64 |     # Multi-job mutation on Linux does not appear to work well - it seems to cause some interference
65 |     # between the jobs. Perhaps due to our CARGO_TARGET_DIR being shared between jobs? One job is
66 |     # executing tests from the wrong job because they are overwriting each other? Simple fix is
67 |     # to just use 1 job on Linux, which is good enough for now.
68 |     if ($IsLinux) {
69 |         $args += "--jobs"
70 |         $args += "1"
71 |     } else {
72 |         $args += "--jobs"
73 |         $args += "4"
74 |     }
75 | 
76 |     # We must use Invoke-Expression to preserve the quotes around the wildcarded arguments on Linux.
77 |     $expanded_args = [String]::join(" ", $args)
78 |     Invoke-Expression "cargo mutants {{ target_package }} --profile=mutants $expanded_args"
79 | 


--------------------------------------------------------------------------------
/just_release.just:
--------------------------------------------------------------------------------
 1 | [group('release')]
 2 | audit:
 3 |     cargo audit
 4 |     
 5 | [group('release')]
 6 | prepare-release:
 7 |     release-plz update
 8 | 
 9 | [group('release')]
10 | release:
11 |     #!{{ shebang }}
12 |     $env:GIT_TOKEN = gh auth token
13 |     try {
14 |         release-plz release
15 |     } finally {
16 |         $env:GIT_TOKEN = $null
17 |     }


--------------------------------------------------------------------------------
/just_setup.just:
--------------------------------------------------------------------------------
1 | [group('setup')]
2 | install-tools:
3 |     cargo install cargo-machete cargo-nextest release-plz cargo-semver-checks cargo-audit cargo-hack cargo-mutants cargo-llvm-cov cargo-sort-derives --locked
4 |     rustup toolchain install nightly --component miri
5 | 


--------------------------------------------------------------------------------
/just_testing.just:
--------------------------------------------------------------------------------
 1 | [group('testing')]
 2 | bench TARGET="":
 3 |     #!{{ shebang }}
 4 |     $target_selector = @()
 5 | 
 6 |     if ("{{ TARGET }}" -ne "") {
 7 |         $target_selector += "--bench"
 8 |         $target_selector += "{{ TARGET }}"
 9 |     }
10 | 
11 |     cargo bench {{ target_package }} --all-features $target_selector
12 | 
13 | [group('testing')]
14 | miri:
15 |     cargo +nightly miri nextest run {{ target_package }}
16 | 
17 | [group('testing')]
18 | test FILTER="":
19 |     cargo nextest run {{ target_package }} --all-features {{ FILTER }}
20 | 
21 | # We run benches separately because they are slow in Nextest multi-process mode,
22 | # probably due to the Gnuplot integration that spawns an external process.
23 | [group('testing')]
24 | test-benches FILTER="":
25 |     cargo test --benches --all-features {{ FILTER }}
26 | 
27 | [group('testing')]
28 | test-docs FILTER="":
29 |     cargo test {{ target_package }} --all-features --doc {{ FILTER }}
30 | 


--------------------------------------------------------------------------------
/justfile:
--------------------------------------------------------------------------------
 1 | set windows-shell := ["pwsh.exe", "-NoLogo", "-NoProfile", "-NonInteractive", "-Command"]
 2 | shebang := if os() == "windows" { "pwsh.exe" } else { "/usr/bin/env pwsh" }
 3 | 
 4 | package := ""
 5 | target_package := if package == "" { " --workspace" } else { " -p " + package }
 6 | 
 7 | _default:
 8 |     @just --list
 9 | 
10 | import 'just_basics.just'
11 | import 'just_quality.just'
12 | import 'just_release.just'
13 | import 'just_setup.just'
14 | import 'just_testing.just'
15 | 


--------------------------------------------------------------------------------
/release-plz.toml:
--------------------------------------------------------------------------------
 1 | [workspace]
 2 | # TEMP: it currently complains for some unclear reason.
 3 | allow_dirty = true
 4 | 
 5 | # At this point in pre-alpha time, changelogs are very messy and useless.
 6 | changelog_update = false
 7 | 
 8 | # All we care about is crates.io, Git releases are not used.
 9 | git_release_enable = false
10 | 
11 | [[package]]
12 | name = "linked"
13 | 
14 | # Changelogs of these two are merged into the `linked` crate as the others are "invisible" crates.
15 | changelog_include = ["linked_macros", "linked_macros_impl"]
16 | 
17 | # All the `linked*` crates are published under the same version, as they are
18 | # all the "logically same" crate, just separated for cargotechnical reasons.
19 | version_group = "linked"
20 | 
21 | [[package]]
22 | name = "linked_macros"
23 | 
24 | # This crate is invisible, changes are recorded in `linked` changelog instead.
25 | changelog_update = false
26 | 
27 | # All the `linked*` crates are published under the same version, as they are
28 | # all the "logically same" crate, just separated for cargotechnical reasons.
29 | version_group = "linked"
30 | 
31 | [[package]]
32 | name = "linked_macros_impl"
33 | 
34 | # This crate is invisible, changes are recorded in `linked` changelog instead.
35 | changelog_update = false
36 | 
37 | # All the `linked*` crates are published under the same version, as they are
38 | # all the "logically same" crate, just separated for cargotechnical reasons.
39 | version_group = "linked"


--------------------------------------------------------------------------------
/rust-toolchain.toml:
--------------------------------------------------------------------------------
1 | [toolchain]
2 | channel = "1.86"
3 | 


--------------------------------------------------------------------------------